Add BasicBlock::title() method.
Latin letters and spaces get converted to halfwidth utf-8 characters. Not ideal as this will break visualisation for Japanese users. Leading and trailing spaces are trimmed and repeated spaces in the names are replaced with a single space.
This commit is contained in:
parent
15e7edbc72
commit
cb37f21502
9 changed files with 9450 additions and 7 deletions
7097
data/SHIFTJIS.TXT
Normal file
7097
data/SHIFTJIS.TXT
Normal file
File diff suppressed because it is too large
Load diff
66
data/shiftjis_to_bin.cpp
Normal file
66
data/shiftjis_to_bin.cpp
Normal file
|
@ -0,0 +1,66 @@
|
|||
//see https://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversionhttps://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversion
|
||||
//First download the "original" data from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
|
||||
//Then use this program while piping/redirecting above text file in, and redirecting the binary output to a new file.
|
||||
|
||||
#include<iostream>
|
||||
#include<string>
|
||||
#include<cstdio>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// pipe SHIFTJIS.txt in and pipe to (binary) file out
|
||||
int main()
|
||||
{
|
||||
string s;
|
||||
uint8_t *mapping; //same bigendian array as in converting function
|
||||
mapping = new uint8_t[2*(256 + 3*256*16)];
|
||||
|
||||
//initializing with space for invalid value, and then ASCII control chars
|
||||
for(size_t i = 32; i < 256 + 3*256*16; i++)
|
||||
{
|
||||
mapping[2 * i] = 0;
|
||||
mapping[2 * i + 1] = 0x20;
|
||||
}
|
||||
for(size_t i = 0; i < 32; i++)
|
||||
{
|
||||
mapping[2 * i] = 0;
|
||||
mapping[2 * i + 1] = i;
|
||||
}
|
||||
|
||||
while(getline(cin, s)) //pipe the file SHIFTJIS to stdin
|
||||
{
|
||||
if(s.substr(0, 2) != "0x") continue; //comment lines
|
||||
|
||||
uint16_t shiftJisValue, unicodeValue;
|
||||
if(2 != sscanf(s.c_str(), "%hx %hx", &shiftJisValue, &unicodeValue)) //getting hex values
|
||||
{
|
||||
puts("Error hex reading");
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t offset; //array offset
|
||||
if((shiftJisValue >> 8) == 0) offset = 0;
|
||||
else if((shiftJisValue >> 12) == 0x8) offset = 256;
|
||||
else if((shiftJisValue >> 12) == 0x9) offset = 256 + 16*256;
|
||||
else if((shiftJisValue >> 12) == 0xE) offset = 256 + 2*16*256;
|
||||
else
|
||||
{
|
||||
puts("Error input values");
|
||||
continue;
|
||||
}
|
||||
|
||||
offset = 2 * (offset + (shiftJisValue & 0xfff));
|
||||
if(mapping[offset] != 0 || mapping[offset + 1] != 0x20)
|
||||
{
|
||||
puts("Error mapping not 1:1");
|
||||
continue;
|
||||
}
|
||||
|
||||
mapping[offset] = unicodeValue >> 8;
|
||||
mapping[offset + 1] = unicodeValue & 0xff;
|
||||
}
|
||||
|
||||
fwrite(mapping, 1, 2*(256 + 3*256*16), stdout);
|
||||
delete[] mapping;
|
||||
return 0;
|
||||
}
|
|
@ -3,6 +3,7 @@
|
|||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
//see: https://www.psdevwiki.com/ps3/PS1_Savedata#Memory_Card_Formats_PS1
|
||||
|
||||
|
@ -37,6 +38,7 @@ public:
|
|||
const std::vector<uint8_t>& palette() const { return m_icon_palette; }
|
||||
bool has_magic() const;
|
||||
IconDisplayFlag icon_display_flag() const;
|
||||
std::string title() const;
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> m_icon_palette;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
project('memcard', 'cpp', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release'])
|
||||
project('memcard', 'cpp', 'c', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release'])
|
||||
add_project_link_arguments(['-lstdc++fs'], language: 'cpp')
|
||||
|
||||
private_incl = include_directories('src')
|
||||
|
@ -9,6 +9,8 @@ memcard = shared_library('memcard',
|
|||
'src/block.cpp',
|
||||
'src/icon_fetch.cpp',
|
||||
'src/resize_harris.cpp',
|
||||
'src/shiftjis.c',
|
||||
'src/shiftjis_to_utf8.cpp',
|
||||
install: true,
|
||||
include_directories: [private_incl, library_incl],
|
||||
)
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#include "memcard/block.hpp"
|
||||
#include "memcard/memorycard.hpp"
|
||||
#include "shiftjis_to_utf8.hpp"
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <string_view>
|
||||
|
||||
#define FLOAT_PALETTE_CONV
|
||||
|
||||
|
@ -19,13 +23,9 @@ namespace {
|
|||
retval.reserve(16 * 4);
|
||||
|
||||
{
|
||||
int advance = 96;
|
||||
while (b != e and advance) {
|
||||
--advance;
|
||||
++b;
|
||||
}
|
||||
if (advance)
|
||||
if (e - b < 96)
|
||||
throw std::runtime_error("Not enough data to advance and extract an icon palette");
|
||||
b += 96;
|
||||
}
|
||||
|
||||
int entries_left = 16;
|
||||
|
@ -88,6 +88,38 @@ IconDisplayFlag BasicBlock<Const>::icon_display_flag() const {
|
|||
return static_cast<IconDisplayFlag>(val);
|
||||
}
|
||||
|
||||
template <bool Const>
|
||||
std::string BasicBlock<Const>::title() const {
|
||||
char mem[64 + 1];
|
||||
std::strncpy(mem, reinterpret_cast<const char*>(m_begin + 4), sizeof(mem) / sizeof(mem[0]));
|
||||
std::string_view temp(mem, std::strlen(mem));
|
||||
auto name = full_to_halfwidth_ascii(shiftjis_to_utf8(temp));
|
||||
|
||||
//trim spaces at the front
|
||||
{
|
||||
const auto pos = name.find_first_not_of(' ');
|
||||
const auto non_space_it = (pos == name.npos ? name.end() : name.begin() + pos);
|
||||
name.erase(name.begin(), non_space_it);
|
||||
}
|
||||
//trim spaces at the end
|
||||
{
|
||||
auto non_space_it = std::find_if(name.rbegin(), name.rend(), [](char c){return c != ' ';});
|
||||
name.resize(name.size() - (non_space_it - name.rbegin()));
|
||||
}
|
||||
|
||||
//remove repeated spaces
|
||||
{
|
||||
std::size_t start = 0;
|
||||
while ((start = name.find(' ', start)) != name.npos) {
|
||||
const std::size_t end = name.find_first_not_of(' ', start);
|
||||
name.erase(start + 1, end - start - 1);
|
||||
++start;
|
||||
};
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
template class BasicBlock<true>;
|
||||
template class BasicBlock<false>;
|
||||
} //namespace mc
|
||||
|
|
2094
subprojects/memcard/src/shiftjis.c
Normal file
2094
subprojects/memcard/src/shiftjis.c
Normal file
File diff suppressed because it is too large
Load diff
12
subprojects/memcard/src/shiftjis.h
Normal file
12
subprojects/memcard/src/shiftjis.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const unsigned char g_shiftjis[];
|
||||
extern const unsigned int g_shiftjis_len;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} //extern C
|
||||
#endif
|
129
subprojects/memcard/src/shiftjis_to_utf8.cpp
Normal file
129
subprojects/memcard/src/shiftjis_to_utf8.cpp
Normal file
|
@ -0,0 +1,129 @@
|
|||
#include "shiftjis_to_utf8.hpp"
|
||||
#include "shiftjis.h"
|
||||
#include <cstdint>
|
||||
#if !defined(NDEBUG)
|
||||
# include <iostream>
|
||||
#endif
|
||||
|
||||
namespace mc {
|
||||
namespace {
|
||||
unsigned int sequence_length (std::string::const_iterator lead_it) {
|
||||
const uint8_t lead = static_cast<uint8_t>(*lead_it & 0xff);
|
||||
if (lead < 0x80)
|
||||
return 1;
|
||||
else if ((lead >> 5) == 0x6)
|
||||
return 2;
|
||||
else if ((lead >> 4) == 0xe)
|
||||
return 3;
|
||||
else if ((lead >> 3) == 0x1e)
|
||||
return 4;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t to_codepoint (std::string::const_iterator it, unsigned int length) {
|
||||
uint32_t cp = static_cast<uint32_t>(*it);
|
||||
switch (length) {
|
||||
case 1:
|
||||
break;
|
||||
case 2:
|
||||
it++;
|
||||
cp = ((cp << 6) & 0x7ff) + (static_cast<uint8_t>(*it) & 0x3f);
|
||||
break;
|
||||
case 3:
|
||||
++it;
|
||||
cp = ((cp << 12) & 0xffff) + ((static_cast<uint8_t>(*it) << 6) & 0xfff);
|
||||
++it;
|
||||
cp += static_cast<uint8_t>(*it) & 0x3f;
|
||||
break;
|
||||
case 4:
|
||||
++it;
|
||||
cp = ((cp << 18) & 0x1fffff) + ((static_cast<uint8_t>(*it) << 12) & 0x3ffff);
|
||||
++it;
|
||||
cp += (static_cast<uint8_t>(*it) << 6) & 0xfff;
|
||||
++it;
|
||||
cp += static_cast<uint8_t>(*it) & 0x3f;
|
||||
break;
|
||||
}
|
||||
return cp;
|
||||
}
|
||||
|
||||
//see https://stackoverflow.com/questions/15631168/validate-japanese-character-in-active-record-callback/15651264#15651264
|
||||
[[gnu::const]] bool is_fullwidth_roman (uint32_t cp) {
|
||||
return (cp >= 0xff01 and cp <= 0xff60);
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
std::string shiftjis_to_utf8 (std::string_view in) {
|
||||
std::string output(3 * in.size(), ' '); //ShiftJis won't give 4byte UTF8, so max. 3 byte per input char are needed
|
||||
size_t indexInput = 0, indexOutput = 0;
|
||||
|
||||
while(indexInput < in.size()) {
|
||||
char arraySection = ((uint8_t)in[indexInput]) >> 4;
|
||||
|
||||
size_t arrayOffset;
|
||||
if(arraySection == 0x8) arrayOffset = 0x100; //these are two-byte shiftjis
|
||||
else if(arraySection == 0x9) arrayOffset = 0x1100;
|
||||
else if(arraySection == 0xE) arrayOffset = 0x2100;
|
||||
else arrayOffset = 0; //this is one byte shiftjis
|
||||
|
||||
//determining real array offset
|
||||
if(arrayOffset) {
|
||||
arrayOffset += (((uint8_t)in[indexInput]) & 0xf) << 8;
|
||||
indexInput++;
|
||||
if(indexInput >= in.size()) break;
|
||||
}
|
||||
arrayOffset += (uint8_t)in[indexInput++];
|
||||
arrayOffset <<= 1;
|
||||
|
||||
//unicode number is...
|
||||
uint16_t unicodeValue = (g_shiftjis[arrayOffset] << 8) | g_shiftjis[arrayOffset + 1];
|
||||
|
||||
//converting to UTF8
|
||||
if(unicodeValue < 0x80) {
|
||||
output[indexOutput++] = unicodeValue;
|
||||
}
|
||||
else if(unicodeValue < 0x800) {
|
||||
output[indexOutput++] = 0xC0 | (unicodeValue >> 6);
|
||||
output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
|
||||
}
|
||||
else {
|
||||
output[indexOutput++] = 0xE0 | (unicodeValue >> 12);
|
||||
output[indexOutput++] = 0x80 | ((unicodeValue & 0xfff) >> 6);
|
||||
output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
|
||||
}
|
||||
}
|
||||
|
||||
output.resize(indexOutput); //remove the unnecessary bytes
|
||||
return output;
|
||||
}
|
||||
|
||||
std::string full_to_halfwidth_ascii (const std::string& in) {
|
||||
if (in.empty())
|
||||
return std::string();
|
||||
|
||||
auto it = in.begin();
|
||||
std::string retval;
|
||||
do {
|
||||
const auto seq_len = sequence_length(it);
|
||||
const auto code = to_codepoint(it, seq_len);
|
||||
if (is_fullwidth_roman(code)) {
|
||||
retval.push_back(0xFF & (code + 0x20));
|
||||
}
|
||||
else if (12288 == code) {
|
||||
retval.push_back(' ');
|
||||
}
|
||||
else if (8722 == code) {
|
||||
retval.push_back('-');
|
||||
}
|
||||
else {
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "unknown codepoint " << code << '\n';
|
||||
#endif
|
||||
std::copy(it, it + seq_len, std::back_inserter(retval));
|
||||
}
|
||||
it += seq_len;
|
||||
} while (it != in.end());
|
||||
return retval;
|
||||
}
|
||||
} //namespace mc
|
9
subprojects/memcard/src/shiftjis_to_utf8.hpp
Normal file
9
subprojects/memcard/src/shiftjis_to_utf8.hpp
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
namespace mc {
|
||||
std::string shiftjis_to_utf8 (std::string_view in);
|
||||
std::string full_to_halfwidth_ascii (const std::string& in);
|
||||
} //namespace mc
|
Loading…
Add table
Reference in a new issue