Add BasicBlock::title() method.

Latin letters and spaces get converted to halfwidth utf-8
characters. Not ideal as this will break visualisation for
Japanese users.
Leading and trailing spaces are trimmed and repeated spaces
in the names are replaced with a single space.
This commit is contained in:
King_DuckZ 2019-08-20 01:39:51 +01:00
parent 15e7edbc72
commit cb37f21502
9 changed files with 9450 additions and 7 deletions

7097
data/SHIFTJIS.TXT Normal file

File diff suppressed because it is too large Load diff

66
data/shiftjis_to_bin.cpp Normal file
View file

@ -0,0 +1,66 @@
//see https://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversionhttps://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversion
//First download the "original" data from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
//Then use this program while piping/redirecting above text file in, and redirecting the binary output to a new file.
#include<iostream>
#include<string>
#include<cstdio>
using namespace std;
// pipe SHIFTJIS.txt in and pipe to (binary) file out
int main()
{
string s;
uint8_t *mapping; //same bigendian array as in converting function
mapping = new uint8_t[2*(256 + 3*256*16)];
//initializing with space for invalid value, and then ASCII control chars
for(size_t i = 32; i < 256 + 3*256*16; i++)
{
mapping[2 * i] = 0;
mapping[2 * i + 1] = 0x20;
}
for(size_t i = 0; i < 32; i++)
{
mapping[2 * i] = 0;
mapping[2 * i + 1] = i;
}
while(getline(cin, s)) //pipe the file SHIFTJIS to stdin
{
if(s.substr(0, 2) != "0x") continue; //comment lines
uint16_t shiftJisValue, unicodeValue;
if(2 != sscanf(s.c_str(), "%hx %hx", &shiftJisValue, &unicodeValue)) //getting hex values
{
puts("Error hex reading");
continue;
}
size_t offset; //array offset
if((shiftJisValue >> 8) == 0) offset = 0;
else if((shiftJisValue >> 12) == 0x8) offset = 256;
else if((shiftJisValue >> 12) == 0x9) offset = 256 + 16*256;
else if((shiftJisValue >> 12) == 0xE) offset = 256 + 2*16*256;
else
{
puts("Error input values");
continue;
}
offset = 2 * (offset + (shiftJisValue & 0xfff));
if(mapping[offset] != 0 || mapping[offset + 1] != 0x20)
{
puts("Error mapping not 1:1");
continue;
}
mapping[offset] = unicodeValue >> 8;
mapping[offset + 1] = unicodeValue & 0xff;
}
fwrite(mapping, 1, 2*(256 + 3*256*16), stdout);
delete[] mapping;
return 0;
}

View file

@ -3,6 +3,7 @@
#include <cstdint> #include <cstdint>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include <string>
//see: https://www.psdevwiki.com/ps3/PS1_Savedata#Memory_Card_Formats_PS1 //see: https://www.psdevwiki.com/ps3/PS1_Savedata#Memory_Card_Formats_PS1
@ -37,6 +38,7 @@ public:
const std::vector<uint8_t>& palette() const { return m_icon_palette; } const std::vector<uint8_t>& palette() const { return m_icon_palette; }
bool has_magic() const; bool has_magic() const;
IconDisplayFlag icon_display_flag() const; IconDisplayFlag icon_display_flag() const;
std::string title() const;
private: private:
std::vector<uint8_t> m_icon_palette; std::vector<uint8_t> m_icon_palette;

View file

@ -1,4 +1,4 @@
project('memcard', 'cpp', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release']) project('memcard', 'cpp', 'c', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release'])
add_project_link_arguments(['-lstdc++fs'], language: 'cpp') add_project_link_arguments(['-lstdc++fs'], language: 'cpp')
private_incl = include_directories('src') private_incl = include_directories('src')
@ -9,6 +9,8 @@ memcard = shared_library('memcard',
'src/block.cpp', 'src/block.cpp',
'src/icon_fetch.cpp', 'src/icon_fetch.cpp',
'src/resize_harris.cpp', 'src/resize_harris.cpp',
'src/shiftjis.c',
'src/shiftjis_to_utf8.cpp',
install: true, install: true,
include_directories: [private_incl, library_incl], include_directories: [private_incl, library_incl],
) )

View file

@ -1,8 +1,12 @@
#include "memcard/block.hpp" #include "memcard/block.hpp"
#include "memcard/memorycard.hpp" #include "memcard/memorycard.hpp"
#include "shiftjis_to_utf8.hpp"
#include <cassert> #include <cassert>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <algorithm>
#include <cstring>
#include <string_view>
#define FLOAT_PALETTE_CONV #define FLOAT_PALETTE_CONV
@ -19,13 +23,9 @@ namespace {
retval.reserve(16 * 4); retval.reserve(16 * 4);
{ {
int advance = 96; if (e - b < 96)
while (b != e and advance) {
--advance;
++b;
}
if (advance)
throw std::runtime_error("Not enough data to advance and extract an icon palette"); throw std::runtime_error("Not enough data to advance and extract an icon palette");
b += 96;
} }
int entries_left = 16; int entries_left = 16;
@ -88,6 +88,38 @@ IconDisplayFlag BasicBlock<Const>::icon_display_flag() const {
return static_cast<IconDisplayFlag>(val); return static_cast<IconDisplayFlag>(val);
} }
template <bool Const>
std::string BasicBlock<Const>::title() const {
char mem[64 + 1];
std::strncpy(mem, reinterpret_cast<const char*>(m_begin + 4), sizeof(mem) / sizeof(mem[0]));
std::string_view temp(mem, std::strlen(mem));
auto name = full_to_halfwidth_ascii(shiftjis_to_utf8(temp));
//trim spaces at the front
{
const auto pos = name.find_first_not_of(' ');
const auto non_space_it = (pos == name.npos ? name.end() : name.begin() + pos);
name.erase(name.begin(), non_space_it);
}
//trim spaces at the end
{
auto non_space_it = std::find_if(name.rbegin(), name.rend(), [](char c){return c != ' ';});
name.resize(name.size() - (non_space_it - name.rbegin()));
}
//remove repeated spaces
{
std::size_t start = 0;
while ((start = name.find(' ', start)) != name.npos) {
const std::size_t end = name.find_first_not_of(' ', start);
name.erase(start + 1, end - start - 1);
++start;
};
}
return name;
}
template class BasicBlock<true>; template class BasicBlock<true>;
template class BasicBlock<false>; template class BasicBlock<false>;
} //namespace mc } //namespace mc

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,12 @@
#pragma once
#if defined(__cplusplus)
extern "C" {
#endif
extern const unsigned char g_shiftjis[];
extern const unsigned int g_shiftjis_len;
#if defined(__cplusplus)
} //extern C
#endif

View file

@ -0,0 +1,129 @@
#include "shiftjis_to_utf8.hpp"
#include "shiftjis.h"
#include <cstdint>
#if !defined(NDEBUG)
# include <iostream>
#endif
namespace mc {
namespace {
unsigned int sequence_length (std::string::const_iterator lead_it) {
const uint8_t lead = static_cast<uint8_t>(*lead_it & 0xff);
if (lead < 0x80)
return 1;
else if ((lead >> 5) == 0x6)
return 2;
else if ((lead >> 4) == 0xe)
return 3;
else if ((lead >> 3) == 0x1e)
return 4;
else
return 0;
}
uint32_t to_codepoint (std::string::const_iterator it, unsigned int length) {
uint32_t cp = static_cast<uint32_t>(*it);
switch (length) {
case 1:
break;
case 2:
it++;
cp = ((cp << 6) & 0x7ff) + (static_cast<uint8_t>(*it) & 0x3f);
break;
case 3:
++it;
cp = ((cp << 12) & 0xffff) + ((static_cast<uint8_t>(*it) << 6) & 0xfff);
++it;
cp += static_cast<uint8_t>(*it) & 0x3f;
break;
case 4:
++it;
cp = ((cp << 18) & 0x1fffff) + ((static_cast<uint8_t>(*it) << 12) & 0x3ffff);
++it;
cp += (static_cast<uint8_t>(*it) << 6) & 0xfff;
++it;
cp += static_cast<uint8_t>(*it) & 0x3f;
break;
}
return cp;
}
//see https://stackoverflow.com/questions/15631168/validate-japanese-character-in-active-record-callback/15651264#15651264
[[gnu::const]] bool is_fullwidth_roman (uint32_t cp) {
return (cp >= 0xff01 and cp <= 0xff60);
}
} //unnamed namespace
std::string shiftjis_to_utf8 (std::string_view in) {
std::string output(3 * in.size(), ' '); //ShiftJis won't give 4byte UTF8, so max. 3 byte per input char are needed
size_t indexInput = 0, indexOutput = 0;
while(indexInput < in.size()) {
char arraySection = ((uint8_t)in[indexInput]) >> 4;
size_t arrayOffset;
if(arraySection == 0x8) arrayOffset = 0x100; //these are two-byte shiftjis
else if(arraySection == 0x9) arrayOffset = 0x1100;
else if(arraySection == 0xE) arrayOffset = 0x2100;
else arrayOffset = 0; //this is one byte shiftjis
//determining real array offset
if(arrayOffset) {
arrayOffset += (((uint8_t)in[indexInput]) & 0xf) << 8;
indexInput++;
if(indexInput >= in.size()) break;
}
arrayOffset += (uint8_t)in[indexInput++];
arrayOffset <<= 1;
//unicode number is...
uint16_t unicodeValue = (g_shiftjis[arrayOffset] << 8) | g_shiftjis[arrayOffset + 1];
//converting to UTF8
if(unicodeValue < 0x80) {
output[indexOutput++] = unicodeValue;
}
else if(unicodeValue < 0x800) {
output[indexOutput++] = 0xC0 | (unicodeValue >> 6);
output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
}
else {
output[indexOutput++] = 0xE0 | (unicodeValue >> 12);
output[indexOutput++] = 0x80 | ((unicodeValue & 0xfff) >> 6);
output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
}
}
output.resize(indexOutput); //remove the unnecessary bytes
return output;
}
std::string full_to_halfwidth_ascii (const std::string& in) {
if (in.empty())
return std::string();
auto it = in.begin();
std::string retval;
do {
const auto seq_len = sequence_length(it);
const auto code = to_codepoint(it, seq_len);
if (is_fullwidth_roman(code)) {
retval.push_back(0xFF & (code + 0x20));
}
else if (12288 == code) {
retval.push_back(' ');
}
else if (8722 == code) {
retval.push_back('-');
}
else {
#if !defined(NDEBUG)
std::cout << "unknown codepoint " << code << '\n';
#endif
std::copy(it, it + seq_len, std::back_inserter(retval));
}
it += seq_len;
} while (it != in.end());
return retval;
}
} //namespace mc

View file

@ -0,0 +1,9 @@
#pragma once
#include <string>
#include <string_view>
namespace mc {
std::string shiftjis_to_utf8 (std::string_view in);
std::string full_to_halfwidth_ascii (const std::string& in);
} //namespace mc