Add BasicBlock::title() method.

Latin letters and spaces get converted to halfwidth utf-8 characters. Not ideal as this will break visualisation for Japanese users. Leading and trailing spaces are trimmed and repeated spaces in the names are replaced with a single space.
2019-08-20 01:39:51 +01:00 · 2019-08-20 01:39:51 +01:00 · cb37f21502
commit cb37f21502
parent 15e7edbc72
9 changed files with 9450 additions and 7 deletions
--- a/data/SHIFTJIS.TXT
+++ b/data/SHIFTJIS.TXT
--- a/data/shiftjis_to_bin.cpp
+++ b/data/shiftjis_to_bin.cpp
@ -0,0 +1,66 @@
 //see https://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversionhttps://stackoverflow.com/questions/33165171/c-shiftjis-to-utf8-conversion
 //First download the "original" data from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
 //Then use this program while piping/redirecting above text file in, and redirecting the binary output to a new file.
 #include<iostream>
 #include<string>
 #include<cstdio>
 using namespace std;
 // pipe SHIFTJIS.txt in and pipe to (binary) file out
 int main()
 {
    string s;
    uint8_t *mapping; //same bigendian array as in converting function
    mapping = new uint8_t[2*(256 + 3*256*16)];
    //initializing with space for invalid value, and then ASCII control chars
    for(size_t i = 32; i < 256 + 3*256*16; i++)
    {
        mapping[2 * i] = 0;
        mapping[2 * i + 1] = 0x20;
    }
    for(size_t i = 0; i < 32; i++)
    {
        mapping[2 * i] = 0;
        mapping[2 * i + 1] = i;
    }
    while(getline(cin, s)) //pipe the file SHIFTJIS to stdin
    {
        if(s.substr(0, 2) != "0x") continue; //comment lines
        uint16_t shiftJisValue, unicodeValue;
        if(2 != sscanf(s.c_str(), "%hx %hx", &shiftJisValue, &unicodeValue)) //getting hex values
        {
            puts("Error hex reading");
            continue;
        }
        size_t offset; //array offset
        if((shiftJisValue >> 8) == 0) offset = 0;
        else if((shiftJisValue >> 12) == 0x8) offset = 256;
        else if((shiftJisValue >> 12) == 0x9) offset = 256 + 16*256;
        else if((shiftJisValue >> 12) == 0xE) offset = 256 + 2*16*256;
        else
        {
            puts("Error input values");
            continue;
        }
        offset = 2 * (offset + (shiftJisValue & 0xfff));
        if(mapping[offset] != 0 || mapping[offset + 1] != 0x20)
        {
            puts("Error mapping not 1:1");
            continue;
        }
        mapping[offset] = unicodeValue >> 8;
        mapping[offset + 1] = unicodeValue & 0xff;
    }
    fwrite(mapping, 1, 2*(256 + 3*256*16), stdout);
    delete[] mapping;
    return 0;
 }
--- a/subprojects/memcard/include/memcard/block.hpp
+++ b/subprojects/memcard/include/memcard/block.hpp
@ -3,6 +3,7 @@
 #include <cstdint>
 #include <type_traits>
 #include <vector>
 #include <string>
 //see: https://www.psdevwiki.com/ps3/PS1_Savedata#Memory_Card_Formats_PS1
@ -37,6 +38,7 @@ public:
 	const std::vector<uint8_t>& palette() const { return m_icon_palette; }
 	bool has_magic() const;
 	IconDisplayFlag icon_display_flag() const;
 	std::string title() const;
 private:
 	std::vector<uint8_t> m_icon_palette;
--- a/subprojects/memcard/meson.build
+++ b/subprojects/memcard/meson.build
@ -1,4 +1,4 @@
-project('memcard', 'cpp', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release'])
+project('memcard', 'cpp', 'c', default_options:['debug=true', 'cpp_std=c++17', 'b_ndebug=if-release'])
 add_project_link_arguments(['-lstdc++fs'], language: 'cpp')
 private_incl = include_directories('src')
@ -9,6 +9,8 @@ memcard = shared_library('memcard',
  'src/block.cpp',
  'src/icon_fetch.cpp',
  'src/resize_harris.cpp',
  'src/shiftjis.c',
  'src/shiftjis_to_utf8.cpp',
  install: true,
  include_directories: [private_incl, library_incl],
 )
--- a/subprojects/memcard/src/block.cpp
+++ b/subprojects/memcard/src/block.cpp
@ -1,8 +1,12 @@
 #include "memcard/block.hpp"
 #include "memcard/memorycard.hpp"
 #include "shiftjis_to_utf8.hpp"
 #include <cassert>
 #include <iomanip>
 #include <iostream>
 #include <algorithm>
 #include <cstring>
 #include <string_view>
 #define FLOAT_PALETTE_CONV
@ -19,13 +23,9 @@ namespace {
 		retval.reserve(16 * 4);
 		{
-			int advance = 96;
+			if (e - b < 96)
 			while (b != e and advance) {
 				--advance;
 				++b;
 			}
 			if (advance)
 				throw std::runtime_error("Not enough data to advance and extract an icon palette");
 			b += 96;
 		}
 		int entries_left = 16;
@ -88,6 +88,38 @@ IconDisplayFlag BasicBlock<Const>::icon_display_flag() const {
 	return static_cast<IconDisplayFlag>(val);
 }
 template <bool Const>
 std::string BasicBlock<Const>::title() const {
 	char mem[64 + 1];
 	std::strncpy(mem, reinterpret_cast<const char*>(m_begin + 4), sizeof(mem) / sizeof(mem[0]));
 	std::string_view temp(mem, std::strlen(mem));
 	auto name = full_to_halfwidth_ascii(shiftjis_to_utf8(temp));
 	//trim spaces at the front
 	{
 		const auto pos = name.find_first_not_of(' ');
 		const auto non_space_it = (pos == name.npos ? name.end() : name.begin() + pos);
 		name.erase(name.begin(), non_space_it);
 	}
 	//trim spaces at the end
 	{
 		auto non_space_it = std::find_if(name.rbegin(), name.rend(), [](char c){return c != ' ';});
 		name.resize(name.size() - (non_space_it - name.rbegin()));
 	}
 	//remove repeated spaces
 	{
 		std::size_t start = 0;
 		while ((start = name.find(' ', start)) != name.npos) {
 			const std::size_t end = name.find_first_not_of(' ', start);
 			name.erase(start + 1, end - start - 1);
 			++start;
 		};
 	}
 	return name;
 }
 template class BasicBlock<true>;
 template class BasicBlock<false>;
 } //namespace mc
--- a/subprojects/memcard/src/shiftjis.c
+++ b/subprojects/memcard/src/shiftjis.c
--- a/subprojects/memcard/src/shiftjis.h
+++ b/subprojects/memcard/src/shiftjis.h
@ -0,0 +1,12 @@
 #pragma once
 #if defined(__cplusplus)
 extern "C" {
 #endif
 extern const unsigned char g_shiftjis[];
 extern const unsigned int g_shiftjis_len;
 #if defined(__cplusplus)
 } //extern C
 #endif
--- a/subprojects/memcard/src/shiftjis_to_utf8.cpp
+++ b/subprojects/memcard/src/shiftjis_to_utf8.cpp
@ -0,0 +1,129 @@
 #include "shiftjis_to_utf8.hpp"
 #include "shiftjis.h"
 #include <cstdint>
 #if !defined(NDEBUG)
 #	include <iostream>
 #endif
 namespace mc {
 namespace {
 	unsigned int sequence_length (std::string::const_iterator lead_it) {
 		const uint8_t lead = static_cast<uint8_t>(*lead_it & 0xff);
 		if (lead < 0x80)
 			return 1;
 		else if ((lead >> 5) == 0x6)
 			return 2;
 		else if ((lead >> 4) == 0xe)
 			return 3;
 		else if ((lead >> 3) == 0x1e)
 			return 4;
 		else
 			return 0;
 	}
 	uint32_t to_codepoint (std::string::const_iterator it, unsigned int length) {
 		uint32_t cp = static_cast<uint32_t>(*it);
 		switch (length) {
 		case 1:
 			break;
 		case 2:
 			it++;
 			cp = ((cp << 6) & 0x7ff) + (static_cast<uint8_t>(*it) & 0x3f);
 			break;
 		case 3:
 			++it;
 			cp = ((cp << 12) & 0xffff) + ((static_cast<uint8_t>(*it) << 6) & 0xfff);
 			++it;
 			cp += static_cast<uint8_t>(*it) & 0x3f;
 			break;
 		case 4:
 			++it;
 			cp = ((cp << 18) & 0x1fffff) + ((static_cast<uint8_t>(*it) << 12) & 0x3ffff);
 			++it;
 			cp += (static_cast<uint8_t>(*it) << 6) & 0xfff;
 			++it;
 			cp += static_cast<uint8_t>(*it) & 0x3f;
 			break;
 		}
 		return cp;
 	}
 	//see https://stackoverflow.com/questions/15631168/validate-japanese-character-in-active-record-callback/15651264#15651264
 	[[gnu::const]] bool is_fullwidth_roman (uint32_t cp) {
 		return (cp >= 0xff01 and cp <= 0xff60);
 	}
 } //unnamed namespace
 std::string shiftjis_to_utf8 (std::string_view in) {
 	std::string output(3 * in.size(), ' '); //ShiftJis won't give 4byte UTF8, so max. 3 byte per input char are needed
 	size_t indexInput = 0, indexOutput = 0;
 	while(indexInput < in.size()) {
 		char arraySection = ((uint8_t)in[indexInput]) >> 4;
 		size_t arrayOffset;
 		if(arraySection == 0x8) arrayOffset = 0x100; //these are two-byte shiftjis
 		else if(arraySection == 0x9) arrayOffset = 0x1100;
 		else if(arraySection == 0xE) arrayOffset = 0x2100;
 		else arrayOffset = 0; //this is one byte shiftjis
 		//determining real array offset
 		if(arrayOffset) {
 			arrayOffset += (((uint8_t)in[indexInput]) & 0xf) << 8;
 			indexInput++;
 			if(indexInput >= in.size()) break;
 		}
 		arrayOffset += (uint8_t)in[indexInput++];
 		arrayOffset <<= 1;
 		//unicode number is...
 		uint16_t unicodeValue = (g_shiftjis[arrayOffset] << 8) | g_shiftjis[arrayOffset + 1];
 		//converting to UTF8
 		if(unicodeValue < 0x80) {
 			output[indexOutput++] = unicodeValue;
 		}
 		else if(unicodeValue < 0x800) {
 			output[indexOutput++] = 0xC0 | (unicodeValue >> 6);
 			output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
 		}
 		else {
 			output[indexOutput++] = 0xE0 | (unicodeValue >> 12);
 			output[indexOutput++] = 0x80 | ((unicodeValue & 0xfff) >> 6);
 			output[indexOutput++] = 0x80 | (unicodeValue & 0x3f);
 		}
 	}
 	output.resize(indexOutput); //remove the unnecessary bytes
 	return output;
 }
 std::string full_to_halfwidth_ascii (const std::string& in) {
 	if (in.empty())
 		return std::string();
 	auto it = in.begin();
 	std::string retval;
 	do {
 		const auto seq_len = sequence_length(it);
 		const auto code = to_codepoint(it, seq_len);
 		if (is_fullwidth_roman(code)) {
 			retval.push_back(0xFF & (code + 0x20));
 		}
 		else if (12288 == code) {
 			retval.push_back(' ');
 		}
 		else if (8722 == code) {
 			retval.push_back('-');
 		}
 		else {
 #if !defined(NDEBUG)
 			std::cout << "unknown codepoint " << code << '\n';
 #endif
 			std::copy(it, it + seq_len, std::back_inserter(retval));
 		}
 		it += seq_len;
 	} while (it != in.end());
 	return retval;
 }
 } //namespace mc
--- a/subprojects/memcard/src/shiftjis_to_utf8.hpp
+++ b/subprojects/memcard/src/shiftjis_to_utf8.hpp
@ -0,0 +1,9 @@
 #pragma once
 #include <string>
 #include <string_view>
 namespace mc {
 std::string shiftjis_to_utf8 (std::string_view in);
 std::string full_to_halfwidth_ascii (const std::string& in);
 } //namespace mc