* Copyright 2010-2016 OpenXcom Developers.
* This file is part of OpenXcom.
* OpenXcom is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* OpenXcom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with OpenXcom. If not, see <http://www.gnu.org/licenses/>.
#include "Unicode.h"
#include <sstream>
#include <locale>
#include <stdexcept>
#include "Logger.h"
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#include <windows.h>
#include <shlwapi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
namespace OpenXcom
namespace Unicode
std::locale utf8;
* Store a UTF-8 locale to use when dealing with character conversions.
* Windows doesn't have a UTF-8 locale so we just use its APIs directly.
void getUtf8Locale()
std::string loc;
#ifndef _WIN32
// Find any UTF-8 locale
FILE *fp = popen("locale -a", "r");
if (fp != NULL)
char buf[50];
while (fgets(buf, sizeof(buf), fp) != NULL)
if (strstr(buf, ".utf8") != NULL ||
strstr(buf, ".UTF-8") != NULL)
// Trim newline
size_t end = strlen(buf) - 1;
if (buf[end] == '\n')
buf[end] = '\0';
loc = buf;
// Try a UTF-8 locale (or default if none was found)
utf8 = std::locale(loc.c_str());
catch (const std::runtime_error &)
// Well we're stuck with the C locale, hope for the best
Log(LOG_INFO) << "Detected locale: " << utf8.name();
* Takes a Unicode 32-bit string and converts it
* to a 8-bit string encoded in UTF-8.
* Used for rendering text.
* @note Adapted from https://stackoverflow.com/a/148766/2683561
* @param src UTF-8 string.
* @return Unicode string.
UString convUtf8ToUtf32(const std::string &src)
if (src.empty())
return UString();
UString out;
UCode codepoint = 0;
for (std::string::const_iterator i = src.begin(); i != src.end();)
unsigned char ch = static_cast<unsigned char>(*i);
if (ch <= 0x7f)
codepoint = ch;
else if (ch <= 0xbf)
codepoint = (codepoint << 6) | (ch & 0x3f);
else if (ch <= 0xdf)
codepoint = ch & 0x1f;
else if (ch <= 0xef)
codepoint = ch & 0x0f;
codepoint = ch & 0x07;
if (i == src.end() || ((*i & 0xc0) != 0x80 && codepoint <= 0x10ffff))
out.append(1, codepoint);
return out;
* Takes a Unicode 32-bit string and converts it
* to a 8-bit string encoded in UTF-8.
* Used for rendering text.
* @note Adapted from https://stackoverflow.com/a/148766/2683561
* @param src Unicode string.
* @return UTF-8 string.
std::string convUtf32ToUtf8(const UString &src)
if (src.empty())
return std::string();
std::string out;
for (UString::const_iterator i = src.begin(); i != src.end(); ++i)
UCode codepoint = *i;
if (codepoint <= 0x7f)
out.append(1, static_cast<char>(codepoint));
else if (codepoint <= 0x7ff)
out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
else if (codepoint <= 0xffff)
out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
return out;
* Takes a wide-character string and converts it to a
* multibyte 8-bit string in a given encoding.
* Used for Win32 APIs.
* @param src Wide-character string.
* @param cp Codepage of the destination string.
* @return Multibyte string.
std::string convWcToMb(const std::wstring &src, unsigned int cp)
if (src.empty())
return std::string();
#ifdef _WIN32
int size = WideCharToMultiByte(cp, 0, &src[0], (int)src.size(), NULL, 0, NULL, NULL);
std::string str(size, 0);
WideCharToMultiByte(cp, 0, &src[0], (int)src.size(), &str[0], size, NULL, NULL);
return str;
assert(sizeof(wchar_t) == sizeof(UCode));
const UString *ustr = reinterpret_cast<const UString*>(&src);
return convUtf32ToUtf8(*ustr);
* Takes a multibyte 8-bit string in a given encoding
* and converts it to a wide-character string.
* Used for Win32 APIs.
* @param src Multibyte string.
* @param cp Codepage of the source string.
* @return Wide-character string.
std::wstring convMbToWc(const std::string &src, unsigned int cp)
if (src.empty())
return std::wstring();
#ifdef _WIN32
int size = MultiByteToWideChar(cp, 0, &src[0], (int)src.size(), NULL, 0);
std::wstring wstr(size, 0);
MultiByteToWideChar(cp, 0, &src[0], (int)src.size(), &wstr[0], size);
return wstr;
assert(sizeof(wchar_t) == sizeof(UCode));
UString ustr = convUtf8ToUtf32(src);
const std::wstring *wstr = reinterpret_cast<const std::wstring*>(&ustr);
return *wstr;
* Takes a filesystem path and converts it to a UTF-8 string.
* On Windows the C paths use the local ANSI codepage.
* Used for SDL APIs.
* @param src Filesystem path.
* @return UTF-8 string.
std::string convPathToUtf8(const std::string &src)
#ifdef _WIN32
return convWcToMb(convMbToWc(src, CP_ACP), CP_UTF8);
return src;
* Takes a UTF-8 string and converts it to a filesystem path.
* On Windows the C paths use the local ANSI codepage.
* Used for SDL APIs.
* @param src UTF-8 string.
* @return Filesystem path.
std::string convUtf8ToPath(const std::string &src)
#ifdef _WIN32
return convWcToMb(convMbToWc(src, CP_UTF8), CP_ACP);
return src;
* Compares two UTF-8 strings using natural human ordering.
* @param a String A.
* @param b String B.
* @return String A comes before String B.
bool naturalCompare(const std::string &a, const std::string &b)
#ifdef _WIN32
typedef int (WINAPI *WinStrCmp)(PCWSTR, PCWSTR);
WinStrCmp pWinStrCmp = (WinStrCmp)GetProcAddress(GetModuleHandleA("shlwapi.dll"), "StrCmpLogicalW");
if (pWinStrCmp)
std::wstring wa = convMbToWc(a, CP_UTF8);
std::wstring wb = convMbToWc(b, CP_UTF8);
return (pWinStrCmp(wa.c_str(), wb.c_str()) < 0);
// fallback to lexical sort
return caseCompare(a, b);
* Compares two UTF-8 strings ignoring case.
* @param a String A.
* @param b String B.
* @return String A comes before String B.
bool caseCompare(const std::string &a, const std::string &b)
#ifdef _WIN32
std::wstring wa = convMbToWc(a, CP_UTF8);
std::wstring wb = convMbToWc(b, CP_UTF8);
return (StrCmpIW(wa.c_str(), wb.c_str()) < 0);
return (std::use_facet< std::collate<char> >(utf8).compare(&a[0], &a[0] + a.size(), &b[0], &b[0] + b.size()) < 0);
* Searches for a substring in another string ignoring case.
* @param haystack String to search.
* @param needle String to find.
* @return True if the needle is in the haystack.
bool caseFind(const std::string &haystack, const std::string &needle)
#ifdef _WIN32
std::wstring wa = convMbToWc(haystack, CP_UTF8);
std::wstring wb = convMbToWc(needle, CP_UTF8);
return (StrStrIW(wa.c_str(), wb.c_str()) != NULL);
std::wstring wa = convMbToWc(haystack, 0);
std::wstring wb = convMbToWc(needle, 0);
std::use_facet< std::ctype<wchar_t> >(utf8).toupper(&wa[0], &wa[0] + wa.size());
std::use_facet< std::ctype<wchar_t> >(utf8).toupper(&wb[0], &wb[0] + wb.size());
return (wa.find(wb) != std::wstring::npos);
* Uppercases a UTF-8 string, modified in place.
* Used for case-insensitive comparisons.
* @param s Source string.
void upperCase(std::string &s)
if (s.empty())
#ifdef _WIN32
std::wstring ws = convMbToWc(s, CP_UTF8);
s = convWcToMb(ws, CP_UTF8);
std::wstring ws = convMbToWc(s, 0);
std::use_facet< std::ctype<wchar_t> >(utf8).toupper(&ws[0], &ws[0] + ws.size());
s = convWcToMb(ws, 0);
* Lowercases a UTF-8 string, modified in place.
* Used for case-insensitive comparisons.
* @param s Source string.
void lowerCase(std::string &s)
if (s.empty())
#ifdef _WIN32
std::wstring ws = convMbToWc(s, CP_UTF8);
s = convWcToMb(ws, CP_UTF8);
std::wstring ws = convMbToWc(s, 0);
std::use_facet< std::ctype<wchar_t> >(utf8).tolower(&ws[0], &ws[0] + ws.size());
s = convWcToMb(ws, 0);
* Replaces every instance of a substring.
* @param str The string to modify.
* @param find The substring to find.
* @param replace The substring to replace it with.
void replace(std::string &str, const std::string &find, const std::string &replace)
for (size_t i = str.find(find); i != std::string::npos; i = str.find(find, i + replace.length()))
str.replace(i, find.length(), replace);
* Takes an integer value and formats it as number with separators (spacing the thousands).
* @param value The value.
* @param currency Currency symbol.
* @return The formatted string.
std::string formatNumber(int64_t value, const std::string ¤cy)
const std::string thousands_sep = "\xC2\xA0"; // TOK_NBSP
bool negative = (value < 0);
std::ostringstream ss;
ss << (negative ? -value : value);
std::string s = ss.str();
size_t spacer = s.size() - 3;
while (spacer > 0 && spacer < s.size())
s.insert(spacer, thousands_sep);
spacer -= 3;
if (!currency.empty())
s.insert(0, currency);
if (negative)
s.insert(0, "-");
return s;
* Takes an integer value and formats it as currency,
* spacing the thousands and adding a $ sign to the front.
* @param funds The funding value.
* @return The formatted string.
std::string formatFunding(int64_t funds)
return formatNumber(funds, "$");
* Takes an integer value and formats it as percentage,
* adding a % sign.
* @param value The percentage value.
* @return The formatted string.
std::string formatPercentage(int value)
std::ostringstream ss;
ss << value << "%";
return ss.str();
↑ V565 An empty exception handler. Silent suppression of exceptions can hide the presence of bugs in source code during testing.