FS#90 - Improve Unicode string classes
Added the sf::String class to replace (and enhance) sf::Unicode::Text FS#138 - Rename sf::String to sf::Text git-svn-id: https://sfml.svn.sourceforge.net/svnroot/sfml/branches/sfml2@1286 4e206d99-4929-0410-ac5d-dfc041789085
This commit is contained in:
parent
9f063921c9
commit
78247bd386
46 changed files with 3003 additions and 1725 deletions
|
@ -40,7 +40,7 @@
|
|||
#include <SFML/Graphics/Shader.hpp>
|
||||
#include <SFML/Graphics/Shape.hpp>
|
||||
#include <SFML/Graphics/Sprite.hpp>
|
||||
#include <SFML/Graphics/String.hpp>
|
||||
#include <SFML/Graphics/Text.hpp>
|
||||
#include <SFML/Graphics/View.hpp>
|
||||
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
#include <SFML/System/Resource.hpp>
|
||||
#include <SFML/System/Vector2.hpp>
|
||||
#include <SFML/System/Unicode.hpp>
|
||||
#include <SFML/System/String.hpp>
|
||||
#include <SFML/Graphics/Glyph.hpp>
|
||||
#include <SFML/Graphics/Image.hpp>
|
||||
#include <SFML/Graphics/Rect.hpp>
|
||||
|
@ -40,16 +40,15 @@
|
|||
|
||||
namespace sf
|
||||
{
|
||||
class String;
|
||||
|
||||
namespace priv
|
||||
{
|
||||
class FontLoader;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Font is the low-level class for loading and
|
||||
/// manipulating character fonts. This class is meant to
|
||||
/// be used by sf::String
|
||||
/// be used by sf::Text
|
||||
////////////////////////////////////////////////////////////
|
||||
class SFML_API Font : public Resource<Font>
|
||||
{
|
||||
|
@ -71,7 +70,7 @@ public :
|
|||
/// \return True if loading was successful
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
bool LoadFromFile(const std::string& filename, unsigned int charSize = 30, const Unicode::Text& charset = ourDefaultCharset);
|
||||
bool LoadFromFile(const std::string& filename, unsigned int charSize = 30, String charset = ourDefaultCharset);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Load the font from a file in memory
|
||||
|
@ -84,7 +83,7 @@ public :
|
|||
/// \return True if loading was successful
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
bool LoadFromMemory(const char* data, std::size_t sizeInBytes, unsigned int charSize = 30, const Unicode::Text& charset = ourDefaultCharset);
|
||||
bool LoadFromMemory(const char* data, std::size_t sizeInBytes, unsigned int charSize = 30, String charset = ourDefaultCharset);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the base size of characters in the font;
|
||||
|
|
|
@ -22,14 +22,14 @@
|
|||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef SFML_STRING_HPP
|
||||
#define SFML_STRING_HPP
|
||||
#ifndef SFML_TEXT_HPP
|
||||
#define SFML_TEXT_HPP
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Headers
|
||||
////////////////////////////////////////////////////////////
|
||||
#include <SFML/System/Resource.hpp>
|
||||
#include <SFML/System/Unicode.hpp>
|
||||
#include <SFML/System/String.hpp>
|
||||
#include <SFML/Graphics/Drawable.hpp>
|
||||
#include <SFML/Graphics/Font.hpp>
|
||||
#include <SFML/Graphics/Rect.hpp>
|
||||
|
@ -39,9 +39,9 @@
|
|||
namespace sf
|
||||
{
|
||||
////////////////////////////////////////////////////////////
|
||||
/// String defines a graphical 2D text, that can be drawn on screen
|
||||
/// Text defines a graphical 2D text, that can be drawn on screen
|
||||
////////////////////////////////////////////////////////////
|
||||
class SFML_API String : public Drawable
|
||||
class SFML_API Text : public Drawable
|
||||
{
|
||||
public :
|
||||
|
||||
|
@ -60,25 +60,25 @@ public :
|
|||
/// Default constructor
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String();
|
||||
Text();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Construct the string from any kind of text
|
||||
///
|
||||
/// \param text : Text assigned to the string
|
||||
/// \param font : Font used to draw the string
|
||||
/// \param size : Characters size
|
||||
/// \param string : Text assigned to the string
|
||||
/// \param font : Font used to draw the string
|
||||
/// \param size : Characters size
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
explicit String(const Unicode::Text& text, const Font& font = Font::GetDefaultFont(), float size = 30.f);
|
||||
explicit Text(const String& string, const Font& font = Font::GetDefaultFont(), float size = 30.f);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Set the text (from any kind of string)
|
||||
///
|
||||
/// \param text : New text
|
||||
/// \param string : New text
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
void SetText(const Unicode::Text& text);
|
||||
void SetString(const String& string);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Set the font of the string
|
||||
|
@ -112,7 +112,7 @@ public :
|
|||
/// \return String's text
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
const Unicode::Text& GetText() const;
|
||||
const String& GetString() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the font used by the string
|
||||
|
@ -177,7 +177,7 @@ private :
|
|||
////////////////////////////////////////////////////////////
|
||||
// Member data
|
||||
////////////////////////////////////////////////////////////
|
||||
Unicode::Text myText; ///< Text to display
|
||||
String myString; ///< String to display
|
||||
ResourcePtr<Font> myFont; ///< Font used to display the string
|
||||
float mySize; ///< Size of the characters
|
||||
unsigned long myStyle; ///< Text style (see Style enum)
|
||||
|
@ -188,4 +188,4 @@ private :
|
|||
} // namespace sf
|
||||
|
||||
|
||||
#endif // SFML_STRING_HPP
|
||||
#endif // SFML_TEXT_HPP
|
|
@ -35,10 +35,11 @@
|
|||
#include <SFML/System/Mutex.hpp>
|
||||
#include <SFML/System/Randomizer.hpp>
|
||||
#include <SFML/System/Sleep.hpp>
|
||||
#include <SFML/System/String.hpp>
|
||||
#include <SFML/System/Thread.hpp>
|
||||
#include <SFML/System/ThreadLocal.hpp>
|
||||
#include <SFML/System/ThreadLocalPtr.hpp>
|
||||
#include <SFML/System/Unicode.hpp>
|
||||
#include <SFML/System/Utf.hpp>
|
||||
#include <SFML/System/Vector2.hpp>
|
||||
#include <SFML/System/Vector3.hpp>
|
||||
|
||||
|
|
530
include/SFML/System/String.hpp
Normal file
530
include/SFML/System/String.hpp
Normal file
|
@ -0,0 +1,530 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SFML - Simple and Fast Multimedia Library
|
||||
// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied warranty.
|
||||
// In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented;
|
||||
// you must not claim that you wrote the original software.
|
||||
// If you use this software in a product, an acknowledgment
|
||||
// in the product documentation would be appreciated but is not required.
|
||||
//
|
||||
// 2. Altered source versions must be plainly marked as such,
|
||||
// and must not be misrepresented as being the original software.
|
||||
//
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef SFML_STRING_HPP
|
||||
#define SFML_STRING_HPP
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Headers
|
||||
////////////////////////////////////////////////////////////
|
||||
#include <SFML/Config.hpp>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace sf
|
||||
{
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Utility string class that automatically handles
|
||||
/// conversions between types and encodings
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
class SFML_API String
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Types
|
||||
////////////////////////////////////////////////////////////
|
||||
typedef std::basic_string<Uint32>::iterator Iterator; ///< Iterator type
|
||||
typedef std::basic_string<Uint32>::const_iterator ConstIterator; ///< Constant iterator type
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Default constructor
|
||||
///
|
||||
/// This constructor creates an empty string.
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from a null-terminated C-style ANSI string
|
||||
///
|
||||
/// The source string is converted to UTF-32 according
|
||||
/// to the current locale. See the other constructor for
|
||||
/// explicitely passing the locale to use.
|
||||
///
|
||||
/// \param ansiString ANSI string to convert
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const char* ansiString);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from an ANSI string
|
||||
///
|
||||
/// The source string is converted to UTF-32 according
|
||||
/// to the current global locale. See the other constructor for
|
||||
/// explicitely passing the locale to use.
|
||||
///
|
||||
/// \param ansiString ANSI string to convert
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const std::string& ansiString);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from a null-terminated C-style ANSI string and a locale
|
||||
///
|
||||
/// The source string is converted to UTF-32 according
|
||||
/// to the given locale. If you want to use the current global
|
||||
/// locale, rather use the other constructor.
|
||||
///
|
||||
/// \param ansiString ANSI string to convert
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const char* ansiString, const std::locale& locale);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from an ANSI string and a locale
|
||||
///
|
||||
/// The source string is converted to UTF-32 according
|
||||
/// to the given locale. If you want to use the current global
|
||||
/// locale, rather use the other constructor.
|
||||
///
|
||||
/// \param ansiString ANSI string to convert
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const std::string& ansiString, const std::locale& locale);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from null-terminated C-style wide string
|
||||
///
|
||||
/// \param wideString Wide string to convert
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const wchar_t* wideString);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from a wide string
|
||||
///
|
||||
/// \param wideString Wide string to convert
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const std::wstring& wideString);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from a null-terminated C-style UTF-32 string
|
||||
///
|
||||
/// \param utf32String UTF-32 string to assign
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const Uint32* utf32String);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Construct from an UTF-32 string
|
||||
///
|
||||
/// \param utf32String UTF-32 string to assign
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const std::basic_string<Uint32>& utf32String);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Copy constructor
|
||||
///
|
||||
/// \param other Instance to copy
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String(const String& copy);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Implicit cast operator to std::string (ANSI string)
|
||||
///
|
||||
/// The current global locale is used for conversion. If you
|
||||
/// want to explicitely specify a locale, see ToAnsiString.
|
||||
/// Characters that do not fit in the target encoding are
|
||||
/// discarded from the returned string.
|
||||
/// This operator is defined for convenience, and is equivalent
|
||||
/// to calling ToAnsiString().
|
||||
///
|
||||
/// \return Converted ANSI string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
operator std::string() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Implicit cast operator to std::wstring (wide string)
|
||||
///
|
||||
/// Characters that do not fit in the target encoding are
|
||||
/// discarded from the returned string.
|
||||
/// This operator is defined for convenience, and is equivalent
|
||||
/// to calling ToWideString().
|
||||
///
|
||||
/// \return Converted wide string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
operator std::wstring() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert the unicode string to an ANSI string
|
||||
///
|
||||
/// The current global locale is used for conversion. If you
|
||||
/// want to explicitely specify a locale, see the other overload
|
||||
/// of ToAnsiString.
|
||||
/// Characters that do not fit in the target encoding are
|
||||
/// discarded from the returned string.
|
||||
///
|
||||
/// \return Converted ANSI string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
std::string ToAnsiString() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert the unicode string to an ANSI string
|
||||
///
|
||||
/// The UTF-32 string is converted to an ANSI string in
|
||||
/// the encoding defined by \a locale. If you want to use
|
||||
/// the current global locale, see the other overload
|
||||
/// of ToAnsiString.
|
||||
/// Characters that do not fit in the target encoding are
|
||||
/// discarded from the returned string.
|
||||
///
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Converted ANSI string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
std::string ToAnsiString(const std::locale& locale) const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert the unicode string to a wide string
|
||||
///
|
||||
/// Characters that do not fit in the target encoding are
|
||||
/// discarded from the returned string.
|
||||
///
|
||||
/// \return Converted wide string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
std::wstring ToWideString() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of assignment operator
|
||||
///
|
||||
/// \param right Instance to assign
|
||||
///
|
||||
/// \return Reference to self
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String& operator =(const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of += operator to append an ANSI character
|
||||
///
|
||||
/// \param right Character to append
|
||||
///
|
||||
/// \return Reference to self
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String& operator +=(char right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of += operator to append a wide character
|
||||
///
|
||||
/// \param right Character to append
|
||||
///
|
||||
/// \return Reference to self
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String& operator +=(wchar_t right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of += operator to append an UTF-32 character
|
||||
///
|
||||
/// \param right Character to append
|
||||
///
|
||||
/// \return Reference to self
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String& operator +=(Uint32 right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of += operator to append an UTF-32 string
|
||||
///
|
||||
/// \param right String to append
|
||||
///
|
||||
/// \return Reference to self
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
String& operator +=(const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of [] operator to access a character by its position
|
||||
///
|
||||
/// This function provides read-only access to characters.
|
||||
/// Note: this function doesn't throw if \a index is out of range.
|
||||
///
|
||||
/// \param index Index of the character to get
|
||||
///
|
||||
/// \return Character at position \a index
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Uint32 operator [](std::size_t index) const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of [] operator to access a character by its position
|
||||
///
|
||||
/// This function provides read and write access to characters.
|
||||
/// Note: this function doesn't throw if \a index is out of range.
|
||||
///
|
||||
/// \param index Index of the character to get
|
||||
///
|
||||
/// \return Reference to the character at position \a index
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Uint32& operator [](std::size_t index);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Clear the string
|
||||
///
|
||||
/// This function removes all the characters from the string.
|
||||
///
|
||||
/// \see IsEmpty, Erase
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
void Clear();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Get the size of the string
|
||||
///
|
||||
/// \return Number of characters in the string
|
||||
///
|
||||
/// \see IsEmpty
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
std::size_t GetSize() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Check whether the string is empty or not
|
||||
///
|
||||
/// \return True if the string is empty (i.e. contains no character)
|
||||
///
|
||||
/// \see Clear, GetSize
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
bool IsEmpty() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Erase one or more characters from the string
|
||||
///
|
||||
/// This function removes a sequence of \a count characters
|
||||
/// starting from \a position.
|
||||
///
|
||||
/// \param position Position of the first character to erase
|
||||
/// \param count Number of characters to erase
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
void Erase(std::size_t position, std::size_t count = 1);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Get a pointer to the C-style array of characters
|
||||
///
|
||||
/// This functions provides a read-only access to a
|
||||
/// null-terminated C-style representation of the string.
|
||||
/// The returned pointer is temporary and is meant only for
|
||||
/// immediate use, thus it is not recommended to store it.
|
||||
///
|
||||
/// \return Read-only pointer to the array of characters
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
const Uint32* GetData() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Return an iterator to the beginning of the string
|
||||
///
|
||||
/// \return Read-write iterator to the beginning of the string characters
|
||||
///
|
||||
/// \see End
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Iterator Begin();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Return an iterator to the beginning of the string
|
||||
///
|
||||
/// \return Read-only iterator to the beginning of the string characters
|
||||
///
|
||||
/// \see End
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
ConstIterator Begin() const;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Return an iterator to the beginning of the string
|
||||
///
|
||||
/// The end iterator refers to 1 position past the last character;
|
||||
/// thus it represents an invalid character and should never be
|
||||
/// accessed.
|
||||
///
|
||||
/// \return Read-write iterator to the end of the string characters
|
||||
///
|
||||
/// \see Begin
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Iterator End();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Return an iterator to the beginning of the string
|
||||
///
|
||||
/// The end iterator refers to 1 position past the last character;
|
||||
/// thus it represents an invalid character and should never be
|
||||
/// accessed.
|
||||
///
|
||||
/// \return Read-only iterator to the end of the string characters
|
||||
///
|
||||
/// \see Begin
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
ConstIterator End() const;
|
||||
|
||||
private :
|
||||
|
||||
friend SFML_API bool operator ==(const String& left, const String& right);
|
||||
friend SFML_API bool operator <(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Member data
|
||||
////////////////////////////////////////////////////////////
|
||||
std::basic_string<Uint32> myString; ///< Internal string of UTF-32 characters
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of == operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if both strings are equal
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator ==(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of != operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if both strings are different
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator !=(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of < operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if \a left is alphabetically lesser than \a right
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator <(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of > operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if \a left is alphabetically greater than \a right
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator >(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of <= operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if \a left is alphabetically lesser or equal than \a right
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator <=(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of >= operator to compare two UTF-32 strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return True if \a left is alphabetically greater or equal than \a right
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API bool operator >=(const String& left, const String& right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of binary + operator to concatenate a string and an ANSI character
|
||||
///
|
||||
/// \param left Source string
|
||||
/// \param right Character to concatenate
|
||||
///
|
||||
/// \return Concatenated string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API String operator +(const String& left, char right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of binary + operator to concatenate a string and a wide character
|
||||
///
|
||||
/// \param left Source string
|
||||
/// \param right Character to concatenate
|
||||
///
|
||||
/// \return Concatenated string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API String operator +(const String& left, wchar_t right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of binary + operator to concatenate a string and a UTF-32 character
|
||||
///
|
||||
/// \param left Source string
|
||||
/// \param right Character to concatenate
|
||||
///
|
||||
/// \return Concatenated string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API String operator +(const String& left, Uint32 right);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Overload of binary + operator to concatenate two strings
|
||||
///
|
||||
/// \param left Left operand (a string)
|
||||
/// \param right Right operand (a string)
|
||||
///
|
||||
/// \return Concatenated string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API String operator +(const String& left, const String& right);
|
||||
|
||||
} // namespace sf
|
||||
|
||||
|
||||
#endif // SFML_STRING_HPP
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \class sf::String
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
|
@ -1,290 +0,0 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SFML - Simple and Fast Multimedia Library
|
||||
// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied warranty.
|
||||
// In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented;
|
||||
// you must not claim that you wrote the original software.
|
||||
// If you use this software in a product, an acknowledgment
|
||||
// in the product documentation would be appreciated but is not required.
|
||||
//
|
||||
// 2. Altered source versions must be plainly marked as such,
|
||||
// and must not be misrepresented as being the original software.
|
||||
//
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef SFML_UNICODE_HPP
|
||||
#define SFML_UNICODE_HPP
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Headers
|
||||
////////////////////////////////////////////////////////////
|
||||
#include <SFML/Config.hpp>
|
||||
#include <iterator>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
namespace sf
|
||||
{
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Provides utility functions to convert from and to
|
||||
/// any unicode and ASCII encoding
|
||||
////////////////////////////////////////////////////////////
|
||||
class SFML_API Unicode
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Define a string type for each encoding
|
||||
/// Warning : in UTF8 and UTF16 strings, one element doesn't
|
||||
/// necessarily maps to one character ; only an UTF32 element
|
||||
/// is wide enough to hold all possible unicode values
|
||||
////////////////////////////////////////////////////////////
|
||||
typedef std::basic_string<Uint8> UTF8String;
|
||||
typedef std::basic_string<Uint16> UTF16String;
|
||||
typedef std::basic_string<Uint32> UTF32String;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// This class is an abstract definition of a unicode text,
|
||||
/// it can be converted from and to any kind of string
|
||||
/// and encoding
|
||||
////////////////////////////////////////////////////////////
|
||||
class SFML_API Text
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Default constructor (empty text)
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Text();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Construct the unicode text from any type of string
|
||||
///
|
||||
/// \param str : String to convert
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
Text(const char* str);
|
||||
Text(const wchar_t* str);
|
||||
Text(const Uint8* str);
|
||||
Text(const Uint16* str);
|
||||
Text(const Uint32* str);
|
||||
Text(const std::string& str);
|
||||
Text(const std::wstring& str);
|
||||
Text(const Unicode::UTF8String& str);
|
||||
Text(const Unicode::UTF16String& str);
|
||||
Text(const Unicode::UTF32String& str);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Operator to cast the text to any type of string
|
||||
///
|
||||
/// \return Converted string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
operator std::string () const;
|
||||
operator std::wstring () const;
|
||||
operator Unicode::UTF8String () const;
|
||||
operator Unicode::UTF16String () const;
|
||||
operator const Unicode::UTF32String&() const;
|
||||
|
||||
private :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Data member
|
||||
////////////////////////////////////////////////////////////
|
||||
sf::Unicode::UTF32String myUTF32String; ///< UTF-32 unicode text
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an ANSI characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
/// \param locale : Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF32ToANSI(In begin, In end, Out output, char replacement = '?', const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an ANSI characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param locale : Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ANSIToUTF32(In begin, In end, Out output, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-8 characters range
|
||||
/// to an UTF-16 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF8ToUTF16(In begin, In end, Out output, Uint16 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-8 characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF8ToUTF32(In begin, In end, Out output, Uint32 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-16 characters range
|
||||
/// to an UTF-8 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF16ToUTF8(In begin, In end, Out output, Uint8 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-16 characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF16ToUTF32(In begin, In end, Out output, Uint32 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an UTF-8 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF32ToUTF8(In begin, In end, Out output, Uint8 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an UTF-16 characters range, using the given locale
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
/// \param output : Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement : Replacement character for characters not convertible to output encoding ('?' by default -- use 0 to use no replacement character)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out UTF32ToUTF16(In begin, In end, Out output, Uint16 replacement = '?');
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-8 string
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Count of the characters in the string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t GetUTF8Length(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-16 string
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Count of the characters in the string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t GetUTF16Length(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-32 string
|
||||
///
|
||||
/// \param begin : Iterator pointing to the beginning of the input sequence
|
||||
/// \param end : Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Count of the characters in the string
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t GetUTF32Length(In begin, In end);
|
||||
|
||||
private :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the default system locale
|
||||
///
|
||||
/// \return Reference to the default system locale
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
static const std::locale& GetDefaultLocale();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Static member data
|
||||
////////////////////////////////////////////////////////////
|
||||
static const int UTF8TrailingBytes[256]; ///< Lookup table to find the length of an UTF-8 sequence
|
||||
static const Uint32 UTF8Offsets[6]; ///< Magic values to subtract during UTF-8 conversions
|
||||
static const Uint8 UTF8FirstBytes[7]; ///< First bytes for UTF-8 sequences
|
||||
};
|
||||
|
||||
#include <SFML/System/Unicode.inl>
|
||||
|
||||
} // namespace sf
|
||||
|
||||
|
||||
#endif // SFML_UNICODE_HPP
|
|
@ -1,474 +0,0 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SFML - Simple and Fast Multimedia Library
|
||||
// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied warranty.
|
||||
// In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented;
|
||||
// you must not claim that you wrote the original software.
|
||||
// If you use this software in a product, an acknowledgment
|
||||
// in the product documentation would be appreciated but is not required.
|
||||
//
|
||||
// 2. Altered source versions must be plainly marked as such,
|
||||
// and must not be misrepresented as being the original software.
|
||||
//
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an ANSI characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF32ToANSI(In begin, In end, Out output, char replacement, const std::locale& locale)
|
||||
{
|
||||
#ifdef __MINGW32__
|
||||
|
||||
// MinGW has a almost no support for unicode stuff
|
||||
// As a consequence, the MinGW version of this function can only use the default locale
|
||||
// and ignores the one passed as parameter
|
||||
while (begin < end)
|
||||
{
|
||||
char character = 0;
|
||||
if (wctomb(&character, static_cast<wchar_t>(*begin++)) >= 0)
|
||||
*output++ = character;
|
||||
else if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Get the facet of the locale which deals with character conversion
|
||||
const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
|
||||
|
||||
// Use the facet to convert each character of the input string
|
||||
while (begin < end)
|
||||
*output++ = facet.narrow(static_cast<wchar_t>(*begin++), replacement);
|
||||
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an ANSI characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::ANSIToUTF32(In begin, In end, Out output, const std::locale& locale)
|
||||
{
|
||||
#ifdef __MINGW32__
|
||||
|
||||
// MinGW has a almost no support for unicode stuff
|
||||
// As a consequence, the MinGW version of this function can only use the default locale
|
||||
// and ignores the one passed as parameter
|
||||
while (begin < end)
|
||||
{
|
||||
wchar_t character = 0;
|
||||
mbtowc(&character, &*begin, 1);
|
||||
begin++;
|
||||
*output++ = static_cast<Uint32>(character);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Get the facet of the locale which deals with character conversion
|
||||
const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
|
||||
|
||||
// Use the facet to convert each character of the input string
|
||||
while (begin < end)
|
||||
*output++ = static_cast<Uint32>(facet.widen(*begin++));
|
||||
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-8 characters range
|
||||
/// to an UTF-16 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF8ToUTF16(In begin, In end, Out output, Uint16 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 character = 0;
|
||||
int trailingBytes = UTF8TrailingBytes[static_cast<int>(*begin)];
|
||||
if (begin + trailingBytes < end)
|
||||
{
|
||||
// First decode the UTF-8 character
|
||||
switch (trailingBytes)
|
||||
{
|
||||
case 5 : character += *begin++; character <<= 6;
|
||||
case 4 : character += *begin++; character <<= 6;
|
||||
case 3 : character += *begin++; character <<= 6;
|
||||
case 2 : character += *begin++; character <<= 6;
|
||||
case 1 : character += *begin++; character <<= 6;
|
||||
case 0 : character += *begin++;
|
||||
}
|
||||
character -= UTF8Offsets[trailingBytes];
|
||||
|
||||
// Then encode it in UTF-16
|
||||
if (character < 0xFFFF)
|
||||
{
|
||||
// Character can be converted directly to 16 bits, just need to check it's in the valid range
|
||||
if ((character >= 0xD800) && (character <= 0xDFFF))
|
||||
{
|
||||
// Invalid character (this range is reserved)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character directly convertible to 16 bits
|
||||
*output++ = static_cast<Uint16>(character);
|
||||
}
|
||||
}
|
||||
else if (character > 0x0010FFFF)
|
||||
{
|
||||
// Invalid character (greater than the maximum unicode value)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Character will be converted to 2 UTF-16 elements
|
||||
character -= 0x0010000;
|
||||
*output++ = static_cast<Uint16>((character >> 10) + 0xD800);
|
||||
*output++ = static_cast<Uint16>((character & 0x3FFUL) + 0xDC00);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-8 characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF8ToUTF32(In begin, In end, Out output, Uint32 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 character = 0;
|
||||
int trailingBytes = UTF8TrailingBytes[static_cast<int>(*begin)];
|
||||
if (begin + trailingBytes < end)
|
||||
{
|
||||
// First decode the UTF-8 character
|
||||
switch (trailingBytes)
|
||||
{
|
||||
case 5 : character += *begin++; character <<= 6;
|
||||
case 4 : character += *begin++; character <<= 6;
|
||||
case 3 : character += *begin++; character <<= 6;
|
||||
case 2 : character += *begin++; character <<= 6;
|
||||
case 1 : character += *begin++; character <<= 6;
|
||||
case 0 : character += *begin++;
|
||||
}
|
||||
character -= UTF8Offsets[trailingBytes];
|
||||
|
||||
// Then write it if valid
|
||||
if ((character < 0xD800) || (character > 0xDFFF))
|
||||
{
|
||||
// Valid UTF-32 character
|
||||
*output++ = character;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid UTF-32 character
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-16 characters range
|
||||
/// to an UTF-8 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF16ToUTF8(In begin, In end, Out output, Uint8 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 character = *begin++;
|
||||
|
||||
// If it's a surrogate pair, first convert to a single UTF-32 character
|
||||
if ((character >= 0xD800) && (character <= 0xDBFF))
|
||||
{
|
||||
if (begin < end)
|
||||
{
|
||||
// The second element is valid : convert the two elements to a UTF-32 character
|
||||
Uint32 d = *begin++;
|
||||
if ((d >= 0xDC00) && (d <= 0xDFFF))
|
||||
character = static_cast<Uint32>(((character - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid second element
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
}
|
||||
|
||||
// Then convert to UTF-8
|
||||
if (character > 0x0010FFFF)
|
||||
{
|
||||
// Invalid character (greater than the maximum unicode value)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character
|
||||
|
||||
// Get number of bytes to write
|
||||
int bytesToWrite = 1;
|
||||
if (character < 0x80) bytesToWrite = 1;
|
||||
else if (character < 0x800) bytesToWrite = 2;
|
||||
else if (character < 0x10000) bytesToWrite = 3;
|
||||
else if (character <= 0x0010FFFF) bytesToWrite = 4;
|
||||
|
||||
// Extract bytes to write
|
||||
Uint8 bytes[4];
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : bytes[3] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 3 : bytes[2] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 2 : bytes[1] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 1 : bytes[0] = static_cast<Uint8> (character | UTF8FirstBytes[bytesToWrite]);
|
||||
}
|
||||
|
||||
// Add them to the output
|
||||
const Uint8* currentByte = bytes;
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : *output++ = *currentByte++;
|
||||
case 3 : *output++ = *currentByte++;
|
||||
case 2 : *output++ = *currentByte++;
|
||||
case 1 : *output++ = *currentByte++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-16 characters range
|
||||
/// to an UTF-32 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF16ToUTF32(In begin, In end, Out output, Uint32 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint16 character = *begin++;
|
||||
if ((character >= 0xD800) && (character <= 0xDBFF))
|
||||
{
|
||||
// We have a surrogate pair, ie. a character composed of two elements
|
||||
if (begin < end)
|
||||
{
|
||||
Uint16 d = *begin++;
|
||||
if ((d >= 0xDC00) && (d <= 0xDFFF))
|
||||
{
|
||||
// The second element is valid : convert the two elements to a UTF-32 character
|
||||
*output++ = static_cast<Uint32>(((character - 0xD800) << 10) + (d - 0xDC00) + 0x0010000);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid second element
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ((character >= 0xDC00) && (character <= 0xDFFF))
|
||||
{
|
||||
// Invalid character
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character directly convertible to UTF-32
|
||||
*output++ = static_cast<Uint32>(character);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an UTF-8 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF32ToUTF8(In begin, In end, Out output, Uint8 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 character = *begin++;
|
||||
if (character > 0x0010FFFF)
|
||||
{
|
||||
// Invalid character (greater than the maximum unicode value)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character
|
||||
|
||||
// Get number of bytes to write
|
||||
int bytesToWrite = 1;
|
||||
if (character < 0x80) bytesToWrite = 1;
|
||||
else if (character < 0x800) bytesToWrite = 2;
|
||||
else if (character < 0x10000) bytesToWrite = 3;
|
||||
else if (character <= 0x0010FFFF) bytesToWrite = 4;
|
||||
|
||||
// Extract bytes to write
|
||||
Uint8 bytes[4];
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : bytes[3] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 3 : bytes[2] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 2 : bytes[1] = static_cast<Uint8>((character | 0x80) & 0xBF); character >>= 6;
|
||||
case 1 : bytes[0] = static_cast<Uint8> (character | UTF8FirstBytes[bytesToWrite]);
|
||||
}
|
||||
|
||||
// Add them to the output
|
||||
const Uint8* currentByte = bytes;
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : *output++ = *currentByte++;
|
||||
case 3 : *output++ = *currentByte++;
|
||||
case 2 : *output++ = *currentByte++;
|
||||
case 1 : *output++ = *currentByte++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Generic function to convert an UTF-32 characters range
|
||||
/// to an UTF-16 characters range, using the given locale
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
inline Out Unicode::UTF32ToUTF16(In begin, In end, Out output, Uint16 replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 character = *begin++;
|
||||
if (character < 0xFFFF)
|
||||
{
|
||||
// Character can be converted directly to 16 bits, just need to check it's in the valid range
|
||||
if ((character >= 0xD800) && (character <= 0xDFFF))
|
||||
{
|
||||
// Invalid character (this range is reserved)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character directly convertible to 16 bits
|
||||
*output++ = static_cast<Uint16>(character);
|
||||
}
|
||||
}
|
||||
else if (character > 0x0010FFFF)
|
||||
{
|
||||
// Invalid character (greater than the maximum unicode value)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Character will be converted to 2 UTF-16 elements
|
||||
character -= 0x0010000;
|
||||
*output++ = static_cast<Uint16>((character >> 10) + 0xD800);
|
||||
*output++ = static_cast<Uint16>((character & 0x3FFUL) + 0xDC00);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-8 string
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
inline std::size_t Unicode::GetUTF8Length(In begin, In end)
|
||||
{
|
||||
std::size_t length = 0;
|
||||
while (begin < end)
|
||||
{
|
||||
int nbBytes = UTF8TrailingBytes[static_cast<int>(*begin)];
|
||||
if (begin + nbBytes < end)
|
||||
++length;
|
||||
|
||||
begin += nbBytes + 1;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-16 string
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
inline std::size_t Unicode::GetUTF16Length(In begin, In end)
|
||||
{
|
||||
std::size_t length = 0;
|
||||
while (begin < end)
|
||||
{
|
||||
if ((*begin >= 0xD800) && (*begin <= 0xDBFF))
|
||||
{
|
||||
++begin;
|
||||
if ((begin < end) && ((*begin >= 0xDC00) && (*begin <= 0xDFFF)))
|
||||
{
|
||||
++length;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
++length;
|
||||
}
|
||||
|
||||
++begin;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the number of characters composing an UTF-32 string
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
inline std::size_t Unicode::GetUTF32Length(In begin, In end)
|
||||
{
|
||||
return end - begin;
|
||||
}
|
672
include/SFML/System/Utf.hpp
Normal file
672
include/SFML/System/Utf.hpp
Normal file
|
@ -0,0 +1,672 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SFML - Simple and Fast Multimedia Library
|
||||
// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied warranty.
|
||||
// In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented;
|
||||
// you must not claim that you wrote the original software.
|
||||
// If you use this software in a product, an acknowledgment
|
||||
// in the product documentation would be appreciated but is not required.
|
||||
//
|
||||
// 2. Altered source versions must be plainly marked as such,
|
||||
// and must not be misrepresented as being the original software.
|
||||
//
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef SFML_UTF_HPP
|
||||
#define SFML_UTF_HPP
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Headers
|
||||
////////////////////////////////////////////////////////////
|
||||
#include <SFML/Config.hpp>
|
||||
#include <algorithm>
|
||||
#include <locale>
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
namespace sf
|
||||
{
|
||||
////////////////////////////////////////////////////////////
|
||||
/// Get the default system locale
|
||||
///
|
||||
/// \return Reference to the default system locale
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
SFML_API const std::locale& GetDefaultLocale();
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Utility class providing generic functions for UTF conversions
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <unsigned int N>
|
||||
class Utf;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Specialization of the Utf template for UTF-8
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <>
|
||||
class Utf<8>
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Decode a single UTF-8 character
|
||||
///
|
||||
/// Decoding a character means finding its unique 32-bits
|
||||
/// code (called the codepoint) in the Unicode standard.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Codepoint of the decoded UTF-8 character
|
||||
/// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Encode a single UTF-8 character
|
||||
///
|
||||
/// Encoding a character means converting a unique 32-bits
|
||||
/// code (called the codepoint) in the target encoding, UTF-8.
|
||||
///
|
||||
/// \param input Codepoint to encode as UTF-8
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
static Out Encode(Uint32 input, Out output, Uint8 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Advance to the next UTF-8 character
|
||||
///
|
||||
/// This function is necessary for multi-elements encodings, as
|
||||
/// a single character may use more than 1 storage element.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Next(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Count the number of characters of a UTF-8 sequence
|
||||
///
|
||||
/// This function is necessary for multi-elements encodings, as
|
||||
/// a single character may use more than 1 storage element, thus the
|
||||
/// total size can be different from (begin - end).
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t Count(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an ANSI characters range to UTF-8
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromAnsi(In begin, In end, Out output, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a wide characters range to UTF-8
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromWide(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-8 characters range to ANSI characters
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-8 characters range to wide characters
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToWide(In begin, In end, Out output, wchar_t replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-8 characters range to UTF-8
|
||||
///
|
||||
/// This functions does nothing more than a direct copy;
|
||||
/// it is defined only to provide the same interface as other
|
||||
/// specializations of the sf::Utf<> template, and allow
|
||||
/// generic code to be written on top of it.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf8(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-8 characters range to UTF-16
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf16(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-8 characters range to UTF-32
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf32(In begin, In end, Out output);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Specialization of the Utf template for UTF-16
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <>
|
||||
class Utf<16>
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Decode a single UTF-16 character
|
||||
///
|
||||
/// Decoding a character means finding its unique 32-bits
|
||||
/// code (called the codepoint) in the Unicode standard.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Codepoint of the decoded UTF-16 character
|
||||
/// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Encode a single UTF-16 character
|
||||
///
|
||||
/// Encoding a character means converting a unique 32-bits
|
||||
/// code (called the codepoint) in the target encoding, UTF-16.
|
||||
///
|
||||
/// \param input Codepoint to encode as UTF-16
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
static Out Encode(Uint32 input, Out output, Uint16 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Advance to the next UTF-16 character
|
||||
///
|
||||
/// This function is necessary for multi-elements encodings, as
|
||||
/// a single character may use more than 1 storage element.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Next(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Count the number of characters of a UTF-16 sequence
|
||||
///
|
||||
/// This function is necessary for multi-elements encodings, as
|
||||
/// a single character may use more than 1 storage element, thus the
|
||||
/// total size can be different from (begin - end).
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t Count(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an ANSI characters range to UTF-16
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromAnsi(In begin, In end, Out output, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a wide characters range to UTF-16
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromWide(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-16 characters range to ANSI characters
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-16 characters range to wide characters
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToWide(In begin, In end, Out output, wchar_t replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-16 characters range to UTF-8
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf8(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-16 characters range to UTF-16
|
||||
///
|
||||
/// This functions does nothing more than a direct copy;
|
||||
/// it is defined only to provide the same interface as other
|
||||
/// specializations of the sf::Utf<> template, and allow
|
||||
/// generic code to be written on top of it.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf16(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-16 characters range to UTF-32
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf32(In begin, In end, Out output);
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Specialization of the Utf template for UTF-16
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <>
|
||||
class Utf<32>
|
||||
{
|
||||
public :
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Decode a single UTF-32 character
|
||||
///
|
||||
/// Decoding a character means finding its unique 32-bits
|
||||
/// code (called the codepoint) in the Unicode standard.
|
||||
/// For UTF-32, the character value is the same as the codepoint.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Codepoint of the decoded UTF-32 character
|
||||
/// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Encode a single UTF-32 character
|
||||
///
|
||||
/// Encoding a character means converting a unique 32-bits
|
||||
/// code (called the codepoint) in the target encoding, UTF-32.
|
||||
/// For UTF-32, the codepoint is the same as the character value.
|
||||
///
|
||||
/// \param input Codepoint to encode as UTF-32
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
static Out Encode(Uint32 input, Out output, Uint32 replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Advance to the next UTF-32 character
|
||||
///
|
||||
/// This function is trivial for UTF-32, which can store
|
||||
/// every character in a single storage element.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static In Next(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Count the number of characters of a UTF-32 sequence
|
||||
///
|
||||
/// This function is trivial for UTF-32, which can store
|
||||
/// every character in a single storage element.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
///
|
||||
/// \return Iterator pointing to one past the last read element of the input sequence
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static std::size_t Count(In begin, In end);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an ANSI characters range to UTF-32
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromAnsi(In begin, In end, Out output, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a wide characters range to UTF-32
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out FromWide(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-32 characters range to ANSI characters
|
||||
///
|
||||
/// The current global locale will be used by default, unless you
|
||||
/// pass a custom one in the \a locale parameter.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert an UTF-32 characters range to wide characters
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToWide(In begin, In end, Out output, wchar_t replacement = 0);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-32 characters range to UTF-8
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf8(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-32 characters range to UTF-16
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf16(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Convert a UTF-32 characters range to UTF-32
|
||||
///
|
||||
/// This functions does nothing more than a direct copy;
|
||||
/// it is defined only to provide the same interface as other
|
||||
/// specializations of the sf::Utf<> template, and allow
|
||||
/// generic code to be written on top of it.
|
||||
///
|
||||
/// \param begin Iterator pointing to the beginning of the input sequence
|
||||
/// \param end Iterator pointing to the end of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
static Out ToUtf32(In begin, In end, Out output);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Decode a single ANSI character to UTF-32
|
||||
///
|
||||
/// This function does not exist in other specializations
|
||||
/// of sf::Utf<>, it is defined for convenience (it is used by
|
||||
/// several other conversion functions).
|
||||
///
|
||||
/// \param input Input ANSI character
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Converted character
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static Uint32 DecodeAnsi(In input, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Decode a single wide character to UTF-32
|
||||
///
|
||||
/// This function does not exist in other specializations
|
||||
/// of sf::Utf<>, it is defined for convenience (it is used by
|
||||
/// several other conversion functions).
|
||||
///
|
||||
/// \param input Input wide character
|
||||
///
|
||||
/// \return Converted character
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
static Uint32 DecodeWide(In input);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Encode a single UTF-32 character to ANSI
|
||||
///
|
||||
/// This function does not exist in other specializations
|
||||
/// of sf::Utf<>, it is defined for convenience (it is used by
|
||||
/// several other conversion functions).
|
||||
///
|
||||
/// \param codepoint Iterator pointing to the beginning of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to skip it)
|
||||
/// \param locale Locale to use for conversion
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
static Out EncodeAnsi(Uint32 codepoint, Out output, char replacement = 0, const std::locale& locale = GetDefaultLocale());
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
/// \brief Encode a single UTF-32 character to wide
|
||||
///
|
||||
/// This function does not exist in other specializations
|
||||
/// of sf::Utf<>, it is defined for convenience (it is used by
|
||||
/// several other conversion functions).
|
||||
///
|
||||
/// \param codepoint Iterator pointing to the beginning of the input sequence
|
||||
/// \param output Iterator pointing to the beginning of the output sequence
|
||||
/// \param replacement Replacement if the input character is not convertible to wide (use 0 to skip it)
|
||||
///
|
||||
/// \return Iterator to the end of the output sequence which has been written
|
||||
///
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
static Out EncodeWide(Uint32 codepoint, Out output, wchar_t replacement = 0);
|
||||
};
|
||||
|
||||
#include <SFML/System/Utf.inl>
|
||||
|
||||
// Make typedefs to get rid of the template syntax
|
||||
typedef Utf<8> Utf8;
|
||||
typedef Utf<16> Utf16;
|
||||
typedef Utf<32> Utf32;
|
||||
|
||||
} // namespace sf
|
||||
|
||||
|
||||
#endif // SFML_UTF_HPP
|
665
include/SFML/System/Utf.inl
Normal file
665
include/SFML/System/Utf.inl
Normal file
|
@ -0,0 +1,665 @@
|
|||
////////////////////////////////////////////////////////////
|
||||
//
|
||||
// SFML - Simple and Fast Multimedia Library
|
||||
// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com)
|
||||
//
|
||||
// This software is provided 'as-is', without any express or implied warranty.
|
||||
// In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
//
|
||||
// Permission is granted to anyone to use this software for any purpose,
|
||||
// including commercial applications, and to alter it and redistribute it freely,
|
||||
// subject to the following restrictions:
|
||||
//
|
||||
// 1. The origin of this software must not be misrepresented;
|
||||
// you must not claim that you wrote the original software.
|
||||
// If you use this software in a product, an acknowledgment
|
||||
// in the product documentation would be appreciated but is not required.
|
||||
//
|
||||
// 2. Altered source versions must be plainly marked as such,
|
||||
// and must not be misrepresented as being the original software.
|
||||
//
|
||||
// 3. This notice may not be removed or altered from any source distribution.
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<8>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
|
||||
{
|
||||
// Some useful precomputed data
|
||||
static const int trailing[256] =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
|
||||
};
|
||||
static const Uint32 offsets[6] =
|
||||
{
|
||||
0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
|
||||
};
|
||||
|
||||
// Decode the character
|
||||
int trailingBytes = trailing[static_cast<int>(*begin)];
|
||||
if (begin + trailingBytes < end)
|
||||
{
|
||||
output = 0;
|
||||
switch (trailingBytes)
|
||||
{
|
||||
case 5 : output += *begin++; output <<= 6;
|
||||
case 4 : output += *begin++; output <<= 6;
|
||||
case 3 : output += *begin++; output <<= 6;
|
||||
case 2 : output += *begin++; output <<= 6;
|
||||
case 1 : output += *begin++; output <<= 6;
|
||||
case 0 : output += *begin++;
|
||||
}
|
||||
output -= offsets[trailingBytes];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Incomplete character
|
||||
begin = end;
|
||||
output = replacement;
|
||||
}
|
||||
|
||||
return begin;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
Out Utf<8>::Encode(Uint32 input, Out output, Uint8 replacement)
|
||||
{
|
||||
// Some useful precomputed data
|
||||
static const Uint8 firstBytes[7] =
|
||||
{
|
||||
0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
|
||||
};
|
||||
|
||||
// Encode the character
|
||||
if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
|
||||
{
|
||||
// Invalid character
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character
|
||||
|
||||
// Get the number of bytes to write
|
||||
int bytesToWrite = 1;
|
||||
if (input < 0x80) bytesToWrite = 1;
|
||||
else if (input < 0x800) bytesToWrite = 2;
|
||||
else if (input < 0x10000) bytesToWrite = 3;
|
||||
else if (input <= 0x0010FFFF) bytesToWrite = 4;
|
||||
|
||||
// Extract the bytes to write
|
||||
Uint8 bytes[4];
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
|
||||
case 3 : bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
|
||||
case 2 : bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
|
||||
case 1 : bytes[0] = static_cast<Uint8> (input | firstBytes[bytesToWrite]);
|
||||
}
|
||||
|
||||
// Add them to the output
|
||||
const Uint8* currentByte = bytes;
|
||||
switch (bytesToWrite)
|
||||
{
|
||||
case 4 : *output++ = *currentByte++;
|
||||
case 3 : *output++ = *currentByte++;
|
||||
case 2 : *output++ = *currentByte++;
|
||||
case 1 : *output++ = *currentByte++;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<8>::Next(In begin, In end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
return Decode(begin, end, codepoint);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
std::size_t Utf<8>::Count(In begin, In end)
|
||||
{
|
||||
std::size_t length = 0;
|
||||
while (begin < end)
|
||||
{
|
||||
begin = Next(begin, end);
|
||||
++length;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
|
||||
output = Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::FromWide(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
|
||||
output = Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::ToWide(In begin, In end, Out output, wchar_t replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<32>::EncodeWide(codepoint, output, replacement);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::ToUtf8(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
*output++ = *begin++;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::ToUtf16(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<16>::Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<8>::ToUtf32(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
*output++ = codepoint;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<16>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
|
||||
{
|
||||
Uint16 first = *begin++;
|
||||
|
||||
// If it's a surrogate pair, first convert to a single UTF-32 character
|
||||
if ((first >= 0xD800) && (first <= 0xDBFF))
|
||||
{
|
||||
if (begin < end)
|
||||
{
|
||||
Uint32 second = *begin++;
|
||||
if ((second >= 0xDC00) && (second <= 0xDFFF))
|
||||
{
|
||||
// The second element is valid: convert the two elements to a UTF-32 character
|
||||
output = static_cast<Uint32>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid character
|
||||
output = replacement;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid character
|
||||
begin = end;
|
||||
output = replacement;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We can make a direct copy
|
||||
output = first;
|
||||
}
|
||||
|
||||
return begin;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
Out Utf<16>::Encode(Uint32 input, Out output, Uint16 replacement)
|
||||
{
|
||||
if (input < 0xFFFF)
|
||||
{
|
||||
// The character can be copied directly, we just need to check if it's in the valid range
|
||||
if ((input >= 0xD800) && (input <= 0xDFFF))
|
||||
{
|
||||
// Invalid character (this range is reserved)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Valid character directly convertible to a single UTF-16 character
|
||||
*output++ = static_cast<Uint16>(input);
|
||||
}
|
||||
}
|
||||
else if (input > 0x0010FFFF)
|
||||
{
|
||||
// Invalid character (greater than the maximum unicode value)
|
||||
if (replacement)
|
||||
*output++ = replacement;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The input character will be converted to two UTF-16 elements
|
||||
input -= 0x0010000;
|
||||
*output++ = static_cast<Uint16>((input >> 10) + 0xD800);
|
||||
*output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<16>::Next(In begin, In end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
return Decode(begin, end, codepoint);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
std::size_t Utf<16>::Count(In begin, In end)
|
||||
{
|
||||
std::size_t length = 0;
|
||||
while (begin < end)
|
||||
{
|
||||
begin = Next(begin, end);
|
||||
++length;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
|
||||
output = Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::FromWide(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
|
||||
output = Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::ToWide(In begin, In end, Out output, wchar_t replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<32>::EncodeWide(codepoint, output, replacement);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::ToUtf8(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
output = Utf<8>::Encode(codepoint, output);
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::ToUtf16(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
*output++ = *begin++;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<16>::ToUtf32(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
{
|
||||
Uint32 codepoint;
|
||||
begin = Decode(begin, end, codepoint);
|
||||
*output++ = codepoint;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<32>::Decode(In begin, In end, Uint32& output, Uint32)
|
||||
{
|
||||
output = *begin++;
|
||||
return begin;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
Out Utf<32>::Encode(Uint32 input, Out output, Uint32 replacement)
|
||||
{
|
||||
*output++ = input;
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
In Utf<32>::Next(In begin, In end)
|
||||
{
|
||||
return ++begin;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
std::size_t Utf<32>::Count(In begin, In end)
|
||||
{
|
||||
return begin - end;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
*output++ = DecodeAnsi(*begin++, locale);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::FromWide(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
*output++ = DecodeWide(*begin++);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
|
||||
{
|
||||
while (begin < end)
|
||||
output = EncodeAnsi(*begin++, output, replacement, locale);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::ToWide(In begin, In end, Out output, wchar_t replacement)
|
||||
{
|
||||
while (begin < end)
|
||||
output = EncodeWide(*begin++, output, replacement);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::ToUtf8(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
output = Utf<8>::Encode(*begin++, output);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::ToUtf16(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
output = Utf<16>::Encode(*begin++, output);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In, typename Out>
|
||||
Out Utf<32>::ToUtf32(In begin, In end, Out output)
|
||||
{
|
||||
while (begin < end)
|
||||
*output++ = *begin++;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
Uint32 Utf<32>::DecodeAnsi(In input, const std::locale& locale)
|
||||
{
|
||||
#ifdef __MINGW32__
|
||||
|
||||
// MinGW has almost no support for unicode stuff
|
||||
// As a consequence, the MinGW version of this function can only use the default locale
|
||||
// and ignores the one passed as parameter
|
||||
|
||||
wchar_t character = 0;
|
||||
mbtowc(&character, &input, 1);
|
||||
return static_cast<Uint32>(character);
|
||||
|
||||
#else
|
||||
|
||||
// Get the facet of the locale which deals with character conversion
|
||||
const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
|
||||
|
||||
// Use the facet to convert each character of the input string
|
||||
return static_cast<Uint32>(facet.widen(input));
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename In>
|
||||
Uint32 Utf<32>::DecodeWide(In input)
|
||||
{
|
||||
// The encoding of wide characters is not well defined and is left to the system;
|
||||
// however we can safely assume that it is UCS-2 on Windows and
|
||||
// UCS-4 on Unix systems.
|
||||
// In both cases, a simple copy is enough (UCS-2 is a subset of UCS-4,
|
||||
// and UCS-4 *is* UTF-32).
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
Out Utf<32>::EncodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
|
||||
{
|
||||
#ifdef __MINGW32__
|
||||
|
||||
// MinGW has almost no support for unicode stuff
|
||||
// As a consequence, the MinGW version of this function can only use the default locale
|
||||
// and ignores the one passed as parameter
|
||||
|
||||
char character = 0;
|
||||
if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
|
||||
*output++ = character;
|
||||
else if (replacement)
|
||||
*output++ = replacement;
|
||||
|
||||
return output;
|
||||
|
||||
#else
|
||||
|
||||
// Get the facet of the locale which deals with character conversion
|
||||
const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
|
||||
|
||||
// Use the facet to convert each character of the input string
|
||||
*output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
|
||||
|
||||
return output;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
template <typename Out>
|
||||
Out Utf<32>::EncodeWide(Uint32 codepoint, Out output, wchar_t replacement)
|
||||
{
|
||||
// The encoding of wide characters is not well defined and is left to the system;
|
||||
// however we can safely assume that it is UCS-2 on Windows and
|
||||
// UCS-4 on Unix systems.
|
||||
// For UCS-2 we need to check if the source characters fits in (UCS-2 is a subset of UCS-4).
|
||||
// For UCS-4 we can do a direct copy (UCS-4 *is* UTF-32).
|
||||
|
||||
switch (sizeof(wchar_t))
|
||||
{
|
||||
case 4:
|
||||
{
|
||||
*output++ = static_cast<wchar_t>(codepoint);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
|
||||
{
|
||||
*output++ = static_cast<wchar_t>(codepoint);
|
||||
}
|
||||
else if (replacement)
|
||||
{
|
||||
*output++ = replacement;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue