
53 changed files with 218 additions and 16602 deletions
@ -1,271 +0,0 @@
@@ -1,271 +0,0 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_ALLOCATORS_H_ |
||||
#define RAPIDJSON_ALLOCATORS_H_ |
||||
|
||||
#include "rapidjson.h" |
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Allocator
|
||||
|
||||
/*! \class rapidjson::Allocator
|
||||
\brief Concept for allocating, resizing and freeing memory block. |
||||
|
||||
Note that Malloc() and Realloc() are non-static but Free() is static. |
||||
|
||||
So if an allocator need to support Free(), it needs to put its pointer in |
||||
the header of memory block. |
||||
|
||||
\code |
||||
concept Allocator { |
||||
static const bool kNeedFree; //!< Whether this allocator needs to call Free().
|
||||
|
||||
// Allocate a memory block.
|
||||
// \param size of the memory block in bytes.
|
||||
// \returns pointer to the memory block.
|
||||
void* Malloc(size_t size); |
||||
|
||||
// Resize a memory block.
|
||||
// \param originalPtr The pointer to current memory block. Null pointer is permitted.
|
||||
// \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
|
||||
// \param newSize the new size in bytes.
|
||||
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); |
||||
|
||||
// Free a memory block.
|
||||
// \param pointer to the memory block. Null pointer is permitted.
|
||||
static void Free(void *ptr); |
||||
}; |
||||
\endcode |
||||
*/ |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// CrtAllocator
|
||||
|
||||
//! C-runtime library allocator.
|
||||
/*! This class is just wrapper for standard C library memory routines.
|
||||
\note implements Allocator concept |
||||
*/ |
||||
class CrtAllocator { |
||||
public: |
||||
static const bool kNeedFree = true; |
||||
void* Malloc(size_t size) { |
||||
if (size) // behavior of malloc(0) is implementation defined.
|
||||
return std::malloc(size); |
||||
else |
||||
return NULL; // standardize to returning NULL.
|
||||
} |
||||
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { |
||||
(void)originalSize; |
||||
if (newSize == 0) { |
||||
std::free(originalPtr); |
||||
return NULL; |
||||
} |
||||
return std::realloc(originalPtr, newSize); |
||||
} |
||||
static void Free(void *ptr) { std::free(ptr); } |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// MemoryPoolAllocator
|
||||
|
||||
//! Default memory allocator used by the parser and DOM.
|
||||
/*! This allocator allocate memory blocks from pre-allocated memory chunks.
|
||||
|
||||
It does not free memory blocks. And Realloc() only allocate new memory. |
||||
|
||||
The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. |
||||
|
||||
User may also supply a buffer as the first chunk. |
||||
|
||||
If the user-buffer is full then additional chunks are allocated by BaseAllocator. |
||||
|
||||
The user-buffer is not deallocated by this allocator. |
||||
|
||||
\tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. |
||||
\note implements Allocator concept |
||||
*/ |
||||
template <typename BaseAllocator = CrtAllocator> |
||||
class MemoryPoolAllocator { |
||||
public: |
||||
static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
|
||||
|
||||
//! Constructor with chunkSize.
|
||||
/*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
|
||||
\param baseAllocator The allocator for allocating memory chunks. |
||||
*/ |
||||
MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : |
||||
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) |
||||
{ |
||||
} |
||||
|
||||
//! Constructor with user-supplied buffer.
|
||||
/*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
|
||||
|
||||
The user buffer will not be deallocated when this allocator is destructed. |
||||
|
||||
\param buffer User supplied buffer. |
||||
\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). |
||||
\param chunkSize The size of memory chunk. The default is kDefaultChunkSize. |
||||
\param baseAllocator The allocator for allocating memory chunks. |
||||
*/ |
||||
MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : |
||||
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) |
||||
{ |
||||
RAPIDJSON_ASSERT(buffer != 0); |
||||
RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); |
||||
chunkHead_ = reinterpret_cast<ChunkHeader*>(buffer); |
||||
chunkHead_->capacity = size - sizeof(ChunkHeader); |
||||
chunkHead_->size = 0; |
||||
chunkHead_->next = 0; |
||||
} |
||||
|
||||
//! Destructor.
|
||||
/*! This deallocates all memory chunks, excluding the user-supplied buffer.
|
||||
*/ |
||||
~MemoryPoolAllocator() { |
||||
Clear(); |
||||
RAPIDJSON_DELETE(ownBaseAllocator_); |
||||
} |
||||
|
||||
//! Deallocates all memory chunks, excluding the user-supplied buffer.
|
||||
void Clear() { |
||||
while (chunkHead_ && chunkHead_ != userBuffer_) { |
||||
ChunkHeader* next = chunkHead_->next; |
||||
baseAllocator_->Free(chunkHead_); |
||||
chunkHead_ = next; |
||||
} |
||||
if (chunkHead_ && chunkHead_ == userBuffer_) |
||||
chunkHead_->size = 0; // Clear user buffer
|
||||
} |
||||
|
||||
//! Computes the total capacity of allocated memory chunks.
|
||||
/*! \return total capacity in bytes.
|
||||
*/ |
||||
size_t Capacity() const { |
||||
size_t capacity = 0; |
||||
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) |
||||
capacity += c->capacity; |
||||
return capacity; |
||||
} |
||||
|
||||
//! Computes the memory blocks allocated.
|
||||
/*! \return total used bytes.
|
||||
*/ |
||||
size_t Size() const { |
||||
size_t size = 0; |
||||
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) |
||||
size += c->size; |
||||
return size; |
||||
} |
||||
|
||||
//! Allocates a memory block. (concept Allocator)
|
||||
void* Malloc(size_t size) { |
||||
if (!size) |
||||
return NULL; |
||||
|
||||
size = RAPIDJSON_ALIGN(size); |
||||
if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity) |
||||
if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size)) |
||||
return NULL; |
||||
|
||||
void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size; |
||||
chunkHead_->size += size; |
||||
return buffer; |
||||
} |
||||
|
||||
//! Resizes a memory block (concept Allocator)
|
||||
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { |
||||
if (originalPtr == 0) |
||||
return Malloc(newSize); |
||||
|
||||
if (newSize == 0) |
||||
return NULL; |
||||
|
||||
originalSize = RAPIDJSON_ALIGN(originalSize); |
||||
newSize = RAPIDJSON_ALIGN(newSize); |
||||
|
||||
// Do not shrink if new size is smaller than original
|
||||
if (originalSize >= newSize) |
||||
return originalPtr; |
||||
|
||||
// Simply expand it if it is the last allocation and there is sufficient space
|
||||
if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) { |
||||
size_t increment = static_cast<size_t>(newSize - originalSize); |
||||
if (chunkHead_->size + increment <= chunkHead_->capacity) { |
||||
chunkHead_->size += increment; |
||||
return originalPtr; |
||||
} |
||||
} |
||||
|
||||
// Realloc process: allocate and copy memory, do not free original buffer.
|
||||
if (void* newBuffer = Malloc(newSize)) { |
||||
if (originalSize) |
||||
std::memcpy(newBuffer, originalPtr, originalSize); |
||||
return newBuffer; |
||||
} |
||||
else |
||||
return NULL; |
||||
} |
||||
|
||||
//! Frees a memory block (concept Allocator)
|
||||
static void Free(void *ptr) { (void)ptr; } // Do nothing
|
||||
|
||||
private: |
||||
//! Copy constructor is not permitted.
|
||||
MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */; |
||||
//! Copy assignment operator is not permitted.
|
||||
MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */; |
||||
|
||||
//! Creates a new chunk.
|
||||
/*! \param capacity Capacity of the chunk in bytes.
|
||||
\return true if success. |
||||
*/ |
||||
bool AddChunk(size_t capacity) { |
||||
if (!baseAllocator_) |
||||
ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)(); |
||||
if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) { |
||||
chunk->capacity = capacity; |
||||
chunk->size = 0; |
||||
chunk->next = chunkHead_; |
||||
chunkHead_ = chunk; |
||||
return true; |
||||
} |
||||
else |
||||
return false; |
||||
} |
||||
|
||||
static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity.
|
||||
|
||||
//! Chunk header for perpending to each chunk.
|
||||
/*! Chunks are stored as a singly linked list.
|
||||
*/ |
||||
struct ChunkHeader { |
||||
size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself).
|
||||
size_t size; //!< Current size of allocated memory in bytes.
|
||||
ChunkHeader *next; //!< Next chunk in the linked list.
|
||||
}; |
||||
|
||||
ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
|
||||
size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated.
|
||||
void *userBuffer_; //!< User supplied buffer.
|
||||
BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
|
||||
BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object.
|
||||
}; |
||||
|
||||
RAPIDJSON_NAMESPACE_END |
||||
|
||||
#endif // RAPIDJSON_ENCODINGS_H_
|
@ -1,78 +0,0 @@
@@ -1,78 +0,0 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_CURSORSTREAMWRAPPER_H_ |
||||
#define RAPIDJSON_CURSORSTREAMWRAPPER_H_ |
||||
|
||||
#include "stream.h" |
||||
|
||||
#if defined(__GNUC__) |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(effc++) |
||||
#endif |
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1800 |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(4702) // unreachable code
|
||||
RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
|
||||
#endif |
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN |
||||
|
||||
|
||||
//! Cursor stream wrapper for counting line and column number if error exists.
|
||||
/*!
|
||||
\tparam InputStream Any stream that implements Stream Concept |
||||
*/ |
||||
template <typename InputStream, typename Encoding = UTF8<> > |
||||
class CursorStreamWrapper : public GenericStreamWrapper<InputStream, Encoding> { |
||||
public: |
||||
typedef typename Encoding::Ch Ch; |
||||
|
||||
CursorStreamWrapper(InputStream& is): |
||||
GenericStreamWrapper<InputStream, Encoding>(is), line_(1), col_(0) {} |
||||
|
||||
// counting line and column number
|
||||
Ch Take() { |
||||
Ch ch = this->is_.Take(); |
||||
if(ch == '\n') { |
||||
line_ ++; |
||||
col_ = 0; |
||||
} else { |
||||
col_ ++; |
||||
} |
||||
return ch; |
||||
} |
||||
|
||||
//! Get the error line number, if error exists.
|
||||
size_t GetLine() const { return line_; } |
||||
//! Get the error column number, if error exists.
|
||||
size_t GetColumn() const { return col_; } |
||||
|
||||
private: |
||||
size_t line_; //!< Current Line
|
||||
size_t col_; //!< Current Column
|
||||
}; |
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1800 |
||||
RAPIDJSON_DIAG_POP |
||||
#endif |
||||
|
||||
#if defined(__GNUC__) |
||||
RAPIDJSON_DIAG_POP |
||||
#endif |
||||
|
||||
RAPIDJSON_NAMESPACE_END |
||||
|
||||
#endif // RAPIDJSON_CURSORSTREAMWRAPPER_H_
|
@ -1,299 +0,0 @@
@@ -1,299 +0,0 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_ENCODEDSTREAM_H_ |
||||
#define RAPIDJSON_ENCODEDSTREAM_H_ |
||||
|
||||
#include "stream.h" |
||||
#include "memorystream.h" |
||||
|
||||
#ifdef __GNUC__ |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(effc++) |
||||
#endif |
||||
|
||||
#ifdef __clang__ |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(padded) |
||||
#endif |
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN |
||||
|
||||
//! Input byte stream wrapper with a statically bound encoding.
|
||||
/*!
|
||||
\tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. |
||||
\tparam InputByteStream Type of input byte stream. For example, FileReadStream. |
||||
*/ |
||||
template <typename Encoding, typename InputByteStream> |
||||
class EncodedInputStream { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
public: |
||||
typedef typename Encoding::Ch Ch; |
||||
|
||||
EncodedInputStream(InputByteStream& is) : is_(is) { |
||||
current_ = Encoding::TakeBOM(is_); |
||||
} |
||||
|
||||
Ch Peek() const { return current_; } |
||||
Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } |
||||
size_t Tell() const { return is_.Tell(); } |
||||
|
||||
// Not implemented
|
||||
void Put(Ch) { RAPIDJSON_ASSERT(false); } |
||||
void Flush() { RAPIDJSON_ASSERT(false); } |
||||
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } |
||||
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } |
||||
|
||||
private: |
||||
EncodedInputStream(const EncodedInputStream&); |
||||
EncodedInputStream& operator=(const EncodedInputStream&); |
||||
|
||||
InputByteStream& is_; |
||||
Ch current_; |
||||
}; |
||||
|
||||
//! Specialized for UTF8 MemoryStream.
|
||||
template <> |
||||
class EncodedInputStream<UTF8<>, MemoryStream> { |
||||
public: |
||||
typedef UTF8<>::Ch Ch; |
||||
|
||||
EncodedInputStream(MemoryStream& is) : is_(is) { |
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take(); |
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take(); |
||||
if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take(); |
||||
} |
||||
Ch Peek() const { return is_.Peek(); } |
||||
Ch Take() { return is_.Take(); } |
||||
size_t Tell() const { return is_.Tell(); } |
||||
|
||||
// Not implemented
|
||||
void Put(Ch) {} |
||||
void Flush() {} |
||||
Ch* PutBegin() { return 0; } |
||||
size_t PutEnd(Ch*) { return 0; } |
||||
|
||||
MemoryStream& is_; |
||||
|
||||
private: |
||||
EncodedInputStream(const EncodedInputStream&); |
||||
EncodedInputStream& operator=(const EncodedInputStream&); |
||||
}; |
||||
|
||||
//! Output byte stream wrapper with statically bound encoding.
|
||||
/*!
|
||||
\tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. |
||||
\tparam OutputByteStream Type of input byte stream. For example, FileWriteStream. |
||||
*/ |
||||
template <typename Encoding, typename OutputByteStream> |
||||
class EncodedOutputStream { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
public: |
||||
typedef typename Encoding::Ch Ch; |
||||
|
||||
EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { |
||||
if (putBOM) |
||||
Encoding::PutBOM(os_); |
||||
} |
||||
|
||||
void Put(Ch c) { Encoding::Put(os_, c); } |
||||
void Flush() { os_.Flush(); } |
||||
|
||||
// Not implemented
|
||||
Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} |
||||
Ch Take() { RAPIDJSON_ASSERT(false); return 0;} |
||||
size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } |
||||
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } |
||||
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } |
||||
|
||||
private: |
||||
EncodedOutputStream(const EncodedOutputStream&); |
||||
EncodedOutputStream& operator=(const EncodedOutputStream&); |
||||
|
||||
OutputByteStream& os_; |
||||
}; |
||||
|
||||
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x |
||||
|
||||
//! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
|
||||
/*!
|
||||
\tparam CharType Type of character for reading. |
||||
\tparam InputByteStream type of input byte stream to be wrapped. |
||||
*/ |
||||
template <typename CharType, typename InputByteStream> |
||||
class AutoUTFInputStream { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
public: |
||||
typedef CharType Ch; |
||||
|
||||
//! Constructor.
|
||||
/*!
|
||||
\param is input stream to be wrapped. |
||||
\param type UTF encoding type if it is not detected from the stream. |
||||
*/ |
||||
AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { |
||||
RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); |
||||
DetectType(); |
||||
static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; |
||||
takeFunc_ = f[type_]; |
||||
current_ = takeFunc_(*is_); |
||||
} |
||||
|
||||
UTFType GetType() const { return type_; } |
||||
bool HasBOM() const { return hasBOM_; } |
||||
|
||||
Ch Peek() const { return current_; } |
||||
Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } |
||||
size_t Tell() const { return is_->Tell(); } |
||||
|
||||
// Not implemented
|
||||
void Put(Ch) { RAPIDJSON_ASSERT(false); } |
||||
void Flush() { RAPIDJSON_ASSERT(false); } |
||||
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } |
||||
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } |
||||
|
||||
private: |
||||
AutoUTFInputStream(const AutoUTFInputStream&); |
||||
AutoUTFInputStream& operator=(const AutoUTFInputStream&); |
||||
|
||||
// Detect encoding type with BOM or RFC 4627
|
||||
void DetectType() { |
||||
// BOM (Byte Order Mark):
|
||||
// 00 00 FE FF UTF-32BE
|
||||
// FF FE 00 00 UTF-32LE
|
||||
// FE FF UTF-16BE
|
||||
// FF FE UTF-16LE
|
||||
// EF BB BF UTF-8
|
||||
|
||||
const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4()); |
||||
if (!c) |
||||
return; |
||||
|
||||
unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24)); |
||||
hasBOM_ = false; |
||||
if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } |
||||
else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } |
||||
else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } |
||||
else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } |
||||
else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } |
||||
|
||||
// RFC 4627: Section 3
|
||||
// "Since the first two characters of a JSON text will always be ASCII
|
||||
// characters [RFC0020], it is possible to determine whether an octet
|
||||
// stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
|
||||
// at the pattern of nulls in the first four octets."
|
||||
// 00 00 00 xx UTF-32BE
|
||||
// 00 xx 00 xx UTF-16BE
|
||||
// xx 00 00 00 UTF-32LE
|
||||
// xx 00 xx 00 UTF-16LE
|
||||
// xx xx xx xx UTF-8
|
||||
|
||||
if (!hasBOM_) { |
||||
int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); |
||||
switch (pattern) { |
||||
case 0x08: type_ = kUTF32BE; break; |
||||
case 0x0A: type_ = kUTF16BE; break; |
||||
case 0x01: type_ = kUTF32LE; break; |
||||
case 0x05: type_ = kUTF16LE; break; |
||||
case 0x0F: type_ = kUTF8; break; |
||||
default: break; // Use type defined by user.
|
||||
} |
||||
} |
||||
|
||||
// Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
|
||||
if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); |
||||
if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); |
||||
} |
||||
|
||||
typedef Ch (*TakeFunc)(InputByteStream& is); |
||||
InputByteStream* is_; |
||||
UTFType type_; |
||||
Ch current_; |
||||
TakeFunc takeFunc_; |
||||
bool hasBOM_; |
||||
}; |
||||
|
||||
//! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
|
||||
/*!
|
||||
\tparam CharType Type of character for writing. |
||||
\tparam OutputByteStream type of output byte stream to be wrapped. |
||||
*/ |
||||
template <typename CharType, typename OutputByteStream> |
||||
class AutoUTFOutputStream { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
public: |
||||
typedef CharType Ch; |
||||
|
||||
//! Constructor.
|
||||
/*!
|
||||
\param os output stream to be wrapped. |
||||
\param type UTF encoding type. |
||||
\param putBOM Whether to write BOM at the beginning of the stream. |
||||
*/ |
||||
AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { |
||||
RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE); |
||||
|
||||
// Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
|
||||
if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2); |
||||
if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4); |
||||
|
||||
static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; |
||||
putFunc_ = f[type_]; |
||||
|
||||
if (putBOM) |
||||
PutBOM(); |
||||
} |
||||
|
||||
UTFType GetType() const { return type_; } |
||||
|
||||
void Put(Ch c) { putFunc_(*os_, c); } |
||||
void Flush() { os_->Flush(); } |
||||
|
||||
// Not implemented
|
||||
Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;} |
||||
Ch Take() { RAPIDJSON_ASSERT(false); return 0;} |
||||
size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } |
||||
Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } |
||||
size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } |
||||
|
||||
private: |
||||
AutoUTFOutputStream(const AutoUTFOutputStream&); |
||||
AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); |
||||
|
||||
void PutBOM() { |
||||
typedef void (*PutBOMFunc)(OutputByteStream&); |
||||
static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; |
||||
f[type_](*os_); |
||||
} |
||||
|
||||
typedef void (*PutFunc)(OutputByteStream&, Ch); |
||||
|
||||
OutputByteStream* os_; |
||||
UTFType type_; |
||||
PutFunc putFunc_; |
||||
}; |
||||
|
||||
#undef RAPIDJSON_ENCODINGS_FUNC |
||||
|
||||
RAPIDJSON_NAMESPACE_END |
||||
|
||||
#ifdef __clang__ |
||||
RAPIDJSON_DIAG_POP |
||||
#endif |
||||
|
||||
#ifdef __GNUC__ |
||||
RAPIDJSON_DIAG_POP |
||||
#endif |
||||
|
||||
#endif // RAPIDJSON_FILESTREAM_H_
|
@ -1,716 +0,0 @@
@@ -1,716 +0,0 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_ENCODINGS_H_ |
||||
#define RAPIDJSON_ENCODINGS_H_ |
||||
|
||||
#include "rapidjson.h" |
||||
|
||||
#ifdef _MSC_VER |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
|
||||
RAPIDJSON_DIAG_OFF(4702) // unreachable code
|
||||
#elif defined(__GNUC__) |
||||
RAPIDJSON_DIAG_PUSH |
||||
RAPIDJSON_DIAG_OFF(effc++) |
||||
RAPIDJSON_DIAG_OFF(overflow) |
||||
#endif |
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Encoding
|
||||
|
||||
/*! \class rapidjson::Encoding
|
||||
\brief Concept for encoding of Unicode characters. |
||||
|
||||
\code |
||||
concept Encoding { |
||||
typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition.
|
||||
|
||||
enum { supportUnicode = 1 }; // or 0 if not supporting unicode
|
||||
|
||||
//! \brief Encode a Unicode codepoint to an output stream.
|
||||
//! \param os Output stream.
|
||||
//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
|
||||
template<typename OutputStream> |
||||
static void Encode(OutputStream& os, unsigned codepoint); |
||||
|
||||
//! \brief Decode a Unicode codepoint from an input stream.
|
||||
//! \param is Input stream.
|
||||
//! \param codepoint Output of the unicode codepoint.
|
||||
//! \return true if a valid codepoint can be decoded from the stream.
|
||||
template <typename InputStream> |
||||
static bool Decode(InputStream& is, unsigned* codepoint); |
||||
|
||||
//! \brief Validate one Unicode codepoint from an encoded stream.
|
||||
//! \param is Input stream to obtain codepoint.
|
||||
//! \param os Output for copying one codepoint.
|
||||
//! \return true if it is valid.
|
||||
//! \note This function just validating and copying the codepoint without actually decode it.
|
||||
template <typename InputStream, typename OutputStream> |
||||
static bool Validate(InputStream& is, OutputStream& os); |
||||
|
||||
// The following functions are deal with byte streams.
|
||||
|
||||
//! Take a character from input byte stream, skip BOM if exist.
|
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is); |
||||
|
||||
//! Take a character from input byte stream.
|
||||
template <typename InputByteStream> |
||||
static Ch Take(InputByteStream& is); |
||||
|
||||
//! Put BOM to output byte stream.
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os); |
||||
|
||||
//! Put a character to output byte stream.
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, Ch c); |
||||
}; |
||||
\endcode |
||||
*/ |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// UTF8
|
||||
|
||||
//! UTF-8 encoding.
|
||||
/*! http://en.wikipedia.org/wiki/UTF-8
|
||||
http://tools.ietf.org/html/rfc3629
|
||||
\tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. |
||||
\note implements Encoding concept |
||||
*/ |
||||
template<typename CharType = char> |
||||
struct UTF8 { |
||||
typedef CharType Ch; |
||||
|
||||
enum { supportUnicode = 1 }; |
||||
|
||||
template<typename OutputStream> |
||||
static void Encode(OutputStream& os, unsigned codepoint) { |
||||
if (codepoint <= 0x7F) |
||||
os.Put(static_cast<Ch>(codepoint & 0xFF)); |
||||
else if (codepoint <= 0x7FF) { |
||||
os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); |
||||
os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); |
||||
} |
||||
else if (codepoint <= 0xFFFF) { |
||||
os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); |
||||
os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
||||
os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
||||
} |
||||
else { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); |
||||
os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); |
||||
os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
||||
os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
||||
} |
||||
} |
||||
|
||||
template<typename OutputStream> |
||||
static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { |
||||
if (codepoint <= 0x7F) |
||||
PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); |
||||
else if (codepoint <= 0x7FF) { |
||||
PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); |
||||
} |
||||
else if (codepoint <= 0xFFFF) { |
||||
PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
||||
} |
||||
else { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); |
||||
PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F))); |
||||
} |
||||
} |
||||
|
||||
template <typename InputStream> |
||||
static bool Decode(InputStream& is, unsigned* codepoint) { |
||||
#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) |
||||
#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) |
||||
#define TAIL() COPY(); TRANS(0x70) |
||||
typename InputStream::Ch c = is.Take(); |
||||
if (!(c & 0x80)) { |
||||
*codepoint = static_cast<unsigned char>(c); |
||||
return true; |
||||
} |
||||
|
||||
unsigned char type = GetRange(static_cast<unsigned char>(c)); |
||||
if (type >= 32) { |
||||
*codepoint = 0; |
||||
} else { |
||||
*codepoint = (0xFFu >> type) & static_cast<unsigned char>(c); |
||||
} |
||||
bool result = true; |
||||
switch (type) { |
||||
case 2: TAIL(); return result; |
||||
case 3: TAIL(); TAIL(); return result; |
||||
case 4: COPY(); TRANS(0x50); TAIL(); return result; |
||||
case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; |
||||
case 6: TAIL(); TAIL(); TAIL(); return result; |
||||
case 10: COPY(); TRANS(0x20); TAIL(); return result; |
||||
case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; |
||||
default: return false; |
||||
} |
||||
#undef COPY |
||||
#undef TRANS |
||||
#undef TAIL |
||||
} |
||||
|
||||
template <typename InputStream, typename OutputStream> |
||||
static bool Validate(InputStream& is, OutputStream& os) { |
||||
#define COPY() os.Put(c = is.Take()) |
||||
#define TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) |
||||
#define TAIL() COPY(); TRANS(0x70) |
||||
Ch c; |
||||
COPY(); |
||||
if (!(c & 0x80)) |
||||
return true; |
||||
|
||||
bool result = true; |
||||
switch (GetRange(static_cast<unsigned char>(c))) { |
||||
case 2: TAIL(); return result; |
||||
case 3: TAIL(); TAIL(); return result; |
||||
case 4: COPY(); TRANS(0x50); TAIL(); return result; |
||||
case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; |
||||
case 6: TAIL(); TAIL(); TAIL(); return result; |
||||
case 10: COPY(); TRANS(0x20); TAIL(); return result; |
||||
case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; |
||||
default: return false; |
||||
} |
||||
#undef COPY |
||||
#undef TRANS |
||||
#undef TAIL |
||||
} |
||||
|
||||
static unsigned char GetRange(unsigned char c) { |
||||
// Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
|
||||
// With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
|
||||
static const unsigned char type[] = { |
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
||||
0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, |
||||
0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, |
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, |
||||
0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, |
||||
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
||||
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, |
||||
}; |
||||
return type[c]; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
typename InputByteStream::Ch c = Take(is); |
||||
if (static_cast<unsigned char>(c) != 0xEFu) return c; |
||||
c = is.Take(); |
||||
if (static_cast<unsigned char>(c) != 0xBBu) return c; |
||||
c = is.Take(); |
||||
if (static_cast<unsigned char>(c) != 0xBFu) return c; |
||||
c = is.Take(); |
||||
return c; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static Ch Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
return static_cast<Ch>(is.Take()); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu)); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, Ch c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(c)); |
||||
} |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// UTF16
|
||||
|
||||
//! UTF-16 encoding.
|
||||
/*! http://en.wikipedia.org/wiki/UTF-16
|
||||
http://tools.ietf.org/html/rfc2781
|
||||
\tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. |
||||
\note implements Encoding concept |
||||
|
||||
\note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. |
||||
For streaming, use UTF16LE and UTF16BE, which handle endianness. |
||||
*/ |
||||
template<typename CharType = wchar_t> |
||||
struct UTF16 { |
||||
typedef CharType Ch; |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); |
||||
|
||||
enum { supportUnicode = 1 }; |
||||
|
||||
template<typename OutputStream> |
||||
static void Encode(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); |
||||
if (codepoint <= 0xFFFF) { |
||||
RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
|
||||
os.Put(static_cast<typename OutputStream::Ch>(codepoint)); |
||||
} |
||||
else { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
unsigned v = codepoint - 0x10000; |
||||
os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); |
||||
os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); |
||||
} |
||||
} |
||||
|
||||
|
||||
template<typename OutputStream> |
||||
static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); |
||||
if (codepoint <= 0xFFFF) { |
||||
RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
|
||||
PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint)); |
||||
} |
||||
else { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
unsigned v = codepoint - 0x10000; |
||||
PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); |
||||
PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00)); |
||||
} |
||||
} |
||||
|
||||
template <typename InputStream> |
||||
static bool Decode(InputStream& is, unsigned* codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); |
||||
typename InputStream::Ch c = is.Take(); |
||||
if (c < 0xD800 || c > 0xDFFF) { |
||||
*codepoint = static_cast<unsigned>(c); |
||||
return true; |
||||
} |
||||
else if (c <= 0xDBFF) { |
||||
*codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10; |
||||
c = is.Take(); |
||||
*codepoint |= (static_cast<unsigned>(c) & 0x3FF); |
||||
*codepoint += 0x10000; |
||||
return c >= 0xDC00 && c <= 0xDFFF; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
template <typename InputStream, typename OutputStream> |
||||
static bool Validate(InputStream& is, OutputStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); |
||||
typename InputStream::Ch c; |
||||
os.Put(static_cast<typename OutputStream::Ch>(c = is.Take())); |
||||
if (c < 0xD800 || c > 0xDFFF) |
||||
return true; |
||||
else if (c <= 0xDBFF) { |
||||
os.Put(c = is.Take()); |
||||
return c >= 0xDC00 && c <= 0xDFFF; |
||||
} |
||||
return false; |
||||
} |
||||
}; |
||||
|
||||
//! UTF-16 little endian encoding.
|
||||
template<typename CharType = wchar_t> |
||||
struct UTF16LE : UTF16<CharType> { |
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
CharType c = Take(is); |
||||
return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
unsigned c = static_cast<uint8_t>(is.Take()); |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; |
||||
return static_cast<CharType>(c); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, CharType c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); |
||||
} |
||||
}; |
||||
|
||||
//! UTF-16 big endian encoding.
|
||||
template<typename CharType = wchar_t> |
||||
struct UTF16BE : UTF16<CharType> { |
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
CharType c = Take(is); |
||||
return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); |
||||
return static_cast<CharType>(c); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, CharType c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu)); |
||||
} |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// UTF32
|
||||
|
||||
//! UTF-32 encoding.
|
||||
/*! http://en.wikipedia.org/wiki/UTF-32
|
||||
\tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. |
||||
\note implements Encoding concept |
||||
|
||||
\note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. |
||||
For streaming, use UTF32LE and UTF32BE, which handle endianness. |
||||
*/ |
||||
template<typename CharType = unsigned> |
||||
struct UTF32 { |
||||
typedef CharType Ch; |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); |
||||
|
||||
enum { supportUnicode = 1 }; |
||||
|
||||
template<typename OutputStream> |
||||
static void Encode(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
os.Put(codepoint); |
||||
} |
||||
|
||||
template<typename OutputStream> |
||||
static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); |
||||
RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); |
||||
PutUnsafe(os, codepoint); |
||||
} |
||||
|
||||
template <typename InputStream> |
||||
static bool Decode(InputStream& is, unsigned* codepoint) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); |
||||
Ch c = is.Take(); |
||||
*codepoint = c; |
||||
return c <= 0x10FFFF; |
||||
} |
||||
|
||||
template <typename InputStream, typename OutputStream> |
||||
static bool Validate(InputStream& is, OutputStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); |
||||
Ch c; |
||||
os.Put(c = is.Take()); |
||||
return c <= 0x10FFFF; |
||||
} |
||||
}; |
||||
|
||||
//! UTF-32 little endian enocoding.
|
||||
template<typename CharType = unsigned> |
||||
struct UTF32LE : UTF32<CharType> { |
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
CharType c = Take(is); |
||||
return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
unsigned c = static_cast<uint8_t>(is.Take()); |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; |
||||
return static_cast<CharType>(c); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, CharType c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); |
||||
} |
||||
}; |
||||
|
||||
//! UTF-32 big endian encoding.
|
||||
template<typename CharType = unsigned> |
||||
struct UTF32BE : UTF32<CharType> { |
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
CharType c = Take(is); |
||||
return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8; |
||||
c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())); |
||||
return static_cast<CharType>(c); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0x00u)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu)); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, CharType c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu)); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu)); |
||||
} |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// ASCII
|
||||
|
||||
//! ASCII encoding.
|
||||
/*! http://en.wikipedia.org/wiki/ASCII
|
||||
\tparam CharType Code unit for storing 7-bit ASCII data. Default is char. |
||||
\note implements Encoding concept |
||||
*/ |
||||
template<typename CharType = char> |
||||
struct ASCII { |
||||
typedef CharType Ch; |
||||
|
||||
enum { supportUnicode = 0 }; |
||||
|
||||
template<typename OutputStream> |
||||
static void Encode(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x7F); |
||||
os.Put(static_cast<Ch>(codepoint & 0xFF)); |
||||
} |
||||
|
||||
template<typename OutputStream> |
||||
static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { |
||||
RAPIDJSON_ASSERT(codepoint <= 0x7F); |
||||
PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF)); |
||||
} |
||||
|
||||
template <typename InputStream> |
||||
static bool Decode(InputStream& is, unsigned* codepoint) { |
||||
uint8_t c = static_cast<uint8_t>(is.Take()); |
||||
*codepoint = c; |
||||
return c <= 0X7F; |
||||
} |
||||
|
||||
template <typename InputStream, typename OutputStream> |
||||
static bool Validate(InputStream& is, OutputStream& os) { |
||||
uint8_t c = static_cast<uint8_t>(is.Take()); |
||||
os.Put(static_cast<typename OutputStream::Ch>(c)); |
||||
return c <= 0x7F; |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static CharType TakeBOM(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
uint8_t c = static_cast<uint8_t>(Take(is)); |
||||
return static_cast<Ch>(c); |
||||
} |
||||
|
||||
template <typename InputByteStream> |
||||
static Ch Take(InputByteStream& is) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); |
||||
return static_cast<Ch>(is.Take()); |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void PutBOM(OutputByteStream& os) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
(void)os; |
||||
} |
||||
|
||||
template <typename OutputByteStream> |
||||
static void Put(OutputByteStream& os, Ch c) { |
||||
RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); |
||||
os.Put(static_cast<typename OutputByteStream::Ch>(c)); |
||||
} |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// AutoUTF
|
||||
|
||||
//! Runtime-specified UTF encoding type of a stream.
|
||||
enum UTFType { |
||||
kUTF8 = 0, //!< UTF-8.
|
||||
kUTF16LE = 1, //!< UTF-16 little endian.
|
||||
kUTF16BE = 2, //!< UTF-16 big endian.
|
||||
kUTF32LE = 3, //!< UTF-32 little endian.
|
||||
kUTF32BE = 4 //!< UTF-32 big endian.
|
||||
}; |
||||
|
||||
//! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
|
||||
/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
|
||||
*/ |
||||
template<typename CharType> |
||||
struct AutoUTF { |
||||
typedef CharType Ch; |
||||
|
||||
enum { supportUnicode = 1 }; |
||||
|
||||
#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x |
||||
|
||||
template<typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) { |
||||
typedef void (*EncodeFunc)(OutputStream&, unsigned); |
||||
static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; |
||||
(*f[os.GetType()])(os, codepoint); |
||||
} |
||||
|
||||
template<typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) { |
||||
typedef void (*EncodeFunc)(OutputStream&, unsigned); |
||||
static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; |
||||
(*f[os.GetType()])(os, codepoint); |
||||
} |
||||
|
||||
template <typename InputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) { |
||||
typedef bool (*DecodeFunc)(InputStream&, unsigned*); |
||||
static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; |
||||
return (*f[is.GetType()])(is, codepoint); |
||||
} |
||||
|
||||
template <typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { |
||||
typedef bool (*ValidateFunc)(InputStream&, OutputStream&); |
||||
static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; |
||||
return (*f[is.GetType()])(is, os); |
||||
} |
||||
|
||||
#undef RAPIDJSON_ENCODINGS_FUNC |
||||
}; |
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Transcoder
|
||||
|
||||
//! Encoding conversion.
|
||||
template<typename SourceEncoding, typename TargetEncoding> |
||||
struct Transcoder { |
||||
//! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
|
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { |
||||
unsigned codepoint; |
||||
if (!SourceEncoding::Decode(is, &codepoint)) |
||||
return false; |
||||
TargetEncoding::Encode(os, codepoint); |
||||
return true; |
||||
} |
||||
|
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { |
||||
unsigned codepoint; |
||||
if (!SourceEncoding::Decode(is, &codepoint)) |
||||
return false; |
||||
TargetEncoding::EncodeUnsafe(os, codepoint); |
||||
return true; |
||||
} |
||||
|
||||
//! Validate one Unicode codepoint from an encoded stream.
|
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { |
||||
return Transcode(is, os); // Since source/target encoding is different, must transcode.
|
||||
} |
||||
}; |
||||
|
||||
// Forward declaration.
|
||||
template<typename Stream> |
||||
inline void PutUnsafe(Stream& stream, typename Stream::Ch c); |
||||
|
||||
//! Specialization of Transcoder with same source and target encoding.
|
||||
template<typename Encoding> |
||||
struct Transcoder<Encoding, Encoding> { |
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) { |
||||
os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class.
|
||||
return true; |
||||
} |
||||
|
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) { |
||||
PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class.
|
||||
return true; |
||||
} |
||||
|
||||
template<typename InputStream, typename OutputStream> |
||||
static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) { |
||||
return Encoding::Validate(is, os); // source/target encoding are the same
|
||||
} |
||||
}; |
||||
|
||||
RAPIDJSON_NAMESPACE_END |
||||
|
||||