This reference page is linked to from the following overview topics: New Classes and Methods, Unicode.
#include <maxtextfile.h>
Reads and interprets text files.
This class was designed to perform file and stream I/O in a code page neutral way.
It was designed to perform resolve the following problems: * Reads and interprets correctly the BOM (an invisible character at the beginning of unicode files.) * Detect correctly UTF-8 and UTF-16 files. (even if it's not signed.) * Detect encoding cookies. XML files usually begins with "<?xml encoding='????'>". The detection algorithm will interpret this directive correctly. * Prevent from splitting a character. In UTF-16, UTF-8 and some ANSI codepage, characters can be stored on 1 to 6 bytes. All the operations of this object are designed to avoid to returns a partial character.
Plugin developer should consider using this class to perform File I/O to ensure that the files they generate remain compatible to previous version of Max.
Public Types |
|
enum | TextFileReaderEncoding { FAVOR_UTF8 = 0x10000000, FOUND_BOM = 0x20000000, FOUND_COOKIE = 0x40000000, FLIPPED = 0x80000000 } |
Text file reading encoding.
More... |
|
enum | EOFCharacterHandling { DEFAULT_EOF_HANDLING, STOP_READING_AT_EOF, FILTEROUT_EOF_CHARACTER, IGNORE_EOF_CHARACTER } |
EOF character handling.
More... |
|
Public Member Functions |
|
Reader () | |
Default Constructor. |
|
virtual | ~Reader () |
Destructor. |
|
bool | Open (FILE *file, unsigned int encoding=0, LineEndMode mode=Text) |
Wrap ANSI C FILE pointer This service is
used to allow a developer to access a file open with fopen but with
the advantage to use the SDK API. |
|
bool | Open (HANDLE fileHandle, unsigned int encoding=0, LineEndMode mode=Text) |
Wrap a Win32 file handle. |
|
bool | Open (const MCHAR *fileName, unsigned int encoding=0, LineEndMode mode=Text) |
Open a file using a file name. |
|
bool | Open (const MaxSDK::Util::MaxString &fileName, unsigned int encoding=0, LineEndMode mode=Text) |
Open a file using a file name. |
|
void | Close () |
Close the underlying stream and free any
intermediate data. |
|
MaxString | LastError () const |
Returns the last error return by the
BinaryStream.
|
|
unsigned int | Encoding () const |
Returns the current encoding of this file.
|
|
LineEndMode | Mode () const |
Determine how this reader handles line
ending. |
|
void | SetReadBufferSize (size_t readSize) |
Set the read buffer size. |
|
size_t | GetReadBufferSize () const |
Returns the read buffer size. |
|
void | SetDetectSize (size_t detectSize) |
Set the default size of the buffer used to
validate encoding. |
|
size_t | GetDetectSize () const |
Returns the detect buffer size. |
|
size_t | NumberOfChars () const |
Return the number of characters in the file.
|
|
size_t | NumberOfLines () const |
Calculate the total number of line in the
file. |
|
void | SetEOFCharacterHandling (EOFCharacterHandling) |
Set the EOF character handling method and
refilter the buffer. |
|
EOFCharacterHandling | GetEOFCharacterHandling () const |
Get the EOF character handling method.
|
|
virtual Char | ReadChar (bool peek=false) const |
Reads a single char. |
|
virtual unsigned int | ReadCharUTF32 (bool peek=false) const |
Reads a single char and returns it's UTF32
representation. |
|
virtual MaxString | ReadChars (size_t nchars) const |
Reads characters from the file. |
|
virtual MaxString | ReadLine (size_t nchars=(size_t)-1, bool dontReturnEOL=false) const |
Reads a line from the file (or nchars,
depending on which one comes first.) |
|
virtual MaxString | ReadChunk (size_t len, bool dontReturnLastEOL=false) const |
Reads up to "len" bytes from the file and
convert it to a unicode-compliant string. |
|
virtual MaxString | ReadFull () const |
Reads the file in a single operation and
returns it in a single string object. |
|
virtual size_t | PositionBytes () const |
Get the number of bytes read so far.
|
|
size_t | Position () const |
Get the number of characters read so far.
|
|
size_t | LineNumber () const |
Get the current line number. |
|
virtual size_t | Seek (long offset, int origin) |
Seek inside the stream. |
|
virtual bool | IsEndOfFile () const |
Returns true if file at the end. |
|
virtual bool | IsFileOpen () const |
Returns true if file is open. |
|
virtual size_t | UnreadChar (const Char &c) |
Unread a character. |
|
Static Public Member Functions |
|
static bool | Detect (void *data, size_t len, unsigned int &encoding, size_t *ignoreBytes) |
Detect the encoding of the passed buffer.
|
|
static size_t | NumberOfChars (const void *data, size_t len, unsigned int encoding=CP_ACP) |
Determine the number of chars inside a
buffer. |
|
static size_t | NumberOfLines (const void *data, size_t length, unsigned int encoding=CP_ACP) |
Determine the number of line feed inside a
buffer. |
|
template<typename ChType , ChType ch> | |
static size_t | RemoveCharacter (ChType *data, size_t len) |
Remove ch character from the passed buffer.
|
|
template<typename ChType , ChType ch> | |
static size_t | TruncateAtCharacter (ChType *data, size_t len) |
Truncate buffer at ch character. |
|
Protected Types |
|
enum |
TextFileReaderError { ALL_OK, STREAM_INVALID_ARGUMENT, STREAM_ALREADY_OPEN, STREAM_NOT_OPEN, ERR_INVALID_FORMAT, STREAM_ERROR } |
Internal processing error code.
More... |
|
Protected Member Functions |
|
void | Detect (size_t len) const |
Detect the opened file encoding by analyzing
the first "len" bytes of the file. |
|
size_t | FillBuffer (size_t len, bool force=false, bool binary=false, bool detecting_encoding=false) const |
Read and cache len bytes from stream.
|
|
size_t | Filter (size_t pos) const |
Apply the selected open mode on the internal
cache buffer up to position. |
|
size_t | Filter (size_t pos, bool processBufferBoundaryCRLF) const |
Apply the selected open mode on the internal
cache buffer up to position. |
|
size_t | EnsureBufferContains (size_t len) const |
Ensure buffer size can contains the passed
length. |
|
size_t | NumberOfChars (const void *, size_t) const |
Return the number of characters in the
passed buffer depending on the current encoding. |
|
size_t | ConvertNumUTF8CharsToNumBytes (const char *data, size_t num) |
Return the number of bytes corresponding to
the num of UTF8 chars in passed buffer. |
|
size_t | ConvertNumUTF16CharsToNumBytes (const MCHAR *data, size_t num) |
Return the number of bytes corresponding to
the num of UTF16 chars in passed buffer. |
|
size_t | ConvertNumCharsToNumBytes (const char *data, size_t num, unsigned int encoding) |
Return the number of bytes corresponding to
the num of chars in passed buffer depending on the encoding.
|
|
size_t | NumberOfLines (const void *, size_t) const |
Calculate the total number of line in the
passed buffer. |
|
template<typename ChType , typename CharLengthFunctor , int maxCharLength> | |
ChType * | ReadChar (size_t &charLengthT, bool peek, const CharLengthFunctor &CharLengthFunction) const |
INTERNAL FUNCTION. |
|
template<typename ChType > | |
MaxString | MakeString (const ChType *data, size_t length, bool dontReturnEndingCRLF) const |
size_t | Unread (const MaxString &string) |
Unread String. |
|
size_t | SeekToEnd (long offset=0) |
INTERNAL FUNCTION. |
|
size_t | SeekToAbsolute (long offset) |
Seek to an absolute point inside the text
stream. |
|
size_t | SeekFromCurrent (long offset) |
Advance "offset" characters. |
|
bool | Open (BinaryStream *stream, unsigned int encoding=0, LineEndMode mode=Text, bool closeOnDelete=false) |
Open an abstract BinaryStream.
|
|
Protected Attributes |
|
BinaryStream * | _stream |
bool | _streamDelete |
bool | _readCR |
bool | _readLF |
LineEndMode | _endOfLineMode |
size_t | _detectSize |
size_t | _readSize |
EOFCharacterHandling | _eofCharacterHandling |
TextFileReaderError | _error |
unsigned int | _encoding |
bool | _encodingDetected |
BinaryStreamMemory * | _backbuffer |
size_t | _ignoreBytes |
size_t | _positionBytes |
size_t | _positionChars |
size_t | _line |
Friends |
|
class | ReaderWriter |
class | CharBinaryStream |
class | BinaryStreamMemory |
Text file reading encoding.
{ /* If the file's encoding cannot be detected, favor UTF-8. By default, we favor ACP encoding. */ FAVOR_UTF8 = 0x10000000, /* Found a BOM at the beginning of the file. */ FOUND_BOM = 0x20000000, /* Found a cookie at the beginning of the file. */ FOUND_COOKIE = 0x40000000, /* Found flipped UTF-16 data. */ FLIPPED = 0x80000000 };
enum EOFCharacterHandling |
EOF character handling.
DEFAULT_EOF_HANDLING | |
STOP_READING_AT_EOF |
Used by ReaderWriter to override default value. |
FILTEROUT_EOF_CHARACTER |
Reading of file terminate at EOF character - this is Reader in text mode. |
IGNORE_EOF_CHARACTER |
EOF character are filter out - this is the ReaderWriter preferred mode. EOF character are read as regular character - this is Reader in binary mode. |
enum
TextFileReaderError [protected] |
Internal processing error code.
Used by LastError to determine the proper message to generate.
Reader | ( | ) |
Default Constructor.
virtual ~Reader | ( | ) | [virtual] |
Destructor.
void Detect | ( | size_t | len | ) | const [protected] |
Detect the opened file encoding by analyzing the first "len" bytes of the file.
len | Size of the buffer to use to detect the encoding |
size_t FillBuffer | ( | size_t | len, |
bool | force = false , |
||
bool | binary = false , |
||
bool | detecting_encoding =
false |
||
) | const [protected] |
size_t Filter | ( | size_t | pos | ) | const [protected] |
Apply the selected open mode on the internal cache buffer up to position.
pos | Position to stop filtering |
size_t Filter | ( | size_t | pos, |
bool | processBufferBoundaryCRLF | ||
) | const [protected] |
Apply the selected open mode on the internal cache buffer up to position.
pos | Position to stop filtering |
processBufferBoundaryCRLF,if | true, take care of the CR or LF which is read in the last FillBuffer call |
size_t EnsureBufferContains | ( | size_t | len | ) | const [protected] |
Ensure buffer size can contains the passed length.
len | The minimum size of the cache buffer |
size_t NumberOfChars | ( | const void * | , |
size_t | |||
) | const [protected] |
Return the number of characters in the passed buffer depending on the current encoding.
Buffer | to evaluate |
Size | of the passed buffer |
size_t ConvertNumUTF8CharsToNumBytes | ( | const char * | data, |
size_t | num | ||
) | [protected] |
Return the number of bytes corresponding to the num of UTF8 chars in passed buffer.
Buffer | to evaluate |
num | of chars in the passed buffer |
size_t ConvertNumUTF16CharsToNumBytes | ( | const MCHAR * | data, |
size_t | num | ||
) | [protected] |
Return the number of bytes corresponding to the num of UTF16 chars in passed buffer.
Buffer | to evaluate |
num | of chars in the passed buffer |
size_t ConvertNumCharsToNumBytes | ( | const char * | data, |
size_t | num, | ||
unsigned int | encoding | ||
) | [protected] |
Return the number of bytes corresponding to the num of chars in passed buffer depending on the encoding.
Buffer | to evaluate |
num | of chars in the passed buffer |
size_t NumberOfLines | ( | const void * | , |
size_t | |||
) | const [protected] |
Calculate the total number of line in the passed buffer.
The | buffer to evaluate the number of line |
The | size of the buffer |
ChType* ReadChar | ( | size_t & | charLengthT, |
bool | peek, | ||
const CharLengthFunctor & | CharLengthFunction | ||
) | const [protected] |
INTERNAL FUNCTION.
Used in the implementation of ReadChar() and ReadCharUTF32().
MaxString MakeString | ( | const ChType * | data, |
size_t | length, | ||
bool | dontReturnEndingCRLF | ||
) | const [protected] |
size_t Unread | ( | const MaxString & | string | ) | [protected] |
Unread String.
Put back a sequence of character inside the buffer. The data will be re-read next time you call read. This is used internally when parsing max scripts.
string | String to put back in the buffer |
size_t SeekToEnd | ( | long | offset = 0 |
) | [protected] |
INTERNAL FUNCTION.
Used by Seek Seek inside this text stream with the end as the reference point.
offset | Offset characters from end of file to seek to |
size_t SeekToAbsolute | ( | long | offset | ) | [protected] |
Seek to an absolute point inside the text stream.
offset | Offset characters from end of file to seek to |
size_t SeekFromCurrent | ( | long | offset | ) | [protected] |
Advance "offset" characters.
offset | Offset characters from end of file to seek to |
bool Open | ( | BinaryStream * | stream, |
unsigned int | encoding = 0 , |
||
LineEndMode | mode = Text , |
||
bool | closeOnDelete =
false |
||
) | [protected] |
Open an abstract BinaryStream.
stream | Opened stream the Reader uses |
encoding | This parameter can contains hint to the detection algorithm. Acceptable values are all codepages numbers that are recognized by Windows. |
In addition to that, you can also specify FAVOR_UTF8. It can be used to cascade the detection of the codepage. For example, if you specify "CP_ACP | FAVOR_UTF8", the detection algorithm will treat any non-UTF8 data as ACP.
mode |
closeOnDelete | Delete the "stream" at the same time of this object. |
bool Open | ( | FILE * | file, |
unsigned int | encoding = 0 , |
||
LineEndMode | mode = Text |
||
) |
Wrap ANSI C FILE pointer This service is used to allow a developer to access a file open with fopen but with the advantage to use the SDK API.
Using this service allows the developer to not worry about character encoding The developer is responsible to close the file once he is done.
file | ANSI C FILE pointer |
encoding | This parameter can contains hint to the detection algorithm. Acceptable values are all codepages numbers that are recognized by Windows. |
In addition to that, you can also specify FAVOR_UTF8. It can be used to cascade the detection of the codepage. For example, if you specify "CP_ACP | FAVOR_UTF8", the detection algorithm will treat any non-UTF8 data as ACP.
mode |
bool Open | ( | HANDLE | fileHandle, |
unsigned int | encoding = 0 , |
||
LineEndMode | mode = Text |
||
) |
Wrap a Win32 file handle.
fileHandle | File Handle |
encoding | This parameter can contains hint to the detection algorithm. Acceptable values are all codepages numbers that are recognized by Windows. |
In addition to that, you can also specify FAVOR_UTF8. It can be used to cascade the detection of the codepage. For example, if you specify "CP_ACP | FAVOR_UTF8", the detection algorithm will treat any non-UTF8 data as ACP.
mode |
bool Open | ( | const MCHAR * | fileName, |
unsigned int | encoding = 0 , |
||
LineEndMode | mode = Text |
||
) |
Open a file using a file name.
fileName | File name to open. If file does not exist, it will be created. |
encoding | This parameter can contains hint to the detection algorithm. Acceptable values are all codepages numbers that are recognized by Windows. |
In addition to that, you can also specify FAVOR_UTF8. It can be used to cascade the detection of the codepage. For example, if you specify "CP_ACP | FAVOR_UTF8", the detection algorithm will treat any non-UTF8 data as ACP.
mode |
bool Open | ( | const MaxSDK::Util::MaxString & | fileName, |
unsigned int | encoding = 0 , |
||
LineEndMode | mode = Text |
||
) |
Open a file using a file name.
fileName | File name to open. If file does not exist, it will be created. |
encoding | This parameter can contains hint to the detection algorithm. Acceptable values are all codepages numbers that are recognized by Windows. |
In addition to that, you can also specify FAVOR_UTF8. It can be used to cascade the detection of the codepage. For example, if you specify "CP_ACP | FAVOR_UTF8", the detection algorithm will treat any non-UTF8 data as ACP.
mode |
void Close | ( | ) |
Close the underlying stream and free any intermediate data.
MaxString LastError | ( | ) | const |
unsigned int Encoding | ( | ) | const |
Returns the current encoding of this file.
LineEndMode Mode | ( | ) | const |
void SetReadBufferSize | ( | size_t | readSize | ) |
Set the read buffer size.
The larger the buffer is, the better read performance is.
readSize | Size of the buffer to read. Default 4096 |
size_t GetReadBufferSize | ( | ) | const |
Returns the read buffer size.
void SetDetectSize | ( | size_t | detectSize | ) |
Set the default size of the buffer used to validate encoding.
This parameter is used internally when calling Detect.
detectSize | Size of the buffer used when detecting the current character type. Default 65536 |
size_t GetDetectSize | ( | ) | const |
Returns the detect buffer size.
size_t NumberOfChars | ( | ) | const |
Return the number of characters in the file.
size_t NumberOfLines | ( | ) | const |
Calculate the total number of line in the file.
void SetEOFCharacterHandling | ( | EOFCharacterHandling | ) |
Set the EOF character handling method and refilter the buffer.
EOFCharacterHandling GetEOFCharacterHandling | ( | ) | const |
virtual Char ReadChar | ( | bool | peek = false |
) | const [virtual] |
Reads a single char.
peek | Read a char but does not move the internal pointer to next char. Default is false so we move to next character |
Implements BaseTextReader.
virtual unsigned int ReadCharUTF32 | ( | bool | peek = false |
) | const [virtual] |
Reads a single char and returns it's UTF32 representation.
peek | Read a char but does not move the internal pointer to next char. Default is false so we move to next character |
Implements BaseTextReader.
virtual MaxString ReadChars | ( | size_t | nchars | ) | const [virtual] |
Reads characters from the file.
nchars | Stop reading after 'nchars' characters. |
Implements BaseTextReader.
virtual MaxString ReadLine | ( | size_t | nchars =
(size_t)-1 , |
bool | dontReturnEOL =
false |
||
) | const [virtual] |
Reads a line from the file (or nchars, depending on which one comes first.)
nchars | Stop reading after 'nchars' characters even if the EOL was not found. |
dontReturnEOL | By default, this function will returns the line including it's end-of-line character(s) unless you set "dontReturnEOL" to true. |
Implements BaseTextReader.
virtual MaxString ReadChunk | ( | size_t | len, |
bool | dontReturnLastEOL =
false |
||
) | const [virtual] |
Reads up to "len" bytes from the file and convert it to a unicode-compliant string.
len | Number of bytes to take out of the underlying stream. |
dontReturnLastEOL | Determine if this function will trim the last EOL sequence. |
virtual MaxString ReadFull | ( | ) | const [virtual] |
Reads the file in a single operation and returns it in a single string object.
virtual size_t PositionBytes | ( | ) | const [virtual] |
Get the number of bytes read so far.
size_t Position | ( | ) | const [virtual] |
size_t LineNumber | ( | ) | const [virtual] |
virtual size_t Seek | ( | long | offset, |
int | origin | ||
) | [virtual] |
Seek inside the stream.
offset | The seeks operations are done in number of characters (not bytes). |
origin | The direction to move. Origin can be one of the following * SEEK_CUR Current position of file pointer. * SEEK_END End of file. * SEEK_SET Beginning of file. |
Implements BaseTextReader.
virtual bool IsEndOfFile | ( | ) | const [virtual] |
virtual bool IsFileOpen | ( | ) | const [virtual] |
virtual size_t UnreadChar | ( | const Char & | c | ) | [virtual] |
Unread a character.
Put back a character inside the buffer. The data will be re-read next time you call read.
c | Char to put back in the buffer |
Implements BaseTextReader.
static bool Detect | ( | void * | data, |
size_t | len, | ||
unsigned int & | encoding, | ||
size_t * | ignoreBytes | ||
) | [static] |
Detect the encoding of the passed buffer.
data | Buffer to detect the encoding |
len | Size of the passed buffer |
encoding | (in/out) In input, tell the detector what to expect. On output it contains what the detector found. |
ignoreBytes | (out) On output, tell the caller how much bytes it must ignore at the beginning of the file because of the BOM. |
static size_t NumberOfChars | ( | const void * | data, |
size_t | len, | ||
unsigned int | encoding =
CP_ACP |
||
) | [static] |
Determine the number of chars inside a buffer.
It's more complex than just strlen or wcslen. Those two functions will returns the number of char or WCHAR entries. This function will returns the number of of characters (or symbol).
data | Buffer containing a string to count the number of symbols |
len | Size of the buffer to check |
encoding | Encoding to use to count the number of symbols |
static size_t NumberOfLines | ( | const void * | data, |
size_t | length, | ||
unsigned int | encoding =
CP_ACP |
||
) | [static] |
Determine the number of line feed inside a buffer.
data | Buffer in which ' ' are counted. |
length | Length of data (in MCHAR) |
encoding | Encoding of "data". Can be any valid encoding. ie. MSDE_CP_UTF16, CP_UTF8, CP_ACP, etc. |
static size_t RemoveCharacter | ( | ChType * | data, |
size_t | len | ||
) | [static] |
Remove ch character from the passed buffer.
data | Buffer in which ch are to be removed. len Length of data (in MCHAR) |
static size_t TruncateAtCharacter | ( | ChType * | data, |
size_t | len | ||
) | [static] |
Truncate buffer at ch character.
data | Buffer to validate. len Length of data (in MCHAR) |
friend class ReaderWriter
[friend] |
friend class CharBinaryStream
[friend] |
friend class BinaryStreamMemory
[friend] |
BinaryStream*
_stream [protected] |
bool
_streamDelete [protected] |
bool
_readCR [mutable, protected] |
bool
_readLF [mutable, protected] |
LineEndMode
_endOfLineMode [protected] |
size_t
_detectSize [protected] |
size_t
_readSize [protected] |
EOFCharacterHandling
_eofCharacterHandling [mutable, protected] |
TextFileReaderError
_error [mutable, protected] |
unsigned int
_encoding [mutable, protected] |
bool
_encodingDetected [mutable, protected] |
BinaryStreamMemory*
_backbuffer [mutable, protected] |
size_t
_ignoreBytes [mutable, protected] |
size_t
_positionBytes [mutable, protected] |
size_t
_positionChars [mutable, protected] |
size_t
_line [mutable, protected] |