mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
2349 lines
84 KiB
C
2349 lines
84 KiB
C
/**
|
|
** Copyright (c) 2011-2012, Karapetsas Eleftherios
|
|
** All rights reserved.
|
|
**
|
|
** Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
|
** 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
|
** 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
|
|
** the documentation and/or other materials provided with the distribution.
|
|
** 3. Neither the name of the Original Author of Refu nor the names of its contributors may be used to endorse or promote products derived from
|
|
**
|
|
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
** INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
** SERVICES;LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
**/
|
|
#include <errno.h>
|
|
|
|
#include <String/rfc_string.h>
|
|
// include bitwise operations
|
|
#include <rf_utils.h>
|
|
// include the private functions and macros
|
|
#include "string_private.h"
|
|
// include io_private only for the write check
|
|
#include "../IO/io_private.h"
|
|
// include the extended strin
|
|
#include <String/rfc_stringx.h>
|
|
// for HUGE_VAL definition
|
|
#include <math.h>
|
|
|
|
#include <rf_localmem.h> // for the local stack memory
|
|
|
|
/*********************************************************************** Start of the RF_String functions *****************************************************************************************/
|
|
|
|
/*-------------------------------------------------------------------------Methods to create an RF_String-------------------------------------------------------------------------------*/
|
|
|
|
// Allocates and returns a string with the given characters a refu string with the given characters. Given characters have to be in UTF-8. A check for valide sequence of bytes is performed.
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
RF_String* rfString_Create(const char* s,...)
|
|
#else
|
|
RF_String* i_rfString_Create(const char* s,...)
|
|
#endif
|
|
{
|
|
READ_VSNPRINTF_ARGS(s,s,0)
|
|
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
|
|
{
|
|
LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return 0;
|
|
}
|
|
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
// get length
|
|
ret->byteLength = byteLength;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(ret->bytes,ret->byteLength+1);
|
|
memcpy(ret->bytes,buff,ret->byteLength+1);
|
|
if(buffAllocated==true)
|
|
free(buff);
|
|
return ret;
|
|
}
|
|
#ifdef RF_OPTION_DEFAULT_ARGUMENTS
|
|
RF_String* i_NVrfString_Create(const char* s)
|
|
{
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
|
|
{
|
|
LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
return 0;
|
|
}
|
|
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
// get length
|
|
ret->byteLength = byteLength;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(ret->bytes,ret->byteLength+1);
|
|
memcpy(ret->bytes,s,ret->byteLength+1);
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
|
|
// Allocates and returns a string with the given characters a refu string with the given characters. Given characters have to be in UTF-8. A check for valid sequence of bytes is performed.
|
|
RF_String* i_rfString_CreateLocal1(const char* s,...)
|
|
{
|
|
#if RF_OPTION_SOURCE_ENCODING != RF_UTF8
|
|
uint32_t characterLength,*codepoints,i=0,j;
|
|
#endif
|
|
// remember the stack pointer before this macro evaluation
|
|
rfLMS_MacroEvalPtr(RF_LMS);
|
|
// read the var args
|
|
READ_VSNPRINTF_ARGS(s,s,0)
|
|
// /===Start of Non-UTF-8 code===// /
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
|
|
// find the bytelength of the UTF-16 buffer
|
|
while(buff[i] != '\0' && buff[i+1]!= '\0')
|
|
i++;
|
|
i+=2;
|
|
// allocate the codepoint buffer
|
|
RF_MALLOC(codepoints,i/2)
|
|
#elif (RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE)
|
|
// find the bytelength of the UTF-32 buffer
|
|
while(buff[i] != '\0' && buff[i+1]!= '\0' && buff[i+2]!= '\0' && buff[i+3]!= '\0')
|
|
i++;
|
|
i+=4;
|
|
// allocate the codepoint buffer
|
|
RF_MALLOC(codepoints,i)
|
|
#endif
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE)// decode the UTF16
|
|
if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
|
|
if(rfUTF16_Decode(buff,&characterLength,codepoints) == false)
|
|
goto cleanup;
|
|
else
|
|
if(rfUTF16_Decode_swap(buff,&characterLength,codepoints)==false)
|
|
goto cleanup;
|
|
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE// decode the UTF16
|
|
if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
|
|
if(rfUTF16_Decode_swap(buff,&characterLength,codepoints) == false)
|
|
goto cleanup;
|
|
else
|
|
if(rfUTF16_Decode(buff,&characterLength,codepoints)==false)
|
|
goto cleanup;
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE// copy the UTF32 into the codepoint
|
|
memcpy(codepoints,buff,i);
|
|
if(rfUTILS_Endianess != RF_LITTLE_ENDIAN)
|
|
{
|
|
for(j=0;j<i;j+=4)
|
|
{
|
|
rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
|
|
}
|
|
}
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE// copy the UTF32 into the codepoint
|
|
memcpy(codepoints,buff,i);
|
|
if(rfUTILS_Endianess !RF_BIG_ENDIAN RF_LITTLE_ENDIAN)
|
|
{
|
|
for(j=0;j<i;j+=4)
|
|
{
|
|
rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
|
|
}
|
|
}
|
|
#endif
|
|
#if RF_OPTION_SOURCE_ENCODING != RF_UTF8 // in any case other than UTF-8 encode the codepoints into UTF-8 , and free them
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
buffAllocated = true;
|
|
if((buff = rfUTF8_Encode(codepoints,characterLength,&byteLength)) == 0)
|
|
{
|
|
LOG_ERROR("While attempting to create a temporary RF_String the given byte sequence could not be properly encoded into UTF-8",RE_UTF8_ENCODING);
|
|
free(codepoints);
|
|
return 0;
|
|
}
|
|
free(codepoints);
|
|
#endif
|
|
// /===End of Non-UTF-8 code===// /
|
|
// /progress normally since here we have a UTF-8 buffer
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
|
|
{
|
|
LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return 0;
|
|
}
|
|
|
|
RF_String* ret;
|
|
ret = rfLMS_Push(RF_LMS,sizeof(RF_String));
|
|
if(ret == 0)
|
|
{
|
|
LOG_ERROR("Memory allocation from the Local Memory Stack failed. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
|
|
RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
exit(RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
}
|
|
// get length
|
|
ret->byteLength = byteLength;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
ret->bytes = rfLMS_Push(RF_LMS,ret->byteLength+1);
|
|
if(ret->bytes == 0)
|
|
{
|
|
LOG_ERROR("Memory allocation from the Local Memory Stack failed. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
|
|
RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
exit(RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
}
|
|
memcpy(ret->bytes,buff,ret->byteLength+1);
|
|
// finally free stuff if needed
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return ret;
|
|
|
|
// /cleanup code for non-UTF-8 cases
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
|
|
cleanup:
|
|
#if RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE
|
|
LOG_ERROR("Temporary RF_String creation from a UTF-16 Little Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE
|
|
LOG_ERROR("Temporary RF_String creation from a UTF-16 Big Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
|
|
#endif
|
|
free(codepoints);
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return 0;
|
|
#endif
|
|
}
|
|
RF_String* i_NVrfString_CreateLocal(const char* s)
|
|
{
|
|
#if RF_OPTION_SOURCE_ENCODING != RF_UTF8
|
|
uint32_t characterLength,*codepoints,i=0,j;
|
|
char* buff;
|
|
#endif
|
|
// remember the stack pointer before this macro evaluation
|
|
rfLMS_MacroEvalPtr(RF_LMS);
|
|
// /===Start of Non-UTF-8 code===// /
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
|
|
// find the bytelength of the UTF-16 buffer
|
|
while(s[i] != '\0' &&s[i+1]!= '\0')
|
|
i++;
|
|
i+=2;
|
|
// allocate the codepoint buffer
|
|
RF_MALLOC(codepoints,i/2)
|
|
#elif (RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE)
|
|
// find the bytelength of the UTF-32 buffer
|
|
while(s[i] != '\0' && s[i+1]!= '\0' && s[i+2]!= '\0' && s[i+3]!= '\0')
|
|
i++;
|
|
i+=4;
|
|
// allocate the codepoint buffer
|
|
RF_MALLOC(codepoints,i)
|
|
#endif
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE)// decode the UTF16
|
|
if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
|
|
if(rfUTF16_Decode(s,&characterLength,codepoints) == false)
|
|
goto cleanup;
|
|
else
|
|
if(rfUTF16_Decode_swap(s,&characterLength,codepoints)==false)
|
|
goto cleanup;
|
|
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE// decode the UTF16
|
|
if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
|
|
if(rfUTF16_Decode_swap(s,&characterLength,codepoints) == false)
|
|
goto cleanup;
|
|
else
|
|
if(rfUTF16_Decode(s,&characterLength,codepoints)==false)
|
|
goto cleanup;
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE// copy the UTF32 into the codepoint
|
|
memcpy(codepoints,s,i);
|
|
if(rfUTILS_Endianess != RF_LITTLE_ENDIAN)
|
|
{
|
|
for(j=0;j<i;j+=4)
|
|
{
|
|
rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
|
|
}
|
|
}
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE// copy the UTF32 into the codepoint
|
|
memcpy(codepoints,s,i);
|
|
if(rfUTILS_Endianess !RF_BIG_ENDIAN RF_LITTLE_ENDIAN)
|
|
{
|
|
for(j=0;j<i;j+=4)
|
|
{
|
|
rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
|
|
}
|
|
}
|
|
#endif
|
|
#if RF_OPTION_SOURCE_ENCODING != RF_UTF8 // in any case other than UTF-8 encode the codepoints into UTF-8 , and free them
|
|
if((buff = rfUTF8_Encode(codepoints,characterLength,&byteLength)) == 0)
|
|
{
|
|
LOG_ERROR("While attempting to create a temporary RF_String the given byte sequence could not be properly encoded into UTF-8",RE_UTF8_ENCODING);
|
|
free(codepoints);
|
|
return 0;
|
|
}
|
|
free(codepoints);
|
|
#endif
|
|
// /===End of Non-UTF-8 code===// /
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
#if RF_OPTION_SOURCE_ENCODING == RF_UTF8
|
|
if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
|
|
#else
|
|
if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
|
|
#endif
|
|
{
|
|
LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
return 0;
|
|
}
|
|
|
|
RF_String* ret;
|
|
ret = rfLMS_Push(RF_LMS,sizeof(RF_String));
|
|
if(ret == 0)
|
|
{
|
|
LOG_ERROR("Memory allocation from the Local Memory Stack failed during string allocation. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
|
|
RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
exit(RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
}
|
|
// get length
|
|
ret->byteLength = byteLength;
|
|
|
|
ret->bytes = rfLMS_Push(RF_LMS,ret->byteLength+1);
|
|
if(ret->bytes == 0)
|
|
{
|
|
LOG_ERROR("Memory allocation from the Local Memory Stack failed during string allocation. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
|
|
RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
exit(RE_LOCALMEMSTACK_INSUFFICIENT);
|
|
}
|
|
#if RF_OPTION_SOURCE_ENCODING == RF_UTF8
|
|
memcpy(ret->bytes,s,ret->byteLength+1);
|
|
#else
|
|
memcpy(ret->bytes,buff,ret->byteLength+1);
|
|
#endif
|
|
return ret;
|
|
|
|
// /cleanup code for non-UTF-8 cases
|
|
#if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
|
|
cleanup:
|
|
#if RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE
|
|
LOG_ERROR("Temporary RF_String creation from a UTF-16 Little Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
|
|
#elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE
|
|
LOG_ERROR("Temporary RF_String creation from a UTF-16 Big Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
|
|
#endif
|
|
free(codepoints);
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
// Initializes a string with the given characters. Given characters have to be in UTF-8. A check for valide sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char rfString_Init(RF_String* str,const char* s,...)
|
|
#else
|
|
char i_rfString_Init(RF_String* str,const char* s,...)
|
|
#endif
|
|
{
|
|
READ_VSNPRINTF_ARGS(s,s,false)
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
|
|
{
|
|
LOG_ERROR("Error at String Initialization due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
if(buffAllocated==true)
|
|
free(buff);
|
|
return false;
|
|
}
|
|
|
|
// get length
|
|
str->byteLength = byteLength;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(str->bytes,str->byteLength+1);
|
|
memcpy(str->bytes,buff,str->byteLength+1);
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return true;
|
|
}
|
|
#ifdef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char i_NVrfString_Init(RF_String* str,const char* s)
|
|
{
|
|
// check for validity of the given sequence and get the character length
|
|
uint32_t byteLength;
|
|
if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
|
|
{
|
|
LOG_ERROR("Error at String Initialization due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
|
|
return false;
|
|
}
|
|
|
|
// get length
|
|
str->byteLength = byteLength;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(str->bytes,str->byteLength+1);
|
|
memcpy(str->bytes,s,str->byteLength+1);
|
|
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
// Allocates a String by turning a unicode code point in a String (encoded in UTF-8).
|
|
RF_String* rfString_Create_cp(uint32_t codepoint)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_cp(ret,codepoint) == true)
|
|
{
|
|
return ret;
|
|
}
|
|
// failure
|
|
free(ret);
|
|
return 0;
|
|
}
|
|
|
|
// Initializes a string by turning a unicode code point in a String (encoded in UTF-8).
|
|
char rfString_Init_cp(RF_String* str, uint32_t codepoint)
|
|
{
|
|
// alloc enough for a character
|
|
RF_MALLOC(str->bytes,5)
|
|
|
|
// if we only need a byte to encode it
|
|
if(RF_HEXLE_UI(codepoint,0x007f))
|
|
{
|
|
str->bytes[0] = codepoint;
|
|
str->bytes[1] = '\0';
|
|
str->byteLength = 1;
|
|
}
|
|
// if we need 2 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x0080) && RF_HEXLE_UI(codepoint,0x07ff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[1] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 5 following bits and encode them in the second byte
|
|
str->bytes[0] = ((codepoint & 0x7C0) >> 6) | (0x6<<5);
|
|
str->bytes[2] = '\0';
|
|
str->byteLength = 2;
|
|
}
|
|
// if we need 3 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x0800) && RF_HEXLE_UI(codepoint,0x0ffff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[2] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 6 following bits and encode them in the second byte
|
|
str->bytes[1] = ((codepoint & 0xFC0) >> 6) | (0x02<<6);
|
|
// get the 4 following bits and encode them in the third byte
|
|
str->bytes[0] = (((codepoint & 0xF000))>>12) | (0xE<<4);
|
|
str->bytes[3] = '\0';
|
|
str->byteLength = 3;
|
|
}
|
|
// if we need 4 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x10000) && RF_HEXLE_UI(codepoint,0x10ffff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[3] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 6 following bits and encode them in the second byte
|
|
str->bytes[2] = ((codepoint & 0xFC0) >> 6) | (0x02<<6);
|
|
// get the 6 following bits and encode them in the third byte
|
|
str->bytes[1] = (((codepoint & 0x3F000))>>12) | (0x02<<6);
|
|
// get the 3 following bits and encode them in the fourth byte
|
|
str->bytes[0] = (((codepoint & 0x1C0000))>>18) | (0x1E<<3);
|
|
str->bytes[4] = '\0';
|
|
str->byteLength = 4;
|
|
}
|
|
else
|
|
{
|
|
LOG_ERROR("Attempted to encode an invalid unicode code point into a string",RE_UTF8_INVALID_CODE_POINT);
|
|
free(str->bytes);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// Allocates and returns a string with the given integer
|
|
RF_String* rfString_Create_i(int32_t i)
|
|
{
|
|
// the size of the int32_t buffer
|
|
int32_t numLen;
|
|
// put the int32_t into a buffer and turn it in a char*
|
|
char buff[12];// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
|
|
sprintf(buff,"%d",i);
|
|
numLen = strlen(buff);
|
|
|
|
// initialize the string and return it
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
ret->byteLength = numLen;
|
|
RF_MALLOC(ret->bytes,numLen+1);
|
|
strcpy(ret->bytes,buff);
|
|
return ret;
|
|
}
|
|
// Initializes a string with the given integer.
|
|
char rfString_Init_i(RF_String* str, int32_t i)
|
|
{
|
|
// the size of the int32_t buffer
|
|
int32_t numLen;
|
|
// put the int32_t into a buffer and turn it in a char*
|
|
char buff[12];// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
|
|
sprintf(buff,"%d",i);
|
|
numLen = strlen(buff);
|
|
|
|
|
|
str->byteLength = numLen;
|
|
RF_MALLOC(str->bytes,numLen+1);
|
|
strcpy(str->bytes,buff);
|
|
|
|
return true;
|
|
}
|
|
|
|
// Allocates and returns a string with the given float
|
|
RF_String* rfString_Create_f(float f)
|
|
{
|
|
// allocate a buffer for the float in characters
|
|
char* buff;
|
|
RF_MALLOC(buff,128);
|
|
sprintf(buff,"%f",f);
|
|
uint32_t len = strlen(buff);
|
|
|
|
// initialize and return the string
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
ret->byteLength = len;
|
|
RF_MALLOC(ret->bytes,len+1);
|
|
strcpy(ret->bytes,buff);
|
|
|
|
free(buff);
|
|
|
|
return ret;
|
|
}
|
|
// Initializes a string with the given float
|
|
char rfString_Init_f(RF_String* str,float f)
|
|
{
|
|
// allocate a buffer for the float in characters
|
|
char* buff;
|
|
RF_MALLOC(buff,128);
|
|
sprintf(buff,"%f",f);
|
|
uint32_t len = strlen(buff);
|
|
|
|
|
|
str->byteLength = len;
|
|
RF_MALLOC(str->bytes,len+1);
|
|
strcpy(str->bytes,buff);
|
|
free(buff);
|
|
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// Allocates and returns a string with the given UTF-16 byte sequence. Given characters have to be in UTF-16. A check for valid sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
|
|
RF_String* rfString_Create_UTF16(const char* s,char endianess)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_UTF16(ret,s,endianess)==false)
|
|
{
|
|
free(ret);
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
// Initializes a string with the given UTF-16 byte sequence. Given characters have to be in UTF-16. A check for valid sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
|
|
char rfString_Init_UTF16(RF_String* str,const char* s,char endianess)
|
|
{
|
|
// decode the utf-16 and get the code points
|
|
uint32_t* codepoints;
|
|
uint32_t byteLength,characterLength,utf8ByteLength;
|
|
char* utf8;
|
|
byteLength = 0;
|
|
while(s[byteLength]!= 0 || s[byteLength+1]!=0)
|
|
{
|
|
byteLength++;
|
|
}
|
|
byteLength+=3;// for the last utf-16 null termination character
|
|
RF_MALLOC(codepoints,byteLength*2) // allocate the codepoints
|
|
// parse the given byte stream depending on the endianess parameter
|
|
switch(endianess)
|
|
{
|
|
case RF_LITTLE_ENDIAN:
|
|
case RF_BIG_ENDIAN:
|
|
if(rfUTILS_Endianess() == endianess)// same endianess as the local
|
|
{
|
|
if(rfUTF16_Decode(s,&characterLength,codepoints) == false)
|
|
{
|
|
free(codepoints);
|
|
LOG_ERROR("String initialization failed due to invalide UTF-16 sequence",RE_STRING_INIT_FAILURE);
|
|
return false;
|
|
}
|
|
}
|
|
else// different
|
|
{
|
|
if(rfUTF16_Decode_swap(s,&characterLength,codepoints) == false)
|
|
{
|
|
free(codepoints);
|
|
LOG_ERROR("String initialization failed due to invalide UTF-16 sequence",RE_STRING_INIT_FAILURE);
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
LOG_ERROR("Illegal endianess value provided",RE_INPUT);
|
|
free(codepoints);
|
|
return false;
|
|
break;
|
|
}// switch ends
|
|
// now encode these codepoints into UTF8
|
|
if( (utf8 = rfUTF8_Encode(codepoints,characterLength,&utf8ByteLength))==0)
|
|
{
|
|
free(codepoints);
|
|
return false;
|
|
}
|
|
// success
|
|
free(codepoints);
|
|
str->bytes = utf8;
|
|
str->byteLength = utf8ByteLength;
|
|
return true;
|
|
|
|
}
|
|
|
|
// Allocates and returns a string with the given UTF-32 byte sequence. Given characters have to be in UTF-32.
|
|
RF_String* rfString_Create_UTF32(const char* s)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_UTF32(ret,s)==false)
|
|
{
|
|
free(ret);
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
// Initializes a string with the given UTF-32 byte sequence. Given characters have to be in UTF-32.
|
|
char rfString_Init_UTF32(RF_String* str,const char* s)
|
|
{
|
|
char swapE = false;
|
|
uint32_t off = 0;
|
|
int32_t i = 0;
|
|
|
|
// get the buffer and if swapping is needed do it for all character
|
|
uint32_t* codeBuffer = (uint32_t*)(s+off);
|
|
|
|
// first of all check for existence of BOM in the beginning of the sequence
|
|
if(RF_HEXEQ_UI(codeBuffer[0],0xFEFF))// big endian
|
|
{
|
|
if(rfUTILS_Endianess()==RF_LITTLE_ENDIAN)
|
|
swapE = true;
|
|
}
|
|
if(RF_HEXEQ_UI(codeBuffer[0],0xFFFE0000))// little
|
|
{
|
|
if(rfUTILS_Endianess()==RF_BIG_ENDIAN)
|
|
swapE = true;
|
|
}
|
|
else// according to the standard no BOM means big endian
|
|
{
|
|
if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
|
|
swapE = true;
|
|
}
|
|
|
|
// if we need to have endianess swapped do it
|
|
if(swapE==true)
|
|
{
|
|
while(codeBuffer[i] != 0)
|
|
{
|
|
rfUTILS_SwapEndianUI(codeBuffer+i);
|
|
i++;
|
|
}
|
|
}
|
|
// find the length of the utf32 buffer in characters
|
|
uint32_t length;
|
|
rfUTF32_Length(codeBuffer,length);
|
|
|
|
// turn the codepoints into a utf-8 encoded buffer
|
|
char* utf8;uint32_t utf8ByteLength;
|
|
if((utf8=rfUTF8_Encode(codeBuffer,length,&utf8ByteLength)) == 0)
|
|
{
|
|
return false;// error
|
|
}
|
|
// if the encoding happened correctly
|
|
if(codeBuffer != 0)
|
|
{
|
|
str->bytes = (char*)codeBuffer;
|
|
str->byteLength = utf8ByteLength;
|
|
return true;
|
|
}
|
|
// else return failure
|
|
return false;
|
|
}
|
|
|
|
// Assigns the value of the source string to the destination.Both strings should already be initialized and hold a value. It is an error to give null parameters.
|
|
void i_rfString_Assign(RF_String* dest,void* sourceP)
|
|
{
|
|
RF_String* source = (RF_String*)sourceP;
|
|
// only if the new string value won't fit in the buffer reallocate the buffer (let's avoid unecessary reallocs)
|
|
if(source->byteLength > dest->byteLength)
|
|
{
|
|
RF_REALLOC(dest->bytes,char,source->byteLength+1);
|
|
}
|
|
// now copy the value
|
|
memcpy(dest->bytes,source->bytes,source->byteLength+1);
|
|
// and fix the lengths
|
|
dest->byteLength = source->byteLength;
|
|
}
|
|
|
|
// Assigns the value of a unicode character to the string
|
|
char rfString_Assign_char(RF_String* str,uint32_t codepoint)
|
|
{
|
|
// realloc if needed
|
|
if(str->byteLength <5)
|
|
{
|
|
RF_REALLOC(str->bytes,char,5);
|
|
}
|
|
// if we only need a byte to encode it
|
|
if(RF_HEXLE_UI(codepoint,0x007f))
|
|
{
|
|
str->bytes[0] = codepoint;
|
|
str->bytes[1] = '\0';
|
|
str->byteLength = 1;
|
|
}
|
|
// if we need 2 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x0080) && RF_HEXLE_UI(codepoint,0x07ff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[1] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 5 following bits and encode them in the second byte
|
|
str->bytes[0] = ((codepoint & 0x7C0) >> 6) | (0x6<<5);
|
|
str->bytes[2] = '\0';
|
|
str->byteLength = 2;
|
|
}
|
|
// if we need 3 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x0800) && RF_HEXLE_UI(codepoint,0x0ffff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[2] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 6 following bits and encode them in the second byte
|
|
str->bytes[1] = ((codepoint & 0xFC0) >> 6) | (0x02<<6);
|
|
// get the 4 following bits and encode them in the third byte
|
|
str->bytes[0] = (((codepoint & 0xF000))>>12) | (0xE<<4);
|
|
str->bytes[3] = '\0';
|
|
str->byteLength = 3;
|
|
}
|
|
// if we need 4 bytes to encode it
|
|
else if( RF_HEXGE_UI(codepoint,0x10000) && RF_HEXLE_UI(codepoint,0x10ffff))
|
|
{
|
|
// get the first bits of the first byte and encode them to the first byte
|
|
str->bytes[3] = (codepoint & 0x3F)|(0x02<<6);
|
|
// get the 6 following bits and encode them in the second byte
|
|
str->bytes[2] = ((codepoint & 0xFC0) >> 6) | (0x02<<6);
|
|
// get the 6 following bits and encode them in the third byte
|
|
str->bytes[1] = (((codepoint & 0x3F000))>>12) | (0x02<<6);
|
|
// get the 3 following bits and encode them in the fourth byte
|
|
str->bytes[0] = (((codepoint & 0x1C0000))>>18) | (0x1E<<3);
|
|
str->bytes[4] = '\0';
|
|
str->byteLength = 4;
|
|
}
|
|
else
|
|
{
|
|
LOG_ERROR("Attempted to encode an invalid unicode code point into a string",RE_UTF8_INVALID_CODE_POINT);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Allocates and returns a string with the given characters. NO VALID-UTF8 check is performed
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
RF_String* rfString_Create_nc(const char* s,...)
|
|
#else
|
|
RF_String* i_rfString_Create_nc(const char* s,...)
|
|
#endif
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
// get the formatted string
|
|
READ_VSNPRINTF_ARGS(s,s,0);
|
|
// get the lengt of the byte buffer
|
|
ret->byteLength = bytesWritten;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(ret->bytes,ret->byteLength+1);
|
|
memcpy(ret->bytes,buff,ret->byteLength+1);
|
|
if(buffAllocated)
|
|
free(buff);
|
|
return ret;
|
|
}
|
|
#ifdef RF_OPTION_DEFAULT_ARGUMENTS
|
|
RF_String* i_NVrfString_Create_nc(const char* s)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
// get length
|
|
ret->byteLength = strlen(s);
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(ret->bytes,ret->byteLength+1);
|
|
memcpy(ret->bytes,s,ret->byteLength+1);
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
// Initializes a string with the given characters. NO VALID-UTF8 check is performed
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char rfString_Init_nc(struct RF_String* str,const char* s,...)
|
|
#else
|
|
char i_rfString_Init_nc(struct RF_String* str,const char* s,...)
|
|
#endif
|
|
{
|
|
// get the formatted string
|
|
READ_VSNPRINTF_ARGS(s,s,false)
|
|
// get its length
|
|
str->byteLength = bytesWritten;
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(str->bytes,str->byteLength+1);
|
|
memcpy(str->bytes,buff,str->byteLength+1);
|
|
if(buffAllocated == true)
|
|
free(buff);
|
|
return true;
|
|
}
|
|
#ifdef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char i_NVrfString_Init_nc(struct RF_String* str,const char* s)
|
|
{
|
|
// get its length
|
|
str->byteLength = strlen(s);
|
|
|
|
// now that we know the length we can allocate the buffer and copy the bytes
|
|
RF_MALLOC(str->bytes,str->byteLength+1);
|
|
memcpy(str->bytes,s,str->byteLength+1);
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
/*-------------------------------------------------------------------------Methods to get rid of an RF_String-------------------------------------------------------------------------------*/
|
|
|
|
// Deletes a string object and also frees its pointer.It is an error to give a NULL(0x0) string for deleting. Will most probably lead to a segmentation fault
|
|
void rfString_Destroy(RF_String* s)
|
|
{
|
|
free(s->bytes);
|
|
free(s);
|
|
}
|
|
// Deletes a string object only, not its memory.It is an error to give a NULL(0x0) string for deleting. Will most probably lead to a segmentation fault
|
|
void rfString_Deinit(RF_String* s)
|
|
{
|
|
free(s->bytes);
|
|
}
|
|
/*------------------------------------------------------------------------ RF_String unicode conversio functions-------------------------------------------------------------------------------*/
|
|
|
|
// Returns the strings contents as a UTF-16 buffer
|
|
uint16_t* rfString_ToUTF16(RF_String* s,uint32_t* length)
|
|
{
|
|
uint32_t* codepoints,charsN;
|
|
// get the unicode codepoints, no check here since RF_String is always guaranteed to have valid UTF=8 and as such valid codepoints
|
|
codepoints = rfUTF8_Decode(s->bytes,s->byteLength,&charsN);
|
|
// encode them in UTF-16, no check here since it comes from an RF_String which is always guaranteed to have valid UTF-8 and as such valid codepoints
|
|
return rfUTF16_Encode(codepoints,charsN,length);
|
|
}
|
|
|
|
// Returns the strings contents as a UTF-32 buffer
|
|
uint32_t* rfString_ToUTF32(RF_String* s,uint32_t* length)
|
|
{
|
|
// get the unicode codepoints, no check here since RF_String is always guaranteed to have valid UTF=8 and as such valid codepoints
|
|
return rfUTF8_Decode(s->bytes,s->byteLength,length);
|
|
}
|
|
|
|
/*------------------------------------------------------------------------ RF_String retrieval functions-------------------------------------------------------------------------------*/
|
|
// Finds the length of the string in characters
|
|
uint32_t rfString_Length(void* str)
|
|
{
|
|
RF_String* s = (RF_String*)str;
|
|
uint32_t length,i;
|
|
RF_STRING_ITERATE_START(s,length,i)
|
|
RF_STRING_ITERATE_END(length,i);
|
|
return length;
|
|
}
|
|
|
|
// Retrieves the unicode code point of the parameter character.
|
|
uint32_t rfString_GetChar(void* str,uint32_t c)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
uint32_t length,i;
|
|
uint32_t codePoint = RF_STRING_INDEX_OUT_OF_BOUNDS;
|
|
RF_STRING_ITERATE_START(thisstr,length,i)
|
|
// if we found the character,inspect the 4 different cases
|
|
if(length == c)
|
|
{
|
|
// take the codepoint from the byte position and break from the loop
|
|
codePoint = rfString_BytePosToCodePoint(thisstr,i);
|
|
break;
|
|
}
|
|
RF_STRING_ITERATE_END(length,i)
|
|
|
|
// and return the code point. Notice that if the character was not found this will return RF_STRING_INDEX_OUT_OF_BOUNDS
|
|
return codePoint;
|
|
}
|
|
|
|
// Retrieves the unicode code point of the parameter bytepos of the string. If the byte position is not the start of a character 0 is returned. This is an internal function, there is no need to use it. <i>Can be used with StringX</i>
|
|
uint32_t rfString_BytePosToCodePoint(void* str,uint32_t i)
|
|
{
|
|
uint32_t codePoint=0;
|
|
RF_String* thisstr = (RF_String*)str;
|
|
// /Here I am not checking if byte position 'i' is withing bounds and especially if it is a start of a character
|
|
// / This is assumed to have been checked or to be known beforehand by the programmer. That's one of the reasons
|
|
// / why this is an internal function and should not be used unless you know what you are doing
|
|
// if the lead bit of the byte is 0 then range is : U+0000 to U+0007F (1 byte)
|
|
if( ((thisstr->bytes[i] & 0x80)>>7) == 0 )
|
|
{
|
|
// and the code point is this whole byte only
|
|
codePoint = thisstr->bytes[i];
|
|
}
|
|
// if the leading bits are in the form of 0b110xxxxx then range is: U+0080 to U+07FF (2 bytes)
|
|
else if ( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^ 0xC0))>>5),0x7) )
|
|
{
|
|
codePoint =0;
|
|
// from the second byte take the first 6 bits
|
|
codePoint = (thisstr->bytes[i+1] & 0x3F) ;
|
|
// from the first byte take the first 5 bits and put them in the start
|
|
codePoint |= ((thisstr->bytes[i] & 0x1F) << 6);
|
|
}
|
|
// if the leading bits are in the form of 0b1110xxxx then range is U+0800 to U+FFFF (3 bytes)
|
|
else if( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^ 0xE0))>>4),0xF) )
|
|
{
|
|
codePoint = 0;
|
|
// from the third byte take the first 6 bits
|
|
codePoint = (thisstr->bytes[i+2] & 0x3F) ;
|
|
// from the second byte take the first 6 bits and put them to the left of the previous 6 bits
|
|
codePoint |= ((thisstr->bytes[i+1] & 0x3F) << 6);
|
|
// from the first byte take the first 4 bits and put them to the left of the previous 6 bits
|
|
codePoint |= ((thisstr->bytes[i] & 0xF) << 12);
|
|
}
|
|
// if the leading bits are in the form of 0b11110xxx then range is U+010000 to U+10FFFF (4 bytes)
|
|
else if( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^ 0xF0))>>3), 0x1F))
|
|
{
|
|
codePoint = 0;
|
|
// from the fourth byte take the first 6 bits
|
|
codePoint = (thisstr->bytes[i+3] & 0x3F) ;
|
|
// from the third byte take the first 6 bits and put them to the left of the previous 6 bits
|
|
codePoint |= ((thisstr->bytes[i+2] & 0x3F) << 6);
|
|
// from the second byte take the first 6 bits and put them to the left of the previous 6 bits
|
|
codePoint |= ((thisstr->bytes[i+1] & 0x3F) << 12);
|
|
// from the first byte take the first 3 bits and put them to the left of the previous 6 bits
|
|
codePoint |= ((thisstr->bytes[i] & 0x7) << 18);
|
|
}
|
|
|
|
return codePoint;
|
|
}
|
|
|
|
|
|
// Retrieves character position of a byte position
|
|
uint32_t rfString_BytePosToCharPos(void* thisstrP,uint32_t bytepos,char before)
|
|
{
|
|
// /here there is no check if this is actually a byte pos inside the string's
|
|
// /byte buffer. The programmer should have made sure it is before hand. This is why it is
|
|
// / an internal function and should only be used if you know what you are doing
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
uint32_t charPos = 0;
|
|
uint32_t byteI = 0;
|
|
// iterate the string's bytes until you get to the required byte
|
|
// if it is not a continuation byte, return the position
|
|
if(rfUTF8_IsContinuationByte(thisstr->bytes[bytepos])==false)
|
|
{
|
|
RF_STRING_ITERATE_START(thisstr,charPos,byteI)
|
|
if(byteI == bytepos)
|
|
return charPos;
|
|
RF_STRING_ITERATE_END(charPos,byteI)
|
|
}
|
|
// else iterate the string's bytes until you get anything bigger than the required byte
|
|
RF_STRING_ITERATE_START(thisstr,charPos,byteI)
|
|
if(byteI > bytepos)
|
|
break;
|
|
RF_STRING_ITERATE_END(charPos,byteI)
|
|
// if we need the previous one return it
|
|
if(before == true)
|
|
return charPos-1;
|
|
// else return this
|
|
return charPos;
|
|
}
|
|
|
|
// Compares two Strings and returns true if they are equal and false otherwise
|
|
char i_rfString_Equal(void* s1P,void* s2P)
|
|
{
|
|
RF_String* s1 = (RF_String*)s1P;
|
|
RF_String* s2 = (RF_String*)s2P;
|
|
if( strcmp(s1->bytes,s2->bytes)==0)
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Finds the existence of String sstr inside this string, either matching case or not
|
|
int32_t i_rfString_Find(const void* str,const void* sstrP,const char* optionsP)
|
|
{
|
|
// / @note TO SELF: If I make any changes to this function do not forget to change the private version that returns byte position too
|
|
// / located at string_private.c and called rfString_FindByte and rfString_FindByte_s
|
|
RF_String* thisstr = (RF_String*)str;
|
|
RF_String* sstr = (RF_String*)sstrP;
|
|
char options = *optionsP;
|
|
|
|
char* found = 0;
|
|
// if we want to match the case of the string then it's a simple search of matching characters
|
|
if( (RF_BITFLAG_ON( options,RF_CASE_IGNORE)) == false)
|
|
{
|
|
// if it is not found
|
|
if( (found = strstr(thisstr->bytes,sstr->bytes)) == 0)
|
|
{
|
|
return RF_FAILURE;
|
|
}
|
|
// get the byte position
|
|
uint32_t bytepos = found-thisstr->bytes;
|
|
// if we need the exact string as it is given
|
|
if(RF_BITFLAG_ON( options,RF_MATCH_WORD))
|
|
{
|
|
// check before the found string
|
|
if(bytepos != 0)
|
|
{
|
|
// if is is not a character
|
|
switch(thisstr->bytes[bytepos-1])
|
|
{
|
|
case ' ':case '\t':case '\n':
|
|
break;
|
|
default:
|
|
return RF_FAILURE;
|
|
break;
|
|
}
|
|
}
|
|
// check after the found string
|
|
if(bytepos+sstr->byteLength != thisstr->byteLength)
|
|
{
|
|
// if is is not a character
|
|
switch(thisstr->bytes[bytepos+sstr->byteLength])
|
|
{
|
|
case ' ':case '\t':case '\n':
|
|
break;
|
|
default:
|
|
return RF_FAILURE;
|
|
break;
|
|
}
|
|
}
|
|
}// end of the exact string option
|
|
// success
|
|
return rfString_BytePosToCharPos(thisstr,bytepos,false);
|
|
}
|
|
|
|
// else ignore case matching
|
|
uint32_t i,j;
|
|
// if(cstr[0] >= 0x41 && cstr[0] <= 0x5a)
|
|
for(i=0;i<thisstr->byteLength; i ++)
|
|
{
|
|
// if i matches the start of the substring
|
|
for(j = 0; j < sstr->byteLength; j++)
|
|
{
|
|
// if the jth char is a big letter
|
|
if(sstr->bytes[j] >= 0x41 && sstr->bytes[j] <= 0x5a)
|
|
{
|
|
// no match
|
|
if(sstr->bytes[j] != thisstr->bytes[i+j] && sstr->bytes[j]+32 != thisstr->bytes[i+j])
|
|
break;
|
|
// there is a match in the jth character so let's perform additional checks if needed
|
|
if(RF_BITFLAG_ON( options,RF_MATCH_WORD))
|
|
{
|
|
// if it's the first substring character and if the string we search is not in it's beginning, check for EXACT string before
|
|
if(j == 0 && i != 0)
|
|
{
|
|
switch(thisstr->bytes[i-1])
|
|
{
|
|
case ' ':case '\t':case '\n':
|
|
break;
|
|
default:
|
|
return RF_FAILURE;
|
|
break;
|
|
}
|
|
}
|
|
}// exact string check if ends
|
|
}
|
|
// small letter
|
|
else if(sstr->bytes[j] >= 0x61 && sstr->bytes[j] <= 0x7a)
|
|
{
|
|
// no match
|
|
if(sstr->bytes[j] != thisstr->bytes[i+j] && sstr->bytes[j]-32 != thisstr->bytes[i+j])
|
|
break;
|
|
// there is a match in the jth character so let's perform additional checks if needed
|
|
if(RF_BITFLAG_ON(options,RF_MATCH_WORD))
|
|
{
|
|
// if it's the first substring character and if the string we search is not in it's beginning, check for EXACT string before
|
|
if(j == 0 && i != 0)
|
|
{
|
|
switch(thisstr->bytes[i-1])
|
|
{
|
|
case ' ':case '\t':case '\n':
|
|
break;
|
|
default:
|
|
return RF_FAILURE;
|
|
break;
|
|
}
|
|
}
|
|
}// exact string check if ends
|
|
}
|
|
// not a letter and no match
|
|
else if(sstr->bytes[j] != thisstr->bytes[i+j])
|
|
break;// break off the substring search loop
|
|
|
|
// if we get here and it's the last char of the substring we either found it or need to perform one last check for exact string
|
|
if(j == sstr->byteLength-1)
|
|
{
|
|
// only if the end of the string is not right after the substring
|
|
if( RF_BITFLAG_ON(options,RF_MATCH_WORD) && i+sstr->byteLength < thisstr->byteLength)
|
|
{
|
|
switch(thisstr->bytes[i+sstr->byteLength])
|
|
{
|
|
case ' ':case '\t':case '\n':
|
|
break;
|
|
default:
|
|
return RF_FAILURE;
|
|
break;
|
|
}
|
|
}// end of the exact string check
|
|
// succes
|
|
return rfString_BytePosToCharPos(thisstr,i,false);
|
|
}// end of it's the last char of the substring check
|
|
}// substring iteration ends
|
|
}// this string iteration ends
|
|
return RF_FAILURE;
|
|
}
|
|
|
|
// Returns the integer value of the string if and only if it contains only numbers. If it contains anything else the function fails.
|
|
char rfString_ToInt(void* str,int32_t* v)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
char* end;
|
|
// get the integer
|
|
*v = strtol ( thisstr->bytes, &end,10);
|
|
|
|
// /This is the non-strict case. Takes the number out of the string no matter what else it has inside
|
|
/* // if we did get something
|
|
if(strlen(end) < this->length())
|
|
return true;
|
|
*/
|
|
// /This is the strict case, and the one we will go with. The non-strict case might be moved to its own function, if ever in the future
|
|
if(end[0] == '\0')
|
|
return true;
|
|
|
|
// else false
|
|
return false;
|
|
}
|
|
|
|
// Returns the float value of a String
|
|
int rfString_ToDouble(void* thisstrP,double* f)
|
|
{
|
|
RF_String* str = (RF_String*)thisstrP;
|
|
*f = strtod(str->bytes,NULL);
|
|
// check the result
|
|
if(*f == 0.0)
|
|
{
|
|
// if it's zero and the string equals to zero then we are okay
|
|
if(rfString_Equal(str,RFS_("0")) || rfString_Equal(str,RFS_("0.0")))
|
|
return RF_SUCCESS;
|
|
// underflow error
|
|
if(errno == ERANGE)
|
|
return RE_STRING_TOFLOAT_UNDERFLOW;
|
|
// in any other case it's a conversion error
|
|
return RE_STRING_TOFLOAT;
|
|
}
|
|
// if the result is a HUGE_VAL and errno is set,the number is not representable by a double
|
|
if(*f == HUGE_VAL && errno == ERANGE)
|
|
return RE_STRING_TOFLOAT_RANGE;
|
|
|
|
// any other case success
|
|
return RF_SUCCESS;
|
|
}
|
|
|
|
// Returns a cstring version of the string.
|
|
const char* rfString_ToCstr(const void* str)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
return thisstr->bytes;
|
|
}
|
|
|
|
// Creates and returns an allocated copy of the given string
|
|
RF_String* rfString_Copy_OUT(void* srcP)
|
|
{
|
|
RF_String* src = (RF_String*)srcP;
|
|
// create the new string
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
// get the length
|
|
ret->byteLength = src->byteLength;
|
|
// copy the bytes
|
|
RF_MALLOC(ret->bytes,ret->byteLength+1);
|
|
memcpy(ret->bytes,src->bytes,ret->byteLength+1);
|
|
return ret;
|
|
|
|
}
|
|
// Copies all the contents of a string to another
|
|
void rfString_Copy_IN(RF_String* dst,void* srcP)
|
|
{
|
|
RF_String* src = (RF_String*)srcP;
|
|
// get the length
|
|
dst->byteLength = src->byteLength;
|
|
// copy the bytes
|
|
RF_MALLOC(dst->bytes,src->byteLength+1);
|
|
memcpy(dst->bytes,src->bytes,dst->byteLength+1);
|
|
return;
|
|
|
|
}
|
|
// Copies a certain number of characters from a string
|
|
void rfString_Copy_chars(RF_String* dst,void* srcP,uint32_t charsN)
|
|
{
|
|
uint32_t i = 0,bytePos;
|
|
RF_String* src = (RF_String*)srcP;
|
|
|
|
// find the byte position until which we need to copy
|
|
RF_STRING_ITERATE_START(src,i,bytePos)
|
|
if(i == charsN)
|
|
break;
|
|
RF_STRING_ITERATE_END(i,bytePos)
|
|
dst->byteLength = bytePos;
|
|
RF_MALLOC(dst->bytes,dst->byteLength+1);
|
|
memcpy(dst->bytes,src->bytes,dst->byteLength+1);
|
|
dst->bytes[dst->byteLength] = '\0';// null terminate it
|
|
}
|
|
|
|
|
|
// Applies a limited version of sscanf after the specified substring
|
|
char i_rfString_ScanfAfter(void* str,void* afterstrP,const char* format,void* var)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
RF_String* afterstr = (RF_String*)afterstrP;
|
|
// return false if the substring is not found
|
|
char* found,*s;
|
|
if( (found = strstr(thisstr->bytes,afterstr->bytes)) ==0 )
|
|
{
|
|
return false;
|
|
}
|
|
// get a pointer to the start of the position where sscanf will be used
|
|
s = thisstr->bytes + (found-thisstr->bytes+afterstr->byteLength);
|
|
|
|
// use sscanf
|
|
if(sscanf(s,format,var) <=0)
|
|
{
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Counts how many times a substring s occurs inside the string
|
|
int32_t i_rfString_Count(void* str,void* sstr2,const char* optionsP)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
RF_String* sstr = (RF_String*)sstr2;
|
|
char options = *optionsP;
|
|
int32_t index = 0;
|
|
int32_t move;
|
|
int32_t n = 0;
|
|
|
|
// as long as the substring is found in the string
|
|
while ((move = rfString_FindBytePos(thisstr,sstr,options)) != RF_FAILURE)
|
|
{
|
|
move+= sstr->byteLength;
|
|
// proceed searching inside the string and also increase the counter
|
|
n++;
|
|
thisstr->bytes+=move;
|
|
index +=move;
|
|
thisstr->byteLength -=move;
|
|
}
|
|
|
|
// return string to its original state and return the number of occurences, also returns 0 if not found
|
|
thisstr->bytes-=index;
|
|
thisstr->byteLength += index;
|
|
// success
|
|
return n;
|
|
}
|
|
|
|
// Tokenizes the given string. Separates it into @c tokensN depending on how many substrings can be created from the @c sep separatior and stores them
|
|
// into the Array of RF_String* that should be passed to the function
|
|
i_DECLIMEX_ char rfString_Tokenize(void* str,char* sep,uint32_t* tokensN,RF_String** tokens)
|
|
{
|
|
RF_String* thisstr = (RF_String*)str;
|
|
uint32_t i;
|
|
// first find the occurences of the separator, and then the number of tokens
|
|
*tokensN = rfString_Count(thisstr,RFS_(sep),0)+1;
|
|
// error checking
|
|
if(*tokensN == 0)
|
|
return false;
|
|
|
|
// allocate the tokens
|
|
RF_MALLOC(*tokens,sizeof(RF_String) *(*tokensN));
|
|
// find the length of the separator
|
|
uint32_t sepLen = strlen(sep);
|
|
char* s,*e;
|
|
s = thisstr->bytes;
|
|
for(i = 0; i < (*tokensN)-1; i ++)
|
|
{
|
|
// find each substring
|
|
e = strstr(s,sep);
|
|
(*tokens)[i].byteLength = e-s;
|
|
RF_MALLOC((*tokens)[i].bytes,(*tokens)[i].byteLength+1);
|
|
// put in the data
|
|
strncpy((*tokens)[i].bytes,s,(*tokens)[i].byteLength);
|
|
// null terminate
|
|
(*tokens)[i].bytes[(*tokens)[i].byteLength] = '\0';
|
|
|
|
// prepare for next sub-string
|
|
s = e+sepLen;
|
|
|
|
}
|
|
// /make sure that if it's the last substring we change strategy
|
|
(*tokens)[i].byteLength = strlen(s);
|
|
RF_MALLOC((*tokens)[i].bytes,(*tokens)[i].byteLength+1);
|
|
// put in the data
|
|
strncpy((*tokens)[i].bytes,s,(*tokens)[i].byteLength);
|
|
// null terminate
|
|
(*tokens)[i].bytes[(*tokens)[i].byteLength] = '\0';
|
|
|
|
// success
|
|
return true;
|
|
}
|
|
// Initializes the given string as the first substring existing between the left and right parameter substrings.
|
|
char i_rfString_Between(void* thisstrP,void* lstrP,void* rstrP,RF_String* result,const char* optionsP)
|
|
{
|
|
int32_t start,end;
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* lstr = (RF_String*)lstrP;
|
|
RF_String* rstr = (RF_String*)rstrP;
|
|
char options = *optionsP;
|
|
RF_String temp;
|
|
// find the left substring
|
|
if( (start = rfString_FindBytePos(thisstr,lstr,options))== RF_FAILURE)
|
|
{
|
|
return false;
|
|
}
|
|
// get what is after it
|
|
rfString_After(thisstr,lstr,&temp,options);
|
|
// find the right substring in the remaining part
|
|
if( (end = rfString_FindBytePos(&temp,rstr,options))== RF_FAILURE)
|
|
{
|
|
return false;
|
|
}
|
|
// free temp string
|
|
rfString_Deinit(&temp);
|
|
// initialize the string to return
|
|
result->byteLength = end;
|
|
RF_MALLOC(result->bytes,result->byteLength+1);
|
|
memcpy(result->bytes,thisstr->bytes+start+lstr->byteLength,result->byteLength+1);
|
|
result->bytes[end]= '\0';
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// Initializes the given string as the substring from the start until any of the given Strings are found.
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char rfString_Beforev(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP, ...)
|
|
#else
|
|
char i_rfString_Beforev(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP, ...)
|
|
#endif
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* s;
|
|
char options = *optionsP;
|
|
unsigned char parN = *parNP;
|
|
int32_t i,minPos,thisPos;
|
|
// will keep the argument list
|
|
va_list argList;
|
|
// get the parameter characters
|
|
va_start(argList,parNP);
|
|
|
|
minPos = 9999999;
|
|
for(i = 0; i < parN; i++)
|
|
{
|
|
s = (RF_String*) va_arg(argList,RF_String*);
|
|
if( (thisPos= rfString_FindBytePos(thisstr,s,options))!= RF_FAILURE)
|
|
{
|
|
if(thisPos < minPos)
|
|
minPos = thisPos;
|
|
}
|
|
}
|
|
va_end(argList);
|
|
|
|
// if it is not found
|
|
if(minPos == 9999999)
|
|
{
|
|
return false;
|
|
}
|
|
// if it is found initialize the substring
|
|
result->byteLength = minPos;
|
|
RF_MALLOC(result->bytes,minPos+1);
|
|
memcpy(result->bytes,thisstr->bytes,minPos);
|
|
result->bytes[minPos] = '\0';
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// Initializes the given string as the substring from the start until the given string is found
|
|
char i_rfString_Before(void* thisstrP,void* sstrP,RF_String* result,const char* optionsP)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* sstr = (RF_String*) sstrP;
|
|
char options = *optionsP;
|
|
int32_t ret;
|
|
// find the substring
|
|
if( (ret = rfString_FindBytePos(thisstr,sstr,options)) == RF_FAILURE)
|
|
{
|
|
return false;
|
|
}
|
|
// if it is found get the result initialize the substring
|
|
result->byteLength = ret;
|
|
RF_MALLOC(result->bytes,result->byteLength+1);
|
|
memcpy(result->bytes,thisstr->bytes,result->byteLength);
|
|
result->bytes[result->byteLength] = '\0';
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
|
|
// Initializes the given String with the substring located after (and not including) the after substring inside the parameter string. If the substring is not located the function returns false.
|
|
char i_rfString_After(void* thisstrP,void* afterP,RF_String* out,const char* optionsP)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* after = (RF_String*)afterP;
|
|
char options = *optionsP;
|
|
int32_t bytePos;
|
|
// check for substring existence
|
|
if( (bytePos = rfString_FindBytePos(thisstr,after,options)) == RF_FAILURE)
|
|
{
|
|
return false;
|
|
}
|
|
// done so let's get it. Notice the use of the non-checking initialization
|
|
rfString_Init_nc(out,thisstr->bytes+bytePos+after->byteLength);
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
|
|
// Initialize a string after the first of the given substrings found
|
|
#ifndef RF_OPTION_DEFAULT_ARGUMENTS
|
|
char rfString_Afterv(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP,...)
|
|
#else
|
|
char i_rfString_Afterv(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP,...)
|
|
#endif
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* s;
|
|
char options = *optionsP;
|
|
unsigned char parN = *parNP;
|
|
int32_t i,minPos,thisPos;
|
|
uint32_t minPosLength;
|
|
// will keep the argument list
|
|
va_list argList;
|
|
// get the parameter characters
|
|
va_start(argList,parNP);
|
|
|
|
minPos = 9999999;
|
|
for(i = 0; i < parN; i++)
|
|
{
|
|
s = (RF_String*) va_arg(argList,RF_String*);
|
|
if( (thisPos= rfString_FindBytePos(thisstr,s,options))!= RF_FAILURE)
|
|
{
|
|
if(thisPos < minPos)
|
|
{
|
|
minPos = thisPos;
|
|
minPosLength = s->byteLength;
|
|
}
|
|
}
|
|
}
|
|
va_end(argList);
|
|
// if it is not found
|
|
if(minPos == 9999999)
|
|
{
|
|
return false;
|
|
}
|
|
// if it is found initialize the substring
|
|
minPos += minPosLength;// go after the found substring
|
|
result->byteLength = thisstr->byteLength-minPos;
|
|
RF_MALLOC(result->bytes,result->byteLength);
|
|
memcpy(result->bytes,thisstr->bytes+minPos,result->byteLength);
|
|
result->bytes[result->byteLength] = '\0';
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
/*------------------------------------------------------------------------ RF_String manipulation functions-------------------------------------------------------------------------------*/
|
|
|
|
|
|
// Appends the parameter String to this one
|
|
void i_rfString_Append(RF_String* thisstr,void* otherP)
|
|
{
|
|
RF_String* other = (RF_String*)otherP;
|
|
// /@note Here if a null addition is given lots of actions are done but the result is safe and the same string as the one entered.
|
|
// /A check here would result in an additional check for every appending so I decided against it
|
|
// calculate the new length
|
|
thisstr->byteLength +=other->byteLength;
|
|
// reallocate this string to fit the new addition
|
|
RF_REALLOC(thisstr->bytes,char,thisstr->byteLength+1);
|
|
// add the string to this one
|
|
strncat(thisstr->bytes,other->bytes,other->byteLength);
|
|
}
|
|
|
|
// Appends an integer to the string
|
|
void rfString_Append_i(RF_String* thisstr,const int32_t i)
|
|
{
|
|
// create a new buffer for the string big enough to fit any number plus the original string
|
|
char* buff;
|
|
RF_MALLOC(buff,thisstr->byteLength+15);// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
|
|
// put the int32_t inside the string
|
|
sprintf(buff,"%s%i",thisstr->bytes,i);
|
|
// free the previous c string
|
|
free(thisstr->bytes);
|
|
// point the string pointer to the new string
|
|
thisstr->bytes = buff;
|
|
thisstr->byteLength = strlen(thisstr->bytes);
|
|
}
|
|
// Appends a float to the string. <b>Can't be used with RF_StringX</b>
|
|
void rfString_Append_f(RF_String* thisstr,const float f)
|
|
{
|
|
// a temporary buffer to hold the float and the string
|
|
char* buff;
|
|
RF_MALLOC(buff,thisstr->byteLength+64);
|
|
// put the float inside the string
|
|
sprintf(buff,"%s%f",thisstr->bytes,f);
|
|
// free the previous c string
|
|
free(thisstr->bytes);
|
|
// point the string pointer to the new string
|
|
thisstr->bytes = buff;
|
|
thisstr->byteLength = strlen(thisstr->bytes);
|
|
}
|
|
|
|
// Prepends the parameter String to this string
|
|
void i_rfString_Prepend(RF_String* thisstr,void* otherP)
|
|
{
|
|
RF_String* other = (RF_String*)otherP;
|
|
uint32_t size;
|
|
int32_t i;// is not unsigned since it goes to -1 in the loop
|
|
// keeep the original byte size of the string
|
|
size = thisstr->byteLength;
|
|
// calculate the new lengths
|
|
thisstr->byteLength += other->byteLength;
|
|
// reallocate this string to fit the new addition
|
|
RF_REALLOC(thisstr->bytes,char,thisstr->byteLength+1);
|
|
// move the pre-existing string to the end of the buffer, by dislocating each byte by cstrlen
|
|
for(i =size; i >=0 ; i--)
|
|
thisstr->bytes[i+other->byteLength] = thisstr->bytes[i];
|
|
// and now add the new string to the start
|
|
memcpy(thisstr->bytes,other->bytes,other->byteLength);
|
|
}
|
|
|
|
// Removes all of the specifed string occurences from this String matching case or not, DOES NOT reallocate buffer size.
|
|
char i_rfString_Remove(void* thisstrP,void* rstrP,uint32_t* numberP,const char* optionsP)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* rstr = (RF_String*)rstrP;
|
|
char options = *optionsP;
|
|
uint32_t number = *numberP;
|
|
uint32_t i,count,occurences=0;
|
|
int32_t bytePos;
|
|
char found = false;
|
|
// as long as we keep finding rstr in the string keep removing it
|
|
do
|
|
{ // if the substring is not found
|
|
if( (bytePos = rfString_FindBytePos(thisstr,rstr,options)) == RF_FAILURE)
|
|
{
|
|
// if we have not even found it once , we fail
|
|
if(found == false)
|
|
{
|
|
return false;
|
|
}
|
|
else // else we are done
|
|
break;
|
|
}
|
|
|
|
// substring found
|
|
found = true;
|
|
// move all of the string a position back
|
|
count = 0;
|
|
for(i = bytePos; i <=thisstr->byteLength; i ++)
|
|
{
|
|
thisstr->bytes[i] = thisstr->bytes[i+rstr->byteLength];
|
|
count++;
|
|
}
|
|
// now change the byte length
|
|
thisstr->byteLength -= rstr->byteLength;
|
|
// count the number of occurences and if we reached the required amount, stop
|
|
occurences++;
|
|
if(occurences == number)
|
|
break;
|
|
}while(bytePos != RF_FAILURE);
|
|
// succcess
|
|
return true;
|
|
}
|
|
|
|
// Removes all of the characters of the string except those specified
|
|
void i_rfString_KeepOnly(void* thisstrP,void* keepstrP)
|
|
{
|
|
uint32_t keepLength,i,j,charValue,temp;
|
|
uint32_t *keepChars;
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
RF_String* keepstr = (RF_String*)keepstrP;
|
|
char exists,charBLength;
|
|
// first let's get all of the characters of the keep string in an array
|
|
i=0;
|
|
keepLength = rfString_Length(keepstr);
|
|
RF_MALLOC(keepChars,4*keepLength);
|
|
rfString_Iterate_Start(keepstr,i,charValue)
|
|
keepChars[i] = charValue;
|
|
rfString_Iterate_End(i)
|
|
// now iterate every character of this string
|
|
i=0;
|
|
rfString_Iterate_Start(thisstr,i,charValue)
|
|
// for every character check if it exists in the keep str
|
|
exists = false;
|
|
for(j=0;j<keepLength; j++)
|
|
{
|
|
if(keepChars[j] == charValue)
|
|
exists = true;
|
|
}
|
|
// if it does not exist, move the string back to cover it so that it effectively gets deleted
|
|
if(exists == false)
|
|
{
|
|
charBLength = rfUTF8_FromCodepoint(charValue,&temp);
|
|
// this is kind of a non-clean way to do it. the rfString_Iterate_Start macro internally uses a byteIndex_ variable
|
|
// we use that here to determine the current byteIndex_ of the string in the iteration and move the string backs
|
|
memmove(thisstr->bytes+byteIndex_,thisstr->bytes+byteIndex_+charBLength,thisstr->byteLength-byteIndex_+charBLength);
|
|
thisstr->byteLength-=charBLength;
|
|
continue;// by contiuing here we make sure that the current string position won't be moved to assure that we also check the newly move characters
|
|
}
|
|
rfString_Iterate_End(i)
|
|
// before returning free the keep string's character array
|
|
free(keepChars);
|
|
}
|
|
|
|
// Removes the first n characters from the start of the string
|
|
char rfString_PruneStart(void* thisstrP,uint32_t n)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
// iterate the characters of the string
|
|
uint32_t i;
|
|
uint32_t length = 0;
|
|
unsigned nBytePos = 0;
|
|
char found = false;
|
|
RF_STRING_ITERATE_START(thisstr,length,i);
|
|
// if we reach the number of characters passed as a parameter, note it
|
|
if(length == n)
|
|
{
|
|
// remember that now i is the byte position we need
|
|
nBytePos = i;
|
|
found = true;
|
|
break;
|
|
}
|
|
RF_STRING_ITERATE_END(length,i)
|
|
|
|
// if the string does not have n chars to remove it becomes an empty string and we return failure
|
|
if(found == false)
|
|
{
|
|
thisstr->bytes[0] = '\0';
|
|
thisstr->byteLength = 0;
|
|
return false;
|
|
}
|
|
|
|
// move the string back to cover the empty places.reallocation here would be an overkill, everything will be freed together when the string gets freed
|
|
for(i =0; i < thisstr->byteLength-nBytePos+1;i++ )
|
|
thisstr->bytes[i] = thisstr->bytes[i+nBytePos];
|
|
|
|
// get the new bytelength
|
|
thisstr->byteLength -= nBytePos;
|
|
|
|
return true;
|
|
}
|
|
|
|
// Removes the last n characters from the end of the string
|
|
char rfString_PruneEnd(void* thisstrP,uint32_t n)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
// start the iteration of the characters from the end of the string
|
|
int32_t nBytePos = -1;
|
|
uint32_t length,i;
|
|
RF_STRING_ITERATEB_START(thisstr,length,i)
|
|
// if we found the requested number of characters from the end of the string
|
|
if(length == n)
|
|
{
|
|
// remember that now i is the byte position we need
|
|
nBytePos = i;
|
|
break;
|
|
}
|
|
RF_STRING_ITERATEB_END(length,i)
|
|
|
|
// if the string does not have n chars to remove it becomes an empty string and we return failure
|
|
if(nBytePos == -1)
|
|
{
|
|
thisstr->bytes[0] = '\0';
|
|
return false;
|
|
}
|
|
|
|
// just set the end of string character characters back, reallocation here would be an overkill, everything will be freed together when the string gets freed
|
|
thisstr->bytes[nBytePos] = '\0';
|
|
// and also set the new byte length
|
|
thisstr->byteLength -= (thisstr->byteLength - nBytePos);
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// Removes n characters from the position p of the string counting backwards. If there is no space to do so, nothing is done and returns false.
|
|
char rfString_PruneMiddleB(void* thisstrP,uint32_t p,uint32_t n)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
// if we ask to remove more characters from the position that it would be possible do nothign and return false
|
|
if(n>p+1)
|
|
return false;
|
|
|
|
// iterate the characters of the string
|
|
uint32_t j,i,length;
|
|
int32_t pBytePos,nBytePos;
|
|
pBytePos = nBytePos = -1;
|
|
RF_STRING_ITERATE_START(thisstr,length,i)
|
|
// if we reach the number of characters passed as a parameter, note it
|
|
if(length == p+1)
|
|
{
|
|
// we search for p+1 because we want to include all of the p character
|
|
pBytePos = i;
|
|
// also break since we don't care after position p
|
|
break;
|
|
}
|
|
if(length == p-n+1)// +1 is to make sure that indexing works from 0
|
|
nBytePos = i;
|
|
|
|
RF_STRING_ITERATE_END(length,i)
|
|
|
|
// if the position was not found in the string do nothing
|
|
if(pBytePos == -1 || nBytePos == -1)
|
|
return false;
|
|
|
|
// move the bytes in the buffer to remove the requested characters
|
|
for(i=nBytePos,j=0;j<= thisstr->byteLength-pBytePos+1; i ++,j++) // here +2 is for (+1 for pbytePos to go to the start of pth character) (+1 for the byteLength to include the null termination character)
|
|
{
|
|
thisstr->bytes[i] = thisstr->bytes[pBytePos+j];
|
|
}
|
|
|
|
// find the new byte length
|
|
thisstr->byteLength -= (nBytePos - pBytePos);
|
|
|
|
return true;
|
|
}
|
|
|
|
// Removes n characters from the position p of the string counting forwards. If there is no space, nothing is done and returns false.
|
|
char rfString_PruneMiddleF(void* thisstrP,uint32_t p,uint32_t n)
|
|
{
|
|
RF_String* thisstr = (RF_String*)thisstrP;
|
|
// iterate the characters of the string
|
|
uint32_t j,i,length;
|
|
int32_t pBytePos,nBytePos;
|
|
pBytePos = nBytePos = -1;
|
|
RF_STRING_ITERATE_START(thisstr,length,i)
|
|
// if we reach the number of characters passed as a parameter, note it
|
|
if(length == p)
|
|
pBytePos = i;
|
|
|
|
if(length == p+n)
|
|
{
|
|
nBytePos = i;
|
|
break;// since we got all the data we needed
|
|
}
|
|
|
|
RF_STRING_ITERATE_END(length,i)
|
|
|
|
// if the position was not found in the string do nothing
|
|
if(pBytePos == -1 )
|
|
return false;
|
|
|
|
// if we did not find the byte position of p+n then we remove everything from pBytePos until the end of the string
|
|
if(nBytePos == -1)
|
|
{
|
|
thisstr->bytes[pBytePos] = '\0';
|
|
thisstr->byteLength -= (thisstr->byteLength-pBytePos);
|
|
return true;
|
|
}
|
|
|
|
// move the bytes in the buffer to remove the requested characters
|
|
for(i=pBytePos,j=0;j<= thisstr->byteLength-nBytePos+1; i ++,j++) // here +2 is for (+1 for pbytePos to go to the start of pth character) (+1 for the byteLength to include the null termination character)
|
|
{
|
|
thisstr->bytes[i] = thisstr->bytes[nBytePos+j];
|
|
}
|
|
|
|
// find the new byte length
|
|
thisstr->byteLength -= (nBytePos - pBytePos);
|
|
return true;
|
|
}
|
|
|
|
// Replaces all of the specified sstr substring from the String with rstr and reallocates size, unless the new size is smaller
|
|
char i_rfString_Replace(RF_String* thisstr,void* sstrP,void* rstrP,const uint32_t* numP,const char* optionsP)
|
|
{
|
|
RF_String* sstr = (RF_String*)sstrP;
|
|
RF_String* rstr = (RF_String*)rstrP;
|
|
char options = *optionsP;
|
|
uint32_t num = *numP;
|
|
RF_StringX temp;// just a temporary string for finding the occurences
|
|
// will keep the number of found instances of the substring
|
|
uint32_t foundN = 0;
|
|
// will keep the number of given instances to find
|
|
uint32_t number = num;
|
|
uint32_t diff,i,j;
|
|
// if the substring string is not even found return false
|
|
if(rfString_FindBytePos(thisstr,sstr,options) == RF_FAILURE)
|
|
{
|
|
return false;
|
|
}
|
|
// create a buffer that will keep the byte positions
|
|
uint32_t bSize = 50;
|
|
int32_t * bytePositions;
|
|
RF_MALLOC(bytePositions,bSize*sizeof(int32_t));
|
|
// if the given num is 0 just make sure we replace all
|
|
if(number == 0)
|
|
number = 999999;// max number of occurences
|
|
|
|
// find how many occurences exist
|
|
rfStringX_FromString_IN(&temp,thisstr);
|
|
while( (bytePositions[foundN] = rfString_FindBytePos(&temp,sstr,options)) != RF_FAILURE)
|
|
{
|
|
int32_t move = bytePositions[foundN] + sstr->byteLength;
|
|
bytePositions[foundN] = bytePositions[foundN]+temp.bIndex;
|
|
temp.bIndex += move;
|
|
temp.bytes += move;
|
|
temp.byteLength -= move;
|
|
foundN++;
|
|
// if buffer is in danger of overflow realloc it
|
|
if(foundN > bSize)
|
|
{
|
|
bSize *=2;
|
|
RF_REALLOC(bytePositions,int32_t,bSize);
|
|
}
|
|
// if we found the required number of occurences break;
|
|
if(foundN >= number)
|
|
break;
|
|
}
|
|
rfStringX_Deinit(&temp);
|
|
// make sure that the number of occurence to replace do not exceed the actual number of occurences
|
|
if(number > foundN)
|
|
number = foundN;
|
|
// act depending on the size difference of rstr and sstr
|
|
if(rstr->byteLength > sstr->byteLength) // replace string is bigger than the removed one
|
|
{
|
|
int32_t orSize,nSize;
|
|
|
|
diff = rstr->byteLength - sstr->byteLength;
|
|
// will keep the original size in bytes
|
|
orSize = thisstr->byteLength +1;
|
|
// reallocate the string to fit the new bigger size
|
|
nSize= orSize + number*diff;
|
|
RF_REALLOC(thisstr->bytes,char,nSize)
|
|
// now replace all the substrings one by one
|
|
for(i = 0; i < number; i ++)
|
|
{
|
|
// move all of the contents of the string to fit the replacement
|
|
for(j =orSize+diff-1; j > bytePositions[i]+sstr->byteLength; j -- )
|
|
thisstr->bytes[j] = thisstr->bytes[j-diff];
|
|
// copy in the replacement
|
|
strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
|
|
// also increase the original size (since now we moved the whole string by one replacement)
|
|
orSize += diff;
|
|
// also increase all the subsequent found byte positions since there is a change of string size
|
|
for(j = i+1; j < number; j ++)
|
|
bytePositions[j] = bytePositions[j]+diff;
|
|
|
|
}
|
|
// finally let's keep the new byte length
|
|
thisstr->byteLength = nSize-1;
|
|
}
|
|
else if( rstr->byteLength < sstr->byteLength) // replace string is smaller than the removed one
|
|
{
|
|
// get the differenc in byte length of removed substring and replace string
|
|
diff = sstr->byteLength-rstr->byteLength;
|
|
|
|
// now replace all the substrings one by one
|
|
for(i =0; i < number; i ++)
|
|
{
|
|
// copy in the replacement
|
|
strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
|
|
// move all of the contents of the string to fit the replacement
|
|
for(j =bytePositions[i]+rstr->byteLength; j < thisstr->byteLength; j ++ )
|
|
thisstr->bytes[j] = thisstr->bytes[j+diff];
|
|
// also decrease all the subsequent found byte positions since there is a change of string size
|
|
for(j = i+1; j < number; j ++)
|
|
bytePositions[j] = bytePositions[j]-diff;
|
|
}
|
|
// finally let's keep the new byte length
|
|
thisstr->byteLength -= diff*number;
|
|
// just note that reallocating downwards is not necessary
|
|
}
|
|
else // replace and remove strings are equal
|
|
{
|
|
for(i = 0; i < number; i ++)
|
|
strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
|
|
}
|
|
free(bytePositions);
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// Removes all characters of a substring only from the start of the String
|
|
char i_rfString_StripStart(void* thisstrP,void* subP)
|
|
{
|
|
RF_String* thisstr = (RF_String*) thisstrP;
|
|
RF_String*sub = (RF_String*) subP;
|
|
char ret = false,noMatch;
|
|
uint32_t charValue,i = 0,*subValues,j,subLength,bytePos;
|
|
|
|
// firstly get all of the characters of the substring in an array
|
|
subLength = rfString_Length(sub);
|
|
RF_MALLOC(subValues,4*subLength)
|
|
rfString_Iterate_Start(sub,i,charValue)
|
|
subValues[i] = charValue;
|
|
rfString_Iterate_End(i)
|
|
|
|
// iterate thisstring from the beginning
|
|
i = 0;
|
|
RF_STRING_ITERATE_START(thisstr,i,bytePos)
|
|
noMatch = true;
|
|
// for every substring character
|
|
for(j = 0;j < subLength; j++)
|
|
{
|
|
// if we got a match
|
|
if(rfString_BytePosToCodePoint(thisstr,bytePos) == subValues[j])
|
|
{
|
|
ret = true;
|
|
noMatch = false;
|
|
break;
|
|
}
|
|
}
|
|
// if we get out of iterating the substring without having found a match, we get out of the iteration in general
|
|
if(noMatch)
|
|
break;
|
|
RF_STRING_ITERATE_END(i,bytePos)
|
|
|
|
// if we had any match
|
|
if(ret == true)
|
|
{
|
|
// remove the characters
|
|
for(i =0; i < thisstr->byteLength-bytePos+1;i++ )
|
|
thisstr->bytes[i] = thisstr->bytes[i+bytePos];
|
|
// also change bytelength
|
|
thisstr->byteLength -= bytePos;
|
|
}
|
|
// free stuff and return
|
|
free(subValues);
|
|
return ret;
|
|
}
|
|
|
|
// Removes all characters of a substring starting from the end of the String
|
|
char i_rfString_StripEnd(void* thisstrP,void* subP)
|
|
{
|
|
RF_String* thisstr = (RF_String*) thisstrP;
|
|
RF_String*sub = (RF_String*) subP;
|
|
char ret = false,noMatch;
|
|
uint32_t charValue,i = 0,*subValues,j,subLength,bytePos,lastBytePos,testity;
|
|
|
|
// firstly get all of the characters of the substring in an array
|
|
subLength = rfString_Length(sub);
|
|
RF_MALLOC(subValues,4*subLength)
|
|
rfString_Iterate_Start(sub,i,charValue)
|
|
subValues[i] = charValue;
|
|
rfString_Iterate_End(i)
|
|
|
|
// iterate thisstring from the end
|
|
i = 0;
|
|
RF_STRING_ITERATEB_START(thisstr,i,bytePos)
|
|
noMatch = true;
|
|
// for every substring character
|
|
for(j = 0;j < subLength; j++)
|
|
{
|
|
// if we got a match
|
|
if((testity=rfString_BytePosToCodePoint(thisstr,bytePos)) == subValues[j])
|
|
{
|
|
ret = true;
|
|
noMatch = false;
|
|
lastBytePos = bytePos;
|
|
break;
|
|
}
|
|
}
|
|
// if we get out of iterating the substring without having found a match, we get out of the iteration in general
|
|
if(noMatch)
|
|
break;
|
|
RF_STRING_ITERATEB_END(i,bytePos)
|
|
|
|
// if we had any match
|
|
if(ret == true)
|
|
{
|
|
// just set the end of string there
|
|
thisstr->bytes[lastBytePos] = '\0';
|
|
// and also set the new byte length
|
|
thisstr->byteLength -= (thisstr->byteLength - lastBytePos);
|
|
}
|
|
|
|
// free stuff and return
|
|
free(subValues);
|
|
return ret;
|
|
}
|
|
|
|
// Removes all characters of a substring from both ends of the given String
|
|
char i_rfString_Strip(void* thisstrP,void* subP)
|
|
{
|
|
char res1 = rfString_StripStart(thisstrP,subP);
|
|
char res2 = rfString_StripEnd(thisstrP,subP);
|
|
return res1|res2;
|
|
}
|
|
|
|
|
|
/*------------------------------------------------------------------------ RF_String File I/O functions-------------------------------------------------------------------------------*/
|
|
|
|
// Allocates and returns a string from file parsing. The file's encoding must be UTF-8.If for some reason (like EOF reached) no string can be read then null is returned
|
|
RF_String* rfString_Create_fUTF8(FILE* f, char* eof)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_fUTF8(ret,f,eof) < 0)
|
|
{
|
|
free(ret);
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
// Initializes a string from file parsing. The file's encoding must be UTF-8.If for some reason (like EOF reached) no string can be read then null is returned
|
|
int32_t rfString_Init_fUTF8(RF_String* str,FILE* f,char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
uint32_t bufferSize;// unused
|
|
if((bytesN=rfFReadLine_UTF8(f,&str->bytes,&str->byteLength,&bufferSize,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failed to initialize String from a UTF-8 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
// success
|
|
return bytesN;
|
|
}
|
|
// Assigns to a String from UTF-8 file parsing
|
|
int32_t rfString_Assign_fUTF8(RF_String* str,FILE*f,char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
uint32_t utf8ByteLength,utf8BufferSize;// bufferSize unused in this function
|
|
char* utf8 = 0;
|
|
if((bytesN=rfFReadLine_UTF8(f,&utf8,&utf8ByteLength,&utf8BufferSize,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failed to assign the contents of a UTF-8 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
// success
|
|
// assign it to the string
|
|
if(str->byteLength <= utf8ByteLength)
|
|
{
|
|
RF_REALLOC(str->bytes,char,utf8ByteLength+1);
|
|
}
|
|
memcpy(str->bytes,utf8,utf8ByteLength+1);
|
|
str->byteLength = utf8ByteLength;
|
|
// free the file's utf8 buffer
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
// Appends to a String from UTF-8 file parsing
|
|
int32_t rfString_Append_fUTF8(RF_String* str,FILE*f,char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
uint32_t utf8ByteLength,utf8BufferSize;// bufferSize unused in this function
|
|
char* utf8 = 0;
|
|
if((bytesN=rfFReadLine_UTF8(f,&utf8,&utf8ByteLength,&utf8BufferSize,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failed to assign the contents of a UTF-8 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
// append the utf8 to the given string
|
|
rfString_Append(str,RFS_(utf8));
|
|
// free the file's utf8 buffer
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
|
|
// Allocates and returns a string from file parsing. The file's encoding must be UTF-16.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
|
|
RF_String* rfString_Create_fUTF16(FILE* f,char endianess,char* eof)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_fUTF16(ret,f,endianess,eof) < 0)
|
|
return 0;
|
|
return ret;
|
|
}
|
|
// Initializes a string from file parsing. The file's encoding must be UTF-16.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
|
|
int32_t rfString_Init_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16LE(f,&str->bytes,&str->byteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to initialize a String from reading a UTF-16 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&str->bytes,&str->byteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to initialize a String from reading a UTF-16 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
return bytesN;
|
|
}
|
|
|
|
// Assigns to an already initialized String from File parsing
|
|
int32_t rfString_Assign_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
|
|
{
|
|
|
|
uint32_t utf8ByteLength;
|
|
int32_t bytesN;
|
|
char* utf8 = 0;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16LE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to assign the contents of a Little Endian UTF-16 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to assign the contents of a Big Endian UTF-16 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
// assign it to the string
|
|
if(str->byteLength <= utf8ByteLength)
|
|
{
|
|
RF_REALLOC(str->bytes,char,utf8ByteLength+1);
|
|
}
|
|
memcpy(str->bytes,utf8,utf8ByteLength+1);
|
|
str->byteLength = utf8ByteLength;
|
|
// free the file's utf8 buffer
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
|
|
// Appends to an already initialized String from File parsing
|
|
int32_t rfString_Append_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
|
|
{
|
|
char*utf8;
|
|
uint32_t utf8ByteLength;
|
|
int32_t bytesN;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16LE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to append the contents of a Little Endian UTF-16 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to append the contents of a Big Endian UTF-16 file to a String",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
rfString_Append(str,RFS_(utf8));
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
|
|
// Allocates and returns a string from file parsing. The file's encoding must be UTF-32.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
|
|
RF_String* rfString_Create_fUTF32(FILE* f,char endianess,char* eof)
|
|
{
|
|
RF_String* ret;
|
|
RF_MALLOC(ret,sizeof(RF_String));
|
|
if(rfString_Init_fUTF32(ret,f,endianess,eof) < 0)
|
|
{
|
|
free(ret);
|
|
return 0;
|
|
}
|
|
return ret;
|
|
}
|
|
// Initializes a string from file parsing. The file's encoding must be UTF-32.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
|
|
int32_t rfString_Init_fUTF32(RF_String* str,FILE* f,char endianess,char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF32LE(f,&str->bytes,&str->byteLength,eof)) <0)
|
|
{
|
|
LOG_ERROR("Failure to initialize a String from reading a Little Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&str->bytes,&str->byteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to initialize a String from reading a Big Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
return bytesN;
|
|
}
|
|
// Assigns the contents of a UTF-32 file to a string
|
|
int32_t rfString_Assign_fUTF32(RF_String* str,FILE* f,char endianess, char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
char*utf8;
|
|
uint32_t utf8ByteLength;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF32LE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to assign to a String from reading a Little Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to assign to a String from reading a Big Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
// assign it to the string
|
|
if(str->byteLength <= utf8ByteLength)
|
|
{
|
|
RF_REALLOC(str->bytes,char,utf8ByteLength+1);
|
|
}
|
|
memcpy(str->bytes,utf8,utf8ByteLength+1);
|
|
str->byteLength = utf8ByteLength;
|
|
// free the file's utf8 buffer
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
// Appends the contents of a UTF-32 file to a string
|
|
int32_t rfString_Append_fUTF32(RF_String* str,FILE* f,char endianess, char* eof)
|
|
{
|
|
int32_t bytesN;
|
|
char*utf8;
|
|
uint32_t utf8ByteLength;
|
|
// depending on the file's endianess
|
|
if(endianess == RF_LITTLE_ENDIAN)
|
|
{
|
|
if((bytesN=rfFReadLine_UTF32LE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to append to a String from reading a Little Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of little endian
|
|
else// big endian
|
|
{
|
|
if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
|
|
{
|
|
LOG_ERROR("Failure to append to a String from reading a Big Endian UTF-32 file",bytesN);
|
|
return bytesN;
|
|
}
|
|
}// end of big endian case
|
|
// success
|
|
// append it
|
|
rfString_Append(str,RFS_(utf8));
|
|
// free the file'sutf8 buffer
|
|
free(utf8);
|
|
return bytesN;
|
|
}
|
|
|
|
// Writes a string to a file in UTF-8 encoding.
|
|
int32_t i_rfString_Fwrite(void* sP,FILE* f,char* encodingP)
|
|
{
|
|
uint32_t *utf32,length,i;
|
|
uint16_t* utf16;
|
|
RF_String* s = (RF_String*)sP;
|
|
char encoding = *encodingP;
|
|
// depending on the encoding
|
|
switch(encoding)
|
|
{
|
|
case RF_UTF8:
|
|
if(fwrite(s->bytes,1,s->byteLength,f) != s->byteLength)
|
|
break;// and go to error logging
|
|
return RF_SUCCESS;
|
|
break;
|
|
case RF_UTF16_LE:
|
|
utf16 = rfString_ToUTF16(s,&length);
|
|
if(rfUTILS_Endianess() != RF_LITTLE_ENDIAN)
|
|
{
|
|
for(i=0;i<length;i++)
|
|
{
|
|
rfUTILS_SwapEndianUS(&utf16[i]);
|
|
}
|
|
}
|
|
if(fwrite(utf16,2,length,f) != length)
|
|
{
|
|
free(utf16);
|
|
break;// and go to error logging
|
|
}
|
|
free(utf16);
|
|
return RF_SUCCESS;
|
|
break;
|
|
case RF_UTF16_BE:
|
|
utf16 = rfString_ToUTF16(s,&length);
|
|
if(rfUTILS_Endianess() != RF_BIG_ENDIAN)
|
|
{
|
|
for(i=0;i<length;i++)
|
|
{
|
|
rfUTILS_SwapEndianUS(&utf16[i]);
|
|
}
|
|
}
|
|
if(fwrite(utf16,2,length,f) != length)
|
|
{
|
|
free(utf16);
|
|
break;// and go to error logging
|
|
}
|
|
free(utf16);
|
|
return RF_SUCCESS;
|
|
break;
|
|
case RF_UTF32_LE:
|
|
utf32 = rfString_ToUTF32(s,&length);
|
|
if(rfUTILS_Endianess() != RF_LITTLE_ENDIAN)
|
|
{
|
|
for(i=0;i<length;i++)
|
|
{
|
|
rfUTILS_SwapEndianUI(&utf32[i]);
|
|
}
|
|
}
|
|
if(fwrite(utf32,4,length,f) != length)
|
|
{
|
|
free(utf32);
|
|
break;// and go to error logging
|
|
}
|
|
free(utf32);
|
|
return RF_SUCCESS;
|
|
break;
|
|
case RF_UTF32_BE:
|
|
utf32 = rfString_ToUTF32(s,&length);
|
|
if(rfUTILS_Endianess() != RF_BIG_ENDIAN)
|
|
{
|
|
for(i=0;i<length;i++)
|
|
{
|
|
rfUTILS_SwapEndianUI(&utf32[i]);
|
|
}
|
|
}
|
|
if(fwrite(utf32,4,length,f) != length)
|
|
{
|
|
free(utf32);
|
|
break;// and go to error logging
|
|
}
|
|
free(utf32);
|
|
return RF_SUCCESS;
|
|
break;
|
|
}
|
|
// if we get here it means an error, and we log it with the macro
|
|
i_WRITE_CHECK(f,"Writting a string to a file")
|
|
return RE_FILE_WRITE;
|
|
}
|
|
|
|
|