mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			2349 lines
		
	
	
		
			84 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2349 lines
		
	
	
		
			84 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /**
 | |
| ** Copyright (c) 2011-2012, Karapetsas Eleftherios
 | |
| ** All rights reserved.
 | |
| **
 | |
| ** Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 | |
| **  1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 | |
| **  2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
 | |
| **     the documentation and/or other materials provided with the distribution.
 | |
| **  3. Neither the name of the Original Author of Refu nor the names of its contributors may be used to endorse or promote products derived from
 | |
| **
 | |
| **  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 | |
| **  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 | |
| **  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
| **  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | |
| **  SERVICES;LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 | |
| **  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| **  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| **/
 | |
| #include <errno.h>
 | |
| 
 | |
| #include <String/rfc_string.h>
 | |
| // include bitwise operations
 | |
| #include <rf_utils.h>
 | |
| // include the private functions and macros
 | |
| #include "string_private.h"
 | |
| // include io_private only for the write check
 | |
| #include "../IO/io_private.h"
 | |
| // include the extended strin
 | |
| #include <String/rfc_stringx.h>
 | |
| // for HUGE_VAL definition
 | |
| #include <math.h>
 | |
| 
 | |
| #include <rf_localmem.h> // for the local stack memory
 | |
| 
 | |
| /*********************************************************************** Start of the RF_String functions *****************************************************************************************/
 | |
| 
 | |
| /*-------------------------------------------------------------------------Methods to create an RF_String-------------------------------------------------------------------------------*/
 | |
| 
 | |
| // Allocates and returns a string with the given characters a refu string with the given characters. Given characters have to be in UTF-8. A check for valide sequence of bytes is performed.
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| RF_String* rfString_Create(const char* s,...)
 | |
| #else
 | |
| RF_String* i_rfString_Create(const char* s,...)
 | |
| #endif
 | |
| {
 | |
|     READ_VSNPRINTF_ARGS(s,s,0)
 | |
| 
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
|     if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
 | |
|     {
 | |
|         LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         if(buffAllocated == true)
 | |
|             free(buff);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     // get length
 | |
|     ret->byteLength = byteLength;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(ret->bytes,ret->byteLength+1);
 | |
|     memcpy(ret->bytes,buff,ret->byteLength+1);
 | |
|     if(buffAllocated==true)
 | |
|         free(buff);
 | |
|     return ret;
 | |
| }
 | |
| #ifdef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| RF_String* i_NVrfString_Create(const char* s)
 | |
| {
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
|     if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
 | |
|     {
 | |
|         LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     // get length
 | |
|     ret->byteLength = byteLength;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(ret->bytes,ret->byteLength+1);
 | |
|     memcpy(ret->bytes,s,ret->byteLength+1);
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| 
 | |
| // Allocates and returns a string with the given characters a refu string with the given characters. Given characters have to be in UTF-8. A check for valid sequence of bytes is performed.
 | |
| RF_String* i_rfString_CreateLocal1(const char* s,...)
 | |
| {
 | |
| #if RF_OPTION_SOURCE_ENCODING != RF_UTF8
 | |
|     uint32_t characterLength,*codepoints,i=0,j;
 | |
| #endif
 | |
|     // remember the stack pointer before this macro evaluation
 | |
|     rfLMS_MacroEvalPtr(RF_LMS);
 | |
|     // read the var args
 | |
|     READ_VSNPRINTF_ARGS(s,s,0)
 | |
| // /===Start of Non-UTF-8 code===// /
 | |
| #if (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_BE)
 | |
|     // find the bytelength of the UTF-16 buffer
 | |
|     while(buff[i] != '\0' && buff[i+1]!= '\0')
 | |
|         i++;
 | |
|     i+=2;
 | |
|     // allocate the codepoint buffer
 | |
|     RF_MALLOC(codepoints,i/2)
 | |
| #elif (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF32_LE) || (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF32_BE)
 | |
|     // find the bytelength of the UTF-32 buffer
 | |
|     while(buff[i] != '\0' && buff[i+1]!= '\0' && buff[i+2]!= '\0' && buff[i+3]!= '\0')
 | |
|         i++;
 | |
|     i+=4;
 | |
|     // allocate the codepoint buffer
 | |
|     RF_MALLOC(codepoints,i)
 | |
| #endif
 | |
| #if (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_LE)// decode the UTF16
 | |
|     if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
 | |
|         if(rfUTF16_Decode(buff,&characterLength,codepoints) == false)
 | |
|             goto cleanup;
 | |
|     else
 | |
|         if(rfUTF16_Decode_swap(buff,&characterLength,codepoints)==false)
 | |
|             goto cleanup;
 | |
| 
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE// decode the UTF16
 | |
|     if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
 | |
|         if(rfUTF16_Decode_swap(buff,&characterLength,codepoints) == false)
 | |
|             goto cleanup;
 | |
|     else
 | |
|         if(rfUTF16_Decode(buff,&characterLength,codepoints)==false)
 | |
|             goto cleanup;
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE// copy the UTF32 into the codepoint
 | |
|     memcpy(codepoints,buff,i);
 | |
|     if(rfUTILS_Endianess != RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         for(j=0;j<i;j+=4)
 | |
|         {
 | |
|             rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
 | |
|         }
 | |
|     }
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE// copy the UTF32 into the codepoint
 | |
|     memcpy(codepoints,buff,i);
 | |
|     if(rfUTILS_Endianess !RF_BIG_ENDIAN RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         for(j=0;j<i;j+=4)
 | |
|         {
 | |
|             rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
 | |
|         }
 | |
|     }
 | |
| #endif
 | |
| #if RF_OPTION_SOURCE_ENCODING != RF_UTF8 // in any case other than UTF-8 encode the codepoints into UTF-8 , and free them
 | |
|     if(buffAllocated == true)
 | |
|         free(buff);
 | |
|     buffAllocated = true;
 | |
|     if((buff =  rfUTF8_Encode(codepoints,characterLength,&byteLength)) == 0)
 | |
|     {
 | |
|         LOG_ERROR("While attempting to create a temporary RF_String the given byte sequence could not be properly encoded into UTF-8",RE_UTF8_ENCODING);
 | |
|         free(codepoints);
 | |
|         return 0;
 | |
|     }
 | |
|     free(codepoints);
 | |
| #endif
 | |
| // /===End of Non-UTF-8 code===// /
 | |
|     // /progress normally since here we have a UTF-8 buffer
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
|     if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
 | |
|     {
 | |
|         LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         if(buffAllocated == true)
 | |
|             free(buff);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     RF_String* ret;
 | |
|     ret = rfLMS_Push(RF_LMS,sizeof(RF_String));
 | |
|     if(ret == 0)
 | |
|     {
 | |
|         LOG_ERROR("Memory allocation from the Local Memory Stack failed. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
 | |
|                   RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|         exit(RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|     }
 | |
|     // get length
 | |
|     ret->byteLength = byteLength;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     ret->bytes = rfLMS_Push(RF_LMS,ret->byteLength+1);
 | |
|     if(ret->bytes == 0)
 | |
|     {
 | |
|         LOG_ERROR("Memory allocation from the Local Memory Stack failed. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
 | |
|                   RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|         exit(RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|     }
 | |
|     memcpy(ret->bytes,buff,ret->byteLength+1);
 | |
|     // finally free stuff if needed
 | |
|     if(buffAllocated == true)
 | |
|         free(buff);
 | |
|     return ret;
 | |
| 
 | |
| // /cleanup code for non-UTF-8 cases
 | |
| #if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
 | |
| cleanup:
 | |
| #if RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE
 | |
|     LOG_ERROR("Temporary RF_String creation from a UTF-16 Little Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE
 | |
|     LOG_ERROR("Temporary RF_String creation from a UTF-16 Big Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
 | |
| #endif
 | |
|     free(codepoints);
 | |
|     if(buffAllocated == true)
 | |
|         free(buff);
 | |
|     return 0;
 | |
| #endif
 | |
| }
 | |
| RF_String* i_NVrfString_CreateLocal(const char* s)
 | |
| {
 | |
| #if RF_OPTION_SOURCE_ENCODING != RF_UTF8
 | |
|     uint32_t characterLength,*codepoints,i=0,j;
 | |
|     char* buff;
 | |
| #endif
 | |
|     // remember the stack pointer before this macro evaluation
 | |
|     rfLMS_MacroEvalPtr(RF_LMS);
 | |
| // /===Start of Non-UTF-8 code===// /
 | |
| #if (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_BE)
 | |
|     // find the bytelength of the UTF-16 buffer
 | |
|     while(s[i] != '\0' &&s[i+1]!= '\0')
 | |
|         i++;
 | |
|     i+=2;
 | |
|     // allocate the codepoint buffer
 | |
|     RF_MALLOC(codepoints,i/2)
 | |
| #elif (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF32_LE) || (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF32_BE)
 | |
|     // find the bytelength of the UTF-32 buffer
 | |
|     while(s[i] != '\0' && s[i+1]!= '\0' && s[i+2]!= '\0' && s[i+3]!= '\0')
 | |
|         i++;
 | |
|     i+=4;
 | |
|     // allocate the codepoint buffer
 | |
|     RF_MALLOC(codepoints,i)
 | |
| #endif
 | |
| #if (RF_OPTION_SOURCE_ENCODING  ==  RF_UTF16_LE)// decode the UTF16
 | |
|     if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
 | |
|         if(rfUTF16_Decode(s,&characterLength,codepoints) == false)
 | |
|             goto cleanup;
 | |
|     else
 | |
|         if(rfUTF16_Decode_swap(s,&characterLength,codepoints)==false)
 | |
|             goto cleanup;
 | |
| 
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE// decode the UTF16
 | |
|     if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
 | |
|         if(rfUTF16_Decode_swap(s,&characterLength,codepoints) == false)
 | |
|             goto cleanup;
 | |
|     else
 | |
|         if(rfUTF16_Decode(s,&characterLength,codepoints)==false)
 | |
|             goto cleanup;
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_LE// copy the UTF32 into the codepoint
 | |
|     memcpy(codepoints,s,i);
 | |
|     if(rfUTILS_Endianess != RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         for(j=0;j<i;j+=4)
 | |
|         {
 | |
|             rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
 | |
|         }
 | |
|     }
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF32_BE// copy the UTF32 into the codepoint
 | |
|     memcpy(codepoints,s,i);
 | |
|     if(rfUTILS_Endianess !RF_BIG_ENDIAN RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         for(j=0;j<i;j+=4)
 | |
|         {
 | |
|             rfUTILS_SwapEndianUI((uint32_t*)(codepoints+j))
 | |
|         }
 | |
|     }
 | |
| #endif
 | |
| #if RF_OPTION_SOURCE_ENCODING != RF_UTF8 // in any case other than UTF-8 encode the codepoints into UTF-8 , and free them
 | |
|     if((buff =  rfUTF8_Encode(codepoints,characterLength,&byteLength)) == 0)
 | |
|     {
 | |
|         LOG_ERROR("While attempting to create a temporary RF_String the given byte sequence could not be properly encoded into UTF-8",RE_UTF8_ENCODING);
 | |
|         free(codepoints);
 | |
|         return 0;
 | |
|     }
 | |
|     free(codepoints);
 | |
| #endif
 | |
| // /===End of Non-UTF-8 code===// /
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
| #if RF_OPTION_SOURCE_ENCODING == RF_UTF8
 | |
|     if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
 | |
| #else
 | |
|     if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
 | |
| #endif
 | |
|     {
 | |
|         LOG_ERROR("Error at String Allocation due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     RF_String* ret;
 | |
|     ret = rfLMS_Push(RF_LMS,sizeof(RF_String));
 | |
|     if(ret == 0)
 | |
|     {
 | |
|         LOG_ERROR("Memory allocation from the Local Memory Stack failed during string allocation. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
 | |
|                   RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|         exit(RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|     }
 | |
|     // get length
 | |
|     ret->byteLength = byteLength;
 | |
| 
 | |
|     ret->bytes = rfLMS_Push(RF_LMS,ret->byteLength+1);
 | |
|     if(ret->bytes == 0)
 | |
|     {
 | |
|         LOG_ERROR("Memory allocation from the Local Memory Stack failed during string allocation. Insufficient local memory stack space. Consider compiling the library with bigger stack space. Quitting proccess...",
 | |
|                   RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|         exit(RE_LOCALMEMSTACK_INSUFFICIENT);
 | |
|     }
 | |
| #if RF_OPTION_SOURCE_ENCODING == RF_UTF8
 | |
|     memcpy(ret->bytes,s,ret->byteLength+1);
 | |
| #else
 | |
|     memcpy(ret->bytes,buff,ret->byteLength+1);
 | |
| #endif
 | |
|     return ret;
 | |
| 
 | |
| // /cleanup code for non-UTF-8 cases
 | |
| #if (RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE) || (RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE)
 | |
| cleanup:
 | |
| #if RF_OPTION_SOURCE_ENCODING == RF_UTF16_LE
 | |
|     LOG_ERROR("Temporary RF_String creation from a UTF-16 Little Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
 | |
| #elif RF_OPTION_SOURCE_ENCODING == RF_UTF16_BE
 | |
|     LOG_ERROR("Temporary RF_String creation from a UTF-16 Big Endian buffer failed due to UTF-16 decoding failure",RE_UTF16_INVALID_SEQUENCE);
 | |
| #endif
 | |
|     free(codepoints);
 | |
|     return 0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // Initializes a string with the given characters. Given characters have to be in UTF-8. A check for valide sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char rfString_Init(RF_String* str,const char* s,...)
 | |
| #else
 | |
| char i_rfString_Init(RF_String* str,const char* s,...)
 | |
| #endif
 | |
| {
 | |
|     READ_VSNPRINTF_ARGS(s,s,false)
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
|     if( rfUTF8_VerifySequence(buff,&byteLength) == RF_FAILURE)
 | |
|     {
 | |
|         LOG_ERROR("Error at String Initialization due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         if(buffAllocated==true)
 | |
|             free(buff);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // get length
 | |
|     str->byteLength = byteLength;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(str->bytes,str->byteLength+1);
 | |
|     memcpy(str->bytes,buff,str->byteLength+1);
 | |
|     if(buffAllocated == true)
 | |
|         free(buff);
 | |
|     return true;
 | |
| }
 | |
| #ifdef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char i_NVrfString_Init(RF_String* str,const char* s)
 | |
| {
 | |
|     // check for validity of the given sequence and get the character length
 | |
|     uint32_t byteLength;
 | |
|     if( rfUTF8_VerifySequence(s,&byteLength) == RF_FAILURE)
 | |
|     {
 | |
|         LOG_ERROR("Error at String Initialization due to invalid UTF-8 byte sequence",RE_STRING_INIT_FAILURE);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // get length
 | |
|     str->byteLength = byteLength;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(str->bytes,str->byteLength+1);
 | |
|     memcpy(str->bytes,s,str->byteLength+1);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| // Allocates a String by turning a unicode code point in a String (encoded in UTF-8).
 | |
| RF_String* rfString_Create_cp(uint32_t codepoint)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_cp(ret,codepoint) == true)
 | |
|     {
 | |
|         return ret;
 | |
|     }
 | |
|     // failure
 | |
|     free(ret);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| // Initializes a string by turning a unicode code point in a String (encoded in UTF-8).
 | |
| char rfString_Init_cp(RF_String* str, uint32_t codepoint)
 | |
| {
 | |
|     // alloc enough for a character
 | |
|     RF_MALLOC(str->bytes,5)
 | |
| 
 | |
|     // if we only need a byte to encode it
 | |
|     if(RF_HEXLE_UI(codepoint,0x007f))
 | |
|     {
 | |
|         str->bytes[0] = codepoint;
 | |
|         str->bytes[1] = '\0';
 | |
|         str->byteLength = 1;
 | |
|     }
 | |
|     // if we need 2 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x0080) && RF_HEXLE_UI(codepoint,0x07ff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[1] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 5 following bits and encode them in the second byte
 | |
|         str->bytes[0] = ((codepoint & 0x7C0) >> 6)  | (0x6<<5);
 | |
|         str->bytes[2] = '\0';
 | |
|         str->byteLength = 2;
 | |
|     }
 | |
|     // if we need 3 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x0800) && RF_HEXLE_UI(codepoint,0x0ffff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[2] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 6 following bits and encode them in the second byte
 | |
|         str->bytes[1] = ((codepoint & 0xFC0) >> 6)  | (0x02<<6);
 | |
|         // get the 4 following bits and encode them in the third byte
 | |
|         str->bytes[0] = (((codepoint & 0xF000))>>12) | (0xE<<4);
 | |
|         str->bytes[3] = '\0';
 | |
|         str->byteLength = 3;
 | |
|     }
 | |
|     // if we need 4 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x10000) && RF_HEXLE_UI(codepoint,0x10ffff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[3] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 6 following bits and encode them in the second byte
 | |
|         str->bytes[2] = ((codepoint & 0xFC0) >> 6)  | (0x02<<6);
 | |
|         // get the 6 following bits and encode them in the third byte
 | |
|         str->bytes[1] = (((codepoint & 0x3F000))>>12) | (0x02<<6);
 | |
|         // get the 3 following bits and encode them in the fourth byte
 | |
|         str->bytes[0] = (((codepoint & 0x1C0000))>>18) | (0x1E<<3);
 | |
|         str->bytes[4] = '\0';
 | |
|         str->byteLength = 4;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         LOG_ERROR("Attempted to encode an invalid unicode code point into a string",RE_UTF8_INVALID_CODE_POINT);
 | |
|         free(str->bytes);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Allocates and returns a string with the given integer
 | |
| RF_String* rfString_Create_i(int32_t i)
 | |
| {
 | |
|     // the size of the int32_t buffer
 | |
|     int32_t numLen;
 | |
|     // put the int32_t into a buffer and turn it in a char*
 | |
|     char buff[12];// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
 | |
|     sprintf(buff,"%d",i);
 | |
|     numLen = strlen(buff);
 | |
| 
 | |
|     // initialize the string and return it
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     ret->byteLength = numLen;
 | |
|     RF_MALLOC(ret->bytes,numLen+1);
 | |
|     strcpy(ret->bytes,buff);
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string with the given integer.
 | |
| char rfString_Init_i(RF_String* str, int32_t i)
 | |
| {
 | |
|     // the size of the int32_t buffer
 | |
|     int32_t numLen;
 | |
|     // put the int32_t into a buffer and turn it in a char*
 | |
|     char buff[12];// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
 | |
|     sprintf(buff,"%d",i);
 | |
|     numLen = strlen(buff);
 | |
| 
 | |
| 
 | |
|     str->byteLength = numLen;
 | |
|     RF_MALLOC(str->bytes,numLen+1);
 | |
|     strcpy(str->bytes,buff);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string with the given float
 | |
| RF_String* rfString_Create_f(float f)
 | |
| {
 | |
|     // allocate a buffer for the float in characters
 | |
|     char* buff;
 | |
|     RF_MALLOC(buff,128);
 | |
|     sprintf(buff,"%f",f);
 | |
|     uint32_t len = strlen(buff);
 | |
| 
 | |
|     // initialize and return the string
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     ret->byteLength = len;
 | |
|     RF_MALLOC(ret->bytes,len+1);
 | |
|     strcpy(ret->bytes,buff);
 | |
| 
 | |
|     free(buff);
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string with the given float
 | |
| char rfString_Init_f(RF_String* str,float f)
 | |
| {
 | |
|     // allocate a buffer for the float in characters
 | |
|     char* buff;
 | |
|     RF_MALLOC(buff,128);
 | |
|     sprintf(buff,"%f",f);
 | |
|     uint32_t len = strlen(buff);
 | |
| 
 | |
| 
 | |
|     str->byteLength = len;
 | |
|     RF_MALLOC(str->bytes,len+1);
 | |
|     strcpy(str->bytes,buff);
 | |
|     free(buff);
 | |
| 
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string with the given UTF-16 byte sequence. Given characters have to be in UTF-16. A check for valid sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
 | |
| RF_String* rfString_Create_UTF16(const char* s,char endianess)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_UTF16(ret,s,endianess)==false)
 | |
|     {
 | |
|         free(ret);
 | |
|         return 0;
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string with the given UTF-16 byte sequence. Given characters have to be in UTF-16. A check for valid sequence of bytes is performed.<b>Can't be used with RF_StringX</b>
 | |
| char rfString_Init_UTF16(RF_String* str,const char* s,char endianess)
 | |
| {
 | |
|     // decode the utf-16 and get the code points
 | |
|     uint32_t* codepoints;
 | |
|     uint32_t byteLength,characterLength,utf8ByteLength;
 | |
|     char* utf8;
 | |
|     byteLength = 0;
 | |
|     while(s[byteLength]!= 0 || s[byteLength+1]!=0)
 | |
|     {
 | |
|         byteLength++;
 | |
|     }
 | |
|     byteLength+=3;// for the last utf-16 null termination character
 | |
|     RF_MALLOC(codepoints,byteLength*2) // allocate the codepoints
 | |
|     // parse the given byte stream depending on the endianess parameter
 | |
|     switch(endianess)
 | |
|     {
 | |
|         case RF_LITTLE_ENDIAN:
 | |
|         case RF_BIG_ENDIAN:
 | |
|             if(rfUTILS_Endianess() == endianess)// same endianess as the local
 | |
|             {
 | |
|                 if(rfUTF16_Decode(s,&characterLength,codepoints) == false)
 | |
|                 {
 | |
|                     free(codepoints);
 | |
|                     LOG_ERROR("String initialization failed due to invalide UTF-16 sequence",RE_STRING_INIT_FAILURE);
 | |
|                     return false;
 | |
|                 }
 | |
|             }
 | |
|             else// different
 | |
|             {
 | |
|                 if(rfUTF16_Decode_swap(s,&characterLength,codepoints) == false)
 | |
|                 {
 | |
|                     free(codepoints);
 | |
|                     LOG_ERROR("String initialization failed due to invalide UTF-16 sequence",RE_STRING_INIT_FAILURE);
 | |
|                     return false;
 | |
|                 }
 | |
|             }
 | |
|         break;
 | |
|         default:
 | |
|             LOG_ERROR("Illegal endianess value provided",RE_INPUT);
 | |
|             free(codepoints);
 | |
|             return false;
 | |
|         break;
 | |
|     }// switch ends
 | |
|     // now encode these codepoints into UTF8
 | |
|     if( (utf8 = rfUTF8_Encode(codepoints,characterLength,&utf8ByteLength))==0)
 | |
|     {
 | |
|         free(codepoints);
 | |
|         return false;
 | |
|     }
 | |
|     // success
 | |
|     free(codepoints);
 | |
|     str->bytes = utf8;
 | |
|     str->byteLength = utf8ByteLength;
 | |
|     return true;
 | |
| 
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string with the given UTF-32 byte sequence. Given characters have to be in UTF-32.
 | |
| RF_String* rfString_Create_UTF32(const char* s)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_UTF32(ret,s)==false)
 | |
|     {
 | |
|         free(ret);
 | |
|         return 0;
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string with the given UTF-32 byte sequence. Given characters have to be in UTF-32.
 | |
| char rfString_Init_UTF32(RF_String* str,const char* s)
 | |
| {
 | |
|     char swapE = false;
 | |
|     uint32_t off = 0;
 | |
|     int32_t i = 0;
 | |
| 
 | |
|     // get the buffer and if swapping is needed do it for all character
 | |
|     uint32_t* codeBuffer = (uint32_t*)(s+off);
 | |
| 
 | |
|     // first of all check for existence of BOM in the beginning of the sequence
 | |
|     if(RF_HEXEQ_UI(codeBuffer[0],0xFEFF))// big endian
 | |
|     {
 | |
|         if(rfUTILS_Endianess()==RF_LITTLE_ENDIAN)
 | |
|             swapE = true;
 | |
|     }
 | |
|     if(RF_HEXEQ_UI(codeBuffer[0],0xFFFE0000))// little
 | |
|     {
 | |
|         if(rfUTILS_Endianess()==RF_BIG_ENDIAN)
 | |
|             swapE = true;
 | |
|     }
 | |
|     else// according to the standard no BOM means big endian
 | |
|     {
 | |
|         if(rfUTILS_Endianess() == RF_LITTLE_ENDIAN)
 | |
|             swapE = true;
 | |
|     }
 | |
| 
 | |
|     // if we need to have endianess swapped do it
 | |
|     if(swapE==true)
 | |
|     {
 | |
|         while(codeBuffer[i] != 0)
 | |
|         {
 | |
|             rfUTILS_SwapEndianUI(codeBuffer+i);
 | |
|             i++;
 | |
|         }
 | |
|     }
 | |
|     // find the length of the utf32 buffer in characters
 | |
|     uint32_t length;
 | |
|     rfUTF32_Length(codeBuffer,length);
 | |
| 
 | |
|     // turn the codepoints into a utf-8 encoded buffer
 | |
|     char* utf8;uint32_t utf8ByteLength;
 | |
|     if((utf8=rfUTF8_Encode(codeBuffer,length,&utf8ByteLength)) == 0)
 | |
|     {
 | |
|         return false;// error
 | |
|     }
 | |
|     // if the encoding happened correctly
 | |
|     if(codeBuffer != 0)
 | |
|     {
 | |
|         str->bytes = (char*)codeBuffer;
 | |
|         str->byteLength = utf8ByteLength;
 | |
|         return true;
 | |
|     }
 | |
|     // else return failure
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| // Assigns the value of the source string to the destination.Both strings should already be initialized and hold a value. It is an error to give null parameters.
 | |
| void i_rfString_Assign(RF_String* dest,void* sourceP)
 | |
| {
 | |
|     RF_String* source = (RF_String*)sourceP;
 | |
|     // only if the new string value won't fit in the buffer reallocate the buffer (let's avoid unecessary reallocs)
 | |
|     if(source->byteLength > dest->byteLength)
 | |
|     {
 | |
|         RF_REALLOC(dest->bytes,char,source->byteLength+1);
 | |
|     }
 | |
|     // now copy the value
 | |
|     memcpy(dest->bytes,source->bytes,source->byteLength+1);
 | |
|     // and fix the lengths
 | |
|     dest->byteLength = source->byteLength;
 | |
| }
 | |
| 
 | |
| // Assigns the value of a unicode character to the string
 | |
| char rfString_Assign_char(RF_String* str,uint32_t codepoint)
 | |
| {
 | |
|     // realloc if needed
 | |
|     if(str->byteLength <5)
 | |
|     {
 | |
|         RF_REALLOC(str->bytes,char,5);
 | |
|     }
 | |
|     // if we only need a byte to encode it
 | |
|     if(RF_HEXLE_UI(codepoint,0x007f))
 | |
|     {
 | |
|         str->bytes[0] = codepoint;
 | |
|         str->bytes[1] = '\0';
 | |
|         str->byteLength = 1;
 | |
|     }
 | |
|     // if we need 2 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x0080) && RF_HEXLE_UI(codepoint,0x07ff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[1] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 5 following bits and encode them in the second byte
 | |
|         str->bytes[0] = ((codepoint & 0x7C0) >> 6)  | (0x6<<5);
 | |
|         str->bytes[2] = '\0';
 | |
|         str->byteLength = 2;
 | |
|     }
 | |
|     // if we need 3 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x0800) && RF_HEXLE_UI(codepoint,0x0ffff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[2] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 6 following bits and encode them in the second byte
 | |
|         str->bytes[1] = ((codepoint & 0xFC0) >> 6)  | (0x02<<6);
 | |
|         // get the 4 following bits and encode them in the third byte
 | |
|         str->bytes[0] = (((codepoint & 0xF000))>>12) | (0xE<<4);
 | |
|         str->bytes[3] = '\0';
 | |
|         str->byteLength = 3;
 | |
|     }
 | |
|     // if we need 4 bytes to encode it
 | |
|     else if( RF_HEXGE_UI(codepoint,0x10000) && RF_HEXLE_UI(codepoint,0x10ffff))
 | |
|     {
 | |
|         // get the first bits of the first byte and encode them to the first byte
 | |
|         str->bytes[3] = (codepoint & 0x3F)|(0x02<<6);
 | |
|         // get the 6 following bits and encode them in the second byte
 | |
|         str->bytes[2] = ((codepoint & 0xFC0) >> 6)  | (0x02<<6);
 | |
|         // get the 6 following bits and encode them in the third byte
 | |
|         str->bytes[1] = (((codepoint & 0x3F000))>>12) | (0x02<<6);
 | |
|         // get the 3 following bits and encode them in the fourth byte
 | |
|         str->bytes[0] = (((codepoint & 0x1C0000))>>18) | (0x1E<<3);
 | |
|         str->bytes[4] = '\0';
 | |
|         str->byteLength = 4;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         LOG_ERROR("Attempted to encode an invalid unicode code point into a string",RE_UTF8_INVALID_CODE_POINT);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string with the given characters. NO VALID-UTF8 check is performed
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| RF_String* rfString_Create_nc(const char* s,...)
 | |
| #else
 | |
| RF_String* i_rfString_Create_nc(const char* s,...)
 | |
| #endif
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     // get  the formatted string
 | |
|     READ_VSNPRINTF_ARGS(s,s,0);
 | |
|     // get the lengt of the byte buffer
 | |
|     ret->byteLength = bytesWritten;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(ret->bytes,ret->byteLength+1);
 | |
|     memcpy(ret->bytes,buff,ret->byteLength+1);
 | |
|     if(buffAllocated)
 | |
|         free(buff);
 | |
|     return ret;
 | |
| }
 | |
| #ifdef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| RF_String* i_NVrfString_Create_nc(const char* s)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     // get length
 | |
|     ret->byteLength = strlen(s);
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(ret->bytes,ret->byteLength+1);
 | |
|     memcpy(ret->bytes,s,ret->byteLength+1);
 | |
|     return ret;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| // Initializes a string with the given characters. NO VALID-UTF8 check is performed
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char rfString_Init_nc(struct RF_String* str,const char* s,...)
 | |
| #else
 | |
| char i_rfString_Init_nc(struct RF_String* str,const char* s,...)
 | |
| #endif
 | |
| {
 | |
|     // get the formatted string
 | |
|     READ_VSNPRINTF_ARGS(s,s,false)
 | |
|     // get its length
 | |
|     str->byteLength = bytesWritten;
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(str->bytes,str->byteLength+1);
 | |
|     memcpy(str->bytes,buff,str->byteLength+1);
 | |
|     if(buffAllocated == true)
 | |
|         free(buff);
 | |
|     return true;
 | |
| }
 | |
| #ifdef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char i_NVrfString_Init_nc(struct RF_String* str,const char* s)
 | |
| {
 | |
|     // get its length
 | |
|     str->byteLength = strlen(s);
 | |
| 
 | |
|     // now that we know the length we can allocate the buffer and copy the bytes
 | |
|     RF_MALLOC(str->bytes,str->byteLength+1);
 | |
|     memcpy(str->bytes,s,str->byteLength+1);
 | |
|     return true;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /*-------------------------------------------------------------------------Methods to get rid of an RF_String-------------------------------------------------------------------------------*/
 | |
| 
 | |
| // Deletes a string object and also frees its pointer.It is an error to give a NULL(0x0) string for deleting. Will most probably lead to a segmentation fault
 | |
| void rfString_Destroy(RF_String* s)
 | |
| {
 | |
|     free(s->bytes);
 | |
|     free(s);
 | |
| }
 | |
| // Deletes a string object only, not its memory.It is an error to give a NULL(0x0) string for deleting. Will most probably lead to a segmentation fault
 | |
| void rfString_Deinit(RF_String* s)
 | |
| {
 | |
|     free(s->bytes);
 | |
| }
 | |
| /*------------------------------------------------------------------------ RF_String unicode conversio functions-------------------------------------------------------------------------------*/
 | |
| 
 | |
| // Returns the strings contents as a UTF-16 buffer
 | |
| uint16_t* rfString_ToUTF16(RF_String* s,uint32_t* length)
 | |
| {
 | |
|     uint32_t* codepoints,charsN;
 | |
|     // get the unicode codepoints, no check here since RF_String is always guaranteed to have valid UTF=8 and as such valid codepoints
 | |
|     codepoints = rfUTF8_Decode(s->bytes,s->byteLength,&charsN);
 | |
|     // encode them in UTF-16, no check here since it comes from an RF_String which is always guaranteed to have valid UTF-8 and as such valid codepoints
 | |
|     return rfUTF16_Encode(codepoints,charsN,length);
 | |
| }
 | |
| 
 | |
| // Returns the strings contents as a UTF-32 buffer
 | |
| uint32_t* rfString_ToUTF32(RF_String* s,uint32_t* length)
 | |
| {
 | |
|     // get the unicode codepoints, no check here since RF_String is always guaranteed to have valid UTF=8 and as such valid codepoints
 | |
|     return rfUTF8_Decode(s->bytes,s->byteLength,length);
 | |
| }
 | |
| 
 | |
| /*------------------------------------------------------------------------ RF_String retrieval functions-------------------------------------------------------------------------------*/
 | |
| // Finds the length of the string in characters
 | |
| uint32_t rfString_Length(void* str)
 | |
| {
 | |
|     RF_String* s = (RF_String*)str;
 | |
|     uint32_t length,i;
 | |
|     RF_STRING_ITERATE_START(s,length,i)
 | |
|     RF_STRING_ITERATE_END(length,i);
 | |
|     return length;
 | |
| }
 | |
| 
 | |
| // Retrieves the unicode code point of the parameter character.
 | |
| uint32_t rfString_GetChar(void* str,uint32_t c)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     uint32_t length,i;
 | |
|     uint32_t codePoint = RF_STRING_INDEX_OUT_OF_BOUNDS;
 | |
|     RF_STRING_ITERATE_START(thisstr,length,i)
 | |
|         // if we found the character,inspect the 4 different cases
 | |
|         if(length == c)
 | |
|         {
 | |
|             // take the codepoint from the byte position and break from the loop
 | |
|             codePoint = rfString_BytePosToCodePoint(thisstr,i);
 | |
|             break;
 | |
|         }
 | |
|     RF_STRING_ITERATE_END(length,i)
 | |
| 
 | |
|     // and return the code point. Notice that if the character was not found this will return RF_STRING_INDEX_OUT_OF_BOUNDS
 | |
|     return codePoint;
 | |
| }
 | |
| 
 | |
| // Retrieves the unicode code point of the parameter bytepos of the string. If the byte position is not the start of a character 0 is returned. This is an internal function, there is no need to use it. <i>Can be used with StringX</i>
 | |
| uint32_t rfString_BytePosToCodePoint(void* str,uint32_t i)
 | |
| {
 | |
|     uint32_t codePoint=0;
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     // /Here I am not checking if byte position 'i' is withing bounds and especially if it is a start of a character
 | |
|     // / This is assumed to have been checked or to be known beforehand by the programmer. That's one of the reasons
 | |
|     // / why this is an internal function and should not be used unless you know what you are doing
 | |
|     // if the lead bit of the byte is 0 then range is : U+0000 to U+0007F (1 byte)
 | |
|     if( ((thisstr->bytes[i] & 0x80)>>7) == 0 )
 | |
|     {
 | |
|         // and the code point is this whole byte only
 | |
|         codePoint = thisstr->bytes[i];
 | |
|     }
 | |
|     // if the leading bits are in the form of 0b110xxxxx then range is: U+0080 to U+07FF (2 bytes)
 | |
|     else if ( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^  0xC0))>>5),0x7) )
 | |
|     {
 | |
|         codePoint =0;
 | |
|         // from the second byte take the first 6 bits
 | |
|         codePoint = (thisstr->bytes[i+1] & 0x3F) ;
 | |
|         // from the first byte take the first 5 bits and put them in the start
 | |
|         codePoint |= ((thisstr->bytes[i] & 0x1F) << 6);
 | |
|     }
 | |
|     // if the leading bits are in the form of 0b1110xxxx then range is U+0800 to U+FFFF  (3 bytes)
 | |
|     else if( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^ 0xE0))>>4),0xF) )
 | |
|     {
 | |
|         codePoint = 0;
 | |
|         // from the third byte take the first 6 bits
 | |
|         codePoint = (thisstr->bytes[i+2] & 0x3F) ;
 | |
|         // from the second byte take the first 6 bits and put them to the left of the previous 6 bits
 | |
|         codePoint |= ((thisstr->bytes[i+1] & 0x3F) << 6);
 | |
|         // from the first byte take the first 4 bits and put them to the left of the previous 6 bits
 | |
|         codePoint |= ((thisstr->bytes[i] & 0xF) << 12);
 | |
|     }
 | |
|     // if the leading bits are in the form of 0b11110xxx then range is U+010000 to U+10FFFF (4 bytes)
 | |
|     else if( RF_HEXEQ_C( ( (~(thisstr->bytes[i] ^ 0xF0))>>3), 0x1F))
 | |
|     {
 | |
|         codePoint = 0;
 | |
|         // from the fourth byte take the first 6 bits
 | |
|         codePoint = (thisstr->bytes[i+3] & 0x3F) ;
 | |
|         // from the third byte take the first 6 bits and put them to the left of the previous 6 bits
 | |
|         codePoint |= ((thisstr->bytes[i+2] & 0x3F) << 6);
 | |
|         // from the second byte take the first 6 bits and put them to the left of the previous 6 bits
 | |
|         codePoint |= ((thisstr->bytes[i+1] & 0x3F) << 12);
 | |
|         // from the first byte take the first 3 bits and put them to the left of the previous 6 bits
 | |
|         codePoint |= ((thisstr->bytes[i] & 0x7) << 18);
 | |
|     }
 | |
| 
 | |
|     return codePoint;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Retrieves character position of a byte position
 | |
| uint32_t rfString_BytePosToCharPos(void* thisstrP,uint32_t bytepos,char before)
 | |
| {
 | |
|     // /here there is no check if this is actually a byte pos inside the string's
 | |
|     // /byte buffer. The programmer should have made sure it is before hand. This is why it is
 | |
|     // / an internal function and should only be used if you know what you are doing
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     uint32_t charPos = 0;
 | |
|     uint32_t byteI = 0;
 | |
|     // iterate the string's bytes until you get to the required byte
 | |
|     // if it is not a continuation byte, return the position
 | |
|     if(rfUTF8_IsContinuationByte(thisstr->bytes[bytepos])==false)
 | |
|     {
 | |
|         RF_STRING_ITERATE_START(thisstr,charPos,byteI)
 | |
|             if(byteI == bytepos)
 | |
|                 return charPos;
 | |
|         RF_STRING_ITERATE_END(charPos,byteI)
 | |
|     }
 | |
|     // else  iterate the string's bytes until you get anything bigger than the required byte
 | |
|     RF_STRING_ITERATE_START(thisstr,charPos,byteI)
 | |
|         if(byteI > bytepos)
 | |
|             break;
 | |
|     RF_STRING_ITERATE_END(charPos,byteI)
 | |
|     // if we need the previous one return it
 | |
|     if(before == true)
 | |
|         return charPos-1;
 | |
|     // else return this
 | |
|     return charPos;
 | |
| }
 | |
| 
 | |
| // Compares two Strings and returns true if they are equal and false otherwise
 | |
| char i_rfString_Equal(void* s1P,void* s2P)
 | |
| {
 | |
|     RF_String* s1 = (RF_String*)s1P;
 | |
|     RF_String* s2 = (RF_String*)s2P;
 | |
|     if( strcmp(s1->bytes,s2->bytes)==0)
 | |
|     {
 | |
|         return true;
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| // Finds the existence of String sstr inside this string, either matching case or not
 | |
| int32_t i_rfString_Find(const void* str,const void* sstrP,const char* optionsP)
 | |
| {
 | |
|     // / @note TO SELF: If I make any changes to this function do not forget to change the private version that returns byte position too
 | |
|     // / located at string_private.c and called rfString_FindByte and rfString_FindByte_s
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     RF_String* sstr = (RF_String*)sstrP;
 | |
|     char options = *optionsP;
 | |
| 
 | |
|     char* found = 0;
 | |
|     // if we want to match the case of the string then it's a simple search of matching characters
 | |
|     if( (RF_BITFLAG_ON( options,RF_CASE_IGNORE)) == false)
 | |
|     {
 | |
|         // if it is not found
 | |
|         if( (found = strstr(thisstr->bytes,sstr->bytes)) == 0)
 | |
|         {
 | |
|             return RF_FAILURE;
 | |
|         }
 | |
|         // get the byte position
 | |
|         uint32_t bytepos = found-thisstr->bytes;
 | |
|         // if we need the exact string as it is given
 | |
|         if(RF_BITFLAG_ON( options,RF_MATCH_WORD))
 | |
|         {
 | |
|             // check before the found string
 | |
|             if(bytepos != 0)
 | |
|             {
 | |
|                 // if is is not a character
 | |
|                 switch(thisstr->bytes[bytepos-1])
 | |
|                 {
 | |
|                     case ' ':case '\t':case '\n':
 | |
|                     break;
 | |
|                     default:
 | |
|                         return RF_FAILURE;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             // check after the found string
 | |
|             if(bytepos+sstr->byteLength != thisstr->byteLength)
 | |
|             {
 | |
|                 // if is is not a character
 | |
|                 switch(thisstr->bytes[bytepos+sstr->byteLength])
 | |
|                 {
 | |
|                     case ' ':case '\t':case '\n':
 | |
|                     break;
 | |
|                     default:
 | |
|                         return RF_FAILURE;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|         }// end of the exact string option
 | |
|         // success
 | |
|         return rfString_BytePosToCharPos(thisstr,bytepos,false);
 | |
|     }
 | |
| 
 | |
|     // else ignore case matching
 | |
|     uint32_t i,j;
 | |
|     // if(cstr[0] >= 0x41 && cstr[0] <= 0x5a)
 | |
|     for(i=0;i<thisstr->byteLength; i ++)
 | |
|     {
 | |
|         // if i matches the start of the substring
 | |
|         for(j = 0; j < sstr->byteLength; j++)
 | |
|         {
 | |
|             // if the jth char is a big letter
 | |
|             if(sstr->bytes[j] >= 0x41 && sstr->bytes[j] <= 0x5a)
 | |
|             {
 | |
|                 // no match
 | |
|                 if(sstr->bytes[j] != thisstr->bytes[i+j] && sstr->bytes[j]+32 != thisstr->bytes[i+j])
 | |
|                     break;
 | |
|                 // there is a match in the jth character so let's perform additional checks if needed
 | |
|                 if(RF_BITFLAG_ON( options,RF_MATCH_WORD))
 | |
|                 {
 | |
|                     // if it's the first substring character and if the string we search is not in it's beginning, check for EXACT string before
 | |
|                     if(j == 0 && i != 0)
 | |
|                     {
 | |
|                         switch(thisstr->bytes[i-1])
 | |
|                         {
 | |
|                             case ' ':case '\t':case '\n':
 | |
|                             break;
 | |
|                             default:
 | |
|                                 return RF_FAILURE;
 | |
|                             break;
 | |
|                         }
 | |
|                     }
 | |
|                 }// exact string check if ends
 | |
|             }
 | |
|             // small letter
 | |
|             else if(sstr->bytes[j] >= 0x61 && sstr->bytes[j] <= 0x7a)
 | |
|             {
 | |
|                 // no match
 | |
|                 if(sstr->bytes[j] != thisstr->bytes[i+j] && sstr->bytes[j]-32 != thisstr->bytes[i+j])
 | |
|                     break;
 | |
|                 // there is a match in the jth character so let's perform additional checks if needed
 | |
|                 if(RF_BITFLAG_ON(options,RF_MATCH_WORD))
 | |
|                 {
 | |
|                     // if it's the first substring character and if the string we search is not in it's beginning, check for EXACT string before
 | |
|                     if(j == 0 && i != 0)
 | |
|                     {
 | |
|                         switch(thisstr->bytes[i-1])
 | |
|                         {
 | |
|                             case ' ':case '\t':case '\n':
 | |
|                             break;
 | |
|                             default:
 | |
|                                 return RF_FAILURE;
 | |
|                             break;
 | |
|                         }
 | |
|                     }
 | |
|                 }// exact string check if ends
 | |
|             }
 | |
|             // not a letter and no match
 | |
|             else if(sstr->bytes[j] != thisstr->bytes[i+j])
 | |
|                 break;// break off the substring search loop
 | |
| 
 | |
|             // if we get here and it's the last char of the substring we either found it or need to perform one last check for exact string
 | |
|             if(j == sstr->byteLength-1)
 | |
|             {
 | |
|                 // only if the end of the string is not right after the substring
 | |
|                 if( RF_BITFLAG_ON(options,RF_MATCH_WORD) && i+sstr->byteLength < thisstr->byteLength)
 | |
|                 {
 | |
|                     switch(thisstr->bytes[i+sstr->byteLength])
 | |
|                     {
 | |
|                         case ' ':case '\t':case '\n':
 | |
|                         break;
 | |
|                         default:
 | |
|                             return RF_FAILURE;
 | |
|                         break;
 | |
|                     }
 | |
|                 }// end of the exact string check
 | |
|                 // succes
 | |
|                 return rfString_BytePosToCharPos(thisstr,i,false);
 | |
|             }// end of it's the last char of the substring check
 | |
|         }// substring iteration ends
 | |
|     }// this string iteration ends
 | |
|     return RF_FAILURE;
 | |
| }
 | |
| 
 | |
| // Returns the integer value of the string if and only if it contains only numbers. If it contains anything else the function fails.
 | |
| char rfString_ToInt(void* str,int32_t* v)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     char* end;
 | |
|     // get the integer
 | |
|     *v = strtol ( thisstr->bytes, &end,10);
 | |
| 
 | |
| // /This is the non-strict case. Takes the number out of the string no matter what else it has inside
 | |
| /*    // if we did get something
 | |
|     if(strlen(end) < this->length())
 | |
|         return true;
 | |
| */
 | |
| // /This is the strict case, and the one we will go with. The non-strict case might be moved to its own function, if ever in the future
 | |
|     if(end[0] == '\0')
 | |
|         return true;
 | |
| 
 | |
|     // else false
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| // Returns the float value of a String
 | |
| int rfString_ToDouble(void* thisstrP,double* f)
 | |
| {
 | |
|     RF_String* str = (RF_String*)thisstrP;
 | |
|     *f = strtod(str->bytes,NULL);
 | |
|     // check the result
 | |
|     if(*f == 0.0)
 | |
|     {
 | |
|         // if it's zero and the string equals to zero then we are okay
 | |
|         if(rfString_Equal(str,RFS_("0")) || rfString_Equal(str,RFS_("0.0")))
 | |
|             return RF_SUCCESS;
 | |
|         // underflow error
 | |
|         if(errno == ERANGE)
 | |
|             return RE_STRING_TOFLOAT_UNDERFLOW;
 | |
|         // in any other case it's a conversion error
 | |
|         return RE_STRING_TOFLOAT;
 | |
|     }
 | |
|     // if the result is a HUGE_VAL and errno is set,the number is not representable by a double
 | |
|     if(*f == HUGE_VAL && errno == ERANGE)
 | |
|         return RE_STRING_TOFLOAT_RANGE;
 | |
| 
 | |
|     // any other case success
 | |
|     return RF_SUCCESS;
 | |
| }
 | |
| 
 | |
| // Returns a cstring version of the string.
 | |
| const char* rfString_ToCstr(const void* str)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     return thisstr->bytes;
 | |
| }
 | |
| 
 | |
| // Creates and returns an allocated copy of the given string
 | |
| RF_String* rfString_Copy_OUT(void* srcP)
 | |
| {
 | |
|     RF_String* src = (RF_String*)srcP;
 | |
|     // create the new string
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     // get the length
 | |
|     ret->byteLength = src->byteLength;
 | |
|     // copy the bytes
 | |
|     RF_MALLOC(ret->bytes,ret->byteLength+1);
 | |
|     memcpy(ret->bytes,src->bytes,ret->byteLength+1);
 | |
|     return ret;
 | |
| 
 | |
| }
 | |
| // Copies all the contents of a string to another
 | |
| void rfString_Copy_IN(RF_String* dst,void* srcP)
 | |
| {
 | |
|     RF_String* src = (RF_String*)srcP;
 | |
|     // get the length
 | |
|     dst->byteLength = src->byteLength;
 | |
|     // copy the bytes
 | |
|     RF_MALLOC(dst->bytes,src->byteLength+1);
 | |
|     memcpy(dst->bytes,src->bytes,dst->byteLength+1);
 | |
|     return;
 | |
| 
 | |
| }
 | |
| // Copies a certain number of characters from a string
 | |
| void rfString_Copy_chars(RF_String* dst,void* srcP,uint32_t charsN)
 | |
| {
 | |
|     uint32_t i = 0,bytePos;
 | |
|     RF_String* src = (RF_String*)srcP;
 | |
| 
 | |
|     // find the byte position until which we need to copy
 | |
|     RF_STRING_ITERATE_START(src,i,bytePos)
 | |
|         if(i == charsN)
 | |
|             break;
 | |
|     RF_STRING_ITERATE_END(i,bytePos)
 | |
|     dst->byteLength = bytePos;
 | |
|     RF_MALLOC(dst->bytes,dst->byteLength+1);
 | |
|     memcpy(dst->bytes,src->bytes,dst->byteLength+1);
 | |
|     dst->bytes[dst->byteLength] = '\0';// null terminate it
 | |
| }
 | |
| 
 | |
| 
 | |
| // Applies a limited version of sscanf after the specified substring
 | |
| char i_rfString_ScanfAfter(void* str,void* afterstrP,const char* format,void* var)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     RF_String* afterstr = (RF_String*)afterstrP;
 | |
|     // return false if the substring is not found
 | |
|     char* found,*s;
 | |
|     if( (found = strstr(thisstr->bytes,afterstr->bytes)) ==0 )
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // get a pointer to the start of the position where sscanf will be used
 | |
|     s = thisstr->bytes + (found-thisstr->bytes+afterstr->byteLength);
 | |
| 
 | |
|     // use sscanf
 | |
|     if(sscanf(s,format,var) <=0)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Counts how many times a substring s occurs inside the string
 | |
| int32_t i_rfString_Count(void* str,void* sstr2,const char* optionsP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     RF_String* sstr = (RF_String*)sstr2;
 | |
|     char options = *optionsP;
 | |
|     int32_t index = 0;
 | |
|     int32_t move;
 | |
|     int32_t n = 0;
 | |
| 
 | |
|     // as long as the substring is found in the string
 | |
|     while ((move = rfString_FindBytePos(thisstr,sstr,options)) != RF_FAILURE)
 | |
|     {
 | |
|         move+= sstr->byteLength;
 | |
|         // proceed searching inside the string and also increase the counter
 | |
|         n++;
 | |
|         thisstr->bytes+=move;
 | |
|         index +=move;
 | |
|         thisstr->byteLength -=move;
 | |
|     }
 | |
| 
 | |
|     // return string to its original state and return the number of occurences, also returns 0 if not found
 | |
|     thisstr->bytes-=index;
 | |
|     thisstr->byteLength += index;
 | |
|     // success
 | |
|     return n;
 | |
| }
 | |
| 
 | |
| // Tokenizes the given string. Separates it into @c tokensN depending on how many substrings can be created from the @c sep separatior and stores them
 | |
| // into the Array of RF_String* that should be passed to the function
 | |
| i_DECLIMEX_ char rfString_Tokenize(void* str,char* sep,uint32_t* tokensN,RF_String** tokens)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)str;
 | |
|     uint32_t i;
 | |
|     // first find the occurences of the separator, and then the number of tokens
 | |
|     *tokensN = rfString_Count(thisstr,RFS_(sep),0)+1;
 | |
|     // error checking
 | |
|     if(*tokensN == 0)
 | |
|         return false;
 | |
| 
 | |
|     // allocate the tokens
 | |
|     RF_MALLOC(*tokens,sizeof(RF_String) *(*tokensN));
 | |
|     // find the length of the separator
 | |
|     uint32_t sepLen = strlen(sep);
 | |
|     char* s,*e;
 | |
|     s = thisstr->bytes;
 | |
|     for(i = 0; i < (*tokensN)-1; i ++)
 | |
|     {
 | |
|         // find each substring
 | |
|         e = strstr(s,sep);
 | |
|         (*tokens)[i].byteLength = e-s;
 | |
|         RF_MALLOC((*tokens)[i].bytes,(*tokens)[i].byteLength+1);
 | |
|         // put in the data
 | |
|         strncpy((*tokens)[i].bytes,s,(*tokens)[i].byteLength);
 | |
|         // null terminate
 | |
|         (*tokens)[i].bytes[(*tokens)[i].byteLength] = '\0';
 | |
| 
 | |
|         // prepare for next sub-string
 | |
|         s = e+sepLen;
 | |
| 
 | |
|     }
 | |
|     // /make sure that if it's the last substring we change strategy
 | |
|     (*tokens)[i].byteLength = strlen(s);
 | |
|     RF_MALLOC((*tokens)[i].bytes,(*tokens)[i].byteLength+1);
 | |
|     // put in the data
 | |
|     strncpy((*tokens)[i].bytes,s,(*tokens)[i].byteLength);
 | |
|     // null terminate
 | |
|     (*tokens)[i].bytes[(*tokens)[i].byteLength] = '\0';
 | |
| 
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| // Initializes the given string as the first substring existing between the left and right parameter substrings.
 | |
| char i_rfString_Between(void* thisstrP,void* lstrP,void* rstrP,RF_String* result,const char* optionsP)
 | |
| {
 | |
|     int32_t start,end;
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* lstr = (RF_String*)lstrP;
 | |
|     RF_String* rstr = (RF_String*)rstrP;
 | |
|     char options = *optionsP;
 | |
|     RF_String temp;
 | |
|     // find the left substring
 | |
|     if( (start = rfString_FindBytePos(thisstr,lstr,options))== RF_FAILURE)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // get what is after it
 | |
|     rfString_After(thisstr,lstr,&temp,options);
 | |
|     // find the right substring in the remaining part
 | |
|     if( (end = rfString_FindBytePos(&temp,rstr,options))== RF_FAILURE)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // free temp string
 | |
|     rfString_Deinit(&temp);
 | |
|     // initialize the string to return
 | |
|     result->byteLength = end;
 | |
|     RF_MALLOC(result->bytes,result->byteLength+1);
 | |
|     memcpy(result->bytes,thisstr->bytes+start+lstr->byteLength,result->byteLength+1);
 | |
|     result->bytes[end]= '\0';
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Initializes the given string as the substring from the start until any of the given Strings are found.
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char rfString_Beforev(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP, ...)
 | |
| #else
 | |
| char i_rfString_Beforev(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP, ...)
 | |
| #endif
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* s;
 | |
|     char options = *optionsP;
 | |
|     unsigned char parN = *parNP;
 | |
|     int32_t i,minPos,thisPos;
 | |
|     // will keep the argument list
 | |
|     va_list argList;
 | |
|     // get the parameter characters
 | |
|     va_start(argList,parNP);
 | |
| 
 | |
|     minPos = 9999999;
 | |
|     for(i = 0; i < parN; i++)
 | |
|     {
 | |
|         s = (RF_String*) va_arg(argList,RF_String*);
 | |
|         if( (thisPos= rfString_FindBytePos(thisstr,s,options))!= RF_FAILURE)
 | |
|         {
 | |
|             if(thisPos < minPos)
 | |
|                 minPos = thisPos;
 | |
|         }
 | |
|     }
 | |
|     va_end(argList);
 | |
| 
 | |
|     // if it is not found
 | |
|     if(minPos == 9999999)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // if it is found initialize the substring
 | |
|     result->byteLength = minPos;
 | |
|     RF_MALLOC(result->bytes,minPos+1);
 | |
|     memcpy(result->bytes,thisstr->bytes,minPos);
 | |
|     result->bytes[minPos] = '\0';
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Initializes the given string as the substring from the start until the given string is found
 | |
| char i_rfString_Before(void* thisstrP,void* sstrP,RF_String* result,const char* optionsP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* sstr = (RF_String*) sstrP;
 | |
|     char options = *optionsP;
 | |
|     int32_t ret;
 | |
|     // find the substring
 | |
|     if( (ret = rfString_FindBytePos(thisstr,sstr,options)) == RF_FAILURE)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // if it is found get the result initialize the substring
 | |
|     result->byteLength = ret;
 | |
|     RF_MALLOC(result->bytes,result->byteLength+1);
 | |
|     memcpy(result->bytes,thisstr->bytes,result->byteLength);
 | |
|     result->bytes[result->byteLength] = '\0';
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Initializes the given String with the substring located after (and not including) the after substring inside the parameter string. If the substring is not located the function returns false.
 | |
| char i_rfString_After(void* thisstrP,void* afterP,RF_String* out,const char* optionsP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* after = (RF_String*)afterP;
 | |
|     char options = *optionsP;
 | |
|     int32_t bytePos;
 | |
|     // check for substring existence
 | |
|     if( (bytePos = rfString_FindBytePos(thisstr,after,options)) == RF_FAILURE)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // done so let's get it. Notice the use of the non-checking initialization
 | |
|     rfString_Init_nc(out,thisstr->bytes+bytePos+after->byteLength);
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Initialize a string after the first of the given substrings found
 | |
| #ifndef RF_OPTION_DEFAULT_ARGUMENTS
 | |
| char rfString_Afterv(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP,...)
 | |
| #else
 | |
| char i_rfString_Afterv(void* thisstrP,RF_String* result,const char* optionsP,const unsigned char* parNP,...)
 | |
| #endif
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* s;
 | |
|     char options = *optionsP;
 | |
|     unsigned char parN = *parNP;
 | |
|     int32_t i,minPos,thisPos;
 | |
|     uint32_t minPosLength;
 | |
|     // will keep the argument list
 | |
|     va_list argList;
 | |
|     // get the parameter characters
 | |
|     va_start(argList,parNP);
 | |
| 
 | |
|     minPos = 9999999;
 | |
|     for(i = 0; i < parN; i++)
 | |
|     {
 | |
|         s = (RF_String*) va_arg(argList,RF_String*);
 | |
|         if( (thisPos= rfString_FindBytePos(thisstr,s,options))!= RF_FAILURE)
 | |
|         {
 | |
|             if(thisPos < minPos)
 | |
|             {
 | |
|                 minPos = thisPos;
 | |
|                 minPosLength = s->byteLength;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     va_end(argList);
 | |
|     // if it is not found
 | |
|     if(minPos == 9999999)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // if it is found initialize the substring
 | |
|     minPos += minPosLength;// go after the found substring
 | |
|     result->byteLength = thisstr->byteLength-minPos;
 | |
|     RF_MALLOC(result->bytes,result->byteLength);
 | |
|     memcpy(result->bytes,thisstr->bytes+minPos,result->byteLength);
 | |
|     result->bytes[result->byteLength] = '\0';
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /*------------------------------------------------------------------------ RF_String manipulation functions-------------------------------------------------------------------------------*/
 | |
| 
 | |
| 
 | |
| // Appends the parameter String to this one
 | |
| void i_rfString_Append(RF_String* thisstr,void* otherP)
 | |
| {
 | |
|     RF_String* other = (RF_String*)otherP;
 | |
|     // /@note Here if a null addition is given lots of actions are done but the result is safe and the same string as the one entered.
 | |
|     // /A check here would result in an additional check for every appending so I decided against it
 | |
|     // calculate the new length
 | |
|     thisstr->byteLength +=other->byteLength;
 | |
|     // reallocate this string to fit the new addition
 | |
|     RF_REALLOC(thisstr->bytes,char,thisstr->byteLength+1);
 | |
|     // add the string to this one
 | |
|     strncat(thisstr->bytes,other->bytes,other->byteLength);
 | |
| }
 | |
| 
 | |
| // Appends an integer to the string
 | |
| void rfString_Append_i(RF_String* thisstr,const int32_t i)
 | |
| {
 | |
|     // create a new buffer for the string big enough to fit any number plus the original string
 | |
|     char* buff;
 | |
|     RF_MALLOC(buff,thisstr->byteLength+15);// max uint32_t is 4,294,967,295 in most environment so 12 chars will certainly fit it
 | |
|     // put the int32_t inside the string
 | |
|     sprintf(buff,"%s%i",thisstr->bytes,i);
 | |
|     // free the previous c string
 | |
|     free(thisstr->bytes);
 | |
|     // point the string pointer to the new string
 | |
|     thisstr->bytes = buff;
 | |
|     thisstr->byteLength = strlen(thisstr->bytes);
 | |
| }
 | |
| // Appends a float to the string. <b>Can't be used with RF_StringX</b>
 | |
| void rfString_Append_f(RF_String* thisstr,const float f)
 | |
| {
 | |
|     // a temporary buffer to hold the float and the string
 | |
|     char* buff;
 | |
|     RF_MALLOC(buff,thisstr->byteLength+64);
 | |
|     // put the float inside the string
 | |
|     sprintf(buff,"%s%f",thisstr->bytes,f);
 | |
|     // free the previous c string
 | |
|     free(thisstr->bytes);
 | |
|     // point the string pointer to the new string
 | |
|     thisstr->bytes = buff;
 | |
|     thisstr->byteLength = strlen(thisstr->bytes);
 | |
| }
 | |
| 
 | |
| // Prepends the parameter String to this string
 | |
| void i_rfString_Prepend(RF_String* thisstr,void* otherP)
 | |
| {
 | |
|     RF_String* other = (RF_String*)otherP;
 | |
|     uint32_t size;
 | |
|     int32_t i;// is not unsigned since it goes to -1 in the loop
 | |
|     // keeep the original byte size of the string
 | |
|     size = thisstr->byteLength;
 | |
|     // calculate the new lengths
 | |
|     thisstr->byteLength += other->byteLength;
 | |
|     // reallocate this string to fit the new addition
 | |
|     RF_REALLOC(thisstr->bytes,char,thisstr->byteLength+1);
 | |
|     // move the pre-existing string to the end of the buffer, by dislocating each byte by cstrlen
 | |
|     for(i =size; i >=0 ; i--)
 | |
|         thisstr->bytes[i+other->byteLength] = thisstr->bytes[i];
 | |
|     // and now add the new string to the start
 | |
|     memcpy(thisstr->bytes,other->bytes,other->byteLength);
 | |
| }
 | |
| 
 | |
| // Removes all of the specifed string occurences from this String matching case or not, DOES NOT reallocate buffer size.
 | |
| char i_rfString_Remove(void* thisstrP,void* rstrP,uint32_t* numberP,const char* optionsP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* rstr = (RF_String*)rstrP;
 | |
|     char options = *optionsP;
 | |
|     uint32_t number = *numberP;
 | |
|     uint32_t i,count,occurences=0;
 | |
|     int32_t bytePos;
 | |
|     char found = false;
 | |
|     // as long as we keep finding rstr in the string keep removing it
 | |
|     do
 | |
|     {   // if the substring is not found
 | |
|         if( (bytePos = rfString_FindBytePos(thisstr,rstr,options)) == RF_FAILURE)
 | |
|         {
 | |
|             // if we have not even found it once , we fail
 | |
|             if(found == false)
 | |
|             {
 | |
|                 return false;
 | |
|             }
 | |
|             else // else we are done
 | |
|                 break;
 | |
|         }
 | |
| 
 | |
|         // substring found
 | |
|         found = true;
 | |
|         // move all of the string a position back
 | |
|         count = 0;
 | |
|         for(i = bytePos; i <=thisstr->byteLength; i ++)
 | |
|         {
 | |
|             thisstr->bytes[i] = thisstr->bytes[i+rstr->byteLength];
 | |
|             count++;
 | |
|         }
 | |
|         // now change the byte length
 | |
|         thisstr->byteLength -= rstr->byteLength;
 | |
|         // count the number of occurences and if we reached the required amount, stop
 | |
|         occurences++;
 | |
|         if(occurences == number)
 | |
|             break;
 | |
|     }while(bytePos != RF_FAILURE);
 | |
|     // succcess
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Removes all of the characters of the string except those specified
 | |
| void i_rfString_KeepOnly(void* thisstrP,void* keepstrP)
 | |
| {
 | |
|     uint32_t keepLength,i,j,charValue,temp;
 | |
|     uint32_t *keepChars;
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     RF_String* keepstr = (RF_String*)keepstrP;
 | |
|     char exists,charBLength;
 | |
|     // first let's get all of the characters of the keep string in an array
 | |
|     i=0;
 | |
|     keepLength = rfString_Length(keepstr);
 | |
|     RF_MALLOC(keepChars,4*keepLength);
 | |
|     rfString_Iterate_Start(keepstr,i,charValue)
 | |
|         keepChars[i] = charValue;
 | |
|     rfString_Iterate_End(i)
 | |
|     // now iterate every character of this string
 | |
|     i=0;
 | |
|     rfString_Iterate_Start(thisstr,i,charValue)
 | |
|         // for every character check if it exists in the keep str
 | |
|         exists = false;
 | |
|         for(j=0;j<keepLength; j++)
 | |
|         {
 | |
|             if(keepChars[j] == charValue)
 | |
|                 exists = true;
 | |
|         }
 | |
|         // if it does not exist, move the string back to cover it so that it effectively gets deleted
 | |
|         if(exists == false)
 | |
|         {
 | |
|             charBLength = rfUTF8_FromCodepoint(charValue,&temp);
 | |
|             // this is kind of a non-clean way to do it. the rfString_Iterate_Start macro internally uses a byteIndex_ variable
 | |
|             // we use that here to determine the current byteIndex_ of the string in the iteration and move the string backs
 | |
|             memmove(thisstr->bytes+byteIndex_,thisstr->bytes+byteIndex_+charBLength,thisstr->byteLength-byteIndex_+charBLength);
 | |
|             thisstr->byteLength-=charBLength;
 | |
|             continue;// by contiuing here we make sure that the current string position won't be moved to assure that we also check the newly move characters
 | |
|         }
 | |
|     rfString_Iterate_End(i)
 | |
|     // before returning free the keep string's character array
 | |
|     free(keepChars);
 | |
| }
 | |
| 
 | |
| // Removes the first n characters from the start of the string
 | |
| char rfString_PruneStart(void* thisstrP,uint32_t n)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     // iterate the characters of the string
 | |
|     uint32_t i;
 | |
|     uint32_t length = 0;
 | |
|     unsigned nBytePos = 0;
 | |
|     char found = false;
 | |
|     RF_STRING_ITERATE_START(thisstr,length,i);
 | |
|         // if we reach the number of characters passed as a parameter, note it
 | |
|         if(length == n)
 | |
|         {
 | |
|             // remember that now i is the byte position we need
 | |
|             nBytePos = i;
 | |
|             found = true;
 | |
|             break;
 | |
|         }
 | |
|     RF_STRING_ITERATE_END(length,i)
 | |
| 
 | |
|     // if the string does not have n chars to remove it becomes an empty string and we return failure
 | |
|     if(found == false)
 | |
|     {
 | |
|         thisstr->bytes[0] = '\0';
 | |
|         thisstr->byteLength = 0;
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // move the string back to cover the empty places.reallocation here would be an overkill, everything will be freed together when the string gets freed
 | |
|     for(i =0; i < thisstr->byteLength-nBytePos+1;i++ )
 | |
|         thisstr->bytes[i] = thisstr->bytes[i+nBytePos];
 | |
| 
 | |
|     // get the new bytelength
 | |
|     thisstr->byteLength -= nBytePos;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Removes the last n characters from the end of the string
 | |
| char rfString_PruneEnd(void* thisstrP,uint32_t n)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     // start the iteration of the characters from the end of the string
 | |
|     int32_t nBytePos = -1;
 | |
|     uint32_t length,i;
 | |
|     RF_STRING_ITERATEB_START(thisstr,length,i)
 | |
|         // if we found the requested number of characters from the end of the string
 | |
|         if(length == n)
 | |
|         {
 | |
|             // remember that now i is the byte position we need
 | |
|             nBytePos = i;
 | |
|             break;
 | |
|         }
 | |
|     RF_STRING_ITERATEB_END(length,i)
 | |
| 
 | |
|     // if the string does not have n chars to remove it becomes an empty string and we return failure
 | |
|     if(nBytePos == -1)
 | |
|     {
 | |
|         thisstr->bytes[0] = '\0';
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     // just set the end of string character characters back, reallocation here would be an overkill, everything will be freed together when the string gets freed
 | |
|     thisstr->bytes[nBytePos] = '\0';
 | |
|     // and also set the new byte length
 | |
|     thisstr->byteLength -= (thisstr->byteLength - nBytePos);
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Removes n characters from the position p of the string counting backwards. If there is no space to do so, nothing is done and returns false.
 | |
| char rfString_PruneMiddleB(void* thisstrP,uint32_t p,uint32_t n)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     // if we ask to remove more characters from the position that it would be possible do nothign and return false
 | |
|     if(n>p+1)
 | |
|         return false;
 | |
| 
 | |
|     // iterate the characters of the string
 | |
|     uint32_t j,i,length;
 | |
|     int32_t pBytePos,nBytePos;
 | |
|     pBytePos = nBytePos = -1;
 | |
|     RF_STRING_ITERATE_START(thisstr,length,i)
 | |
|         // if we reach the number of characters passed as a parameter, note it
 | |
|         if(length == p+1)
 | |
|         {
 | |
|             // we search for p+1  because we want to include all of the p character
 | |
|             pBytePos = i;
 | |
|             // also break since we don't care after position p
 | |
|             break;
 | |
|         }
 | |
|         if(length == p-n+1)// +1 is to make sure that indexing works from 0
 | |
|             nBytePos = i;
 | |
| 
 | |
|     RF_STRING_ITERATE_END(length,i)
 | |
| 
 | |
|     // if the position was not found in the string do nothing
 | |
|     if(pBytePos == -1 || nBytePos == -1)
 | |
|         return false;
 | |
| 
 | |
|     // move the bytes in the buffer to remove the requested characters
 | |
|     for(i=nBytePos,j=0;j<= thisstr->byteLength-pBytePos+1; i ++,j++) // here +2 is for (+1 for pbytePos to go to the start of pth character) (+1 for the byteLength to include the null termination character)
 | |
|     {
 | |
|         thisstr->bytes[i] = thisstr->bytes[pBytePos+j];
 | |
|     }
 | |
| 
 | |
|     // find the new byte length
 | |
|     thisstr->byteLength -= (nBytePos - pBytePos);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Removes n characters from the position p of the string counting forwards. If there is no space, nothing is done and returns false.
 | |
| char rfString_PruneMiddleF(void* thisstrP,uint32_t p,uint32_t n)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*)thisstrP;
 | |
|     // iterate the characters of the string
 | |
|     uint32_t j,i,length;
 | |
|     int32_t pBytePos,nBytePos;
 | |
|     pBytePos = nBytePos = -1;
 | |
|     RF_STRING_ITERATE_START(thisstr,length,i)
 | |
|         // if we reach the number of characters passed as a parameter, note it
 | |
|         if(length == p)
 | |
|             pBytePos = i;
 | |
| 
 | |
|         if(length == p+n)
 | |
|         {
 | |
|             nBytePos = i;
 | |
|             break;// since we got all the data we needed
 | |
|         }
 | |
| 
 | |
|     RF_STRING_ITERATE_END(length,i)
 | |
| 
 | |
|     // if the position was not found in the string do nothing
 | |
|     if(pBytePos == -1 )
 | |
|         return false;
 | |
| 
 | |
|     // if we did not find the byte position of p+n then we remove everything from pBytePos until the end of the string
 | |
|     if(nBytePos == -1)
 | |
|     {
 | |
|         thisstr->bytes[pBytePos] = '\0';
 | |
|         thisstr->byteLength -= (thisstr->byteLength-pBytePos);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     // move the bytes in the buffer to remove the requested characters
 | |
|     for(i=pBytePos,j=0;j<= thisstr->byteLength-nBytePos+1; i ++,j++) // here +2 is for (+1 for pbytePos to go to the start of pth character) (+1 for the byteLength to include the null termination character)
 | |
|     {
 | |
|         thisstr->bytes[i] = thisstr->bytes[nBytePos+j];
 | |
|     }
 | |
| 
 | |
|     // find the new byte length
 | |
|     thisstr->byteLength -= (nBytePos - pBytePos);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Replaces all of the specified sstr substring from the String with rstr and reallocates size, unless the new size is smaller
 | |
| char i_rfString_Replace(RF_String* thisstr,void* sstrP,void* rstrP,const uint32_t* numP,const char* optionsP)
 | |
| {
 | |
|     RF_String* sstr = (RF_String*)sstrP;
 | |
|     RF_String* rstr = (RF_String*)rstrP;
 | |
|     char options = *optionsP;
 | |
|     uint32_t num = *numP;
 | |
|     RF_StringX temp;// just a temporary string for finding the occurences
 | |
|     // will keep the number of found instances of the substring
 | |
|     uint32_t foundN = 0;
 | |
|     // will keep the number of given instances to find
 | |
|     uint32_t number = num;
 | |
|     uint32_t diff,i,j;
 | |
|     // if the substring string is not even found return false
 | |
|     if(rfString_FindBytePos(thisstr,sstr,options) == RF_FAILURE)
 | |
|     {
 | |
|         return false;
 | |
|     }
 | |
|     // create a buffer that will keep the byte positions
 | |
|     uint32_t bSize = 50;
 | |
|     int32_t * bytePositions;
 | |
|     RF_MALLOC(bytePositions,bSize*sizeof(int32_t));
 | |
|     // if the given num is 0 just make sure we replace all
 | |
|     if(number == 0)
 | |
|         number = 999999;// max number of occurences
 | |
| 
 | |
|     // find how many occurences exist
 | |
|     rfStringX_FromString_IN(&temp,thisstr);
 | |
|     while( (bytePositions[foundN] = rfString_FindBytePos(&temp,sstr,options))  != RF_FAILURE)
 | |
|     {
 | |
|         int32_t move = bytePositions[foundN] + sstr->byteLength;
 | |
|         bytePositions[foundN] = bytePositions[foundN]+temp.bIndex;
 | |
|         temp.bIndex += move;
 | |
|         temp.bytes += move;
 | |
|         temp.byteLength -= move;
 | |
|         foundN++;
 | |
|         // if buffer is in danger of overflow realloc it
 | |
|         if(foundN > bSize)
 | |
|         {
 | |
|             bSize *=2;
 | |
|             RF_REALLOC(bytePositions,int32_t,bSize);
 | |
|         }
 | |
|         // if we found the required number of occurences break;
 | |
|         if(foundN >= number)
 | |
|             break;
 | |
|     }
 | |
|     rfStringX_Deinit(&temp);
 | |
|     // make sure that the number of occurence to replace do not exceed the actual number of occurences
 | |
|     if(number > foundN)
 | |
|         number = foundN;
 | |
|     // act depending on the size difference of rstr and sstr
 | |
|     if(rstr->byteLength > sstr->byteLength) // replace string is bigger than the removed one
 | |
|     {
 | |
|         int32_t orSize,nSize;
 | |
| 
 | |
|         diff = rstr->byteLength - sstr->byteLength;
 | |
|         // will keep the original size in bytes
 | |
|         orSize = thisstr->byteLength +1;
 | |
|         // reallocate the string to fit the new bigger size
 | |
|         nSize= orSize + number*diff;
 | |
|         RF_REALLOC(thisstr->bytes,char,nSize)
 | |
|         // now replace all the substrings one by one
 | |
|         for(i = 0; i < number; i ++)
 | |
|         {
 | |
|             // move all of the contents of the string to fit the replacement
 | |
|             for(j =orSize+diff-1; j > bytePositions[i]+sstr->byteLength; j -- )
 | |
|                 thisstr->bytes[j] = thisstr->bytes[j-diff];
 | |
|             // copy in the replacement
 | |
|             strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
 | |
|             // also increase the original size (since now we moved the whole string by one replacement)
 | |
|             orSize += diff;
 | |
|             // also increase all the subsequent found byte positions since there is a change of string size
 | |
|             for(j = i+1; j < number; j ++)
 | |
|                 bytePositions[j] = bytePositions[j]+diff;
 | |
| 
 | |
|         }
 | |
|         // finally let's keep the new byte length
 | |
|         thisstr->byteLength = nSize-1;
 | |
|     }
 | |
|     else if( rstr->byteLength < sstr->byteLength) // replace string is smaller than the removed one
 | |
|     {
 | |
|         // get the differenc in byte length of removed substring and replace string
 | |
|         diff = sstr->byteLength-rstr->byteLength;
 | |
| 
 | |
|         // now replace all the substrings one by one
 | |
|         for(i =0; i < number; i ++)
 | |
|         {
 | |
|             // copy in the replacement
 | |
|             strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
 | |
|             // move all of the contents of the string to fit the replacement
 | |
|             for(j =bytePositions[i]+rstr->byteLength; j < thisstr->byteLength; j ++ )
 | |
|                 thisstr->bytes[j] = thisstr->bytes[j+diff];
 | |
|             // also decrease all the subsequent found byte positions since there is a change of string size
 | |
|             for(j = i+1; j < number; j ++)
 | |
|                 bytePositions[j] = bytePositions[j]-diff;
 | |
|         }
 | |
|         // finally let's keep the new byte length
 | |
|         thisstr->byteLength -= diff*number;
 | |
|         // just note that reallocating downwards is not necessary
 | |
|     }
 | |
|     else // replace and remove strings are equal
 | |
|     {
 | |
|         for(i = 0; i < number; i ++)
 | |
|             strncpy(thisstr->bytes+bytePositions[i],rstr->bytes,rstr->byteLength);
 | |
|     }
 | |
|     free(bytePositions);
 | |
|     // success
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| // Removes all characters of a substring only from the start of the String
 | |
| char i_rfString_StripStart(void* thisstrP,void* subP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*) thisstrP;
 | |
|     RF_String*sub = (RF_String*) subP;
 | |
|     char ret = false,noMatch;
 | |
|     uint32_t charValue,i = 0,*subValues,j,subLength,bytePos;
 | |
| 
 | |
|     // firstly get all of the characters of the substring in an array
 | |
|     subLength = rfString_Length(sub);
 | |
|     RF_MALLOC(subValues,4*subLength)
 | |
|     rfString_Iterate_Start(sub,i,charValue)
 | |
|     subValues[i] = charValue;
 | |
|     rfString_Iterate_End(i)
 | |
| 
 | |
|     // iterate thisstring from the beginning
 | |
|     i = 0;
 | |
|     RF_STRING_ITERATE_START(thisstr,i,bytePos)
 | |
|         noMatch = true;
 | |
|         // for every substring character
 | |
|         for(j = 0;j < subLength; j++)
 | |
|         {
 | |
|             // if we got a match
 | |
|             if(rfString_BytePosToCodePoint(thisstr,bytePos) == subValues[j])
 | |
|             {
 | |
|                 ret = true;
 | |
|                 noMatch = false;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         // if we get out of iterating the substring without having found a match, we get out of the iteration in general
 | |
|         if(noMatch)
 | |
|             break;
 | |
|     RF_STRING_ITERATE_END(i,bytePos)
 | |
| 
 | |
|     // if we had any match
 | |
|     if(ret == true)
 | |
|     {
 | |
|         // remove the characters
 | |
|         for(i =0; i < thisstr->byteLength-bytePos+1;i++ )
 | |
|             thisstr->bytes[i] = thisstr->bytes[i+bytePos];
 | |
|         // also change bytelength
 | |
|         thisstr->byteLength -= bytePos;
 | |
|     }
 | |
|     // free stuff and return
 | |
|     free(subValues);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| // Removes all characters of a substring starting from the end of the String
 | |
| char i_rfString_StripEnd(void* thisstrP,void* subP)
 | |
| {
 | |
|     RF_String* thisstr = (RF_String*) thisstrP;
 | |
|     RF_String*sub = (RF_String*) subP;
 | |
|     char ret = false,noMatch;
 | |
|     uint32_t charValue,i = 0,*subValues,j,subLength,bytePos,lastBytePos,testity;
 | |
| 
 | |
|     // firstly get all of the characters of the substring in an array
 | |
|     subLength = rfString_Length(sub);
 | |
|     RF_MALLOC(subValues,4*subLength)
 | |
|     rfString_Iterate_Start(sub,i,charValue)
 | |
|     subValues[i] = charValue;
 | |
|     rfString_Iterate_End(i)
 | |
| 
 | |
|     // iterate thisstring from the end
 | |
|     i = 0;
 | |
|     RF_STRING_ITERATEB_START(thisstr,i,bytePos)
 | |
|         noMatch = true;
 | |
|         // for every substring character
 | |
|         for(j = 0;j < subLength; j++)
 | |
|         {
 | |
|             // if we got a match
 | |
|             if((testity=rfString_BytePosToCodePoint(thisstr,bytePos)) == subValues[j])
 | |
|             {
 | |
|                 ret = true;
 | |
|                 noMatch = false;
 | |
|                 lastBytePos = bytePos;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         // if we get out of iterating the substring without having found a match, we get out of the iteration in general
 | |
|         if(noMatch)
 | |
|             break;
 | |
|     RF_STRING_ITERATEB_END(i,bytePos)
 | |
| 
 | |
|     // if we had any match
 | |
|     if(ret == true)
 | |
|     {
 | |
|         // just set the end of string there
 | |
|         thisstr->bytes[lastBytePos] = '\0';
 | |
|         // and also set the new byte length
 | |
|         thisstr->byteLength -= (thisstr->byteLength - lastBytePos);
 | |
|     }
 | |
| 
 | |
|     // free stuff and return
 | |
|     free(subValues);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| // Removes all characters of a substring from both ends of the given String
 | |
| char i_rfString_Strip(void* thisstrP,void* subP)
 | |
| {
 | |
|     char res1 = rfString_StripStart(thisstrP,subP);
 | |
|     char res2 = rfString_StripEnd(thisstrP,subP);
 | |
|     return res1|res2;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*------------------------------------------------------------------------ RF_String File I/O functions-------------------------------------------------------------------------------*/
 | |
| 
 | |
| // Allocates and returns a string from file parsing. The file's encoding must be UTF-8.If for some reason (like EOF reached) no string can be read then null is returned
 | |
| RF_String* rfString_Create_fUTF8(FILE* f, char* eof)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_fUTF8(ret,f,eof) < 0)
 | |
|     {
 | |
|         free(ret);
 | |
|         return 0;
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string from file parsing. The file's encoding must be UTF-8.If for some reason (like EOF reached) no string can be read then null is returned
 | |
| int32_t rfString_Init_fUTF8(RF_String* str,FILE* f,char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     uint32_t bufferSize;// unused
 | |
|     if((bytesN=rfFReadLine_UTF8(f,&str->bytes,&str->byteLength,&bufferSize,eof)) < 0)
 | |
|     {
 | |
|         LOG_ERROR("Failed to initialize String from a UTF-8 file",bytesN);
 | |
|         return bytesN;
 | |
|     }
 | |
|     // success
 | |
|     return bytesN;
 | |
| }
 | |
| // Assigns to a String from UTF-8 file parsing
 | |
| int32_t rfString_Assign_fUTF8(RF_String* str,FILE*f,char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     uint32_t utf8ByteLength,utf8BufferSize;// bufferSize unused in this function
 | |
|     char* utf8 = 0;
 | |
|     if((bytesN=rfFReadLine_UTF8(f,&utf8,&utf8ByteLength,&utf8BufferSize,eof)) < 0)
 | |
|     {
 | |
|         LOG_ERROR("Failed to assign the contents of a UTF-8 file to a String",bytesN);
 | |
|         return bytesN;
 | |
|     }
 | |
|     // success
 | |
|     // assign it to the string
 | |
|     if(str->byteLength <= utf8ByteLength)
 | |
|     {
 | |
|         RF_REALLOC(str->bytes,char,utf8ByteLength+1);
 | |
|     }
 | |
|     memcpy(str->bytes,utf8,utf8ByteLength+1);
 | |
|     str->byteLength = utf8ByteLength;
 | |
|     // free the file's utf8 buffer
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| // Appends to a String from UTF-8 file parsing
 | |
| int32_t rfString_Append_fUTF8(RF_String* str,FILE*f,char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     uint32_t utf8ByteLength,utf8BufferSize;// bufferSize unused in this function
 | |
|     char* utf8 = 0;
 | |
|     if((bytesN=rfFReadLine_UTF8(f,&utf8,&utf8ByteLength,&utf8BufferSize,eof)) < 0)
 | |
|     {
 | |
|         LOG_ERROR("Failed to assign the contents of a UTF-8 file to a String",bytesN);
 | |
|         return bytesN;
 | |
|     }
 | |
|     // append the utf8 to the given string
 | |
|     rfString_Append(str,RFS_(utf8));
 | |
|     // free the file's utf8 buffer
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string from file parsing. The file's encoding must be UTF-16.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
 | |
| RF_String* rfString_Create_fUTF16(FILE* f,char endianess,char* eof)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_fUTF16(ret,f,endianess,eof) < 0)
 | |
|         return 0;
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string from file parsing. The file's encoding must be UTF-16.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
 | |
| int32_t rfString_Init_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16LE(f,&str->bytes,&str->byteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to initialize a String from reading a UTF-16 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&str->bytes,&str->byteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to initialize a String from reading a UTF-16 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     return bytesN;
 | |
| }
 | |
| 
 | |
| // Assigns to an already initialized String from File parsing
 | |
| int32_t rfString_Assign_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
 | |
| {
 | |
| 
 | |
|     uint32_t utf8ByteLength;
 | |
|     int32_t bytesN;
 | |
|     char* utf8 = 0;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16LE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to assign the contents of a Little Endian UTF-16 file to a String",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to assign the contents of a Big Endian UTF-16 file to a String",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     // assign it to the string
 | |
|     if(str->byteLength <= utf8ByteLength)
 | |
|     {
 | |
|         RF_REALLOC(str->bytes,char,utf8ByteLength+1);
 | |
|     }
 | |
|     memcpy(str->bytes,utf8,utf8ByteLength+1);
 | |
|     str->byteLength = utf8ByteLength;
 | |
|     // free the file's utf8 buffer
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| 
 | |
| // Appends to an already initialized String from File parsing
 | |
| int32_t rfString_Append_fUTF16(RF_String* str,FILE* f, char endianess,char* eof)
 | |
| {
 | |
|     char*utf8;
 | |
|     uint32_t utf8ByteLength;
 | |
|     int32_t bytesN;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16LE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to append the contents of a Little Endian UTF-16 file to a String",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to append the contents of a Big Endian UTF-16 file to a String",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     rfString_Append(str,RFS_(utf8));
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| 
 | |
| // Allocates and returns a string from file parsing. The file's encoding must be UTF-32.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
 | |
| RF_String* rfString_Create_fUTF32(FILE* f,char endianess,char* eof)
 | |
| {
 | |
|     RF_String* ret;
 | |
|     RF_MALLOC(ret,sizeof(RF_String));
 | |
|     if(rfString_Init_fUTF32(ret,f,endianess,eof) < 0)
 | |
|     {
 | |
|         free(ret);
 | |
|         return 0;
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| // Initializes a string from file parsing. The file's encoding must be UTF-32.If for some reason (like EOF reached) no string can be read then null is returned. A check for a valid sequence of bytes is performed.
 | |
| int32_t rfString_Init_fUTF32(RF_String* str,FILE* f,char endianess,char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF32LE(f,&str->bytes,&str->byteLength,eof)) <0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to initialize a String from reading a Little Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&str->bytes,&str->byteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to initialize a String from reading a Big Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     return bytesN;
 | |
| }
 | |
| // Assigns the contents of a UTF-32 file to a string
 | |
| int32_t rfString_Assign_fUTF32(RF_String* str,FILE* f,char endianess, char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     char*utf8;
 | |
|     uint32_t utf8ByteLength;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF32LE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to assign to a String from reading a Little Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to assign to a String from reading a Big Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     // assign it to the string
 | |
|     if(str->byteLength <= utf8ByteLength)
 | |
|     {
 | |
|         RF_REALLOC(str->bytes,char,utf8ByteLength+1);
 | |
|     }
 | |
|     memcpy(str->bytes,utf8,utf8ByteLength+1);
 | |
|     str->byteLength = utf8ByteLength;
 | |
|     // free the file's utf8 buffer
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| // Appends the contents of a UTF-32 file to a string
 | |
| int32_t rfString_Append_fUTF32(RF_String* str,FILE* f,char endianess, char* eof)
 | |
| {
 | |
|     int32_t bytesN;
 | |
|     char*utf8;
 | |
|     uint32_t utf8ByteLength;
 | |
|     // depending on the file's endianess
 | |
|     if(endianess == RF_LITTLE_ENDIAN)
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF32LE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to append to a String from reading a Little Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of little endian
 | |
|     else// big endian
 | |
|     {
 | |
|         if((bytesN=rfFReadLine_UTF16BE(f,&utf8,&utf8ByteLength,eof)) < 0)
 | |
|         {
 | |
|             LOG_ERROR("Failure to append to a String from reading a Big Endian UTF-32 file",bytesN);
 | |
|             return bytesN;
 | |
|         }
 | |
|     }// end of big endian case
 | |
|     // success
 | |
|     // append it
 | |
|     rfString_Append(str,RFS_(utf8));
 | |
|     // free the file'sutf8 buffer
 | |
|     free(utf8);
 | |
|     return bytesN;
 | |
| }
 | |
| 
 | |
| // Writes a string to a file in UTF-8 encoding.
 | |
| int32_t i_rfString_Fwrite(void* sP,FILE* f,char* encodingP)
 | |
| {
 | |
|     uint32_t *utf32,length,i;
 | |
|     uint16_t* utf16;
 | |
|     RF_String* s = (RF_String*)sP;
 | |
|     char encoding = *encodingP;
 | |
|     // depending on the encoding
 | |
|     switch(encoding)
 | |
|     {
 | |
|         case RF_UTF8:
 | |
|             if(fwrite(s->bytes,1,s->byteLength,f) != s->byteLength)
 | |
|                 break;// and go to error logging
 | |
|             return RF_SUCCESS;
 | |
|         break;
 | |
|         case RF_UTF16_LE:
 | |
|             utf16 = rfString_ToUTF16(s,&length);
 | |
|             if(rfUTILS_Endianess() != RF_LITTLE_ENDIAN)
 | |
|             {
 | |
|                 for(i=0;i<length;i++)
 | |
|                 {
 | |
|                     rfUTILS_SwapEndianUS(&utf16[i]);
 | |
|                 }
 | |
|             }
 | |
|             if(fwrite(utf16,2,length,f) != length)
 | |
|             {
 | |
|                 free(utf16);
 | |
|                 break;// and go to error logging
 | |
|             }
 | |
|             free(utf16);
 | |
|             return RF_SUCCESS;
 | |
|         break;
 | |
|         case RF_UTF16_BE:
 | |
|             utf16 = rfString_ToUTF16(s,&length);
 | |
|             if(rfUTILS_Endianess() != RF_BIG_ENDIAN)
 | |
|             {
 | |
|                 for(i=0;i<length;i++)
 | |
|                 {
 | |
|                     rfUTILS_SwapEndianUS(&utf16[i]);
 | |
|                 }
 | |
|             }
 | |
|             if(fwrite(utf16,2,length,f) != length)
 | |
|             {
 | |
|                 free(utf16);
 | |
|                 break;// and go to error logging
 | |
|             }
 | |
|             free(utf16);
 | |
|             return RF_SUCCESS;
 | |
|         break;
 | |
|         case RF_UTF32_LE:
 | |
|             utf32 = rfString_ToUTF32(s,&length);
 | |
|             if(rfUTILS_Endianess() != RF_LITTLE_ENDIAN)
 | |
|             {
 | |
|                 for(i=0;i<length;i++)
 | |
|                 {
 | |
|                     rfUTILS_SwapEndianUI(&utf32[i]);
 | |
|                 }
 | |
|             }
 | |
|             if(fwrite(utf32,4,length,f) != length)
 | |
|             {
 | |
|                 free(utf32);
 | |
|                 break;// and go to error logging
 | |
|             }
 | |
|             free(utf32);
 | |
|             return RF_SUCCESS;
 | |
|         break;
 | |
|         case RF_UTF32_BE:
 | |
|             utf32 = rfString_ToUTF32(s,&length);
 | |
|             if(rfUTILS_Endianess() != RF_BIG_ENDIAN)
 | |
|             {
 | |
|                 for(i=0;i<length;i++)
 | |
|                 {
 | |
|                     rfUTILS_SwapEndianUI(&utf32[i]);
 | |
|                 }
 | |
|             }
 | |
|             if(fwrite(utf32,4,length,f) != length)
 | |
|             {
 | |
|                 free(utf32);
 | |
|                 break;// and go to error logging
 | |
|             }
 | |
|             free(utf32);
 | |
|             return RF_SUCCESS;
 | |
|         break;
 | |
|     }
 | |
|     // if we get here it means an error, and we log it with the macro
 | |
|     i_WRITE_CHECK(f,"Writting a string to a file")
 | |
|     return RE_FILE_WRITE;
 | |
| }
 | |
| 
 | |
| 
 |