Linguist 2.3.4

Rebuild samples db
Ensure lang is skipped on any binary file
2025-10-29 17:50:22 +00:00 · 2012-09-24 10:54:17 -05:00 · 2012-09-24 10:52:05 -05:00 · 2012-09-24 10:51:39 -05:00 · 2012-09-24 10:50:49 -05:00 · 2012-09-24 10:48:22 -05:00
23 changed files with 12325 additions and 5134 deletions
--- a/github-linguist.gemspec
+++ b/github-linguist.gemspec
@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
  s.name    = 'github-linguist'
-  s.version = '2.3.0'
+  s.version = '2.3.4'
  s.summary = "GitHub Language detection"
  s.authors = "GitHub"
@@ -12,6 +12,7 @@ Gem::Specification.new do |s|
  s.add_dependency 'escape_utils',    '~> 0.2.3'
  s.add_dependency 'mime-types',      '~> 1.19'
  s.add_dependency 'pygments.rb',     '>= 0.2.13'
  s.add_development_dependency 'mocha'
  s.add_development_dependency 'json'
  s.add_development_dependency 'rake'
  s.add_development_dependency 'yajl-ruby'
--- a/lib/linguist/blob_helper.rb
+++ b/lib/linguist/blob_helper.rb
@@ -160,7 +160,7 @@ module Linguist
    #
    # Return true or false
    def safe_to_colorize?
-      text? && !large? && !high_ratio_of_long_lines?
+      !large? && text? && !high_ratio_of_long_lines?
    end
    # Internal: Does the blob have a ratio of long lines?
@@ -204,7 +204,31 @@ module Linguist
    #
    # Returns an Array of lines
    def lines
-      @lines ||= (viewable? && data) ? data.split("\n", -1) : []
+      @lines ||=
        if viewable? && data
          data.split(line_split_character, -1)
        else
          []
        end
    end
    # Character used to split lines. This is almost always "\n" except when Mac
    # Format is detected in which case it's "\r".
    #
    # Returns a split pattern string.
    def line_split_character
      @line_split_character ||= (mac_format?? "\r" : "\n")
    end
    # Public: Is the data in ** Mac Format **. This format uses \r (0x0d) characters
    # for line ends and does not include a \n (0x0a).
    #
    # Returns true when mac format is detected.
    def mac_format?
      return if !viewable?
      if pos = data[0, 4096].index("\r")
        data[pos + 1] != ?\n
      end
    end
    # Public: Get number of lines of code
@@ -250,7 +274,9 @@ module Linguist
    #
    # Return true or false
    def indexable?
-      if binary?
+      if size > 100 * 1024
        false
      elsif binary?
        false
      elsif extname == '.txt'
        true
@@ -260,8 +286,6 @@ module Linguist
        false
      elsif generated?
        false
      elsif size > 100 * 1024
        false
      else
        true
      end
@@ -278,7 +302,7 @@ module Linguist
      if defined?(@data) && @data.is_a?(String)
        data = @data
      else
-        data = lambda { binary_mime_type? ? "" : self.data }
+        data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
      end
      @language = Language.detect(name.to_s, data, mode)
--- a/lib/linguist/language.rb
+++ b/lib/linguist/language.rb
@@ -84,7 +84,9 @@ module Linguist
      if possible_languages.length > 1
        data = data.call() if data.respond_to?(:call)
-        if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
+        if data.nil? || data == ""
          nil
        elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
          Language[result[0]]
        end
      else
--- a/lib/linguist/languages.yml
+++ b/lib/linguist/languages.yml
@@ -367,6 +367,14 @@ Ecere Projects:
  extensions:
  - .epj
 Ecl:
  type: programming
  color: "#8a1267"
  primary_extension: .ecl
  lexer: ECL
  extensions:
  - .eclxml
 Eiffel:
  type: programming
  lexer: Text only
--- a/lib/linguist/samples.json
+++ b/lib/linguist/samples.json
--- a/lib/linguist/samples.rb
+++ b/lib/linguist/samples.rb
@@ -76,12 +76,14 @@ module Linguist
          db['extnames'][language_name] ||= []
          if !db['extnames'][language_name].include?(sample[:extname])
            db['extnames'][language_name] << sample[:extname]
            db['extnames'][language_name].sort!
          end
        end
        if sample[:filename]
          db['filenames'][language_name] ||= []
          db['filenames'][language_name] << sample[:filename]
          db['filenames'][language_name].sort!
        end
        data = File.read(sample[:path])
--- a/lib/linguist/tokenizer.rb
+++ b/lib/linguist/tokenizer.rb
@@ -16,6 +16,9 @@ module Linguist
      new.extract_tokens(data)
    end
    # Read up to 100KB
    BYTE_LIMIT = 100_000
    # Start state on token, ignore anything till the next newline
    SINGLE_LINE_COMMENTS = [
      '//', # C
@@ -55,6 +58,8 @@ module Linguist
      tokens = []
      until s.eos?
        break if s.pos >= BYTE_LIMIT
        if token = s.scan(/^#!.+$/)
          if name = extract_shebang(token)
            tokens << "SHEBANG#!#{name}"
--- a/samples/C++/gdsdbreader.h
+++ b/samples/C++/gdsdbreader.h
@@ -0,0 +1,69 @@
 #ifndef GDSDBREADER_H
 #define GDSDBREADER_H
 // This file contains core structures, classes and types for the entire gds app
 // WARNING: DO NOT MODIFY UNTIL IT'S STRICTLY NECESSARY
 #include <QDir>
 #include "diagramwidget/qgldiagramwidget.h"
 #define GDS_DIR "gdsdata"
 enum level {LEVEL_ONE, LEVEL_TWO, LEVEL_THREE};
 // The internal structure of the db to store information about each node (each level)
 // this will be serialized before being written to file
 class dbDataStructure
 {
 public:
    QString label;
    quint32 depth;
    quint32 userIndex;
    QByteArray data;    // This is COMPRESSED data, optimize ram and disk space, is decompressed
                        // just when needed (to display the comments)
    // The following ID is used to create second-third level files
    quint64 uniqueID;
    // All the next items linked to this one
    QVector<dbDataStructure*> nextItems;
    // Corresponding indices vector (used to store data)
    QVector<quint32> nextItemsIndices;
    // The father element (or NULL if it's root)
    dbDataStructure* father;
    // Corresponding indices vector (used to store data)
    quint32 fatherIndex;
    bool noFatherRoot; // Used to tell if this node is the root (so hasn't a father)
    // These fields will be useful for levels 2 and 3
    QString fileName; // Relative filename for the associated code file
    QByteArray firstLineData; // Compressed first line data, this will be used with the line number to retrieve info
    QVector<quint32> linesNumbers; // First and next lines (next are relative to the first) numbers
    // -- Generic system data not to be stored on disk
    void *glPointer; // GL pointer
    // These operator overrides prevent the glPointer and other non-disk-necessary data serialization
    friend QDataStream& operator<<(QDataStream& stream, const dbDataStructure& myclass)
    // Notice: this function has to be "friend" because it cannot be a member function, member functions
    // have an additional parameter "this" which isn't in the argument list of an operator overload. A friend
    // function has full access to private data of the class without having the "this" argument
    {
        // Don't write glPointer and every pointer-dependent structure
        return stream << myclass.label << myclass.depth << myclass.userIndex << qCompress(myclass.data)
                         << myclass.uniqueID << myclass.nextItemsIndices << myclass.fatherIndex << myclass.noFatherRoot
                            << myclass.fileName << qCompress(myclass.firstLineData) << myclass.linesNumbers;
    }
    friend QDataStream& operator>>(QDataStream& stream, dbDataStructure& myclass)
    {
        //Don't read it, either
        stream >> myclass.label >> myclass.depth >> myclass.userIndex >> myclass.data
                      >> myclass.uniqueID >> myclass.nextItemsIndices >> myclass.fatherIndex >> myclass.noFatherRoot
                         >> myclass.fileName >> myclass.firstLineData >> myclass.linesNumbers;
        myclass.data = qUncompress(myclass.data);
        myclass.firstLineData = qUncompress(myclass.firstLineData);
        return stream;
    }
 };
 #endif // GDSDBREADER_H
--- a/samples/C/rf_io.c
+++ b/samples/C/rf_io.c
--- a/samples/C/rf_io.h
+++ b/samples/C/rf_io.h
@@ -0,0 +1,682 @@
 /**
 ** Copyright (c) 2011-2012, Karapetsas Eleftherios
 ** All rights reserved.
 **
 ** Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 **  1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 **  2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
 **     the documentation and/or other materials provided with the distribution.
 **  3. Neither the name of the Original Author of Refu nor the names of its contributors may be used to endorse or promote products derived from
 **
 **  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 **  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 **  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 **  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 **  SERVICES;LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 **  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 **  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 **/
 #ifndef REFU_IO_H
 #define REFU_IO_H
 #include <rf_setup.h>
 #include <stdio.h>
 #ifdef __cplusplus
 extern "C"
 {// opening bracket for calling from C++
 #endif
 // New line feed
 #define RF_LF   0xA
 // Carriage Return
 #define RF_CR   0xD
 #ifdef REFU_WIN32_VERSION
    #define i_PLUSB_WIN32   "b"
 #else
    #define i_PLUSB_WIN32   ""
 #endif
 // This is the type that represents the file offset
 #ifdef _MSC_VER
 typedef __int64 foff_rft;
 #else
 #include <sys/types.h>
 typedef off64_t foff_rft;
 #endif
 ///Fseek and Ftelll definitions
 #ifdef _MSC_VER
    #define rfFseek(i_FILE_,i_OFFSET_,i_WHENCE_)    _fseeki64(i_FILE_,i_OFFSET_,i_WHENCE_)
    #define rfFtell(i_FILE_)                        _ftelli64(i_FILE_)
 #else
    #define rfFseek(i_FILE_,i_OFFSET_,i_WHENCE_)    fseeko64(i_FILE_,i_OFFSET_,i_WHENCE_)
    #define rfFtell(i_FILE_)                        ftello64(i_FILE_)
 #endif
 /**
 ** @defgroup RF_IOGRP I/O
 ** @addtogroup RF_IOGRP
 ** @{
 **/
 // @brief Reads a UTF-8 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
 // shall not be adding any CR character that is found in the file behind a newline character since this is
 // the Windows line ending scheme. Beware though that the returned  read bytes value shall still count the CR character inside.
 //
 // @param[in] f The file descriptor to read
 // @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
 // and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
 // @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
 // @param[out] bufferSize Give an @c uint32_t here to receive the capacity of the @c utf8 buffer in bytes
 // @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
 // with reading this line
 // @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
 // The possible errors to return are the same as rfFgets_UTF8()
 i_DECLIMEX_ int32_t rfFReadLine_UTF8(FILE* f,char** utf8,uint32_t* byteLength,uint32_t* bufferSize,char* eof);
 // @brief Reads a Big Endian UTF-16 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
 // shall not be adding any CR character that is found in the file behind a newline character since this is
 // the Windows line ending scheme. Beware though that the returned  read bytes value shall still count the CR character inside.
 //
 // @param[in] f The file descriptor to read
 // @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
 // and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
 // @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
 // @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
 // with reading this line
 // @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
 // + Any error that can be returned by @ref rfFgets_UTF16BE()
 // + @c RE_UTF16_INVALID_SEQUENCE: Failed to decode the UTF-16 byte stream of the file descriptor
 // + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
 i_DECLIMEX_ int32_t rfFReadLine_UTF16BE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
 // @brief Reads a Little Endian UTF-16 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
 // shall not be adding any CR character that is found in the file behind a newline character since this is
 // the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
 //
 // @param[in] f The file descriptor to read
 // @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
 // and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
 // @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
 // @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
 // with reading this line
 // @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
 // + Any error that can be returned by @ref rfFgets_UTF16LE()
 // + @c RE_UTF16_INVALID_SEQUENCE: Failed to decode the UTF-16 byte stream of the file descriptor
 // + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
 i_DECLIMEX_ int32_t rfFReadLine_UTF16LE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
 // @brief Reads a Big Endian UTF-32 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
 // shall not be adding any CR character that is found in the file behind a newline character since this is
 // the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
 //
 // @param[in] f The file descriptor to read
 // @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
 // and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
 // @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
 // @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
 // with reading this line
 // @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
 // + Any error that can be returned by @ref rfFgets_UTF32BE()
 // + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
 i_DECLIMEX_ int32_t rfFReadLine_UTF32BE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
 // @brief Reads a Little Endian UTF-32 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
 // shall not be adding any CR character that is found in the file behind a newline character since this is
 // the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
 //
 // @param[in] f The file descriptor to read
 // @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
 // and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
 // @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
 // @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
 // with reading this line
 // @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
 // + Any error that can be returned by @ref rfFgets_UTF32LE()
 // + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
 i_DECLIMEX_ int32_t rfFReadLine_UTF32LE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
 // @brief Gets a number of bytes from a BIG endian UTF-32 file descriptor
 //
 // This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
 // have been read or new line or EOF character has been encountered.
 //
 // The function will read until @c num characters are read and if @c num
 // would take us to the middle of a UTF32 character then the next character shall also be read
 // and the function will return the number of bytes read.
 // Since the function null terminates the buffer the given @c buff needs to be of at least
 // @c num+7 size to cater for the worst case.
 //
 // The final bytestream stored inside @c buff is in the endianess of the system.
 //
 // If right after the last character read comes the EOF, the function
 // shall detect so and assign @c true to @c eof.
 //
 // In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
 // shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+7
 // @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 4 bytes). Should be a multiple of 4
 // @param[in] f A valid FILE descriptor from which to read the bytes
 // @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
 // @return Returns the actual number of bytes read or an error if there was a problem.
 // The possible errors are:
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgets_UTF32BE(char* buff,uint32_t num,FILE* f,char* eof);
 // @brief Gets a number of bytes from a Little endian UTF-32 file descriptor
 //
 // This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
 // have been read or new line or EOF character has been encountered.
 //
 // The function will read until @c num characters are read and if @c num
 // would take us to the middle of a UTF32 character then the next character shall also be read
 // and the function will return the number of bytes read.
 // Since the function null terminates the buffer the given @c buff needs to be of at least
 // @c num+7 size to cater for the worst case.
 //
 // The final bytestream stored inside @c buff is in the endianess of the system.
 //
 // If right after the last character read comes the EOF, the function
 // shall detect so and assign @c true to @c eof.
 //
 // In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
 // shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+7
 // @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 4 bytes). Should be a multiple of 4
 // @param[in] f A valid FILE descriptor from which to read the bytes
 // @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
 // @return Returns the actual number of bytes read or an error if there was a problem.
 // The possible errors are:
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgets_UTF32LE(char* buff,uint32_t num,FILE* f,char* eof);
 // @brief Gets a number of bytes from a BIG endian UTF-16 file descriptor
 //
 // This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
 // have been read or new line or EOF character has been encountered.
 //
 // The function will read until @c num characters are read and if @c num
 // would take us to the middle of a UTF16 character then the next character shall also be read
 // and the function will return the number of bytes read.
 // Since the function null terminates the buffer the given @c buff needs to be of at least
 // @c num+5 size to cater for the worst case.
 //
 // The final bytestream stored inside @c buff is in the endianess of the system.
 //
 // If right after the last character read comes the EOF, the function
 // shall detect so and assign @c true to @c eof.
 //
 // In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
 // shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+5
 // @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 2 bytes). Should be a multiple of 2
 // @param[in] f A valid FILE descriptor from which to read the bytes
 // @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
 // @return Returns the actual number of bytes read or an error if there was a problem.
 // The possible errors are:
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgets_UTF16BE(char* buff,uint32_t num,FILE* f,char* eof);
 // @brief Gets a number of bytes from a Little endian UTF-16 file descriptor
 //
 // This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
 // have been read or new line or EOF character has been encountered.
 //
 // The function will read until @c num characters are read and if @c num
 // would take us to the middle of a UTF16 character then the next character shall also be read
 // and the function will return the number of bytes read.
 // Since the function null terminates the buffer the given @c buff needs to be of at least
 // @c num+5 size to cater for the worst case.
 //
 // The final bytestream stored inside @c buff is in the endianess of the system.
 //
 // If right after the last character read comes the EOF, the function
 // shall detect so and assign @c true to @c eof.
 //
 // In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
 // shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+2
 // @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 2 bytes). Should be a multiple of 2
 // @param[in] f A valid FILE descriptor from which to read the bytes
 // @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
 // @return Returns the actual number of bytes read or an error if there was a problem.
 // The possible errors are:
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgets_UTF16LE(char* buff,uint32_t num,FILE* f,char* eof);
 // @brief Gets a number of bytes from a UTF-8 file descriptor
 //
 // This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num characters
 // have been read or new line or EOF character has been encountered.
 //
 // The function  automatically adds a null termination character at the end of
 // @c buff but this character is not included in the returned actual number of bytes.
 //
 // The function will read until @c num characters are read and if @c num
 // would take us to the middle of a UTF8 character then the next character shall also be read
 // and the function will return the number of bytes read.
 // Since the function null terminates the buffer the given @c buff needs to be of at least
 // @c num+4 size to cater for the worst case.
 //
 // If right after the last character read comes the EOF, the function
 // shall detect so and assign @c true to @c eof.
 //
 // In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
 // shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param[in] buff A buffer to be filled with the contents of the file. Should of size at least @c num+4
 // @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 1 byte)
 // @param[in] f A valid FILE descriptor from which to read the bytes
 // @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
 // @return Returns the actual number of bytes read or an error if there was a problem.
 // The possible errors are:
 // + @c RE_UTF8_INVALID_SEQUENCE_INVALID_BYTE: If an invalid UTF-8 byte has been found
 // + @c RE_UTF8_INVALID_SEQUENCE_CONBYTE: If during parsing the file we were expecting a continuation
 // byte and did not find it
 // + @c RE_UTF8_INVALID_SEQUENCE_END: If the null character is encountered in between bytes that should
 // have been continuation bytes
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgets_UTF8(char* buff,uint32_t num,FILE* f,char* eof);
 // @brief  Gets a unicode character from a UTF-8 file descriptor
 //
 // This function attempts to assume a more modern fgetc() role for UTF-8 encoded files.
 // Reads bytes from the File descriptor @c f until a full UTF-8 unicode character has been read
 //
 // After this function the file pointer will have moved either by @c 1, @c 2, @c 3 or @c 4
 // bytes if the return value is positive. You can see how much by checking the return value.
 //
 // You shall need to provide an integer at @c c to contain either the decoded Unicode
 // codepoint or the UTF-8 endoced byte depending on the value of the @c cp argument.
 //
 // @param f A valid FILE descriptor from which to read the bytes
 // @param c Pass an int that will receive either the unicode code point value or
 // the UTF8 bytes depending on the value of the @c cp flag
 // @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
 // of the read character, so the UTF-8 will be decoded.
 // If @c false the int passed at @c c will contain the value of the read bytes in UTF-8 without any decoding
 // @return Returns the number of bytes read (either @c 1, @c 2, @c 3 or @c 4) or an error if the function
 // fails for some reason. Possible error values are:
 // + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
 // in the middle of a UTF-8 encoded character where we would be expecting something different
 // and @c RE_UTF8_INVALID_SEQUENCE_END error is also logged
 // + @c RE_UTF8_INVALID_SEQUENCE_INVALID_BYTE: If an invalid UTF-8 byte has been found
 // + @c RE_UTF8_INVALID_SEQUENCE_CONBYTE: If during parsing the file we were expecting a continuation
 // byte and did not find it
 // + @c RE_UTF8_INVALID_SEQUENCE_END: If the null character is encountered in between bytes that should
 // have been continuation bytes
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgetc_UTF8(FILE* f,uint32_t *c,char cp);
 // @brief  Gets a unicode character from a UTF-16 Big Endian file descriptor
 //
 // This function attempts to assume a more modern fgetc() role for UTF-16 encoded files.
 // Reads bytes from the File descriptor @c f until a full UTF-16 unicode character has been read
 //
 // After this function the file pointer will have moved either by @c 2 or @c 4
 // bytes if the return value is positive. You can see how much by checking the return value.
 //
 // You shall need to provide an integer at @c c to contain either the decoded Unicode
 // codepoint or the Bigendian encoded UTF-16 bytes depending on the value of @c the cp argument.
 //
 // @param f A valid FILE descriptor from which to read the bytes
 // @param c Pass an int that will receive either the unicode code point value or
 // the UTF16 bytes depending on the value of the @c cp flag
 // @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
 // of the read character, so the UTF-16 will be decoded.
 // If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
 // @return Returns the number of bytes read (either @c 2 or @c 4) or an error if the function
 // fails for some reason. Possible error values are:
 // + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
 // + @c RE_UTF16_NO_SURRPAIR: According to the first read word a surrogate pair was expected but none was found
 // + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
 // while we expect a UTF-16 surrogate pair an appropriate error is logged
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgetc_UTF16BE(FILE* f,uint32_t *c,char cp);
 // @brief  Gets a unicode character from a UTF-16 Little Endian file descriptor
 //
 // This function attempts to assume a more modern fgetc() role for UTF-16 encoded files.
 // Reads bytes from the File descriptor @c f until a full UTF-16 unicode character has been read
 //
 // After this function the file pointer will have moved either by @c 2 or @c 4
 // bytes if the return value is positive. You can see how much by checking the return value.
 //
 // You shall need to provide an integer at @c c to contain either the decoded Unicode
 // codepoint or the Bigendian encoded UTF-16 bytes depending on the value of @c the cp argument.
 //
 // @param f A valid FILE descriptor from which to read the bytes
 // @param c Pass an int that will receive either the unicode code point value or
 // the UTF16 bytes depending on the value of the @c cp flag
 // @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
 // of the read character, so the UTF-16 will be decoded.
 // If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
 // @return Returns the number of bytes read (either @c 2 or @c 4) or an error if the function
 // fails for some reason. Possible error values are:
 // + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
 // + @c RE_UTF16_NO_SURRPAIR: According to the first read word a surrogate pair was expected but none was found
 // + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
 // while we expect a UTF-16 surrogate pair an appropriate error is logged
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgetc_UTF16LE(FILE* f,uint32_t *c,char cp);
 // @brief  Gets a unicode character from a UTF-32 Little Endian file descriptor
 //
 // This function attempts to assume a more modern fgetc() role for UTF-32 encoded files.
 // Reads bytes from the File descriptor @c f until a full UTF-32 unicode character has been read
 //
 // After this function the file pointer will have moved by @c 4
 // bytes if the return value is positive.
 //
 // You shall need to provide an integer at @c to contain the UTF-32 codepoint.
 //
 // @param f A valid FILE descriptor from which to read the bytes
 // @param c Pass an int that will receive either the unicode code point value or
 // the UTF16 bytes depending on the value of the @c cp flag
 // If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
 // @return Returns either @c RF_SUCCESS for succesfull readin or one of the following errors:
 // + @c RE_FILE_EOF: The end of file has been found while reading.
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgetc_UTF32LE(FILE* f,uint32_t *c);
 // @brief  Gets a unicode character from a UTF-32 Big Endian file descriptor
 //
 // This function attempts to assume a more modern fgetc() role for UTF-32 encoded files.
 // Reads bytes from the File descriptor @c f until a full UTF-32 unicode character has been read
 //
 // After this function the file pointer will have moved by @c 4
 // bytes if the return value is positive.
 //
 // You shall need to provide an integer at @c to contain the UTF-32 codepoint.
 //
 // @param f A valid FILE descriptor from which to read the bytes
 // @param c Pass an int that will receive either the unicode code point value or
 // the UTF16 bytes depending on the value of the @c cp flag
 // If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
 // @return Returns either @c RF_SUCCESS for succesfull readin or one of the following errors:
 // + @c RE_FILE_EOF: The end of file has been found while reading.
 // + @c RE_FILE_READ: If during reading the file there was an unknown read error
 // + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
 // + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
 // + @c RE_INTERRUPT: If during reading, there was a system interrupt
 // + @c RE_FILE_IO: If there was a physical I/O error
 // + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
 i_DECLIMEX_ int32_t rfFgetc_UTF32BE(FILE* f,uint32_t *c);
 // @brief Moves a unicode character backwards in a big endian UTF-32 file stream
 //
 // @param f The file stream
 // @param c Returns the character we moved back to as a unicode codepoint
 // @return Returns either @c RF_SUCCESS for success or one of the following errors:
 // + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
 // + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
 // + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
 // + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
 // + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
 // + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
 // + @c RE_FILE_IO: There was a physical I/O error
 // + @c RE_FILE_NOSPACE: There was no space on the device holding the file
 // + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
 // + @c RE_FILE_READ: If during reading the file there was an error
 // + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
 i_DECLIMEX_ int32_t rfFback_UTF32BE(FILE* f,uint32_t *c);
 // @brief Moves a unicode character backwards in a little endian UTF-32 file stream
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param f The file stream
 // @param c Returns the character we moved back to as a unicode codepoint
 // @return Returns either @c RF_SUCCESS for success or one of the following errors:
 // + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
 // + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
 // + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
 // + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
 // + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
 // + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
 // + @c RE_FILE_IO: There was a physical I/O error
 // + @c RE_FILE_NOSPACE: There was no space on the device holding the file
 // + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
 // + @c RE_FILE_READ: If during reading the file there was an error
 // + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
 i_DECLIMEX_ int32_t rfFback_UTF32LE(FILE* f,uint32_t *c);
 // @brief Moves a unicode character backwards in a big endian UTF-16 file stream
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param f The file stream
 // @param c Returns the character we moved back to as a unicode codepoint
 // @return Returns either the number of bytes moved backwards (either @c 4 or @c 2) for success or one of the following errors:
 // + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
 // + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
 // + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
 // + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
 // + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
 // + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
 // + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
 // + @c RE_FILE_IO: There was a physical I/O error
 // + @c RE_FILE_NOSPACE: There was no space on the device holding the file
 // + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
 // + @c RE_FILE_READ: If during reading the file there was an error
 // + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
 i_DECLIMEX_ int32_t rfFback_UTF16BE(FILE* f,uint32_t *c);
 // @brief Moves a unicode character backwards in a little endian UTF-16 file stream
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param f The file stream
 // @param c Returns the character we moved back to as a unicode codepoint
 // @return Returns either the number of bytes moved backwards (either @c 4 or @c 2) for success or one of the following errors:
 // + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
 // + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
 // + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
 // + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
 // + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
 // + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
 // + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
 // + @c RE_FILE_IO: There was a physical I/O error
 // + @c RE_FILE_NOSPACE: There was no space on the device holding the file
 // + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
 // + @c RE_FILE_READ: If during reading the file there was an error
 // + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
 i_DECLIMEX_ int32_t rfFback_UTF16LE(FILE* f,uint32_t *c);
 // @brief Moves a unicode character backwards in a UTF-8 file stream
 //
 // The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
 // Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
 // default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
 // _set_fmode(). For more information take a look at the msdn pages here:
 // http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
 //
 // @param f The file stream
 // @param c Returns the character we moved back to as a unicode codepoint
 // @return Returns either the number of bytes moved backwards for success (either @c 4, @c 3, @c 2 or @c 1) or one of the following errors:
 // + @c RE_UTF8_INVALID_SEQUENCE: If during moving bacwards in the file unexpected UTF-8 bytes were found
 // + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
 // + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
 // + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
 // + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
 // + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
 // + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
 // + @c RE_FILE_IO: There was a physical I/O error
 // + @c RE_FILE_NOSPACE: There was no space on the device holding the file
 // + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
 // + @c RE_FILE_READ: If during reading the file there was an error
 // + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
 // + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
 i_DECLIMEX_ int32_t rfFback_UTF8(FILE* f,uint32_t *c);
 // @brief Opens another process as a pipe
 //
 // This function is a cross-platform popen wrapper. In linux it uses popen and in Windows it uses
 // _popen.
 // @lmsFunction
 // @param command The string with the command to execute. Is basically the name of the program/process you want to spawn
 // with its full path and its parameters. @inhtype{String,StringX} @tmpSTR
 // @param mode The mode you want the pipe to work in. There are two possible values:
 // + @c "r" The calling process can read the spawned command's standard output via the returned stream.
 // + @c "w" The calling process can write to the spawned command's standard input via the returned stream.
 //
 // Anything else will result in an error
 // @return For success popen will return a FILE descriptor that can be used to either read or write from the pipe.
 // If there was an error @c 0 is returned and an error is logged.
 #ifdef RF_IAMHERE_FOR_DOXYGEN
 i_DECLIMEX_ FILE* rfPopen(void* command,const char* mode);
 #else
 i_DECLIMEX_ FILE* i_rfPopen(void* command,const char* mode);
 #define rfPopen(i_CMD_,i_MODE_) i_rfLMS_WRAP2(FILE*,i_rfPopen,i_CMD_,i_MODE_)
 #endif
 // @brief Closes a pipe
 //
 // This function is a cross-platform wrapper for pclose. It closes a file descriptor opened with @ref rfPopen() and
 // returns the exit code of the process that was running
 // @param stream The file descriptor of the pipe returned by @ref rfPopen() that we want to close
 // @return Returns the exit code of the process or -1 if there was an error
 i_DECLIMEX_ int rfPclose(FILE* stream);
 // @} End of I/O group
 #ifdef __cplusplus
 }///closing bracket for calling from C++
 #endif
 #endif//include guards end
--- a/samples/C/rfc_string.c
+++ b/samples/C/rfc_string.c
--- a/samples/C/rfc_string.h
+++ b/samples/C/rfc_string.h
--- a/samples/C/wglew.h
+++ b/samples/C/wglew.h
--- a/samples/Coq/Heap.v
+++ b/samples/Coq/Heap.v
@@ -1,13 +1,3 @@
 (************************************************************************)
 (*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
 (* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010     *)
 (*   \VV/  **************************************************************)
 (*    //   *      This file is distributed under the terms of the       *)
 (*         *       GNU Lesser General Public License Version 2.1        *)
 (************************************************************************)
 (** This file is deprecated, for a tree on list, use [Mergesort.v]. *)
 (** A development of Treesort on Heap trees. It has an average
    complexity of O(n.log n) but of O(n²) in the worst case (e.g. if
    the list is already sorted) *)
@@ -88,9 +78,9 @@ Section defs.
    forall P:Tree -> Type,
      P Tree_Leaf ->
      (forall (a:A) (T1 T2:Tree),
-	leA_Tree a T1 ->
+        leA_Tree a T1 ->
-	leA_Tree a T2 ->
+        leA_Tree a T2 ->
-	is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
+        is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
      forall T:Tree, is_heap T -> P T.
  Proof.
    simple induction T; auto with datatypes.
@@ -105,9 +95,9 @@ Section defs.
    forall P:Tree -> Set,
      P Tree_Leaf ->
      (forall (a:A) (T1 T2:Tree),
-	leA_Tree a T1 ->
+        leA_Tree a T1 ->
-	leA_Tree a T2 ->
+        leA_Tree a T2 ->
-	is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
+        is_heap T1 -> P T1 -> is_heap T2 -> P T2 -> P (Tree_Node a T1 T2)) ->
      forall T:Tree, is_heap T -> P T.
  Proof.
    simple induction T; auto with datatypes.
@@ -186,7 +176,7 @@ Section defs.
    match t with
      | Tree_Leaf => emptyBag
      | Tree_Node a t1 t2 =>
-	munion (contents t1) (munion (contents t2) (singletonBag a))
+        munion (contents t1) (munion (contents t2) (singletonBag a))
    end.
@@ -272,11 +262,11 @@ Section defs.
    apply flat_exist with (a :: l); simpl; auto with datatypes.
    apply meq_trans with
      (munion (list_contents _ eqA_dec l1)
-	(munion (list_contents _ eqA_dec l2) (singletonBag a))).
+        (munion (list_contents _ eqA_dec l2) (singletonBag a))).
    apply meq_congr; auto with datatypes.
    apply meq_trans with
      (munion (singletonBag a)
-	(munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
+        (munion (list_contents _ eqA_dec l1) (list_contents _ eqA_dec l2))).
    apply munion_rotate.
    apply meq_right; apply meq_sym; trivial with datatypes.
  Qed.
--- a/samples/Coq/PermutSetoid.v
+++ b/samples/Coq/PermutSetoid.v
@@ -1,11 +1,3 @@
 (************************************************************************)
 (*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
 (* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010     *)
 (*   \VV/  **************************************************************)
 (*    //   *      This file is distributed under the terms of the       *)
 (*         *       GNU Lesser General Public License Version 2.1        *)
 (************************************************************************)
 Require Import Omega Relations Multiset SetoidList.
 (** This file is deprecated, use [Permutation.v] instead.
@@ -154,7 +146,7 @@ Lemma permut_add_cons_inside :
 Proof.
  intros;
    replace (a :: l) with ([] ++ a :: l); trivial;
-	apply permut_add_inside; trivial.
+        apply permut_add_inside; trivial.
 Qed.
 Lemma permut_middle :
@@ -168,8 +160,8 @@ Lemma permut_sym_app :
 Proof.
  intros l1 l2;
    unfold permutation, meq;
-	intro a; do 2 rewrite list_contents_app; simpl;
+        intro a; do 2 rewrite list_contents_app; simpl;
-	  auto with arith.
+          auto with arith.
 Qed.
 Lemma permut_rev :
--- a/samples/Coq/Permutation.v
+++ b/samples/Coq/Permutation.v
@@ -1,17 +1,5 @@
 (************************************************************************)
 (*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
 (* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010     *)
 (*   \VV/  **************************************************************)
 (*    //   *      This file is distributed under the terms of the       *)
 (*         *       GNU Lesser General Public License Version 2.1        *)
 (************************************************************************)
 (*********************************************************************)
 (** * List permutations as a composition of adjacent transpositions  *)
 (*********************************************************************)
 (* Adapted in May 2006 by Jean-Marc Notin from initial contents by
-   Laurent Théry (Huffmann contribution, October 2003) *)
+   Laurent Thery (Huffmann contribution, October 2003) *)
 Require Import List Setoid Compare_dec Morphisms.
 Import ListNotations. (* For notations [] and [a;b;c] *)
--- a/samples/Coq/Sorting.v
+++ b/samples/Coq/Sorting.v
@@ -1,10 +1,2 @@
 (************************************************************************)
 (*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
 (* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2010     *)
 (*   \VV/  **************************************************************)
 (*    //   *      This file is distributed under the terms of the       *)
 (*         *       GNU Lesser General Public License Version 2.1        *)
 (************************************************************************)
 Require Export Sorted.
 Require Export Mergesort.
--- a/samples/Ecl/sample.ecl
+++ b/samples/Ecl/sample.ecl
@@ -0,0 +1,42 @@
 /* 
 * Multi-line comment
 */
 #option ('slidingJoins', true);
 namesRecord :=
            RECORD
 string20        surname;
 string10        forename;
 integer2        age;
 integer2        dadAge;
 integer2        mumAge;
            END;
 namesRecord2 :=
            record
 string10        extra;
 namesRecord;
            end;
 namesTable := dataset('x',namesRecord,FLAT);
 namesTable2 := dataset('y',namesRecord2,FLAT);
 integer2 aveAgeL(namesRecord l) := (l.dadAge+l.mumAge)/2;
 integer2 aveAgeR(namesRecord2 r) := (r.dadAge+r.mumAge)/2;
 // Standard join on a function of left and right
 output(join(namesTable, namesTable2, aveAgeL(left) = aveAgeR(right)));
 //Several simple examples of sliding join syntax
 output(join(namesTable, namesTable2, left.age >= right.age - 10 and left.age <= right.age +10));
 output(join(namesTable, namesTable2, left.age between right.age - 10 and right.age +10));
 output(join(namesTable, namesTable2, left.age between right.age + 10 and right.age +30));
 output(join(namesTable, namesTable2, left.age between (right.age + 20) - 10 and (right.age +20) + 10));
 output(join(namesTable, namesTable2, aveAgeL(left) between aveAgeR(right)+10 and aveAgeR(right)+40));
 //Same, but on strings.  Also includes age to ensure sort is done by non-sliding before sliding.
 output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age));
 output(join(namesTable, namesTable2, left.surname between right.surname[1..10]+'AAAAAAAAAA' and right.surname[1..10]+'ZZZZZZZZZZ' and left.age=right.age,all));
 //This should not generate a self join
 output(join(namesTable, namesTable, left.age between right.age - 10 and right.age +10));
--- a/samples/Objective-C/empty.m
+++ b/samples/Objective-C/empty.m
--- a/samples/Shell/sbt.script!
+++ b/samples/Shell/sbt.script!
@@ -0,0 +1,432 @@
 #!/usr/bin/env bash
 #
 # A more capable sbt runner, coincidentally also called sbt.
 # Author: Paul Phillips <paulp@typesafe.com>
 # todo - make this dynamic
 declare -r sbt_release_version=0.11.3
 declare -r sbt_snapshot_version=0.13.0-SNAPSHOT
 unset sbt_jar sbt_dir sbt_create sbt_snapshot sbt_launch_dir
 unset scala_version java_home sbt_explicit_version
 unset verbose debug quiet
 build_props_sbt () {
  if [[ -f project/build.properties ]]; then
    versionLine=$(grep ^sbt.version project/build.properties)
    versionString=${versionLine##sbt.version=}
    echo "$versionString"
  fi
 }
 update_build_props_sbt () {
  local ver="$1"
  local old=$(build_props_sbt)
  if [[ $ver == $old ]]; then
    return
  elif [[ -f project/build.properties ]]; then
    perl -pi -e "s/^sbt\.version=.*\$/sbt.version=${ver}/" project/build.properties
    grep -q '^sbt.version=' project/build.properties || echo "sbt.version=${ver}" >> project/build.properties
    echo !!!
    echo !!! Updated file project/build.properties setting sbt.version to: $ver
    echo !!! Previous value was: $old
    echo !!!
  fi
 }
 sbt_version () {
  if [[ -n $sbt_explicit_version ]]; then
    echo $sbt_explicit_version
  else
    local v=$(build_props_sbt)
    if [[ -n $v ]]; then
      echo $v
    else
      echo $sbt_release_version
    fi
  fi
 }
 echoerr () {
  echo 1>&2 "$@"
 }
 vlog () {
  [[ $verbose || $debug ]] && echoerr "$@"
 }
 dlog () {
  [[ $debug ]] && echoerr "$@"
 }
 # this seems to cover the bases on OSX, and someone will
 # have to tell me about the others.
 get_script_path () {
  local path="$1"
  [[ -L "$path" ]] || { echo "$path" ; return; }
  local target=$(readlink "$path")
  if [[ "${target:0:1}" == "/" ]]; then
    echo "$target"
  else
    echo "$(dirname $path)/$target"
  fi
 }
 # a ham-fisted attempt to move some memory settings in concert
 # so they need not be dicked around with individually.
 get_mem_opts () {
  local mem=${1:-1536}
  local perm=$(( $mem / 4 ))
  (( $perm > 256 )) || perm=256
  (( $perm < 1024 )) || perm=1024
  local codecache=$(( $perm / 2 ))
  echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m"
 }
 die() {
  echo "Aborting: $@"
  exit 1
 }
 make_url () {
  groupid="$1"
  category="$2"
  version="$3"
  echo "http://typesafe.artifactoryonline.com/typesafe/ivy-$category/$groupid/sbt-launch/$version/sbt-launch.jar"
 }
 declare -r default_jvm_opts="-Dfile.encoding=UTF8"
 declare -r default_sbt_opts="-XX:+CMSClassUnloadingEnabled"
 declare -r default_sbt_mem=1536
 declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
 declare -r sbt_opts_file=".sbtopts"
 declare -r jvm_opts_file=".jvmopts"
 declare -r latest_28="2.8.2"
 declare -r latest_29="2.9.1"
 declare -r latest_210="2.10.0-SNAPSHOT"
 declare -r script_path=$(get_script_path "$BASH_SOURCE")
 declare -r script_dir="$(dirname $script_path)"
 declare -r script_name="$(basename $script_path)"
 # some non-read-onlies set with defaults
 declare java_cmd=java
 declare sbt_launch_dir="$script_dir/.lib"
 declare sbt_mem=$default_sbt_mem
 # pull -J and -D options to give to java.
 declare -a residual_args
 declare -a java_args
 declare -a scalac_args
 declare -a sbt_commands
 build_props_scala () {
  if [[ -f project/build.properties ]]; then
    versionLine=$(grep ^build.scala.versions project/build.properties)
    versionString=${versionLine##build.scala.versions=}
    echo ${versionString%% .*}
  fi
 }
 execRunner () {
  # print the arguments one to a line, quoting any containing spaces
  [[ $verbose || $debug ]] && echo "# Executing command line:" && {
    for arg; do
      if printf "%s\n" "$arg" | grep -q ' '; then
        printf "\"%s\"\n" "$arg"
      else
        printf "%s\n" "$arg"
      fi
    done
    echo ""
  }
  exec "$@"
 }
 sbt_groupid () {
  case $(sbt_version) in
        0.7.*) echo org.scala-tools.sbt ;;
       0.10.*) echo org.scala-tools.sbt ;;
    0.11.[12]) echo org.scala-tools.sbt ;;
            *) echo org.scala-sbt ;;
  esac
 }
 sbt_artifactory_list () {
  local version0=$(sbt_version)
  local version=${version0%-SNAPSHOT}
  local url="http://typesafe.artifactoryonline.com/typesafe/ivy-snapshots/$(sbt_groupid)/sbt-launch/"
  dlog "Looking for snapshot list at: $url "
  curl -s --list-only "$url" | \
    grep -F $version | \
    perl -e 'print reverse <>' | \
    perl -pe 's#^<a href="([^"/]+).*#$1#;'
 }
 make_release_url () {
  make_url $(sbt_groupid) releases $(sbt_version)
 }
 # argument is e.g. 0.13.0-SNAPSHOT
 # finds the actual version (with the build id) at artifactory
 make_snapshot_url () {
  for ver in $(sbt_artifactory_list); do
    local url=$(make_url $(sbt_groupid) snapshots $ver)
    dlog "Testing $url"
    curl -s --head "$url" >/dev/null
    dlog "curl returned: $?"
    echo "$url"
    return
  done
 }
 jar_url () {
  case $(sbt_version) in
             0.7.*) echo "http://simple-build-tool.googlecode.com/files/sbt-launch-0.7.7.jar" ;;
        *-SNAPSHOT) make_snapshot_url ;;
                 *) make_release_url ;;
  esac
 }
 jar_file () {
  echo "$sbt_launch_dir/$1/sbt-launch.jar"
 }
 download_url () {
  local url="$1"
  local jar="$2"
  echo "Downloading sbt launcher $(sbt_version):"
  echo "  From  $url"
  echo "    To  $jar"
  mkdir -p $(dirname "$jar") && {
    if which curl >/dev/null; then
      curl --fail --silent "$url" --output "$jar"
    elif which wget >/dev/null; then
      wget --quiet -O "$jar" "$url"
    fi
  } && [[ -f "$jar" ]]
 }
 acquire_sbt_jar () {
  sbt_url="$(jar_url)"
  sbt_jar="$(jar_file $(sbt_version))"
  [[ -f "$sbt_jar" ]] || download_url "$sbt_url" "$sbt_jar"
 }
 usage () {
  cat <<EOM
 Usage: $script_name [options]
  -h | -help         print this message
  -v | -verbose      this runner is chattier
  -d | -debug        set sbt log level to Debug
  -q | -quiet        set sbt log level to Error
  -no-colors         disable ANSI color codes
  -sbt-create        start sbt even if current directory contains no sbt project
  -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt/<version>)
  -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
  -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
  -mem    <integer>  set memory options (default: $sbt_mem, which is
                       $(get_mem_opts $sbt_mem) )
  -no-share          use all local caches; no sharing
  -offline           put sbt in offline mode
  -jvm-debug <port>  Turn on JVM debugging, open at the given port.
  -batch             Disable interactive mode
  # sbt version (default: from project/build.properties if present, else latest release)
  !!! The only way to accomplish this pre-0.12.0 if there is a build.properties file which
  !!! contains an sbt.version property is to update the file on disk.  That's what this does.
  -sbt-version  <version>   use the specified version of sbt 
  -sbt-jar      <path>      use the specified jar as the sbt launcher
  -sbt-snapshot             use a snapshot version of sbt
  -sbt-launch-dir <path>    directory to hold sbt launchers (default: $sbt_launch_dir)
  # scala version (default: as chosen by sbt)
  -28                       use $latest_28
  -29                       use $latest_29
  -210                      use $latest_210
  -scala-home <path>        use the scala build at the specified directory
  -scala-version <version>  use the specified version of scala
  # java version (default: java from PATH, currently $(java -version |& grep version))
  -java-home <path>         alternate JAVA_HOME
  # jvm options and output control
  JAVA_OPTS     environment variable holding jvm args, if unset uses "$default_jvm_opts"
  SBT_OPTS      environment variable holding jvm args, if unset uses "$default_sbt_opts"
  .jvmopts      if file is in sbt root, it is prepended to the args given to the jvm
  .sbtopts      if file is in sbt root, it is prepended to the args given to **sbt**
  -Dkey=val     pass -Dkey=val directly to the jvm
  -J-X          pass option -X directly to the jvm (-J is stripped)
  -S-X          add -X to sbt's scalacOptions (-S is stripped)
 In the case of duplicated or conflicting options, the order above
 shows precedence: JAVA_OPTS lowest, command line options highest.
 EOM
 }
 addJava () {
  dlog "[addJava] arg = '$1'"
  java_args=( "${java_args[@]}" "$1" )
 }
 addSbt () {
  dlog "[addSbt] arg = '$1'"
  sbt_commands=( "${sbt_commands[@]}" "$1" )
 }
 addScalac () {
  dlog "[addScalac] arg = '$1'"
  scalac_args=( "${scalac_args[@]}" "$1" )
 }
 addResidual () {
  dlog "[residual] arg = '$1'"
  residual_args=( "${residual_args[@]}" "$1" )
 }
 addResolver () {
  addSbt "set resolvers in ThisBuild += $1"
 }
 addDebugger () {
  addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1"
 }
 get_jvm_opts () {
  # echo "${JAVA_OPTS:-$default_jvm_opts}"
  # echo "${SBT_OPTS:-$default_sbt_opts}"
  [[ -f "$jvm_opts_file" ]] && cat "$jvm_opts_file"
 }
 process_args ()
 {
  require_arg () {
    local type="$1"
    local opt="$2"
    local arg="$3"
    if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then
      die "$opt requires <$type> argument"
    fi
  }
  while [[ $# -gt 0 ]]; do
    case "$1" in
       -h|-help) usage; exit 1 ;;
    -v|-verbose) verbose=1 && shift ;;
      -d|-debug) debug=1 && shift ;;
      -q|-quiet) quiet=1 && shift ;;
           -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;;
           -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;;
     -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
      -no-share) addJava "$noshare_opts" && shift ;;
      -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
       -sbt-dir) require_arg path "$1" "$2" && sbt_dir="$2" && shift 2 ;;
     -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
       -offline) addSbt "set offline := true" && shift ;;
     -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;;
         -batch) exec </dev/null && shift ;;
    -sbt-create) sbt_create=true && shift ;;
  -sbt-snapshot) sbt_explicit_version=$sbt_snapshot_version && shift ;;
       -sbt-jar) require_arg path "$1" "$2" && sbt_jar="$2" && shift 2 ;;
   -sbt-version) require_arg version "$1" "$2" && sbt_explicit_version="$2" && shift 2 ;;
 -sbt-launch-dir) require_arg path "$1" "$2" && sbt_launch_dir="$2" && shift 2 ;;
 -scala-version) require_arg version "$1" "$2" && addSbt "set scalaVersion := \"$2\"" && shift 2 ;;
    -scala-home) require_arg path "$1" "$2" && addSbt "set scalaHome in ThisBuild := Some(file(\"$2\"))" && shift 2 ;;
     -java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && shift 2 ;;
            -D*) addJava "$1" && shift ;;
            -J*) addJava "${1:2}" && shift ;;
            -S*) addScalac "${1:2}" && shift ;;
            -28) addSbt "++ $latest_28" && shift ;;
            -29) addSbt "++ $latest_29" && shift ;;
           -210) addSbt "++ $latest_210" && shift ;;
              *) addResidual "$1" && shift ;;
    esac
  done
  [[ $debug ]] && {
    case $(sbt_version) in
     0.7.*) addSbt "debug" ;; 
         *) addSbt "set logLevel in Global := Level.Debug" ;;
    esac
  }
  [[ $quiet ]] && {
    case $(sbt_version) in
     0.7.*) ;; 
         *) addSbt "set logLevel in Global := Level.Error" ;;
    esac
  }
 }
 # if .sbtopts exists, prepend its contents to $@ so it can be processed by this runner
 [[ -f "$sbt_opts_file" ]] && {
  sbtargs=()
  while IFS= read -r arg; do
    sbtargs=( "${sbtargs[@]}" "$arg" )
  done <"$sbt_opts_file"
  set -- "${sbtargs[@]}" "$@"
 }
 # process the combined args, then reset "$@" to the residuals
 process_args "$@"
 set -- "${residual_args[@]}"
 argumentCount=$#
 # set scalacOptions if we were given any -S opts
 [[ ${#scalac_args[@]} -eq 0 ]] || addSbt "set scalacOptions in ThisBuild += \"${scalac_args[@]}\""
 # Update build.properties no disk to set explicit version - sbt gives us no choice
 [[ -n "$sbt_explicit_version" ]] && update_build_props_sbt "$sbt_explicit_version"
 echo "Detected sbt version $(sbt_version)"
 [[ -n "$scala_version" ]] && echo "Overriding scala version to $scala_version"
 # no args - alert them there's stuff in here
 (( $argumentCount > 0 )) || echo "Starting $script_name: invoke with -help for other options"
 # verify this is an sbt dir or -create was given
 [[ -f ./build.sbt || -d ./project || -n "$sbt_create" ]] || {
  cat <<EOM
 $(pwd) doesn't appear to be an sbt project.
 If you want to start sbt anyway, run:
  $0 -sbt-create
 EOM
  exit 1
 }
 # pick up completion if present; todo
 [[ -f .sbt_completion.sh ]] && source .sbt_completion.sh
 # no jar? download it.
 [[ -f "$sbt_jar" ]] || acquire_sbt_jar || {
  # still no jar? uh-oh.
  echo "Download failed. Obtain the jar manually and place it at $sbt_jar"
  exit 1
 }
 [[ -n "$sbt_dir" ]] || {
  sbt_dir=~/.sbt/$(sbt_version)
  addJava "-Dsbt.global.base=$sbt_dir"
  echo "Using $sbt_dir as sbt dir, -sbt-dir to override."
 }
 # since sbt 0.7 doesn't understand iflast
 (( ${#residual_args[@]} == 0 )) && residual_args=( "shell" )
 # run sbt
 execRunner "$java_cmd" \
  $(get_mem_opts $sbt_mem) \
  $(get_jvm_opts) \
  ${java_args[@]} \
  -jar "$sbt_jar" \
  "${sbt_commands[@]}" \
  "${residual_args[@]}"
--- a/samples/Text/mac.txt
+++ b/samples/Text/mac.txt
@@ -0,0 +1 @@
 line 1
--- a/test/test_blob.rb
+++ b/test/test_blob.rb
@@ -2,6 +2,7 @@ require 'linguist/file_blob'
 require 'linguist/samples'
 require 'test/unit'
 require 'mocha'
 require 'mime/types'
 require 'pygments'
@@ -64,6 +65,14 @@ class TestBlob < Test::Unit::TestCase
    assert_equal ["module Foo", "end", ""], blob("Ruby/foo.rb").lines
  end
  def test_mac_format
    assert blob("Text/mac.txt").mac_format?
  end
  def test_lines_mac_format
    assert_equal ["line 1", "line 2", ""], blob("Text/mac.txt").lines
  end
  def test_size
    assert_equal 15, blob("Ruby/foo.rb").size
  end
@@ -261,6 +270,12 @@ class TestBlob < Test::Unit::TestCase
    assert !blob("Text/dump.sql").indexable?
    assert !blob("Binary/github.po").indexable?
    assert !blob("Binary/linguist.gem").indexable?
    # large binary blobs should fail on size check first, not call 
    # into charlock_holmes and alloc big buffers for testing encoding
    b = blob("Binary/octocat.ai")
    b.expects(:binary?).never
    assert !b.indexable?
  end
  def test_language
--- a/test/test_samples.rb
+++ b/test/test_samples.rb
@@ -1,4 +1,6 @@
 require 'linguist/samples'
 require 'tempfile'
 require 'yajl'
 require 'test/unit'
@@ -12,6 +14,19 @@ class TestSamples < Test::Unit::TestCase
    # Just warn, it shouldn't scare people off by breaking the build.
    if serialized['md5'] != latest['md5']
      warn "Samples database is out of date. Run `bundle exec rake samples`."
      expected = Tempfile.new('expected.json')
      expected.write Yajl::Encoder.encode(serialized, :pretty => true)
      expected.close
      actual = Tempfile.new('actual.json')
      actual.write Yajl::Encoder.encode(latest, :pretty => true)
      actual.close
      warn `diff #{expected.path} #{actual.path}`
      expected.unlink
      actual.unlink
    end
  end
Author	SHA1	Message	Date
Joshua Peek	9e9500dfa9	Linguist 2.3.4	2012-09-24 10:54:17 -05:00
Joshua Peek	04cc100fba	Rebuild samples db	2012-09-24 10:52:05 -05:00
Joshua Peek	31e33f99f2	Ensure lang is skipped on any binary file	2012-09-24 10:51:39 -05:00
Joshua Peek	7c51b90586	Skip empty sample	2012-09-24 10:50:49 -05:00
Joshua Peek	2b36f73da6	Some comments are triggering charlock binary	2012-09-24 10:48:22 -05:00
Joshua Peek	d96dd473b8	Rebuild samples db	2012-09-24 10:12:18 -05:00
Joshua Peek	f9066ffb7b	Sort exts and filenames	2012-09-24 10:12:05 -05:00
Joshua Peek	945941d529	Update samples db	2012-09-24 10:07:58 -05:00
Joshua Peek	10e875e899	Print out samples db diffs	2012-09-24 10:07:08 -05:00
Justin Palmer	d24e5c938e	sample directory needs uppercase E	2012-09-20 15:23:58 -07:00
Justin Palmer	aa069a336f	add color to ecl language	2012-09-20 15:16:06 -07:00
Justin Palmer	662fc2ee9d	Merge remote-tracking branch 'rengolin/ecl'	2012-09-20 15:07:41 -07:00
Ryan Tomayko	567cd6ef68	Merge pull request #250 from github/mac-format Handle Mac Format when splitting lines	2012-09-11 14:17:21 -07:00
Ryan Tomayko	887a050db9	Only search the first 4K chars for \r	2012-09-10 01:56:08 -07:00
Ryan Tomayko	bda895eaae	Test Mac Format detection and line splitting	2012-09-10 01:52:30 -07:00
Ryan Tomayko	2e49c06f47	Handle ✨Mac Format✨ when splitting lines	2012-09-10 01:05:48 -07:00
Joshua Peek	ae137847b4	Linguist 2.3.3	2012-09-04 09:32:21 -05:00
Scott J. Goldman	5443dc50a3	Merge pull request #247 from github/check-size-first When testing if a blob is indexable or safe to colorize, check size first	2012-09-02 00:09:51 -07:00
Scott J. Goldman	fc435a2541	Linguist 2.3.2	2012-09-02 00:08:37 -07:00
Scott J. Goldman	04394750e7	When testing if a blob is safe to colorize, check size first Similar to `e415a13`	2012-09-02 00:08:37 -07:00
Scott J. Goldman	e415a1351b	When testing if a blob is indexable, check size first Otherwise, charlock_holmes will allocate another large binary buffer for testing the encoding, which is a problem if the binary blob is many hundreds of MB large. It'll just fail and crash ruby.	2012-08-31 22:47:19 -07:00
Joshua Peek	6ec907a915	Merge pull request #245 from jcazevedo/master Add Shell sample	2012-08-28 10:55:11 -07:00
Joao Azevedo	1f55f01fa9	Add Shell sample	2012-08-28 18:01:46 +01:00
Joshua Peek	5d79b88875	Linguist 2.3.1	2012-08-27 11:34:55 -05:00
Joshua Peek	458890b4b9	Add C++ sample	2012-08-27 11:33:28 -05:00
Joshua Peek	89267f792d	Rebuild samples db	2012-08-27 11:30:44 -05:00
Joshua Peek	b183fcca05	Only read up to 100KB	2012-08-27 11:30:38 -05:00
Joshua Peek	684a57dbc0	Add another C sample	2012-08-27 11:21:57 -05:00
Joshua Peek	400086a5c8	Add more C samples Closes #237	2012-08-23 13:38:16 -05:00
Renato Golin	da6cf8dbb4	Add ECL programming language and test	2012-07-12 09:09:32 +01:00