mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 09:40:21 +00:00
Several people mentioned in #467 that they were seeing their C++ projects erroneously showing up as containing Objective C. I've added a file from each of the problematic repositories: - https://github.com/mp13on11/dwarf_mine - https://github.com/miguelishawt/anax - https://github.com/mholt/cppcsv - https://github.com/coder543/libcanister They all seem to be triggering on different aspects, since adding one sample wasn't sufficient to correctly classify the others. The discussion in #467 makes me think that perhaps Linuist might need to take the rest of the repository into account when classifying ambiguous files.
138 lines
5.0 KiB
C++
138 lines
5.0 KiB
C++
#ifndef LIBCANIH
|
|
#define LIBCANIH
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <stdlib.h>
|
|
#include <cstring>
|
|
|
|
#define int64 unsigned long long
|
|
//#define DEBUG
|
|
|
|
#ifdef DEBUG
|
|
#define dout cout
|
|
#else
|
|
#define dout if (0) cerr
|
|
#endif
|
|
|
|
using namespace std;
|
|
|
|
namespace libcanister
|
|
{
|
|
|
|
//the canmem object is a generic memory container used commonly
|
|
//throughout the canister framework to hold memory of uncertain
|
|
//length which may or may not contain null bytes.
|
|
class canmem
|
|
{
|
|
public:
|
|
char* data; //the raw memory block
|
|
int size; //the absolute length of the block
|
|
canmem(); //creates an unallocated canmem
|
|
canmem(int allocsize); //creates an allocated, blank canmem of size
|
|
canmem(char* strdata); //automates the creation of zero-limited canmems
|
|
~canmem(); //cleans up the canmem
|
|
void zeromem(); //overwrites this canmem
|
|
void fragmem(); //overwrites this canmem with fragment notation
|
|
void countlen(); //counts length of zero-limited strings and stores it in size
|
|
void trim(); //removes any nulls from the end of the string
|
|
static canmem null(); //returns a singleton null canmem
|
|
|
|
};
|
|
|
|
//contains information about the canister
|
|
class caninfo
|
|
{
|
|
public:
|
|
canmem path; //physical path
|
|
canmem internalname; //a name for the canister
|
|
int numfiles; //the number of files in the canister
|
|
};
|
|
|
|
//necessary for the use of this class as a type in canfile
|
|
class canister;
|
|
|
|
//this object holds the definition of a 'file' within the
|
|
//canister 'filesystem.'
|
|
class canfile
|
|
{
|
|
public:
|
|
libcanister::canister* parent; //the canister that holds this file
|
|
canmem path; //internal path ('filename')
|
|
canmem data; //the file's decompressed contents
|
|
int isfrag; //0 = probably not fragment, 1 = definitely a fragment (ignore)
|
|
int cfid; //'canfile id' -- a unique ID for this file
|
|
int64 dsize; //ondisk size (compressed form size)
|
|
int cachestate; //0 = not in memory, 1 = in memory, 2 = in memory and needs flush
|
|
//-1 = error, check the data for the message
|
|
void cache(); //pull the file from disk and cache it in memory
|
|
void cachedump(); //deletes the contents of this file from the memory cache after assuring the on disk copy is up to date
|
|
void cachedumpfinal(fstream& infile); //same as cachedump, but more efficient during closing procedures
|
|
void flush(); //updates the on disk copy, but retains the memory cache
|
|
};
|
|
|
|
//the primary class
|
|
//this defines and controls a single canister
|
|
class canister
|
|
{
|
|
//table of contents
|
|
//absolutely worthless to the control code in the canister
|
|
//but quite useful to programs using the API, as they may
|
|
//desire to enumerate the files in a canister for a user's
|
|
//use or for their own.
|
|
//contains a newline-delimited list of files in the container.
|
|
canfile TOC;
|
|
public:
|
|
caninfo info; //the general info about this canister
|
|
|
|
//the raw canfiles -- recommended that programs do not modify
|
|
//these files directly, but not enforced.
|
|
canfile* files;
|
|
bool readonly; //if true then no write routines will do anything
|
|
|
|
//maximum number of files to have in memory at any given
|
|
//time, change this to whatever suits your application.
|
|
int cachemax;
|
|
int cachecnt; //number of files in the cache (should not be modified)
|
|
|
|
//both initialize the canister from a physical location
|
|
canister (canmem fspath);
|
|
canister (char* fspath);
|
|
|
|
//destroys the canister (after flushing the modded buffers, of course)
|
|
~canister();
|
|
|
|
//open the fspath
|
|
//does it exist?
|
|
// | --- yes --- opening it (return 1)
|
|
// | --- yes --- file is corrupted, halting (return -1)
|
|
// | --- no --- making a new one (return 0)
|
|
int open();
|
|
|
|
//close the canister, flush all buffers, clean up
|
|
int close();
|
|
|
|
//deletes the file at path inside this canister
|
|
int delFile(canmem path);
|
|
|
|
//pulls the contents of the file from disk or memory and returns it as a file
|
|
canfile getFile(canmem path);
|
|
|
|
//creates a file if it does not exist, otherwise overwrites
|
|
//returns whether operation succeeded
|
|
bool writeFile(canmem path, canmem data);
|
|
bool writeFile(canfile file);
|
|
|
|
//get the 'table of contents', a file containing a newline delimited
|
|
//list of the file paths in the container which have contents
|
|
canfile getTOC();
|
|
|
|
//brings the cache back within the cachemax limit
|
|
//important: sCFID is the safe CFID
|
|
//(the CFID of the file we want to avoid uncaching)
|
|
//really just used internally, but it can't do any harm.
|
|
void cacheclean(int sCFID, bool dFlush = false);
|
|
};
|
|
|
|
}
|
|
|
|
#endif |