mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Add a few samples of misclassified C++ headers
Several people mentioned in #467 that they were seeing their C++ projects erroneously showing up as containing Objective C. I've added a file from each of the problematic repositories: - https://github.com/mp13on11/dwarf_mine - https://github.com/miguelishawt/anax - https://github.com/mholt/cppcsv - https://github.com/coder543/libcanister They all seem to be triggering on different aspects, since adding one sample wasn't sufficient to correctly classify the others. The discussion in #467 makes me think that perhaps Linuist might need to take the rest of the repository into account when classifying ambiguous files.
This commit is contained in:
		
							
								
								
									
										42
									
								
								samples/C++/CsvStreamer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								samples/C++/CsvStreamer.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| #pragma once | ||||
| #include <string> | ||||
| #include <vector> | ||||
| #include <fstream> | ||||
| #include "util.h" | ||||
|  | ||||
| using namespace std; | ||||
|  | ||||
|  | ||||
| #define DEFAULT_DELIMITER	',' | ||||
|  | ||||
|  | ||||
| class CsvStreamer | ||||
| { | ||||
| 	private: | ||||
| 		ofstream file;				// File output stream | ||||
| 		vector<string> row_buffer;	// Buffer which stores a row's data before being flushed/written | ||||
| 		int fields;					// Number of fields (columns) | ||||
| 		long rows;					// Number of rows (records) including header row | ||||
| 		char delimiter;				// Delimiter character; comma by default | ||||
| 		string sanitize(string);	// Returns a string ready for output into the file | ||||
| 		 | ||||
| 	public: | ||||
| 		CsvStreamer();				// Empty CSV streamer... be sure to open the file before writing! | ||||
| 		CsvStreamer(string, char);	// Same as open(string, char)... | ||||
| 		CsvStreamer(string);		// Opens an output CSV file given a file path/name | ||||
| 		~CsvStreamer();				// Ensures the output file is closed and saved | ||||
| 		void open(string);			// Opens an output CSV file given a file path/name (default delimiter) | ||||
| 		void open(string, char);	// Opens an output CSV file given a file path/name and a delimiting character (default comma) | ||||
| 		void add_field(string);		// If still on first line, adds a new field to the header row | ||||
| 		void save_fields();			// Call this to save the header row; all new writes should be through append() | ||||
| 		void append(string);		// Appends the current row with this data for the next field; quoted only if needed (leading/trailing spaces are trimmed) | ||||
| 		void append(string, bool);	// Like append(string) but can specify whether to trim spaces at either end of the data (false to keep spaces) | ||||
| 		void append(float);			// Appends the current row with this number | ||||
| 		void append(double);		// Appends the current row with this number | ||||
| 		void append(long);			// Appends the current row with this number | ||||
| 		void append(int);			// Appends the current row with this number | ||||
| 		void writeln();				// Flushes what was in the row buffer into the file (writes the row) | ||||
| 		void close();				// Saves and closes the file | ||||
| 		int field_count();			// Gets the number of fields (columns) | ||||
| 		long row_count();			// Gets the number of records (rows) -- NOT including the header row | ||||
| }; | ||||
							
								
								
									
										32
									
								
								samples/C++/Field.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								samples/C++/Field.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| /***************************************************************************** | ||||
| * Dwarf Mine - The 13-11 Benchmark | ||||
| * | ||||
| * Copyright (c) 2013 Bünger, Thomas; Kieschnick, Christian; Kusber, | ||||
| * Michael; Lohse, Henning; Wuttke, Nikolai; Xylander, Oliver; Yao, Gary; | ||||
| * Zimmermann, Florian | ||||
| * | ||||
| * Permission is hereby granted, free of charge, to any person obtaining | ||||
| * a copy of this software and associated documentation files (the | ||||
| * "Software"), to deal in the Software without restriction, including | ||||
| * without limitation the rights to use, copy, modify, merge, publish, | ||||
| * distribute, sublicense, and/or sell copies of the Software, and to | ||||
| * permit persons to whom the Software is furnished to do so, subject to | ||||
| * the following conditions: | ||||
| * | ||||
| * The above copyright notice and this permission notice shall be | ||||
| * included in all copies or substantial portions of the Software. | ||||
| * | ||||
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||||
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||||
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||||
| * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||||
| * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||||
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||||
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||||
| *****************************************************************************/ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| enum Field { Free, Black, White, Illegal }; | ||||
|  | ||||
| typedef Field Player; | ||||
							
								
								
									
										138
									
								
								samples/C++/libcanister.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								samples/C++/libcanister.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,138 @@ | ||||
| #ifndef LIBCANIH | ||||
| #define LIBCANIH | ||||
| #include <iostream> | ||||
| #include <fstream> | ||||
| #include <stdlib.h> | ||||
| #include <cstring> | ||||
|  | ||||
| #define int64 unsigned long long | ||||
| //#define DEBUG | ||||
|  | ||||
| #ifdef DEBUG | ||||
| #define dout cout | ||||
| #else | ||||
| #define dout if (0) cerr | ||||
| #endif | ||||
|  | ||||
| using namespace std; | ||||
|  | ||||
| namespace libcanister | ||||
| { | ||||
|  | ||||
|     //the canmem object is a generic memory container used commonly | ||||
|     //throughout the canister framework to hold memory of uncertain | ||||
|     //length which may or may not contain null bytes.  | ||||
|     class canmem | ||||
|     { | ||||
|     public: | ||||
|         char* data; //the raw memory block | ||||
|         int size; //the absolute length of the block | ||||
|         canmem(); //creates an unallocated canmem | ||||
|         canmem(int allocsize); //creates an allocated, blank canmem of size | ||||
|         canmem(char* strdata); //automates the creation of zero-limited canmems | ||||
|         ~canmem(); //cleans up the canmem | ||||
|         void zeromem(); //overwrites this canmem | ||||
|         void fragmem(); //overwrites this canmem with fragment notation | ||||
|         void countlen(); //counts length of zero-limited strings and stores it in size | ||||
|         void trim(); //removes any nulls from the end of the string | ||||
|         static canmem null(); //returns a singleton null canmem | ||||
|          | ||||
|     }; | ||||
|      | ||||
|     //contains information about the canister | ||||
|     class caninfo | ||||
|     { | ||||
|     public: | ||||
|         canmem path; //physical path | ||||
|         canmem internalname; //a name for the canister | ||||
|         int numfiles; //the number of files in the canister | ||||
|     }; | ||||
|      | ||||
|     //necessary for the use of this class as a type in canfile | ||||
|     class canister; | ||||
|      | ||||
|     //this object holds the definition of a 'file' within the | ||||
|     //canister 'filesystem.' | ||||
|     class canfile | ||||
|     { | ||||
|     public: | ||||
|         libcanister::canister* parent; //the canister that holds this file | ||||
|         canmem path; //internal path ('filename') | ||||
|         canmem data; //the file's decompressed contents | ||||
|         int isfrag; //0 = probably not fragment, 1 = definitely a fragment (ignore) | ||||
|         int cfid; //'canfile id' -- a unique ID for this file | ||||
|         int64 dsize; //ondisk size (compressed form size) | ||||
|         int cachestate; //0 = not in memory, 1 = in memory, 2 = in memory and needs flush | ||||
|                         //-1 = error, check the data for the message | ||||
|         void cache(); //pull the file from disk and cache it in memory | ||||
|         void cachedump(); //deletes the contents of this file from the memory cache after assuring the on disk copy is up to date | ||||
|         void cachedumpfinal(fstream& infile); //same as cachedump, but more efficient during closing procedures | ||||
|         void flush(); //updates the on disk copy, but retains the memory cache | ||||
|     }; | ||||
|  | ||||
|     //the primary class | ||||
|     //this defines and controls a single canister | ||||
|     class canister | ||||
|     { | ||||
|         //table of contents | ||||
|         //absolutely worthless to the control code in the canister | ||||
|         //but quite useful to programs using the API, as they may | ||||
|         //desire to enumerate the files in a canister for a user's | ||||
|         //use or for their own. | ||||
|         //contains a newline-delimited list of files in the container. | ||||
|         canfile TOC; | ||||
|     public: | ||||
|         caninfo info; //the general info about this canister | ||||
|  | ||||
|         //the raw canfiles -- recommended that programs do not modify | ||||
|         //these files directly, but not enforced. | ||||
|         canfile* files; | ||||
|         bool readonly; //if true then no write routines will do anything | ||||
|          | ||||
|         //maximum number of files to have in memory at any given | ||||
|         //time, change this to whatever suits your application. | ||||
|         int cachemax; | ||||
|         int cachecnt; //number of files in the cache (should not be modified) | ||||
|  | ||||
|         //both initialize the canister from a physical location | ||||
|         canister (canmem fspath); | ||||
|         canister (char* fspath); | ||||
|  | ||||
|         //destroys the canister (after flushing the modded buffers, of course) | ||||
|         ~canister(); | ||||
|          | ||||
|         //open the fspath | ||||
|         //does it exist? | ||||
|         // | --- yes --- opening it (return 1) | ||||
|         // | --- yes --- file is corrupted, halting (return -1) | ||||
|         // | --- no  --- making a new one (return 0) | ||||
|         int open(); | ||||
|          | ||||
|         //close the canister, flush all buffers, clean up | ||||
|         int close(); | ||||
|          | ||||
|         //deletes the file at path inside this canister | ||||
|         int delFile(canmem path); | ||||
|          | ||||
|         //pulls the contents of the file from disk or memory and returns it as a file | ||||
|         canfile getFile(canmem path); | ||||
|          | ||||
|         //creates a file if it does not exist, otherwise overwrites | ||||
|         //returns whether operation succeeded | ||||
|         bool writeFile(canmem path, canmem data); | ||||
|         bool writeFile(canfile file); | ||||
|          | ||||
|         //get the 'table of contents', a file containing a newline delimited | ||||
|         //list of the file paths in the container which have contents | ||||
|         canfile getTOC(); | ||||
|   | ||||
|         //brings the cache back within the cachemax limit | ||||
|         //important: sCFID is the safe CFID | ||||
|         //(the CFID of the file we want to avoid uncaching) | ||||
|         //really just used internally, but it can't do any harm. | ||||
|         void cacheclean(int sCFID, bool dFlush = false); | ||||
|     }; | ||||
|  | ||||
| } | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										92
									
								
								samples/C++/metrics.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								samples/C++/metrics.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| // Copyright 2011 Google Inc. All Rights Reserved. | ||||
| // | ||||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| // you may not use this file except in compliance with the License. | ||||
| // You may obtain a copy of the License at | ||||
| // | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, software | ||||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
|  | ||||
| #ifndef NINJA_METRICS_H_ | ||||
| #define NINJA_METRICS_H_ | ||||
|  | ||||
| #include <string> | ||||
| #include <vector> | ||||
| using namespace std; | ||||
|  | ||||
| #include "util.h"  // For int64_t. | ||||
|  | ||||
| /// The Metrics module is used for the debug mode that dumps timing stats of | ||||
| /// various actions.  To use, see METRIC_RECORD below. | ||||
|  | ||||
| /// A single metrics we're tracking, like "depfile load time". | ||||
| struct Metric { | ||||
|   string name; | ||||
|   /// Number of times we've hit the code path. | ||||
|   int count; | ||||
|   /// Total time (in micros) we've spent on the code path. | ||||
|   int64_t sum; | ||||
| }; | ||||
|  | ||||
|  | ||||
| /// A scoped object for recording a metric across the body of a function. | ||||
| /// Used by the METRIC_RECORD macro. | ||||
| struct ScopedMetric { | ||||
|   explicit ScopedMetric(Metric* metric); | ||||
|   ~ScopedMetric(); | ||||
|  | ||||
| private: | ||||
|   Metric* metric_; | ||||
|   /// Timestamp when the measurement started. | ||||
|   /// Value is platform-dependent. | ||||
|   int64_t start_; | ||||
| }; | ||||
|  | ||||
| /// The singleton that stores metrics and prints the report. | ||||
| struct Metrics { | ||||
|   Metric* NewMetric(const string& name); | ||||
|  | ||||
|   /// Print a summary report to stdout. | ||||
|   void Report(); | ||||
|  | ||||
| private: | ||||
|   vector<Metric*> metrics_; | ||||
| }; | ||||
|  | ||||
| /// Get the current time as relative to some epoch. | ||||
| /// Epoch varies between platforms; only useful for measuring elapsed time. | ||||
| int64_t GetTimeMillis(); | ||||
|  | ||||
| /// A simple stopwatch which returns the time | ||||
| /// in seconds since Restart() was called. | ||||
| struct Stopwatch { | ||||
|  public: | ||||
|   Stopwatch() : started_(0) {} | ||||
|  | ||||
|   /// Seconds since Restart() call. | ||||
|   double Elapsed() const { | ||||
|     return 1e-6 * static_cast<double>(Now() - started_); | ||||
|   } | ||||
|  | ||||
|   void Restart() { started_ = Now(); } | ||||
|  | ||||
|  private: | ||||
|   uint64_t started_; | ||||
|   uint64_t Now() const; | ||||
| }; | ||||
|  | ||||
| /// The primary interface to metrics.  Use METRIC_RECORD("foobar") at the top | ||||
| /// of a function to get timing stats recorded for each call of the function. | ||||
| #define METRIC_RECORD(name)                                             \ | ||||
|   static Metric* metrics_h_metric =                                     \ | ||||
|       g_metrics ? g_metrics->NewMetric(name) : NULL;                    \ | ||||
|   ScopedMetric metrics_h_scoped(metrics_h_metric); | ||||
|  | ||||
| extern Metrics* g_metrics; | ||||
|  | ||||
| #endif // NINJA_METRICS_H_ | ||||
		Reference in New Issue
	
	Block a user