mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Compare commits
	
		
			37 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | f100dc91c2 | ||
|  | fd9d63d605 | ||
|  | 5c21c35875 | ||
|  | 370d55fd74 | ||
|  | 8dd2ddcbf7 | ||
|  | 037857623d | ||
|  | d7b19d577b | ||
|  | c70048a3e2 | ||
|  | 6d51117a91 | ||
|  | 848a1cc1e5 | ||
|  | 9092dfdc7f | ||
|  | d7fe0cc5c7 | ||
|  | 15ec37d4bc | ||
|  | 43cc701ac3 | ||
|  | 7cb8357f73 | ||
|  | 4b46bcf649 | ||
|  | a954a6465e | ||
|  | afb6041104 | ||
|  | 4b28fdbc4d | ||
|  | b8a5e8505a | ||
|  | 3087d640a3 | ||
|  | e87b89ab5b | ||
|  | 7aabc6a5ad | ||
|  | 5cc053694a | ||
|  | 653314448c | ||
|  | 4f14db10ea | ||
|  | 98e348ba5f | ||
|  | a69b20c1a4 | ||
|  | 9275e5240f | ||
|  | 7dcc3b3edf | ||
|  | 6e872c11b6 | ||
|  | e5b6001759 | ||
|  | 769f1b8658 | ||
|  | 5814b61356 | ||
|  | f59cf24a82 | ||
|  | d94bffb198 | ||
|  | 2beb450df6 | 
							
								
								
									
										58
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										58
									
								
								README.md
									
									
									
									
									
								
							| @@ -32,33 +32,57 @@ The Language stats bar that you see on every repository is built by aggregating | ||||
|  | ||||
| The repository stats API, accessed through `#languages`, can be used on a directory: | ||||
|  | ||||
| ***API UPDATE*** | ||||
|  | ||||
| Since [Version 3.0.0](https://github.com/github/linguist/releases/tag/v3.0.0) Linguist expects a git repository (in the form of a [Rugged::Repository](https://github.com/libgit2/rugged#repositories)) to be passed when initializing `Linguist::Repository`. | ||||
|  | ||||
|  | ||||
| ```ruby | ||||
| project = Linguist::Repository.from_directory(".") | ||||
| project.language.name  #=> "Ruby" | ||||
| project.languages      #=> { "Ruby" => 0.98, "Shell" => 0.02 } | ||||
| require 'rugged' | ||||
| require 'linguist' | ||||
|  | ||||
| repo = Rugged::Repository.new('.') | ||||
| project = Linguist::Repository.new(repo, repo.head.target_id) | ||||
| project.language       #=> "Ruby" | ||||
| project.languages      #=> { "Ruby" => 119387 } | ||||
| ``` | ||||
|  | ||||
| These stats are also printed out by the `linguist` binary. You can use the | ||||
| `--breakdown` flag, and the binary will also output the breakdown of files by language. | ||||
|  | ||||
| You can try running `linguist` on the `lib/` directory in this repository itself: | ||||
| You can try running `linguist` on the root directory in this repository itself: | ||||
|  | ||||
|     $ bundle exec linguist lib/ --breakdown | ||||
|     $ bundle exec linguist --breakdown | ||||
|  | ||||
|     100.00% Ruby | ||||
|  | ||||
|     Ruby: | ||||
|     linguist/blob_helper.rb | ||||
|     linguist/classifier.rb | ||||
|     linguist/file_blob.rb | ||||
|     linguist/generated.rb | ||||
|     linguist/heuristics.rb | ||||
|     linguist/language.rb | ||||
|     linguist/md5.rb | ||||
|     linguist/repository.rb | ||||
|     linguist/samples.rb | ||||
|     linguist/tokenizer.rb | ||||
|     linguist.rb | ||||
|     Gemfile | ||||
|     Rakefile | ||||
|     bin/linguist | ||||
|     github-linguist.gemspec | ||||
|     lib/linguist.rb | ||||
|     lib/linguist/blob_helper.rb | ||||
|     lib/linguist/classifier.rb | ||||
|     lib/linguist/file_blob.rb | ||||
|     lib/linguist/generated.rb | ||||
|     lib/linguist/heuristics.rb | ||||
|     lib/linguist/language.rb | ||||
|     lib/linguist/lazy_blob.rb | ||||
|     lib/linguist/md5.rb | ||||
|     lib/linguist/repository.rb | ||||
|     lib/linguist/samples.rb | ||||
|     lib/linguist/tokenizer.rb | ||||
|     lib/linguist/version.rb | ||||
|     test/test_blob.rb | ||||
|     test/test_classifier.rb | ||||
|     test/test_heuristics.rb | ||||
|     test/test_language.rb | ||||
|     test/test_md5.rb | ||||
|     test/test_pedantic.rb | ||||
|     test/test_repository.rb | ||||
|     test/test_samples.rb | ||||
|     test/test_tokenizer.rb | ||||
|  | ||||
| #### Ignore vendored files | ||||
|  | ||||
| @@ -141,7 +165,7 @@ If you are the current maintainer of this gem: | ||||
|  0. Ensure that tests are green: `bundle exec rake test` | ||||
|  0. Bump gem version in `lib/linguist/version.rb`.  For example, [like this](https://github.com/github/linguist/commit/8d2ea90a5ba3b2fe6e1508b7155aa4632eea2985). | ||||
|  0. Make a PR to github/linguist.  For example, [#1238](https://github.com/github/linguist/pull/1238). | ||||
|  0. Build a local gem: `gem build github-linguist.gemspec` | ||||
|  0. Build a local gem: `bundle exec rake build_gem` | ||||
|  0. Testing: | ||||
|    0. Bump the Gemfile and Gemfile.lock versions for an app which relies on this gem | ||||
|    0. Install the new gem locally | ||||
|   | ||||
| @@ -321,6 +321,11 @@ module Linguist | ||||
|       language ? language.lexer : Pygments::Lexer.find_by_name('Text only') | ||||
|     end | ||||
|  | ||||
|     # Internal: Get the TextMate compatible scope for the blob | ||||
|     def tm_scope | ||||
|       language && language.tm_scope | ||||
|     end | ||||
|  | ||||
|     # Public: Highlight syntax of blob | ||||
|     # | ||||
|     # options - A Hash of options (defaults to {}) | ||||
|   | ||||
| @@ -19,6 +19,9 @@ module Linguist | ||||
|         if languages.all? { |l| ["ECL", "Prolog"].include?(l) } | ||||
|           result = disambiguate_ecl(data, languages) | ||||
|         end | ||||
|         if languages.all? { |l| ["IDL", "Prolog"].include?(l) } | ||||
|           result = disambiguate_pro(data, languages) | ||||
|         end | ||||
|         if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) } | ||||
|           result = disambiguate_cl(data, languages) | ||||
|         end | ||||
| @@ -51,6 +54,16 @@ module Linguist | ||||
|       matches | ||||
|     end | ||||
|  | ||||
|     def self.disambiguate_pro(data, languages) | ||||
|       matches = [] | ||||
|       if (data.include?(":-")) | ||||
|         matches << Language["Prolog"] | ||||
|       else | ||||
|         matches << Language["IDL"] | ||||
|       end | ||||
|       matches | ||||
|     end | ||||
|  | ||||
|     def self.disambiguate_ts(data, languages) | ||||
|       matches = [] | ||||
|       if (data.include?("</translation>")) | ||||
|   | ||||
| @@ -290,6 +290,16 @@ module Linguist | ||||
|       @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) || | ||||
|         raise(ArgumentError, "#{@name} is missing lexer") | ||||
|  | ||||
|       @tm_scope = attributes[:tm_scope] || begin | ||||
|         context = case @type | ||||
|                   when :data, :markup, :prose | ||||
|                     'text' | ||||
|                   when :programming, nil | ||||
|                     'source' | ||||
|                   end | ||||
|         "#{context}.#{@name.downcase}" | ||||
|       end | ||||
|  | ||||
|       @ace_mode = attributes[:ace_mode] | ||||
|       @wrap = attributes[:wrap] || false | ||||
|  | ||||
| @@ -363,6 +373,11 @@ module Linguist | ||||
|     # Returns the Lexer | ||||
|     attr_reader :lexer | ||||
|  | ||||
|     # Public: Get the name of a TextMate-compatible scope | ||||
|     # | ||||
|     # Returns the scope | ||||
|     attr_reader :tm_scope | ||||
|  | ||||
|     # Public: Get Ace mode | ||||
|     # | ||||
|     # Examples | ||||
| @@ -564,6 +579,7 @@ module Linguist | ||||
|       :type              => options['type'], | ||||
|       :aliases           => options['aliases'], | ||||
|       :lexer             => options['lexer'], | ||||
|       :tm_scope          => options['tm_scope'], | ||||
|       :ace_mode          => options['ace_mode'], | ||||
|       :wrap              => options['wrap'], | ||||
|       :group_name        => options['group'], | ||||
|   | ||||
| @@ -83,6 +83,7 @@ ATS: | ||||
| ActionScript: | ||||
|   type: programming | ||||
|   lexer: ActionScript 3 | ||||
|   tm_scope: source.actionscript.3 | ||||
|   color: "#e3491a" | ||||
|   search_term: as3 | ||||
|   aliases: | ||||
| @@ -282,6 +283,7 @@ C: | ||||
| C#: | ||||
|   type: programming | ||||
|   ace_mode: csharp | ||||
|   tm_scope: source.cs | ||||
|   search_term: csharp | ||||
|   color: "#178600" | ||||
|   aliases: | ||||
| @@ -411,6 +413,7 @@ Clojure: | ||||
|  | ||||
| CoffeeScript: | ||||
|   type: programming | ||||
|   tm_scope: source.coffee | ||||
|   ace_mode: coffee | ||||
|   color: "#244776" | ||||
|   aliases: | ||||
| @@ -453,6 +456,7 @@ ColdFusion CFC: | ||||
|  | ||||
| Common Lisp: | ||||
|   type: programming | ||||
|   tm_scope: source.lisp | ||||
|   color: "#3fb68b" | ||||
|   aliases: | ||||
|   - lisp | ||||
| @@ -648,6 +652,7 @@ Elm: | ||||
| Emacs Lisp: | ||||
|   type: programming | ||||
|   lexer: Common Lisp | ||||
|   tm_scope: source.lisp | ||||
|   color: "#c065db" | ||||
|   aliases: | ||||
|   - elisp | ||||
| @@ -932,6 +937,7 @@ Groovy Server Pages: | ||||
|  | ||||
| HTML: | ||||
|   type: markup | ||||
|   tm_scope: text.html.basic | ||||
|   ace_mode: html | ||||
|   aliases: | ||||
|   - xhtml | ||||
| @@ -943,6 +949,7 @@ HTML: | ||||
|  | ||||
| HTML+Django: | ||||
|   type: markup | ||||
|   tm_scope: text.html.django | ||||
|   group: HTML | ||||
|   lexer: HTML+Django/Jinja | ||||
|   extensions: | ||||
| @@ -951,6 +958,7 @@ HTML+Django: | ||||
|  | ||||
| HTML+ERB: | ||||
|   type: markup | ||||
|   tm_scope: text.html.ruby | ||||
|   group: HTML | ||||
|   lexer: RHTML | ||||
|   aliases: | ||||
| @@ -961,6 +969,7 @@ HTML+ERB: | ||||
|  | ||||
| HTML+PHP: | ||||
|   type: markup | ||||
|   tm_scope: text.html.php | ||||
|   group: HTML | ||||
|   extensions: | ||||
|   - .phtml | ||||
| @@ -1096,6 +1105,7 @@ J: | ||||
|  | ||||
| JSON: | ||||
|   type: data | ||||
|   tm_scope: source.json | ||||
|   group: JavaScript | ||||
|   ace_mode: json | ||||
|   searchable: false | ||||
| @@ -1158,6 +1168,7 @@ Java Server Pages: | ||||
|  | ||||
| JavaScript: | ||||
|   type: programming | ||||
|   tm_scope: source.js | ||||
|   ace_mode: javascript | ||||
|   color: "#f1e05a" | ||||
|   aliases: | ||||
| @@ -1286,6 +1297,7 @@ Literate Agda: | ||||
|  | ||||
| Literate CoffeeScript: | ||||
|   type: programming | ||||
|   tm_scope: source.litcoffee | ||||
|   group: CoffeeScript | ||||
|   lexer: Text only | ||||
|   ace_mode: markdown | ||||
| @@ -1569,6 +1581,7 @@ ObjDump: | ||||
|  | ||||
| Objective-C: | ||||
|   type: programming | ||||
|   tm_scope: source.objc | ||||
|   color: "#438eff" | ||||
|   aliases: | ||||
|   - obj-c | ||||
| @@ -1579,6 +1592,7 @@ Objective-C: | ||||
|  | ||||
| Objective-C++: | ||||
|   type: programming | ||||
|   tm_scope: source.objc++ | ||||
|   color: "#4886FC" | ||||
|   aliases: | ||||
|   - obj-c++ | ||||
| @@ -1669,6 +1683,7 @@ PAWN: | ||||
|  | ||||
| PHP: | ||||
|   type: programming | ||||
|   tm_scope: text.html.php | ||||
|   ace_mode: php | ||||
|   color: "#4F5D95" | ||||
|   extensions: | ||||
| @@ -1820,6 +1835,7 @@ Prolog: | ||||
|   extensions: | ||||
|   - .pl | ||||
|   - .ecl | ||||
|   - .pro | ||||
|   - .prolog | ||||
|  | ||||
| Propeller Spin: | ||||
| @@ -2074,6 +2090,7 @@ SAS: | ||||
|  | ||||
| SCSS: | ||||
|   type: markup | ||||
|   tm_scope: source.scss | ||||
|   group: CSS | ||||
|   ace_mode: scss | ||||
|   extensions: | ||||
| @@ -2089,6 +2106,7 @@ SQF: | ||||
|  | ||||
| SQL: | ||||
|   type: data | ||||
|   tm_scope: source.sql | ||||
|   ace_mode: sql | ||||
|   extensions: | ||||
|   - .sql | ||||
| @@ -2113,6 +2131,7 @@ Sage: | ||||
|  | ||||
| Sass: | ||||
|   type: markup | ||||
|   tm_scope: source.sass | ||||
|   group: CSS | ||||
|   extensions: | ||||
|   - .sass | ||||
| @@ -2587,6 +2606,7 @@ Xtend: | ||||
|  | ||||
| YAML: | ||||
|   type: data | ||||
|   tm_scope: source.yaml | ||||
|   aliases: | ||||
|   - yml | ||||
|   extensions: | ||||
|   | ||||
| @@ -128,13 +128,20 @@ module Linguist | ||||
|     protected | ||||
|  | ||||
|     def compute_stats(old_commit_oid, cache = nil) | ||||
|       file_map = cache ? cache.dup : {} | ||||
|       old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree | ||||
|  | ||||
|       read_index | ||||
|  | ||||
|       diff = Rugged::Tree.diff(repository, old_tree, current_tree) | ||||
|  | ||||
|       # Clear file map and fetch full diff if any .gitattributes files are changed | ||||
|       if cache && diff.each_delta.any? { |delta| File.basename(delta.new_file[:path]) == ".gitattributes" } | ||||
|         diff = Rugged::Tree.diff(repository, old_tree = nil, current_tree) | ||||
|         file_map = {} | ||||
|       else | ||||
|         file_map = cache ? cache.dup : {} | ||||
|       end | ||||
|  | ||||
|       diff.each_delta do |delta| | ||||
|         old = delta.old_file[:path] | ||||
|         new = delta.new_file[:path] | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| module Linguist | ||||
|   VERSION = "3.2.0" | ||||
|   VERSION = "3.4.0" | ||||
| end | ||||
|   | ||||
							
								
								
									
										68
									
								
								samples/Prolog/logic-problem.pro
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								samples/Prolog/logic-problem.pro
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| /** | ||||
|  * Question 1.1 | ||||
|  * combiner(+Buddies, -Pairs) | ||||
|  */ | ||||
| combiner([], []). | ||||
| combiner([First|Buddies], Pairs):- | ||||
| 	make_pairs(First, Buddies, Pairs1), | ||||
| 	combiner(Buddies, Pairs2), | ||||
| 	concat(Pairs1, Pairs2, Pairs). | ||||
|  | ||||
| /** | ||||
|  * make_pairs(+Buddy, +Buddies, -Pairs) | ||||
|  */ | ||||
| make_pairs(Buddy, [], []). | ||||
| make_pairs(Buddy, [First|Buddies], [(Buddy, First)|Pairs]):- | ||||
| 	make_pairs(Buddy, Buddies, Pairs). | ||||
|  | ||||
| /** | ||||
|  * concat(+X, +Y, ?T) | ||||
|  */ | ||||
| concat([], Y, Y). | ||||
| concat([P|R], Y, [P|T]):- | ||||
| 	concat(R, Y, T). | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * Question 1.2 | ||||
|  * extraire(+AllPossiblePairs, +NbPairs, -Tp, -RemainingPairs) | ||||
|  */ | ||||
| extraire(AllPossiblePairs, 0, [], AllPossiblePairs). | ||||
| extraire([PossiblePair|AllPossiblePairs], NbPairs, [PossiblePair|Tp], NewRemainingPairs):- | ||||
| 	NbPairs > 0, | ||||
| 	NewNbPairs is NbPairs - 1, | ||||
| 	extraire(AllPossiblePairs, NewNbPairs, Tp, RemainingPairs), | ||||
| 	not(pair_in_array(PossiblePair, Tp)), | ||||
| 	delete_pair(RemainingPairs, PossiblePair, NewRemainingPairs). | ||||
| extraire([PossiblePair|AllPossiblePairs], NbPairs, Tp, [PossiblePair|RemainingPairs]):- | ||||
| 	NbPairs > 0, | ||||
| 	extraire(AllPossiblePairs, NbPairs, Tp, RemainingPairs), | ||||
| 	pair_in_array(PossiblePair, Tp). | ||||
|  | ||||
| /** | ||||
|  * delete_pair(+Pairs, +Pair, -PairsWithoutPair) | ||||
|  */ | ||||
| delete_pair([], _, []). | ||||
| delete_pair([Pair|Pairs], Pair, Pairs):-!. | ||||
| delete_pair([FirstPair|Pairs], Pair, [FirstPair|PairsWithoutPair]):- | ||||
| 	delete_pair(Pairs, Pair, PairsWithoutPair). | ||||
|  | ||||
| /** | ||||
|  * pair_in_array(+Pair, +Pairs) | ||||
|  */ | ||||
| pair_in_array((A, B), [(C, D)|Pairs]):- | ||||
| 	(A == C ; B == D ; A == D ; B == C), | ||||
| 	!. | ||||
| pair_in_array(Pair, [FirstPair|Pairs]):- | ||||
| 	pair_in_array(Pair, Pairs). | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * Question 1.3 | ||||
|  * les_tps(+Buddies, -Tps) | ||||
|  */ | ||||
| les_tps(Buddies, Tps):- | ||||
| 	combiner(Buddies, PossiblePairs), | ||||
| 	length(Buddies, NbBuddies), | ||||
| 	NbPairs is integer(NbBuddies / 2), | ||||
| 	findall(Tp, extraire(PossiblePairs, NbPairs, Tp, _), Tps). | ||||
| @@ -65,6 +65,18 @@ class TestHeuristcs < Test::Unit::TestCase | ||||
|     assert_equal Language["ECL"], results.first | ||||
|   end | ||||
|  | ||||
|   def test_pro_prolog_by_heuristics | ||||
|     languages = ["IDL", "Prolog"] | ||||
|     results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"), languages) | ||||
|     assert_equal Language["Prolog"], results.first | ||||
|   end | ||||
|  | ||||
|   def test_pro_idl_by_heuristics | ||||
|     languages = ["IDL", "Prolog"] | ||||
|     results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"), languages) | ||||
|     assert_equal Language["IDL"], results.first | ||||
|   end | ||||
|  | ||||
|   def test_ts_typescript_by_heuristics | ||||
|     languages = ["TypeScript", "XML"] | ||||
|     results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"), languages) | ||||
|   | ||||
| @@ -68,6 +68,19 @@ class TestRepository < Test::Unit::TestCase | ||||
|     assert !repo.breakdown_by_file["Ruby"].empty? | ||||
|   end | ||||
|  | ||||
|   def test_commit_with_git_attributes_data | ||||
|     # Before we had any .gitattributes data | ||||
|     old_commit = '4a017d9033f91b2776eb85275463f9613cc371ef' | ||||
|     old_repo = linguist_repo(old_commit) | ||||
|  | ||||
|     # With some .gitattributes data | ||||
|     attr_commit = '7ee006cbcb2d7261f9e648510a684ee9ac64126b' | ||||
|     # It's incremental but should bust the cache | ||||
|     new_repo = Linguist::Repository.incremental(rugged_repository, attr_commit, old_commit, old_repo.cache) | ||||
|  | ||||
|     assert new_repo.breakdown_by_file["Java"].include?("lib/linguist.rb") | ||||
|   end | ||||
|  | ||||
|   def test_linguist_override_vendored? | ||||
|     attr_commit = '351c1cc8fd57340839bdb400d7812332af80e9bd' | ||||
|     repo = linguist_repo(attr_commit).read_index | ||||
|   | ||||
		Reference in New Issue
	
	Block a user