From 7fb62de4d79ac85dc9878c43b769466fbb73e39b Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Sat, 6 Jun 2015 15:37:41 +0200 Subject: [PATCH] Associate each heuristic rule to a file extension --- lib/linguist/heuristics.rb | 69 ++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index 39e286bb..736c4e89 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -13,11 +13,14 @@ module Linguist # ]) # # Returns an Array of languages, or empty if none matched or were inconclusive. - def self.call(blob, languages) + def self.call(blob, candidates) data = blob.data @heuristics.each do |heuristic| - return Array(heuristic.call(data)) if heuristic.matches?(languages) + if heuristic.matches?(blob.name) + languages = Array(heuristic.call(data)) + return languages if languages.any? || languages.all? { |l| candidates.include?(l) } + end end [] # No heuristics matched @@ -38,22 +41,22 @@ module Linguist # end # end # - def self.disambiguate(*languages, &heuristic) - @heuristics << new(languages, &heuristic) + def self.disambiguate(extension, &heuristic) + @heuristics << new(extension, &heuristic) end # Internal: Array of defined heuristics @heuristics = [] # Internal - def initialize(languages, &heuristic) - @languages = languages + def initialize(extension, &heuristic) + @extension = extension @heuristic = heuristic end # Internal: Check if this heuristic matches the candidate languages. - def matches?(candidates) - candidates.any? && candidates.all? { |l| @languages.include?(l.name) } + def matches?(filename) + filename.end_with?(@extension) end # Internal: Perform the heuristic @@ -64,7 +67,7 @@ module Linguist # Common heuristics ObjectiveCRegex = /^[ \t]*@(interface|class|protocol|property|end|synchronised|selector|implementation)\b/ - disambiguate "BitBake", "BlitzBasic" do |data| + disambiguate ".bb" do |data| if /^\s*; /.match(data) || data.include?("End Function") Language["BlitzBasic"] elsif /^\s*(# |include|require)\b/.match(data) @@ -72,7 +75,7 @@ module Linguist end end - disambiguate "C#", "Smalltalk" do |data| + disambiguate ".cs" do |data| if /![\w\s]+methodsFor: /.match(data) Language["Smalltalk"] elsif /^\s*namespace\s*[\w\.]+\s*{/.match(data) || /^\s*\/\//.match(data) @@ -80,7 +83,7 @@ module Linguist end end - disambiguate "Objective-C", "C++", "C" do |data| + disambiguate ".h" do |data| if ObjectiveCRegex.match(data) Language["Objective-C"] elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) || @@ -89,7 +92,7 @@ module Linguist end end - disambiguate "Perl", "Perl6", "Prolog" do |data| + disambiguate ".pl" do |data| if data.include?("use v6") Language["Perl6"] elsif data.match(/use strict|use\s+v?5\./) @@ -99,7 +102,7 @@ module Linguist end end - disambiguate "ECL", "Prolog" do |data| + disambiguate ".ecl" do |data| if /^[^#]+:-/.match(data) Language["Prolog"] elsif data.include?(":=") @@ -107,7 +110,7 @@ module Linguist end end - disambiguate "IDL", "Prolog", "INI", "QMake" do |data| + disambiguate ".pro" do |data| if /^[^#]+:-/.match(data) Language["Prolog"] elsif data.include?("last_client=") @@ -119,7 +122,7 @@ module Linguist end end - disambiguate "GAP", "Scilab" do |data| + disambiguate ".tst" do |data| if (data.include?("gap> ")) Language["GAP"] # Heads up - we don't usually write heuristics like this (with no regex match) @@ -128,7 +131,7 @@ module Linguist end end - disambiguate "Common Lisp", "OpenCL", "Cool" do |data| + disambiguate ".cl" do |data| if /^\s*\((defun|in-package|defpackage) /i.match(data) Language["Common Lisp"] elsif /^class/x.match(data) @@ -138,7 +141,7 @@ module Linguist end end - disambiguate "Hack", "PHP" do |data| + disambiguate ".php" do |data| if data.include?(" |case\s+(\S+\s)+of/.match(data) @@ -274,7 +277,7 @@ module Linguist end end - disambiguate "NL", "NewLisp" do |data| + disambiguate ".nl" do |data| if /^(b|g)[0-9]+ /.match(data) Language["NL"] else @@ -282,7 +285,7 @@ module Linguist end end - disambiguate "Rust", "RenderScript" do |data| + disambiguate ".rs" do |data| if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data) Language["Rust"] elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data) @@ -290,7 +293,7 @@ module Linguist end end - disambiguate "Common Lisp", "Lex", "Groff" do |data| + disambiguate ".l" do |data| if data.include?("(def(un|macro)\s") Language["Common Lisp"] elsif /^(%[%{}]xs|<.*>)/.match(data)