mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge branch 'refactor-heuristics' into 1036-local
* refactor-heuristics: (43 commits) update docs Clean up heuristic logic Allow disambiguate to return an Array Rename .create to .disambiguate docs Remove inactive heuristics Refactor heuristics Not going back docs Move call method into existing Classifier class Try strategies until one language is returned Remove unneded empty blob check Add F# and GLSL samples. Add Forth and GLSL extension .fs. Add heuristic to disambiguate between F#, Forth, and GLSL. byebug requires ruby 2.0 Remove test for removed extension Fix typo in test add rake interpreter add python3 interpreter Remove old wrong_shebang.rb sample Add byebug ... Conflicts: lib/linguist/heuristics.rb test/test_heuristics.rb
This commit is contained in:
@@ -12,7 +12,7 @@ This can usually be solved either by adding a new filename or file name extensio
|
||||
|
||||
Assuming your code is being detected as the right language (see above), in most cases this is due to a bug in the language grammar rather than a bug in Linguist. [`grammars.yml`][grammars] lists all the grammars we use for syntax highlighting on github.com. Find the one corresponding to your code's programming language and submit a bug report upstream.
|
||||
|
||||
You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars.
|
||||
You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. You can test grammars using [Lightshow](https://lightshow.githubapp.com).
|
||||
|
||||
Once the bug has been fixed upstream, please let us know and we'll pick it up for GitHub.
|
||||
|
||||
|
||||
1
Gemfile
1
Gemfile
@@ -2,3 +2,4 @@ source 'https://rubygems.org'
|
||||
gemspec :name => "github-linguist"
|
||||
gemspec :name => "github-linguist-grammars"
|
||||
gem 'test-unit', require: false if RUBY_VERSION >= '2.2'
|
||||
gem 'byebug' if RUBY_VERSION >= '2.0'
|
||||
|
||||
@@ -5,8 +5,6 @@ http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle:
|
||||
- source.blitzmax
|
||||
http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle:
|
||||
- source.cython
|
||||
http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle:
|
||||
- source.fsharp
|
||||
http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle:
|
||||
- source.forth
|
||||
http://svn.textmate.org/trunk/Review/Bundles/Parrot.tmbundle:
|
||||
@@ -135,6 +133,8 @@ https://github.com/euler0/sublime-glsl/raw/master/GLSL.tmLanguage:
|
||||
- source.glsl
|
||||
https://github.com/fancy-lang/fancy-tmbundle:
|
||||
- source.fancy
|
||||
https://github.com/fsharp/fsharpbinding:
|
||||
- source.fsharp
|
||||
https://github.com/gingerbeardman/monkey.tmbundle:
|
||||
- source.monkey
|
||||
https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage:
|
||||
|
||||
@@ -3,6 +3,25 @@ require 'linguist/tokenizer'
|
||||
module Linguist
|
||||
# Language bayesian classifier.
|
||||
class Classifier
|
||||
# Public: Use the classifier to detect language of the blob.
|
||||
#
|
||||
# blob - An object that quacks like a blob.
|
||||
# possible_languages - Array of Language objects
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# Classifier.call(FileBlob.new("path/to/file"), [
|
||||
# Language["Ruby"], Language["Python"]
|
||||
# ])
|
||||
#
|
||||
# Returns an Array of Language objects, most probable first.
|
||||
def self.call(blob, possible_languages)
|
||||
language_names = possible_languages.map(&:name)
|
||||
classify(Samples.cache, blob.data, language_names).map do |name, _|
|
||||
Language[name] # Return the actual Language objects
|
||||
end
|
||||
end
|
||||
|
||||
# Public: Train classifier that data is a certain language.
|
||||
#
|
||||
# db - Hash classifier database object
|
||||
|
||||
@@ -57,14 +57,20 @@ module Linguist
|
||||
#
|
||||
# Returns a String.
|
||||
def extension
|
||||
# File.extname returns nil if the filename is an extension.
|
||||
extension = File.extname(name)
|
||||
basename = File.basename(name)
|
||||
# Checks if the filename is an extension.
|
||||
if extension.empty? && basename[0] == "."
|
||||
basename
|
||||
else
|
||||
extension
|
||||
extensions.last || ""
|
||||
end
|
||||
|
||||
# Public: Return an array of the file extensions
|
||||
#
|
||||
# >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
|
||||
# => [".html.erb", ".erb"]
|
||||
#
|
||||
# Returns an Array
|
||||
def extensions
|
||||
basename, *segments = File.basename(name).split(".")
|
||||
|
||||
segments.map.with_index do |segment, index|
|
||||
"." + segments[index..-1].join(".")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,162 +1,143 @@
|
||||
module Linguist
|
||||
# A collection of simple heuristics that can be used to better analyze languages.
|
||||
class Heuristics
|
||||
ACTIVE = true
|
||||
|
||||
# Public: Given an array of String language names,
|
||||
# apply heuristics against the given data and return an array
|
||||
# of matching languages, or nil.
|
||||
# Public: Use heuristics to detect language of the blob.
|
||||
#
|
||||
# data - Array of tokens or String data to analyze.
|
||||
# languages - Array of language name Strings to restrict to.
|
||||
# blob - An object that quacks like a blob.
|
||||
# possible_languages - Array of Language objects
|
||||
#
|
||||
# Returns an array of Languages or []
|
||||
def self.find_by_heuristics(data, languages)
|
||||
if active?
|
||||
result = []
|
||||
# Examples
|
||||
#
|
||||
# Heuristics.call(FileBlob.new("path/to/file"), [
|
||||
# Language["Ruby"], Language["Python"]
|
||||
# ])
|
||||
#
|
||||
# Returns an Array of languages, or empty if none matched or were inconclusive.
|
||||
def self.call(blob, languages)
|
||||
data = blob.data
|
||||
|
||||
if languages.all? { |l| ["Objective-C", "C++", "C"].include?(l) }
|
||||
result = disambiguate_c(data)
|
||||
end
|
||||
if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
|
||||
result = disambiguate_pl(data)
|
||||
end
|
||||
if languages.all? { |l| ["ECL", "Prolog"].include?(l) }
|
||||
result = disambiguate_ecl(data)
|
||||
end
|
||||
if languages.all? { |l| ["IDL", "Prolog"].include?(l) }
|
||||
result = disambiguate_pro(data)
|
||||
end
|
||||
if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) }
|
||||
result = disambiguate_cl(data)
|
||||
end
|
||||
if languages.all? { |l| ["Hack", "PHP"].include?(l) }
|
||||
result = disambiguate_hack(data)
|
||||
end
|
||||
if languages.all? { |l| ["Scala", "SuperCollider"].include?(l) }
|
||||
result = disambiguate_sc(data)
|
||||
end
|
||||
if languages.all? { |l| ["AsciiDoc", "AGS Script"].include?(l) }
|
||||
result = disambiguate_asc(data)
|
||||
end
|
||||
if languages.all? { |l| ["FORTRAN", "Forth"].include?(l) }
|
||||
result = disambiguate_f(data)
|
||||
end
|
||||
return result
|
||||
@heuristics.each do |heuristic|
|
||||
return Array(heuristic.call(data)) if heuristic.matches?(languages)
|
||||
end
|
||||
|
||||
[] # No heuristics matched
|
||||
end
|
||||
|
||||
# .h extensions are ambiguous between C, C++, and Objective-C.
|
||||
# We want to shortcut look for Objective-C _and_ now C++ too!
|
||||
# Internal: Define a new heuristic.
|
||||
#
|
||||
# Returns an array of Languages or []
|
||||
def self.disambiguate_c(data)
|
||||
matches = []
|
||||
# languages - String names of languages to disambiguate.
|
||||
# heuristic - Block which takes data as an argument and returns a Language or nil.
|
||||
#
|
||||
# Examples
|
||||
#
|
||||
# disambiguate "Perl", "Prolog" do |data|
|
||||
# if data.include?("use strict")
|
||||
# Language["Perl"]
|
||||
# elsif data.include?(":-")
|
||||
# Language["Prolog"]
|
||||
# end
|
||||
# end
|
||||
#
|
||||
def self.disambiguate(*languages, &heuristic)
|
||||
@heuristics << new(languages, &heuristic)
|
||||
end
|
||||
|
||||
# Internal: Array of defined heuristics
|
||||
@heuristics = []
|
||||
|
||||
# Internal
|
||||
def initialize(languages, &heuristic)
|
||||
@languages = languages
|
||||
@heuristic = heuristic
|
||||
end
|
||||
|
||||
# Internal: Check if this heuristic matches the candidate languages.
|
||||
def matches?(candidates)
|
||||
candidates.all? { |l| @languages.include?(l.name) }
|
||||
end
|
||||
|
||||
# Internal: Perform the heuristic
|
||||
def call(data)
|
||||
@heuristic.call(data)
|
||||
end
|
||||
|
||||
disambiguate "Objective-C", "C++", "C" do |data|
|
||||
if (/@(interface|class|protocol|property|end|synchronised|selector|implementation)\b/.match(data))
|
||||
matches << Language["Objective-C"]
|
||||
Language["Objective-C"]
|
||||
elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) ||
|
||||
/^\s*template\s*</.match(data) || /^[^@]class\s+\w+/.match(data) || /^[^@](private|public|protected):$/.match(data) || /std::.+$/.match(data))
|
||||
matches << Language["C++"]
|
||||
/^\s*template\s*</.match(data) || /^[^@]class\s+\w+/.match(data) || /^[^@](private|public|protected):$/.match(data) || /std::.+$/.match(data))
|
||||
Language["C++"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_pl(data)
|
||||
matches = []
|
||||
disambiguate "Perl", "Prolog" do |data|
|
||||
if data.include?("use strict")
|
||||
matches << Language["Perl"]
|
||||
Language["Perl"]
|
||||
elsif data.include?(":-")
|
||||
matches << Language["Prolog"]
|
||||
Language["Prolog"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_ecl(data)
|
||||
matches = []
|
||||
disambiguate "ECL", "Prolog" do |data|
|
||||
if data.include?(":-")
|
||||
matches << Language["Prolog"]
|
||||
Language["Prolog"]
|
||||
elsif data.include?(":=")
|
||||
matches << Language["ECL"]
|
||||
Language["ECL"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_pro(data)
|
||||
matches = []
|
||||
if (data.include?(":-"))
|
||||
matches << Language["Prolog"]
|
||||
disambiguate "IDL", "Prolog" do |data|
|
||||
if data.include?(":-")
|
||||
Language["Prolog"]
|
||||
else
|
||||
matches << Language["IDL"]
|
||||
Language["IDL"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_ts(data)
|
||||
matches = []
|
||||
if (data.include?("</translation>"))
|
||||
matches << Language["XML"]
|
||||
else
|
||||
matches << Language["TypeScript"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_cl(data)
|
||||
matches = []
|
||||
disambiguate "Common Lisp", "OpenCL" do |data|
|
||||
if data.include?("(defun ")
|
||||
matches << Language["Common Lisp"]
|
||||
Language["Common Lisp"]
|
||||
elsif /\/\* |\/\/ |^\}/.match(data)
|
||||
matches << Language["OpenCL"]
|
||||
Language["OpenCL"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_r(data)
|
||||
matches = []
|
||||
matches << Language["Rebol"] if /\bRebol\b/i.match(data)
|
||||
matches << Language["R"] if data.include?("<-")
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_hack(data)
|
||||
matches = []
|
||||
disambiguate "Hack", "PHP" do |data|
|
||||
if data.include?("<?hh")
|
||||
matches << Language["Hack"]
|
||||
Language["Hack"]
|
||||
elsif /<?[^h]/.match(data)
|
||||
matches << Language["PHP"]
|
||||
Language["PHP"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_sc(data)
|
||||
matches = []
|
||||
if (/\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data))
|
||||
matches << Language["SuperCollider"]
|
||||
disambiguate "Scala", "SuperCollider" do |data|
|
||||
if /\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data)
|
||||
Language["SuperCollider"]
|
||||
elsif /^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data)
|
||||
Language["Scala"]
|
||||
end
|
||||
if (/^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data))
|
||||
matches << Language["Scala"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.disambiguate_asc(data)
|
||||
matches = []
|
||||
matches << Language["AsciiDoc"] if /^=+(\s|\n)/.match(data)
|
||||
matches
|
||||
disambiguate "AsciiDoc", "AGS Script" do |data|
|
||||
Language["AsciiDoc"] if /^=+(\s|\n)/.match(data)
|
||||
end
|
||||
|
||||
def self.disambiguate_f(data)
|
||||
matches = []
|
||||
disambiguate "FORTRAN", "Forth" do |data|
|
||||
if /^: /.match(data)
|
||||
matches << Language["Forth"]
|
||||
Language["Forth"]
|
||||
elsif /^([c*][^a-z]| subroutine\s)/i.match(data)
|
||||
matches << Language["FORTRAN"]
|
||||
Language["FORTRAN"]
|
||||
end
|
||||
matches
|
||||
end
|
||||
|
||||
def self.active?
|
||||
!!ACTIVE
|
||||
disambiguate "F#", "Forth", "GLSL" do |data|
|
||||
if /^(: |new-device)/.match(data)
|
||||
Language["Forth"]
|
||||
elsif /^(#light|import|let|module|namespace|open|type)/.match(data)
|
||||
Language["F#"]
|
||||
elsif /^(#include|#pragma|precision|uniform|varying|void)/.match(data)
|
||||
Language["GLSL"]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -10,6 +10,8 @@ require 'linguist/heuristics'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/file_blob'
|
||||
require 'linguist/blob_helper'
|
||||
require 'linguist/strategy/filename'
|
||||
require 'linguist/strategy/shebang'
|
||||
|
||||
module Linguist
|
||||
# Language names that are recognizable by GitHub. Defined languages
|
||||
@@ -91,6 +93,13 @@ module Linguist
|
||||
language
|
||||
end
|
||||
|
||||
STRATEGIES = [
|
||||
Linguist::Strategy::Filename,
|
||||
Linguist::Strategy::Shebang,
|
||||
Linguist::Heuristics,
|
||||
Linguist::Classifier
|
||||
]
|
||||
|
||||
# Public: Detects the Language of the blob.
|
||||
#
|
||||
# blob - an object that includes the Linguist `BlobHelper` interface;
|
||||
@@ -98,49 +107,22 @@ module Linguist
|
||||
#
|
||||
# Returns Language or nil.
|
||||
def self.detect(blob)
|
||||
name = blob.name.to_s
|
||||
|
||||
# Bail early if the blob is binary or empty.
|
||||
return nil if blob.likely_binary? || blob.binary? || blob.empty?
|
||||
|
||||
# A bit of an elegant hack. If the file is executable but extensionless,
|
||||
# append a "magic" extension so it can be classified with other
|
||||
# languages that have shebang scripts.
|
||||
extension = FileBlob.new(name).extension
|
||||
if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
||||
name += ".script!"
|
||||
end
|
||||
|
||||
# First try to find languages that match based on filename.
|
||||
possible_languages = find_by_filename(name)
|
||||
|
||||
# If there is more than one possible language with that extension (or no
|
||||
# extension at all, in the case of extensionless scripts), we need to continue
|
||||
# our detection work
|
||||
if possible_languages.length > 1
|
||||
data = blob.data
|
||||
possible_language_names = possible_languages.map(&:name)
|
||||
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||
|
||||
if heuristic_languages.size > 1
|
||||
possible_language_names = heuristic_languages.map(&:name)
|
||||
# Call each strategy until one candidate is returned
|
||||
STRATEGIES.reduce([]) do |languages, strategy|
|
||||
candidates = strategy.call(blob, languages)
|
||||
if candidates.size == 1
|
||||
return candidates.first
|
||||
elsif candidates.size > 1
|
||||
# More than one candidate was found, pass them to the next strategy
|
||||
candidates
|
||||
else
|
||||
# Strategy couldn't find any candidates, so pass on the original list
|
||||
languages
|
||||
end
|
||||
|
||||
# Check if there's a shebang line and use that as authoritative
|
||||
if (result = find_by_shebang(data)) && !result.empty?
|
||||
result.first
|
||||
# No shebang. Still more work to do. Try to find it with our heuristics.
|
||||
elsif heuristic_languages.size == 1
|
||||
heuristic_languages.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
Language[classified[0]]
|
||||
end
|
||||
else
|
||||
# Simplest and most common case, we can just return the one match based on extension
|
||||
possible_languages.first
|
||||
end
|
||||
end.first
|
||||
end
|
||||
|
||||
# Public: Get all Languages
|
||||
@@ -190,8 +172,13 @@ module Linguist
|
||||
# Returns all matching Languages or [] if none were found.
|
||||
def self.find_by_filename(filename)
|
||||
basename = File.basename(filename)
|
||||
extname = FileBlob.new(filename).extension
|
||||
(@filename_index[basename] + find_by_extension(extname)).compact.uniq
|
||||
|
||||
# find the first extension with language definitions
|
||||
extname = FileBlob.new(filename).extensions.detect do |e|
|
||||
!@extension_index[e].empty?
|
||||
end
|
||||
|
||||
(@filename_index[basename] + @extension_index[extname]).compact.uniq
|
||||
end
|
||||
|
||||
# Public: Look up Languages by file extension.
|
||||
|
||||
@@ -558,6 +558,8 @@ Crystal:
|
||||
- .cr
|
||||
ace_mode: ruby
|
||||
tm_scope: source.ruby
|
||||
interpreters:
|
||||
- crystal
|
||||
|
||||
Cucumber:
|
||||
extensions:
|
||||
@@ -735,6 +737,8 @@ Erlang:
|
||||
- .es
|
||||
- .escript
|
||||
- .hrl
|
||||
interpreters:
|
||||
- escript
|
||||
|
||||
F#:
|
||||
type: programming
|
||||
@@ -814,6 +818,7 @@ Forth:
|
||||
- .for
|
||||
- .forth
|
||||
- .frt
|
||||
- .fs
|
||||
|
||||
Frege:
|
||||
type: programming
|
||||
@@ -867,6 +872,7 @@ GLSL:
|
||||
- .fp
|
||||
- .frag
|
||||
- .frg
|
||||
- .fs
|
||||
- .fshader
|
||||
- .geo
|
||||
- .geom
|
||||
@@ -930,6 +936,8 @@ Gnuplot:
|
||||
- .gnuplot
|
||||
- .plot
|
||||
- .plt
|
||||
interpreters:
|
||||
- gnuplot
|
||||
|
||||
Go:
|
||||
type: programming
|
||||
@@ -1195,6 +1203,8 @@ Ioke:
|
||||
color: "#078193"
|
||||
extensions:
|
||||
- .ik
|
||||
interpreters:
|
||||
- ioke
|
||||
|
||||
Isabelle:
|
||||
type: programming
|
||||
@@ -1702,6 +1712,8 @@ Nu:
|
||||
filenames:
|
||||
- Nukefile
|
||||
tm_scope: source.scheme
|
||||
interpreters:
|
||||
- nush
|
||||
|
||||
NumPy:
|
||||
group: Python
|
||||
@@ -1888,6 +1900,8 @@ Parrot Assembly:
|
||||
- pasm
|
||||
extensions:
|
||||
- .pasm
|
||||
interpreters:
|
||||
- parrot
|
||||
tm_scope: none
|
||||
|
||||
Parrot Internal Representation:
|
||||
@@ -1898,6 +1912,8 @@ Parrot Internal Representation:
|
||||
- pir
|
||||
extensions:
|
||||
- .pir
|
||||
interpreters:
|
||||
- parrot
|
||||
|
||||
Pascal:
|
||||
type: programming
|
||||
@@ -1940,6 +1956,8 @@ Perl6:
|
||||
- .p6m
|
||||
- .pl6
|
||||
- .pm6
|
||||
interpreters:
|
||||
- perl6
|
||||
tm_scope: none
|
||||
|
||||
PigLatin:
|
||||
@@ -2004,6 +2022,8 @@ Prolog:
|
||||
- .ecl
|
||||
- .pro
|
||||
- .prolog
|
||||
interpreters:
|
||||
- swipl
|
||||
|
||||
Propeller Spin:
|
||||
type: programming
|
||||
@@ -2067,6 +2087,8 @@ Python:
|
||||
- wscript
|
||||
interpreters:
|
||||
- python
|
||||
- python2
|
||||
- python3
|
||||
|
||||
Python traceback:
|
||||
type: data
|
||||
@@ -2087,6 +2109,8 @@ QMake:
|
||||
extensions:
|
||||
- .pro
|
||||
- .pri
|
||||
interpreters:
|
||||
- qmake
|
||||
|
||||
R:
|
||||
type: programming
|
||||
@@ -2241,6 +2265,8 @@ Ruby:
|
||||
- .watchr
|
||||
interpreters:
|
||||
- ruby
|
||||
- macruby
|
||||
- rake
|
||||
filenames:
|
||||
- .pryrc
|
||||
- Appraisals
|
||||
@@ -2327,6 +2353,8 @@ Scala:
|
||||
- .scala
|
||||
- .sbt
|
||||
- .sc
|
||||
interpreters:
|
||||
- scala
|
||||
|
||||
Scaml:
|
||||
group: HTML
|
||||
|
||||
@@ -52,14 +52,16 @@ module Linguist
|
||||
})
|
||||
end
|
||||
else
|
||||
path = File.join(dirname, filename)
|
||||
|
||||
if File.extname(filename) == ""
|
||||
raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
|
||||
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
||||
end
|
||||
|
||||
yield({
|
||||
:path => File.join(dirname, filename),
|
||||
:path => path,
|
||||
:language => category,
|
||||
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
|
||||
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
|
||||
:extname => File.extname(filename)
|
||||
})
|
||||
end
|
||||
@@ -131,18 +133,19 @@ module Linguist
|
||||
|
||||
script = script == 'env' ? tokens[1] : script
|
||||
|
||||
# "python2.6" -> "python"
|
||||
if script =~ /((?:\d+\.?)+)/
|
||||
script.sub! $1, ''
|
||||
end
|
||||
# If script has an invalid shebang, we might get here
|
||||
return unless script
|
||||
|
||||
# "python2.6" -> "python2"
|
||||
script.sub! $1, '' if script =~ /(\.\d+)$/
|
||||
|
||||
# Check for multiline shebang hacks that call `exec`
|
||||
if script == 'sh' &&
|
||||
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
||||
script = $1
|
||||
end
|
||||
|
||||
script
|
||||
|
||||
File.basename(script)
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
||||
20
lib/linguist/strategy/filename.rb
Normal file
20
lib/linguist/strategy/filename.rb
Normal file
@@ -0,0 +1,20 @@
|
||||
module Linguist
|
||||
module Strategy
|
||||
# Detects language based on filename and/or extension
|
||||
class Filename
|
||||
def self.call(blob, _)
|
||||
name = blob.name.to_s
|
||||
|
||||
# A bit of an elegant hack. If the file is executable but extensionless,
|
||||
# append a "magic" extension so it can be classified with other
|
||||
# languages that have shebang scripts.
|
||||
extensions = FileBlob.new(name).extensions
|
||||
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
||||
name += ".script!"
|
||||
end
|
||||
|
||||
Language.find_by_filename(name)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
10
lib/linguist/strategy/shebang.rb
Normal file
10
lib/linguist/strategy/shebang.rb
Normal file
@@ -0,0 +1,10 @@
|
||||
module Linguist
|
||||
module Strategy
|
||||
# Check if there's a shebang line and use that as authoritative
|
||||
class Shebang
|
||||
def self.call(blob, _)
|
||||
Language.find_by_shebang(blob.data)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
15
samples/F#/sample.fs
Normal file
15
samples/F#/sample.fs
Normal file
@@ -0,0 +1,15 @@
|
||||
module Sample
|
||||
|
||||
open System
|
||||
|
||||
type Foo =
|
||||
{
|
||||
Bar : string
|
||||
}
|
||||
|
||||
type Baz = interface end
|
||||
|
||||
let Sample1(xs : int list) : string =
|
||||
xs
|
||||
|> List.map (fun x -> string x)
|
||||
|> String.concat ","
|
||||
252
samples/Forth/core.fs
Normal file
252
samples/Forth/core.fs
Normal file
@@ -0,0 +1,252 @@
|
||||
: immediate lastxt @ dup c@ negate swap c! ;
|
||||
|
||||
: \ source nip >in ! ; immediate \ Copyright 2004, 2012 Lars Brinkhoff
|
||||
|
||||
: char \ ( "word" -- char )
|
||||
bl-word here 1+ c@ ;
|
||||
|
||||
: ahead here 0 , ;
|
||||
|
||||
: resolve here swap ! ;
|
||||
|
||||
: ' bl-word here find 0branch [ ahead ] exit [ resolve ] 0 ;
|
||||
|
||||
: postpone-nonimmediate [ ' literal , ' compile, ] literal , ;
|
||||
|
||||
: create dovariable_code header, reveal ;
|
||||
|
||||
create postponers
|
||||
' postpone-nonimmediate ,
|
||||
' abort ,
|
||||
' , ,
|
||||
|
||||
: word \ ( char "<chars>string<char>" -- caddr )
|
||||
drop bl-word here ;
|
||||
|
||||
: postpone \ ( C: "word" -- )
|
||||
bl word find 1+ cells postponers + @ execute ; immediate
|
||||
|
||||
: unresolved \ ( C: "word" -- orig )
|
||||
postpone postpone postpone ahead ; immediate
|
||||
|
||||
: chars \ ( n1 -- n2 )
|
||||
;
|
||||
|
||||
: else \ ( -- ) ( C: orig1 -- orig2 )
|
||||
unresolved branch swap resolve ; immediate
|
||||
|
||||
: if \ ( flag -- ) ( C: -- orig )
|
||||
unresolved 0branch ; immediate
|
||||
|
||||
: then \ ( -- ) ( C: orig -- )
|
||||
resolve ; immediate
|
||||
|
||||
: [char] \ ( "word" -- )
|
||||
char postpone literal ; immediate
|
||||
|
||||
: (does>) lastxt @ dodoes_code over >code ! r> swap >does ! ;
|
||||
|
||||
: does> postpone (does>) ; immediate
|
||||
|
||||
: begin \ ( -- ) ( C: -- dest )
|
||||
here ; immediate
|
||||
|
||||
: while \ ( x -- ) ( C: dest -- orig dest )
|
||||
unresolved 0branch swap ; immediate
|
||||
|
||||
: repeat \ ( -- ) ( C: orig dest -- )
|
||||
postpone branch , resolve ; immediate
|
||||
|
||||
: until \ ( x -- ) ( C: dest -- )
|
||||
postpone 0branch , ; immediate
|
||||
|
||||
: recurse lastxt @ compile, ; immediate
|
||||
|
||||
: pad \ ( -- addr )
|
||||
here 1024 + ;
|
||||
|
||||
: parse \ ( char "string<char>" -- addr n )
|
||||
pad >r begin
|
||||
source? if <source 2dup <> else 0 0 then
|
||||
while
|
||||
r@ c! r> 1+ >r
|
||||
repeat 2drop pad r> over - ;
|
||||
|
||||
: ( \ ( "string<paren>" -- )
|
||||
[ char ) ] literal parse 2drop ; immediate
|
||||
\ TODO: If necessary, refill and keep parsing.
|
||||
|
||||
: string, ( addr n -- )
|
||||
here over allot align swap cmove ;
|
||||
|
||||
: (s") ( -- addr n ) ( R: ret1 -- ret2 )
|
||||
r> dup @ swap cell+ 2dup + aligned >r swap ;
|
||||
|
||||
create squote 128 allot
|
||||
|
||||
: s" ( "string<quote>" -- addr n )
|
||||
state @ if
|
||||
postpone (s") [char] " parse dup , string,
|
||||
else
|
||||
[char] " parse >r squote r@ cmove squote r>
|
||||
then ; immediate
|
||||
|
||||
: (abort") ( ... addr n -- ) ( R: ... -- )
|
||||
cr type cr abort ;
|
||||
|
||||
: abort" ( ... x "string<quote>" -- ) ( R: ... -- )
|
||||
postpone if postpone s" postpone (abort") postpone then ; immediate
|
||||
|
||||
\ ----------------------------------------------------------------------
|
||||
|
||||
( Core words. )
|
||||
|
||||
\ TODO: #
|
||||
\ TODO: #>
|
||||
\ TODO: #s
|
||||
|
||||
: and ( x y -- x&y ) nand invert ;
|
||||
|
||||
: * 1 2>r 0 swap begin r@ while
|
||||
r> r> swap 2dup dup + 2>r and if swap over + swap then dup +
|
||||
repeat r> r> 2drop drop ;
|
||||
|
||||
\ TODO: */mod
|
||||
|
||||
: +loop ( -- ) ( C: nest-sys -- )
|
||||
postpone (+loop) postpone 0branch , postpone unloop ; immediate
|
||||
|
||||
: space bl emit ;
|
||||
|
||||
: ?.- dup 0 < if [char] - emit negate then ;
|
||||
|
||||
: digit [char] 0 + emit ;
|
||||
|
||||
: (.) base @ /mod ?dup if recurse then digit ;
|
||||
|
||||
: ." ( "string<quote>" -- ) postpone s" postpone type ; immediate
|
||||
|
||||
: . ( x -- ) ?.- (.) space ;
|
||||
|
||||
: postpone-number ( caddr -- )
|
||||
0 0 rot count >number dup 0= if
|
||||
2drop nip
|
||||
postpone (literal) postpone (literal) postpone ,
|
||||
postpone literal postpone ,
|
||||
else
|
||||
." Undefined: " type cr abort
|
||||
then ;
|
||||
|
||||
' postpone-number postponers cell+ !
|
||||
|
||||
: / ( x y -- x/y ) /mod nip ;
|
||||
|
||||
: 0< ( n -- flag ) 0 < ;
|
||||
|
||||
: 1- ( n -- n-1 ) -1 + ;
|
||||
|
||||
: 2! ( x1 x2 addr -- ) swap over ! cell+ ! ;
|
||||
|
||||
: 2* ( n -- 2n ) dup + ;
|
||||
|
||||
\ Kernel: 2/
|
||||
|
||||
: 2@ ( addr -- x1 x2 ) dup cell+ @ swap @ ;
|
||||
|
||||
\ Kernel: 2drop
|
||||
\ Kernel: 2dup
|
||||
|
||||
\ TODO: 2over ( x1 x2 x3 x4 -- x1 x2 x3 x4 x1 x2 )
|
||||
\ 3 pick 3 pick ;
|
||||
|
||||
\ TODO: 2swap
|
||||
|
||||
\ TODO: <#
|
||||
|
||||
: abs ( n -- |n| )
|
||||
dup 0< if negate then ;
|
||||
|
||||
\ TODO: accept
|
||||
|
||||
: c, ( n -- )
|
||||
here c! 1 chars allot ;
|
||||
|
||||
: char+ ( n1 -- n2 )
|
||||
1+ ;
|
||||
|
||||
: constant create , does> @ ;
|
||||
|
||||
: decimal ( -- )
|
||||
10 base ! ;
|
||||
|
||||
: depth ( -- n )
|
||||
data_stack 100 cells + 'SP @ - /cell / 2 - ;
|
||||
|
||||
: do ( n1 n2 -- ) ( R: -- loop-sys ) ( C: -- do-sys )
|
||||
postpone 2>r here ; immediate
|
||||
|
||||
\ TODO: environment?
|
||||
\ TODO: evaluate
|
||||
\ TODO: fill
|
||||
\ TODO: fm/mod )
|
||||
\ TODO: hold
|
||||
|
||||
: j ( -- x1 ) ( R: x1 x2 x3 -- x1 x2 x3 )
|
||||
'RP @ 3 cells + @ ;
|
||||
|
||||
\ TODO: leave
|
||||
|
||||
: loop ( -- ) ( C: nest-sys -- )
|
||||
postpone 1 postpone (+loop)
|
||||
postpone 0branch ,
|
||||
postpone unloop ; immediate
|
||||
|
||||
: lshift begin ?dup while 1- swap dup + swap repeat ;
|
||||
|
||||
: rshift 1 begin over while dup + swap 1- swap repeat nip
|
||||
2>r 0 1 begin r@ while
|
||||
r> r> 2dup swap dup + 2>r and if swap over + swap then dup +
|
||||
repeat r> r> 2drop drop ;
|
||||
|
||||
: max ( x y -- max[x,y] )
|
||||
2dup > if drop else nip then ;
|
||||
|
||||
\ Kernel: min
|
||||
\ TODO: mod
|
||||
\ TODO: move
|
||||
|
||||
: (quit) ( R: ... -- )
|
||||
return_stack 100 cells + 'RP !
|
||||
0 'source-id ! tib ''source ! #tib ''#source !
|
||||
postpone [
|
||||
begin
|
||||
refill
|
||||
while
|
||||
interpret state @ 0= if ." ok" cr then
|
||||
repeat
|
||||
bye ;
|
||||
|
||||
' (quit) ' quit >body cell+ !
|
||||
|
||||
\ TODO: s>d
|
||||
\ TODO: sign
|
||||
\ TODO: sm/rem
|
||||
|
||||
: spaces ( n -- )
|
||||
0 do space loop ;
|
||||
|
||||
\ TODO: u.
|
||||
|
||||
: signbit ( -- n ) -1 1 rshift invert ;
|
||||
|
||||
: xor ( x y -- x^y ) 2dup nand >r r@ nand swap r> nand nand ;
|
||||
|
||||
: u< ( x y -- flag ) signbit xor swap signbit xor > ;
|
||||
|
||||
\ TODO: um/mod
|
||||
|
||||
: variable ( "word" -- )
|
||||
create /cell allot ;
|
||||
|
||||
: ['] \ ( C: "word" -- )
|
||||
' postpone literal ; immediate
|
||||
48
samples/GLSL/recurse1.fs
Normal file
48
samples/GLSL/recurse1.fs
Normal file
@@ -0,0 +1,48 @@
|
||||
#version 330 core
|
||||
|
||||
// cross-unit recursion
|
||||
|
||||
void main() {}
|
||||
|
||||
// two-level recursion
|
||||
|
||||
float cbar(int);
|
||||
|
||||
void cfoo(float)
|
||||
{
|
||||
cbar(2);
|
||||
}
|
||||
|
||||
// four-level, out of order
|
||||
|
||||
void CB();
|
||||
void CD();
|
||||
void CA() { CB(); }
|
||||
void CC() { CD(); }
|
||||
|
||||
// high degree
|
||||
|
||||
void CBT();
|
||||
void CDT();
|
||||
void CAT() { CBT(); CBT(); CBT(); }
|
||||
void CCT() { CDT(); CDT(); CBT(); }
|
||||
|
||||
// not recursive
|
||||
|
||||
void norA() {}
|
||||
void norB() { norA(); }
|
||||
void norC() { norA(); }
|
||||
void norD() { norA(); }
|
||||
void norE() { norB(); }
|
||||
void norF() { norB(); }
|
||||
void norG() { norE(); }
|
||||
void norH() { norE(); }
|
||||
void norI() { norE(); }
|
||||
|
||||
// not recursive, but with a call leading into a cycle if ignoring direction
|
||||
|
||||
void norcA() { }
|
||||
void norcB() { norcA(); }
|
||||
void norcC() { norcB(); }
|
||||
void norcD() { norcC(); norcB(); } // head of cycle
|
||||
void norcE() { norcD(); } // lead into cycle
|
||||
@@ -1,2 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
puts "Not Python"
|
||||
22
test/fixtures/Python/run_tests.module
vendored
Normal file
22
test/fixtures/Python/run_tests.module
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, os
|
||||
|
||||
# Set the current working directory to the directory where this script is located
|
||||
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
|
||||
|
||||
#### Set the name of the application here and moose directory relative to the application
|
||||
app_name = 'stork'
|
||||
|
||||
MODULE_DIR = os.path.abspath('..')
|
||||
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
|
||||
#### See if MOOSE_DIR is already in the environment instead
|
||||
if os.environ.has_key("MOOSE_DIR"):
|
||||
MOOSE_DIR = os.environ['MOOSE_DIR']
|
||||
|
||||
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
|
||||
import path_tool
|
||||
path_tool.activate_module('TestHarness')
|
||||
|
||||
from TestHarness import TestHarness
|
||||
# Run the tests!
|
||||
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)
|
||||
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4
test/helper.rb
Normal file
4
test/helper.rb
Normal file
@@ -0,0 +1,4 @@
|
||||
require "bundler/setup"
|
||||
require "test/unit"
|
||||
require "mocha/setup"
|
||||
require "linguist"
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/file_blob'
|
||||
require 'linguist/samples'
|
||||
|
||||
require 'test/unit'
|
||||
require 'mocha/setup'
|
||||
require 'mime/types'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestBlob < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
|
||||
assert blob.language, "No language for #{sample[:path]}"
|
||||
assert_equal sample[:language], blob.language.name, blob.name
|
||||
end
|
||||
|
||||
# Test language detection for files which shouldn't be used as samples
|
||||
root = File.expand_path('../fixtures', __FILE__)
|
||||
Dir.entries(root).each do |language|
|
||||
next unless File.file?(language)
|
||||
|
||||
# Each directory contains test files of a language
|
||||
dirname = File.join(root, language)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
next unless File.file?(filename)
|
||||
|
||||
# By default blob search the file in the samples;
|
||||
# thus, we need to give it the absolute path
|
||||
filepath = File.join(dirname, filename)
|
||||
blob = blob(filepath)
|
||||
assert blob.language, "No language for #{filepath}"
|
||||
assert_equal language, blob.language.name, blob.name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_minified_files_not_safe_to_highlight
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/language'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/tokenizer'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestClassifier < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
10
test/test_file_blob.rb
Normal file
10
test/test_file_blob.rb
Normal file
@@ -0,0 +1,10 @@
|
||||
require 'linguist/file_blob'
|
||||
require 'test/unit'
|
||||
|
||||
class TestFileBlob < Test::Unit::TestCase
|
||||
def test_extensions
|
||||
assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
|
||||
assert_equal [".xml"], Linguist::FileBlob.new("build.xml").extensions
|
||||
assert_equal [".html.erb", ".erb"], Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
|
||||
end
|
||||
end
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/heuristics'
|
||||
require 'linguist/language'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/file_blob'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestHeuristcs < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -16,6 +11,11 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
File.read(File.join(samples_path, name))
|
||||
end
|
||||
|
||||
def file_blob(name)
|
||||
path = File.exist?(name) ? name : File.join(samples_path, name)
|
||||
FileBlob.new(path)
|
||||
end
|
||||
|
||||
def all_fixtures(language_name, file="*")
|
||||
Dir.glob("#{samples_path}/#{language_name}/#{file}")
|
||||
end
|
||||
@@ -23,24 +23,17 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
# Candidate languages = ["C++", "Objective-C"]
|
||||
def test_obj_c_by_heuristics
|
||||
# Only calling out '.h' filenames as these are the ones causing issues
|
||||
all_fixtures("Objective-C", "*.h").each do |fixture|
|
||||
results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}"))
|
||||
assert_equal Language["Objective-C"], results.first, "Failed for #{File.basename(fixture)}"
|
||||
end
|
||||
end
|
||||
|
||||
# Candidate languages = ["C++", "Objective-C"]
|
||||
def test_cpp_by_heuristics
|
||||
results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp"))
|
||||
assert_equal Language["C++"], results.first
|
||||
results = Heuristics.disambiguate_c(fixture("C++/ThreadedQueue.h"))
|
||||
assert_equal Language["C++"], results.first
|
||||
assert_heuristics({
|
||||
"Objective-C" => all_fixtures("Objective-C", "*.h"),
|
||||
"C++" => ["C++/render_adapter.cpp", "C++/ThreadedQueue.h"],
|
||||
"C" => nil
|
||||
})
|
||||
end
|
||||
|
||||
def test_c_by_heuristics
|
||||
languages = ["C++", "Objective-C", "C"]
|
||||
results = Heuristics.disambiguate_c(fixture("C/ArrowLeft.h"))
|
||||
assert_equal nil, results.first
|
||||
languages = [Language["C++"], Language["Objective-C"], Language["C"]]
|
||||
results = Heuristics.call(file_blob("C/ArrowLeft.h"), languages)
|
||||
assert_equal [], results
|
||||
end
|
||||
|
||||
def test_detect_still_works_if_nothing_matches
|
||||
@@ -50,94 +43,89 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
# Candidate languages = ["Perl", "Prolog"]
|
||||
def test_pl_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["Perl", "Prolog"]
|
||||
def test_pl_perl_by_heuristics
|
||||
results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t"))
|
||||
assert_equal Language["Perl"], results.first
|
||||
def test_pl_prolog_perl_by_heuristics
|
||||
assert_heuristics({
|
||||
"Prolog" => "Prolog/turing.pl",
|
||||
"Perl" => "Perl/perl-test.t",
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["ECL", "Prolog"]
|
||||
def test_ecl_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
results = Heuristics.call(file_blob("Prolog/or-constraint.ecl"), [Language["ECL"], Language["Prolog"]])
|
||||
assert_equal [Language["Prolog"]], results
|
||||
end
|
||||
|
||||
# Candidate languages = ["ECL", "Prolog"]
|
||||
def test_ecl_ecl_by_heuristics
|
||||
results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl"))
|
||||
assert_equal Language["ECL"], results.first
|
||||
def test_ecl_prolog_by_heuristics
|
||||
assert_heuristics({
|
||||
"ECL" => "ECL/sample.ecl",
|
||||
"Prolog" => "Prolog/or-constraint.ecl"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["IDL", "Prolog"]
|
||||
def test_pro_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["IDL", "Prolog"]
|
||||
def test_pro_idl_by_heuristics
|
||||
results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"))
|
||||
assert_equal Language["IDL"], results.first
|
||||
def test_pro_prolog_idl_by_heuristics
|
||||
assert_heuristics({
|
||||
"Prolog" => "Prolog/logic-problem.pro",
|
||||
"IDL" => "IDL/mg_acosh.pro"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["AGS Script", "AsciiDoc"]
|
||||
def test_asc_asciidoc_by_heuristics
|
||||
results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc"))
|
||||
assert_equal Language["AsciiDoc"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["TypeScript", "XML"]
|
||||
def test_ts_typescript_by_heuristics
|
||||
results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"))
|
||||
assert_equal Language["TypeScript"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["TypeScript", "XML"]
|
||||
def test_ts_xml_by_heuristics
|
||||
results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml"))
|
||||
assert_equal Language["XML"], results.first
|
||||
assert_heuristics({
|
||||
"AsciiDoc" => "AsciiDoc/list.asc",
|
||||
"AGS Script" => nil
|
||||
})
|
||||
end
|
||||
|
||||
def test_cl_by_heuristics
|
||||
languages = ["Common Lisp", "OpenCL"]
|
||||
languages.each do |language|
|
||||
all_fixtures(language).each do |fixture|
|
||||
results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}"))
|
||||
assert_equal Language[language], results.first
|
||||
end
|
||||
end
|
||||
assert_heuristics({
|
||||
"Common Lisp" => all_fixtures("Common Lisp"),
|
||||
"OpenCL" => all_fixtures("OpenCL")
|
||||
})
|
||||
end
|
||||
|
||||
def test_f_by_heuristics
|
||||
languages = ["FORTRAN", "Forth"]
|
||||
languages.each do |language|
|
||||
all_fixtures(language).each do |fixture|
|
||||
results = Heuristics.disambiguate_f(fixture("#{language}/#{File.basename(fixture)}"))
|
||||
assert_equal Language[language], results.first
|
||||
end
|
||||
end
|
||||
assert_heuristics({
|
||||
"FORTRAN" => all_fixtures("FORTRAN"),
|
||||
"Forth" => all_fixtures("Forth")
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Hack", "PHP"]
|
||||
def test_hack_by_heuristics
|
||||
results = Heuristics.disambiguate_hack(fixture("Hack/funs.php"))
|
||||
assert_equal Language["Hack"], results.first
|
||||
assert_heuristics({
|
||||
"Hack" => "Hack/funs.php",
|
||||
"PHP" => "PHP/Model.php"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Scala", "SuperCollider"]
|
||||
def test_sc_supercollider_by_heuristics
|
||||
results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc"))
|
||||
assert_equal Language["SuperCollider"], results.first
|
||||
def test_sc_supercollider_scala_by_heuristics
|
||||
assert_heuristics({
|
||||
"SuperCollider" => "SuperCollider/WarpPreset.sc",
|
||||
"Scala" => "Scala/node11.sc"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Scala", "SuperCollider"]
|
||||
def test_sc_scala_by_heuristics
|
||||
results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"))
|
||||
assert_equal Language["Scala"], results.first
|
||||
def test_fs_by_heuristics
|
||||
assert_heuristics({
|
||||
"F#" => all_fixtures("F#"),
|
||||
"Forth" => all_fixtures("Forth"),
|
||||
"GLSL" => all_fixtures("GLSL")
|
||||
})
|
||||
end
|
||||
|
||||
def assert_heuristics(hash)
|
||||
candidates = hash.keys.map { |l| Language[l] }
|
||||
|
||||
hash.each do |language, blobs|
|
||||
Array(blobs).each do |blob|
|
||||
result = Heuristics.call(file_blob(blob), candidates)
|
||||
assert_equal [Language[language]], result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/language'
|
||||
require 'test/unit'
|
||||
require 'yaml'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestLanguage < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/md5'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestMD5 < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
require 'test/unit'
|
||||
require 'yaml'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestPedantic < Test::Unit::TestCase
|
||||
filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/repository'
|
||||
require 'linguist/lazy_blob'
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestRepository < Test::Unit::TestCase
|
||||
def rugged_repository
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
require 'linguist/samples'
|
||||
require 'linguist/language'
|
||||
require 'tempfile'
|
||||
require 'yajl'
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
require "tempfile"
|
||||
|
||||
class TestSamples < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
|
||||
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
|
||||
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
|
||||
assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
|
||||
assert !data["interpreters"].empty?
|
||||
end
|
||||
|
||||
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
|
||||
def test_parity
|
||||
extensions = Samples.cache['extnames']
|
||||
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
languages = YAML.load_file(languages_yml)
|
||||
|
||||
languages.each do |name, options|
|
||||
# Check that there aren't samples with extensions or interpreters that
|
||||
# aren't explicitly defined in languages.yml
|
||||
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
YAML.load_file(languages_yml).each do |name, options|
|
||||
define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
|
||||
options['extensions'] ||= []
|
||||
|
||||
if extnames = extensions[name]
|
||||
if extnames = Samples.cache['extnames'][name]
|
||||
extnames.each do |extname|
|
||||
next if extname == '.script!'
|
||||
assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
|
||||
end
|
||||
end
|
||||
|
||||
options['interpreters'] ||= []
|
||||
if interpreters = Samples.cache['interpreters'][name]
|
||||
interpreters.each do |interpreter|
|
||||
# next if extname == '.script!'
|
||||
assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_shebang
|
||||
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
|
||||
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/tokenizer'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestTokenizer < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
BIN
vendor/cache/byebug-3.5.1.gem
vendored
Normal file
BIN
vendor/cache/byebug-3.5.1.gem
vendored
Normal file
Binary file not shown.
BIN
vendor/cache/columnize-0.8.9.gem
vendored
Normal file
BIN
vendor/cache/columnize-0.8.9.gem
vendored
Normal file
Binary file not shown.
BIN
vendor/cache/debugger-linecache-1.2.0.gem
vendored
Normal file
BIN
vendor/cache/debugger-linecache-1.2.0.gem
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user