diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ee13ae71..d0884dab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,7 +12,7 @@ This can usually be solved either by adding a new filename or file name extensio Assuming your code is being detected as the right language (see above), in most cases this is due to a bug in the language grammar rather than a bug in Linguist. [`grammars.yml`][grammars] lists all the grammars we use for syntax highlighting on github.com. Find the one corresponding to your code's programming language and submit a bug report upstream. -You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. +You can also try to fix the bug yourself and submit a Pull Request. [This piece from TextMate's documentation](http://manual.macromates.com/en/language_grammars) offers a good introduction on how to work with TextMate-compatible grammars. You can test grammars using [Lightshow](https://lightshow.githubapp.com). Once the bug has been fixed upstream, please let us know and we'll pick it up for GitHub. diff --git a/Gemfile b/Gemfile index 481a4c6e..95769569 100644 --- a/Gemfile +++ b/Gemfile @@ -2,3 +2,4 @@ source 'https://rubygems.org' gemspec :name => "github-linguist" gemspec :name => "github-linguist-grammars" gem 'test-unit', require: false if RUBY_VERSION >= '2.2' +gem 'byebug' if RUBY_VERSION >= '2.0' diff --git a/grammars.yml b/grammars.yml index fbf087d3..10361eef 100644 --- a/grammars.yml +++ b/grammars.yml @@ -5,8 +5,6 @@ http://svn.textmate.org/trunk/Review/Bundles/BlitzMax.tmbundle: - source.blitzmax http://svn.textmate.org/trunk/Review/Bundles/Cython.tmbundle: - source.cython -http://svn.textmate.org/trunk/Review/Bundles/F%20Sharp.tmbundle: -- source.fsharp http://svn.textmate.org/trunk/Review/Bundles/Forth.tmbundle: - source.forth http://svn.textmate.org/trunk/Review/Bundles/Parrot.tmbundle: @@ -135,6 +133,8 @@ https://github.com/euler0/sublime-glsl/raw/master/GLSL.tmLanguage: - source.glsl https://github.com/fancy-lang/fancy-tmbundle: - source.fancy +https://github.com/fsharp/fsharpbinding: +- source.fsharp https://github.com/gingerbeardman/monkey.tmbundle: - source.monkey https://github.com/guillermooo/dart-sublime-bundle/raw/master/Dart.tmLanguage: diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 5370bdd8..89a0df2f 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -3,6 +3,25 @@ require 'linguist/tokenizer' module Linguist # Language bayesian classifier. class Classifier + # Public: Use the classifier to detect language of the blob. + # + # blob - An object that quacks like a blob. + # possible_languages - Array of Language objects + # + # Examples + # + # Classifier.call(FileBlob.new("path/to/file"), [ + # Language["Ruby"], Language["Python"] + # ]) + # + # Returns an Array of Language objects, most probable first. + def self.call(blob, possible_languages) + language_names = possible_languages.map(&:name) + classify(Samples.cache, blob.data, language_names).map do |name, _| + Language[name] # Return the actual Language objects + end + end + # Public: Train classifier that data is a certain language. # # db - Hash classifier database object diff --git a/lib/linguist/file_blob.rb b/lib/linguist/file_blob.rb index bc475023..04441935 100644 --- a/lib/linguist/file_blob.rb +++ b/lib/linguist/file_blob.rb @@ -57,14 +57,20 @@ module Linguist # # Returns a String. def extension - # File.extname returns nil if the filename is an extension. - extension = File.extname(name) - basename = File.basename(name) - # Checks if the filename is an extension. - if extension.empty? && basename[0] == "." - basename - else - extension + extensions.last || "" + end + + # Public: Return an array of the file extensions + # + # >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions + # => [".html.erb", ".erb"] + # + # Returns an Array + def extensions + basename, *segments = File.basename(name).split(".") + + segments.map.with_index do |segment, index| + "." + segments[index..-1].join(".") end end end diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index f9f31a69..8fa52c57 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -1,162 +1,143 @@ module Linguist # A collection of simple heuristics that can be used to better analyze languages. class Heuristics - ACTIVE = true - - # Public: Given an array of String language names, - # apply heuristics against the given data and return an array - # of matching languages, or nil. + # Public: Use heuristics to detect language of the blob. # - # data - Array of tokens or String data to analyze. - # languages - Array of language name Strings to restrict to. + # blob - An object that quacks like a blob. + # possible_languages - Array of Language objects # - # Returns an array of Languages or [] - def self.find_by_heuristics(data, languages) - if active? - result = [] + # Examples + # + # Heuristics.call(FileBlob.new("path/to/file"), [ + # Language["Ruby"], Language["Python"] + # ]) + # + # Returns an Array of languages, or empty if none matched or were inconclusive. + def self.call(blob, languages) + data = blob.data - if languages.all? { |l| ["Objective-C", "C++", "C"].include?(l) } - result = disambiguate_c(data) - end - if languages.all? { |l| ["Perl", "Prolog"].include?(l) } - result = disambiguate_pl(data) - end - if languages.all? { |l| ["ECL", "Prolog"].include?(l) } - result = disambiguate_ecl(data) - end - if languages.all? { |l| ["IDL", "Prolog"].include?(l) } - result = disambiguate_pro(data) - end - if languages.all? { |l| ["Common Lisp", "OpenCL"].include?(l) } - result = disambiguate_cl(data) - end - if languages.all? { |l| ["Hack", "PHP"].include?(l) } - result = disambiguate_hack(data) - end - if languages.all? { |l| ["Scala", "SuperCollider"].include?(l) } - result = disambiguate_sc(data) - end - if languages.all? { |l| ["AsciiDoc", "AGS Script"].include?(l) } - result = disambiguate_asc(data) - end - if languages.all? { |l| ["FORTRAN", "Forth"].include?(l) } - result = disambiguate_f(data) - end - return result + @heuristics.each do |heuristic| + return Array(heuristic.call(data)) if heuristic.matches?(languages) end + + [] # No heuristics matched end - # .h extensions are ambiguous between C, C++, and Objective-C. - # We want to shortcut look for Objective-C _and_ now C++ too! + # Internal: Define a new heuristic. # - # Returns an array of Languages or [] - def self.disambiguate_c(data) - matches = [] + # languages - String names of languages to disambiguate. + # heuristic - Block which takes data as an argument and returns a Language or nil. + # + # Examples + # + # disambiguate "Perl", "Prolog" do |data| + # if data.include?("use strict") + # Language["Perl"] + # elsif data.include?(":-") + # Language["Prolog"] + # end + # end + # + def self.disambiguate(*languages, &heuristic) + @heuristics << new(languages, &heuristic) + end + + # Internal: Array of defined heuristics + @heuristics = [] + + # Internal + def initialize(languages, &heuristic) + @languages = languages + @heuristic = heuristic + end + + # Internal: Check if this heuristic matches the candidate languages. + def matches?(candidates) + candidates.all? { |l| @languages.include?(l.name) } + end + + # Internal: Perform the heuristic + def call(data) + @heuristic.call(data) + end + + disambiguate "Objective-C", "C++", "C" do |data| if (/@(interface|class|protocol|property|end|synchronised|selector|implementation)\b/.match(data)) - matches << Language["Objective-C"] + Language["Objective-C"] elsif (/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/.match(data) || - /^\s*template\s*")) - matches << Language["XML"] - else - matches << Language["TypeScript"] - end - matches - end - - def self.disambiguate_cl(data) - matches = [] + disambiguate "Common Lisp", "OpenCL" do |data| if data.include?("(defun ") - matches << Language["Common Lisp"] + Language["Common Lisp"] elsif /\/\* |\/\/ |^\}/.match(data) - matches << Language["OpenCL"] + Language["OpenCL"] end - matches end - def self.disambiguate_r(data) - matches = [] - matches << Language["Rebol"] if /\bRebol\b/i.match(data) - matches << Language["R"] if data.include?("<-") - matches - end - - def self.disambiguate_hack(data) - matches = [] + disambiguate "Hack", "PHP" do |data| if data.include?(" 1 - data = blob.data - possible_language_names = possible_languages.map(&:name) - heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names) - - if heuristic_languages.size > 1 - possible_language_names = heuristic_languages.map(&:name) + # Call each strategy until one candidate is returned + STRATEGIES.reduce([]) do |languages, strategy| + candidates = strategy.call(blob, languages) + if candidates.size == 1 + return candidates.first + elsif candidates.size > 1 + # More than one candidate was found, pass them to the next strategy + candidates + else + # Strategy couldn't find any candidates, so pass on the original list + languages end - - # Check if there's a shebang line and use that as authoritative - if (result = find_by_shebang(data)) && !result.empty? - result.first - # No shebang. Still more work to do. Try to find it with our heuristics. - elsif heuristic_languages.size == 1 - heuristic_languages.first - # Lastly, fall back to the probabilistic classifier. - elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first - # Return the actual Language object based of the string language name (i.e., first element of `#classify`) - Language[classified[0]] - end - else - # Simplest and most common case, we can just return the one match based on extension - possible_languages.first - end + end.first end # Public: Get all Languages @@ -190,8 +172,13 @@ module Linguist # Returns all matching Languages or [] if none were found. def self.find_by_filename(filename) basename = File.basename(filename) - extname = FileBlob.new(filename).extension - (@filename_index[basename] + find_by_extension(extname)).compact.uniq + + # find the first extension with language definitions + extname = FileBlob.new(filename).extensions.detect do |e| + !@extension_index[e].empty? + end + + (@filename_index[basename] + @extension_index[extname]).compact.uniq end # Public: Look up Languages by file extension. diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 71caf5ee..cc124e04 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -558,6 +558,8 @@ Crystal: - .cr ace_mode: ruby tm_scope: source.ruby + interpreters: + - crystal Cucumber: extensions: @@ -735,6 +737,8 @@ Erlang: - .es - .escript - .hrl + interpreters: + - escript F#: type: programming @@ -814,6 +818,7 @@ Forth: - .for - .forth - .frt + - .fs Frege: type: programming @@ -867,6 +872,7 @@ GLSL: - .fp - .frag - .frg + - .fs - .fshader - .geo - .geom @@ -930,6 +936,8 @@ Gnuplot: - .gnuplot - .plot - .plt + interpreters: + - gnuplot Go: type: programming @@ -1195,6 +1203,8 @@ Ioke: color: "#078193" extensions: - .ik + interpreters: + - ioke Isabelle: type: programming @@ -1702,6 +1712,8 @@ Nu: filenames: - Nukefile tm_scope: source.scheme + interpreters: + - nush NumPy: group: Python @@ -1888,6 +1900,8 @@ Parrot Assembly: - pasm extensions: - .pasm + interpreters: + - parrot tm_scope: none Parrot Internal Representation: @@ -1898,6 +1912,8 @@ Parrot Internal Representation: - pir extensions: - .pir + interpreters: + - parrot Pascal: type: programming @@ -1940,6 +1956,8 @@ Perl6: - .p6m - .pl6 - .pm6 + interpreters: + - perl6 tm_scope: none PigLatin: @@ -2004,6 +2022,8 @@ Prolog: - .ecl - .pro - .prolog + interpreters: + - swipl Propeller Spin: type: programming @@ -2067,6 +2087,8 @@ Python: - wscript interpreters: - python + - python2 + - python3 Python traceback: type: data @@ -2087,6 +2109,8 @@ QMake: extensions: - .pro - .pri + interpreters: + - qmake R: type: programming @@ -2241,6 +2265,8 @@ Ruby: - .watchr interpreters: - ruby + - macruby + - rake filenames: - .pryrc - Appraisals @@ -2327,6 +2353,8 @@ Scala: - .scala - .sbt - .sc + interpreters: + - scala Scaml: group: HTML diff --git a/lib/linguist/samples.rb b/lib/linguist/samples.rb index 82c011b1..001204b5 100644 --- a/lib/linguist/samples.rb +++ b/lib/linguist/samples.rb @@ -52,14 +52,16 @@ module Linguist }) end else + path = File.join(dirname, filename) + if File.extname(filename) == "" - raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir" + raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir" end yield({ - :path => File.join(dirname, filename), + :path => path, :language => category, - :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil, + :interpreter => Linguist.interpreter_from_shebang(File.read(path)), :extname => File.extname(filename) }) end @@ -131,18 +133,19 @@ module Linguist script = script == 'env' ? tokens[1] : script - # "python2.6" -> "python" - if script =~ /((?:\d+\.?)+)/ - script.sub! $1, '' - end + # If script has an invalid shebang, we might get here + return unless script + + # "python2.6" -> "python2" + script.sub! $1, '' if script =~ /(\.\d+)$/ # Check for multiline shebang hacks that call `exec` if script == 'sh' && lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } script = $1 end - - script + + File.basename(script) else nil end diff --git a/lib/linguist/strategy/filename.rb b/lib/linguist/strategy/filename.rb new file mode 100644 index 00000000..e682863b --- /dev/null +++ b/lib/linguist/strategy/filename.rb @@ -0,0 +1,20 @@ +module Linguist + module Strategy + # Detects language based on filename and/or extension + class Filename + def self.call(blob, _) + name = blob.name.to_s + + # A bit of an elegant hack. If the file is executable but extensionless, + # append a "magic" extension so it can be classified with other + # languages that have shebang scripts. + extensions = FileBlob.new(name).extensions + if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05 + name += ".script!" + end + + Language.find_by_filename(name) + end + end + end +end diff --git a/lib/linguist/strategy/shebang.rb b/lib/linguist/strategy/shebang.rb new file mode 100644 index 00000000..dd5bc38b --- /dev/null +++ b/lib/linguist/strategy/shebang.rb @@ -0,0 +1,10 @@ +module Linguist + module Strategy + # Check if there's a shebang line and use that as authoritative + class Shebang + def self.call(blob, _) + Language.find_by_shebang(blob.data) + end + end + end +end diff --git a/samples/F#/sample.fs b/samples/F#/sample.fs new file mode 100644 index 00000000..2b690f10 --- /dev/null +++ b/samples/F#/sample.fs @@ -0,0 +1,15 @@ +module Sample + +open System + +type Foo = + { + Bar : string + } + +type Baz = interface end + +let Sample1(xs : int list) : string = + xs + |> List.map (fun x -> string x) + |> String.concat "," diff --git a/samples/Forth/core.fs b/samples/Forth/core.fs new file mode 100644 index 00000000..4a13e217 --- /dev/null +++ b/samples/Forth/core.fs @@ -0,0 +1,252 @@ +: immediate lastxt @ dup c@ negate swap c! ; + +: \ source nip >in ! ; immediate \ Copyright 2004, 2012 Lars Brinkhoff + +: char \ ( "word" -- char ) + bl-word here 1+ c@ ; + +: ahead here 0 , ; + +: resolve here swap ! ; + +: ' bl-word here find 0branch [ ahead ] exit [ resolve ] 0 ; + +: postpone-nonimmediate [ ' literal , ' compile, ] literal , ; + +: create dovariable_code header, reveal ; + +create postponers + ' postpone-nonimmediate , + ' abort , + ' , , + +: word \ ( char "string" -- caddr ) + drop bl-word here ; + +: postpone \ ( C: "word" -- ) + bl word find 1+ cells postponers + @ execute ; immediate + +: unresolved \ ( C: "word" -- orig ) + postpone postpone postpone ahead ; immediate + +: chars \ ( n1 -- n2 ) + ; + +: else \ ( -- ) ( C: orig1 -- orig2 ) + unresolved branch swap resolve ; immediate + +: if \ ( flag -- ) ( C: -- orig ) + unresolved 0branch ; immediate + +: then \ ( -- ) ( C: orig -- ) + resolve ; immediate + +: [char] \ ( "word" -- ) + char postpone literal ; immediate + +: (does>) lastxt @ dodoes_code over >code ! r> swap >does ! ; + +: does> postpone (does>) ; immediate + +: begin \ ( -- ) ( C: -- dest ) + here ; immediate + +: while \ ( x -- ) ( C: dest -- orig dest ) + unresolved 0branch swap ; immediate + +: repeat \ ( -- ) ( C: orig dest -- ) + postpone branch , resolve ; immediate + +: until \ ( x -- ) ( C: dest -- ) + postpone 0branch , ; immediate + +: recurse lastxt @ compile, ; immediate + +: pad \ ( -- addr ) + here 1024 + ; + +: parse \ ( char "string" -- addr n ) + pad >r begin + source? if else 0 0 then + while + r@ c! r> 1+ >r + repeat 2drop pad r> over - ; + +: ( \ ( "string" -- ) + [ char ) ] literal parse 2drop ; immediate + \ TODO: If necessary, refill and keep parsing. + +: string, ( addr n -- ) + here over allot align swap cmove ; + +: (s") ( -- addr n ) ( R: ret1 -- ret2 ) + r> dup @ swap cell+ 2dup + aligned >r swap ; + +create squote 128 allot + +: s" ( "string" -- addr n ) + state @ if + postpone (s") [char] " parse dup , string, + else + [char] " parse >r squote r@ cmove squote r> + then ; immediate + +: (abort") ( ... addr n -- ) ( R: ... -- ) + cr type cr abort ; + +: abort" ( ... x "string" -- ) ( R: ... -- ) + postpone if postpone s" postpone (abort") postpone then ; immediate + +\ ---------------------------------------------------------------------- + +( Core words. ) + +\ TODO: # +\ TODO: #> +\ TODO: #s + +: and ( x y -- x&y ) nand invert ; + +: * 1 2>r 0 swap begin r@ while + r> r> swap 2dup dup + 2>r and if swap over + swap then dup + + repeat r> r> 2drop drop ; + +\ TODO: */mod + +: +loop ( -- ) ( C: nest-sys -- ) + postpone (+loop) postpone 0branch , postpone unloop ; immediate + +: space bl emit ; + +: ?.- dup 0 < if [char] - emit negate then ; + +: digit [char] 0 + emit ; + +: (.) base @ /mod ?dup if recurse then digit ; + +: ." ( "string" -- ) postpone s" postpone type ; immediate + +: . ( x -- ) ?.- (.) space ; + +: postpone-number ( caddr -- ) + 0 0 rot count >number dup 0= if + 2drop nip + postpone (literal) postpone (literal) postpone , + postpone literal postpone , + else + ." Undefined: " type cr abort + then ; + +' postpone-number postponers cell+ ! + +: / ( x y -- x/y ) /mod nip ; + +: 0< ( n -- flag ) 0 < ; + +: 1- ( n -- n-1 ) -1 + ; + +: 2! ( x1 x2 addr -- ) swap over ! cell+ ! ; + +: 2* ( n -- 2n ) dup + ; + +\ Kernel: 2/ + +: 2@ ( addr -- x1 x2 ) dup cell+ @ swap @ ; + +\ Kernel: 2drop +\ Kernel: 2dup + +\ TODO: 2over ( x1 x2 x3 x4 -- x1 x2 x3 x4 x1 x2 ) +\ 3 pick 3 pick ; + +\ TODO: 2swap + +\ TODO: <# + +: abs ( n -- |n| ) + dup 0< if negate then ; + +\ TODO: accept + +: c, ( n -- ) + here c! 1 chars allot ; + +: char+ ( n1 -- n2 ) + 1+ ; + +: constant create , does> @ ; + +: decimal ( -- ) + 10 base ! ; + +: depth ( -- n ) + data_stack 100 cells + 'SP @ - /cell / 2 - ; + +: do ( n1 n2 -- ) ( R: -- loop-sys ) ( C: -- do-sys ) + postpone 2>r here ; immediate + +\ TODO: environment? +\ TODO: evaluate +\ TODO: fill +\ TODO: fm/mod ) +\ TODO: hold + +: j ( -- x1 ) ( R: x1 x2 x3 -- x1 x2 x3 ) + 'RP @ 3 cells + @ ; + +\ TODO: leave + +: loop ( -- ) ( C: nest-sys -- ) + postpone 1 postpone (+loop) + postpone 0branch , + postpone unloop ; immediate + +: lshift begin ?dup while 1- swap dup + swap repeat ; + +: rshift 1 begin over while dup + swap 1- swap repeat nip + 2>r 0 1 begin r@ while + r> r> 2dup swap dup + 2>r and if swap over + swap then dup + + repeat r> r> 2drop drop ; + +: max ( x y -- max[x,y] ) + 2dup > if drop else nip then ; + +\ Kernel: min +\ TODO: mod +\ TODO: move + +: (quit) ( R: ... -- ) + return_stack 100 cells + 'RP ! + 0 'source-id ! tib ''source ! #tib ''#source ! + postpone [ + begin + refill + while + interpret state @ 0= if ." ok" cr then + repeat + bye ; + +' (quit) ' quit >body cell+ ! + +\ TODO: s>d +\ TODO: sign +\ TODO: sm/rem + +: spaces ( n -- ) + 0 do space loop ; + +\ TODO: u. + +: signbit ( -- n ) -1 1 rshift invert ; + +: xor ( x y -- x^y ) 2dup nand >r r@ nand swap r> nand nand ; + +: u< ( x y -- flag ) signbit xor swap signbit xor > ; + +\ TODO: um/mod + +: variable ( "word" -- ) + create /cell allot ; + +: ['] \ ( C: "word" -- ) + ' postpone literal ; immediate diff --git a/samples/GLSL/recurse1.fs b/samples/GLSL/recurse1.fs new file mode 100644 index 00000000..66b4c3fe --- /dev/null +++ b/samples/GLSL/recurse1.fs @@ -0,0 +1,48 @@ +#version 330 core + +// cross-unit recursion + +void main() {} + +// two-level recursion + +float cbar(int); + +void cfoo(float) +{ + cbar(2); +} + +// four-level, out of order + +void CB(); +void CD(); +void CA() { CB(); } +void CC() { CD(); } + +// high degree + +void CBT(); +void CDT(); +void CAT() { CBT(); CBT(); CBT(); } +void CCT() { CDT(); CDT(); CBT(); } + +// not recursive + +void norA() {} +void norB() { norA(); } +void norC() { norA(); } +void norD() { norA(); } +void norE() { norB(); } +void norF() { norB(); } +void norG() { norE(); } +void norH() { norE(); } +void norI() { norE(); } + +// not recursive, but with a call leading into a cycle if ignoring direction + +void norcA() { } +void norcB() { norcA(); } +void norcC() { norcB(); } +void norcD() { norcC(); norcB(); } // head of cycle +void norcE() { norcD(); } // lead into cycle diff --git a/samples/PHP/drupal.module b/samples/PHP/drupal.script! similarity index 100% rename from samples/PHP/drupal.module rename to samples/PHP/drupal.script! diff --git a/samples/Ruby/wrong_shebang.rb b/samples/Ruby/wrong_shebang.rb deleted file mode 100644 index 22b4804a..00000000 --- a/samples/Ruby/wrong_shebang.rb +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python -puts "Not Python" diff --git a/test/fixtures/Python/run_tests.module b/test/fixtures/Python/run_tests.module new file mode 100644 index 00000000..b3d004e6 --- /dev/null +++ b/test/fixtures/Python/run_tests.module @@ -0,0 +1,22 @@ +#!/usr/bin/env python +import sys, os + +# Set the current working directory to the directory where this script is located +os.chdir(os.path.abspath(os.path.dirname(sys.argv[0]))) + +#### Set the name of the application here and moose directory relative to the application +app_name = 'stork' + +MODULE_DIR = os.path.abspath('..') +MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..')) +#### See if MOOSE_DIR is already in the environment instead +if os.environ.has_key("MOOSE_DIR"): + MOOSE_DIR = os.environ['MOOSE_DIR'] + +sys.path.append(os.path.join(MOOSE_DIR, 'python')) +import path_tool +path_tool.activate_module('TestHarness') + +from TestHarness import TestHarness +# Run the tests! +TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR) diff --git a/test/fixtures/Shell/mintleaf.module b/test/fixtures/Shell/mintleaf.module new file mode 100644 index 00000000..2f80f259 --- /dev/null +++ b/test/fixtures/Shell/mintleaf.module @@ -0,0 +1,1505 @@ +#!/bin/bash + +################################################################################ +## base routines +## + +function list_modules() { + + # define help + local help=$(cat < /dev/null + printf "\n" + fi + else + echo "Module does not exist" + fi +} + +function list_functions() { + + # define help + local help=$(cat <> $MINTLEAF_HOME/modules/$module/$module.md +$module +======= + +TODO +EOF + [ ! -f $MINTLEAF_HOME/modules/$module/$module.config ] && cat << EOF >> $MINTLEAF_HOME/modules/$module/$module.config +EOF + [ ! -f $MINTLEAF_HOME/modules/$module/$module.install ] && cat << EOF >> $MINTLEAF_HOME/modules/$module/$module.install +#!/bin/bash + +function install_module() { + + echo "TODO" +} +EOF + [ ! -f $MINTLEAF_HOME/modules/$module/$module.module ] && cat << EOF >> $MINTLEAF_HOME/modules/$module/$module.module +#!/bin/bash +EOF + [ ! -f $MINTLEAF_HOME/modules/$module/$module.test ] && cat << EOF >> $MINTLEAF_HOME/modules/$module/$module.test +#!/bin/bash + +function test_prerequisites() { + + echo "TODO" +} + +function test_module() { + + assert_prerequisites + + echo "TODO" +} +EOF + [ ! -f $MINTLEAF_HOME/modules/$module/$module.groovy ] && cat << EOF >> $MINTLEAF_HOME/modules/$module/$module.groovy +EOF +} + +################################################################################ +## general routines +## + +function func_exists() { + + # define help + local help=$(cat < /dev/null + if [ "$?" == "0" ]; then + echo $result_pos + else + echo $result_neg + fi +} + +function usleep() { + + # define help + local help=$(cat < /dev/null 2>&1 || head -c $len) + echo $str +} + +function trim() { + + # define help + local help=$(cat < + --max-length +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 1 ]; then + echo -e "${help}\n" + return + fi + + # get parameters + local str=$1 + local char=$2 + + # get optional parameters + local allowed_characters= + local max_length=255 + if [ "$3" != "" ] && [ "$3" != "--allowed-characters" ]; then + len=$3 + fi + while [ "$1" != "" ]; do + case $1 in + --allowed-characters) shift; allowed_characters=$1 + ;; + --max-length) shift; max_length=$1 + ;; + esac + shift + done + + # remove unwanted characters + local sanitised=$(echo $str | sed "s/[^A-Za-z0-9$allowed_characters]/$char/g") + # remove multiple instances of the replacement character + sanitised=$(echo $sanitised | sed -r "s/($char)+/$char/g") + # limit the length + sanitised=$(echo $sanitised | cut -c1-${max_length}) + # make it lower case + echo $sanitised | tr '[:upper:]' '[:lower:]' +} + +function str_substring() { + + # define help + local help=$(cat < 10#${ver2[i]})); then + echo 1 + return + fi + if ((10#${ver1[i]} < 10#${ver2[i]})); then + echo -1 + return + fi + done + + # test the 2nd part + if [ "$ver1b" \< "$ver2b" ]; then + echo -1 + return + elif [ "$ver1b" \> "$ver2b" ]; then + echo 1 + return + fi + + echo 0 +} + +################################################################################ +## file routines +## + +function file_escape_name() { + + # define help + local help=$(cat < $tmp_file + else + sed "s/$str1/$str2/g" $file > $tmp_file + fi + mv $tmp_file $file +} + +function file_remove_str() { + + # define help + local help=$(cat < $tmp_file + else + str='1h;1!H;${;g;s/' + sed -n "$str$1//g;p;}" $file > $tmp_file + fi + mv $tmp_file $file +} + +function file_download() { + + + # define help + local help=$(cat < URL address of file to download (required). + --file Name of output file. + --cache-dir Cache directory; file name in that directory + must match the file name given as the parameter. + --donwload-directory Destination directory where file + should be placed after download. + --size Check file size after download. + --hash Check file hash after download. + --hash-algorithm Hash algorithm used to check file. + --do-not-cache Do not cache file locally. + --force Force to download from given URL address not + using cached file or an alternative location. +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 4 ]; then + echo -e "${help}\n" + return + fi + + # variables + local url= + local file= + local cache_dir=$mintleaf_tmp_dir + local download_dir=./ + local expected_size=0 + local expected_hash= + local hash_algorithm="md5" + local do_not_cache=$result_neg + local force=$result_neg + local current_dir=$(pwd) + + # get parameters + while [ "$1" != "" ]; do + case $1 in + --url) shift; url=$1 + ;; + --file) shift; file=$1 + ;; + --cache-dir) shift; cache_dir=$1 + ;; + --download-directory) shift; download_dir=$1 + ;; + --size) shift; expected_size=$1 + ;; + --hash) shift; expected_hash=$1 + ;; + --hash-algorithm) shift; hash_algorithm=$1 + ;; + --do-not-cache) do_not_cache=$result_pos + ;; + --force) force=$result_pos + ;; + esac + shift + done + + # file may have already been downloaded + if [ $force == $result_neg ] && [ -s $cache_dir/$file ] && [ ! -s $download_dir/$file ]; then + + cp -f $cache_dir/$file $download_dir + + else + + # download from local network + # TODO + + # download from custom location + # TODO + + # download from given url address + if ([ -n $url ] && ([ ! -s $cache_dir/$file ] || [ $force == $result_pos ])); then + # try to download + wget --tries=1 --connect-timeout=10 $url -O $file + # cache file + if [ -s $file ]; then + mv -f $file $cache_dir + fi + fi + + # copy file to the download directory + if [ -s $cache_dir/$file ] && [ $cache_dir != $download_dir ]; then + cp -f $cache_dir/$file $download_dir + fi + + # do not cache + if [ $do_not_cache == $result_pos ]; then + rm -f $cache_dir/$file + fi + + fi + + # check file size + if [ $expected_size -ne 0 ] && [ -s $download_dir/$file ]; then + local size=$(ls -l $download_dir/$file | awk '{ print $5 }') + if [ $expected_size -gt $size ]; then + rm -f $download_dir/$file + fi + fi + + # return value + if [ -s $download_dir/$file ]; then + # check file hash + if [ -n "$expected_hash" ]; then + file_valid_hash $download_dir/$file $expected_hash $hash_algorithm + else + echo $result_pos + fi + else + echo $result_neg + fi + + cd $current_dir +} + +function file_valid_hash() { + + # define help + local help=$(cat < /dev/null | grep "^/" | sort | uniq +} + +function chroot_dependency_list_all() { + + # define help + local help=$(cat < current depth of recursion (1 by default) + --max-depth maximum depth of recursion (3 by default) +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 1 ]; then + echo -e "${help}\n" + return + fi + + # get parameters + local bin=$1 + + # get optional parameters + local cur_depth=1 + local max_depth=3 + while [ "$1" != "" ]; do + case $1 in + --cur-depth) shift; cur_depth=$1 + ;; + --max-depth) shift; max_depth=$1 + ;; + esac + shift + done + + ( + local output=$(chroot_dependency_list $bin) + for file in $output; do + echo "$file" + if [ $cur_depth -lt $max_depth ]; then + chroot_dependency_list_all $file --cur-depth $(expr $cur_depth + 1) --max-depth $max_depth + fi + done + ) 2> /dev/null | grep "^/" | sort | uniq +} + +function chroot_dependency_copy() { + + # define help + local help=$(cat < /dev/null + fi + fi +} + +function chroot_create_env() { + + # define help + local help=$(cat < user name to mount their home directory + --home-read-only whether user home directory should be mounted as read-only +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 1 ]; then + echo -e "${help}\n" + return + fi + + # get parameters + local dir=$1 + + # get additional parameters + local user= + local home_read_only= + while [ "$1" != "" ]; do + case $1 in + --user) shift; user=$1 + ;; + --home-read-only) home_read_only="--read-only" + ;; + esac + shift + done + + [ ! -d $dir ] && mkdir $dir + mkdir -p $dir/{bin,dev/pts,etc,home,lib,lib64,proc,root,sbin,tmp,usr/bin,usr/include,usr/lib,usr/lib64,usr/sbin} + + chmod 1777 $dir/tmp + + # /bin + chroot_mount_dir /bin $dir/bin --read-only + # /dev/pts + chroot_mount_dir /dev/pts $dir/dev/pts + # /etc + chroot_mount_dir /etc $dir/etc --read-only + # /lib + chroot_mount_dir /lib $dir/lib --read-only + # /lib64 + chroot_mount_dir /lib64 $dir/lib64 --read-only + # /proc + chroot_mount_dir /proc $dir/proc + # /sbin + chroot_mount_dir /sbin $dir/sbin --read-only + # /usr/bin + chroot_mount_dir /usr/bin $dir/usr/bin --read-only + # /usr/include + chroot_mount_dir /usr/include $dir/usr/include --read-only + # /usr/lib + chroot_mount_dir /usr/lib $dir/usr/lib --read-only + # /usr/lib64 + chroot_mount_dir /usr/lib64 $dir/usr/lib64 --read-only + # /usr/sbin + chroot_mount_dir /usr/sbin $dir/usr/sbin --read-only + + rm -f $dir/dev/null + mknod -m 666 $dir/dev/null c 1 3 + rm -f $dir/dev/zero + mknod -m 666 $dir/dev/zero c 1 5 + rm -f $dir/dev/random + mknod -m 444 $dir/dev/random c 1 8 + rm -f $dir/dev/urandom + mknod -m 444 $dir/dev/urandom c 1 9 + + # user home directory + if [ -n "$user" ]; then + local home_dir=/home/$user + if [ "$user" == "root" ]; then + home_dir=/root + fi + if [ -d $home_dir ]; then + chroot_mount_dir $home_dir ${dir}${home_dir} $home_read_only + fi + fi +} + +function chroot_remove_env() { + + # define help + local help=$(cat < + --gid + --groups + --home + --shell +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 2 ]; then + echo -e "${help}\n" + return + fi + + # get parameters + local user=$1 + local group=$2 + + # get optional parameters + local uid="-K UID_MIN=${uid_min} -K UID_MAX=${uid_max}" + local gid="-K GID_MIN=${gid_min} -K GID_MAX=${gid_max}" + local groups= + local home="-d /dev/null" + local shell="-s /usr/sbin/nologin" + while [ "$1" != "" ]; do + case $1 in + --uid) shift; uid="-u ${1}" + ;; + --gid) shift; gid="-g ${1}" + ;; + --groups) shift; groups="-G ${1}" + ;; + --home) shift; home="-d ${1}" + ;; + --shell) shift; shell="-s ${1}" + ;; + esac + shift + done + + groupadd $group $gid + useradd $user $uid -g $group $groups $home $shell +} + +function user_delete() { + + # define help + local help=$(cat < /dev/null 2>&1 +} + +################################################################################ +## security routines +## + +function security_gen_cert() { + + # define help + local help=$(cat < size of certificate (default is 2048) + --days for how many days certificate remains valid (default is 3650) + --dir output dirctory +HEREDOC +) + + # check parameters + if [ "$1" == "--help" ] || [ $# -lt 1 ]; then + echo -e "${help}\n" + return + fi + + # get parameters + local name=$1 + local size=2048 + local days=3650 + local dir=. + while [ "$1" != "" ]; do + case $1 in + --size) shift; size=$1 + ;; + --days) shift; days=$1 + ;; + --dir) shift; dir=$1 + ;; + esac + shift + done + + $cmd_openssl req \ + -new -x509 -nodes -sha1 -newkey rsa:$size -days $days -subj "/O=unknown/OU=unknown/CN=$name" \ + -keyout $dir/$name.key \ + -out $dir/$name.crt + cat $dir/$name.crt $dir/$name.key > $dir/$name.pem + chmod 400 $dir/$name.{crt,key,pem} +} diff --git a/test/helper.rb b/test/helper.rb new file mode 100644 index 00000000..780819dc --- /dev/null +++ b/test/helper.rb @@ -0,0 +1,4 @@ +require "bundler/setup" +require "test/unit" +require "mocha/setup" +require "linguist" diff --git a/test/test_blob.rb b/test/test_blob.rb index eb758534..4fe6b152 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -1,9 +1,4 @@ -require 'linguist/file_blob' -require 'linguist/samples' - -require 'test/unit' -require 'mocha/setup' -require 'mime/types' +require_relative "./helper" class TestBlob < Test::Unit::TestCase include Linguist @@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase assert blob.language, "No language for #{sample[:path]}" assert_equal sample[:language], blob.language.name, blob.name end + + # Test language detection for files which shouldn't be used as samples + root = File.expand_path('../fixtures', __FILE__) + Dir.entries(root).each do |language| + next unless File.file?(language) + + # Each directory contains test files of a language + dirname = File.join(root, language) + Dir.entries(dirname).each do |filename| + next unless File.file?(filename) + + # By default blob search the file in the samples; + # thus, we need to give it the absolute path + filepath = File.join(dirname, filename) + blob = blob(filepath) + assert blob.language, "No language for #{filepath}" + assert_equal language, blob.language.name, blob.name + end + end end def test_minified_files_not_safe_to_highlight diff --git a/test/test_classifier.rb b/test/test_classifier.rb index 87c6feb2..8e1d8355 100644 --- a/test/test_classifier.rb +++ b/test/test_classifier.rb @@ -1,9 +1,4 @@ -require 'linguist/classifier' -require 'linguist/language' -require 'linguist/samples' -require 'linguist/tokenizer' - -require 'test/unit' +require_relative "./helper" class TestClassifier < Test::Unit::TestCase include Linguist diff --git a/test/test_file_blob.rb b/test/test_file_blob.rb new file mode 100644 index 00000000..9371dce7 --- /dev/null +++ b/test/test_file_blob.rb @@ -0,0 +1,10 @@ +require 'linguist/file_blob' +require 'test/unit' + +class TestFileBlob < Test::Unit::TestCase + def test_extensions + assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions + assert_equal [".xml"], Linguist::FileBlob.new("build.xml").extensions + assert_equal [".html.erb", ".erb"], Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions + end +end diff --git a/test/test_heuristics.rb b/test/test_heuristics.rb index 27e2a39d..0883d7b7 100644 --- a/test/test_heuristics.rb +++ b/test/test_heuristics.rb @@ -1,9 +1,4 @@ -require 'linguist/heuristics' -require 'linguist/language' -require 'linguist/samples' -require 'linguist/file_blob' - -require 'test/unit' +require_relative "./helper" class TestHeuristcs < Test::Unit::TestCase include Linguist @@ -16,6 +11,11 @@ class TestHeuristcs < Test::Unit::TestCase File.read(File.join(samples_path, name)) end + def file_blob(name) + path = File.exist?(name) ? name : File.join(samples_path, name) + FileBlob.new(path) + end + def all_fixtures(language_name, file="*") Dir.glob("#{samples_path}/#{language_name}/#{file}") end @@ -23,24 +23,17 @@ class TestHeuristcs < Test::Unit::TestCase # Candidate languages = ["C++", "Objective-C"] def test_obj_c_by_heuristics # Only calling out '.h' filenames as these are the ones causing issues - all_fixtures("Objective-C", "*.h").each do |fixture| - results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}")) - assert_equal Language["Objective-C"], results.first, "Failed for #{File.basename(fixture)}" - end - end - - # Candidate languages = ["C++", "Objective-C"] - def test_cpp_by_heuristics - results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp")) - assert_equal Language["C++"], results.first - results = Heuristics.disambiguate_c(fixture("C++/ThreadedQueue.h")) - assert_equal Language["C++"], results.first + assert_heuristics({ + "Objective-C" => all_fixtures("Objective-C", "*.h"), + "C++" => ["C++/render_adapter.cpp", "C++/ThreadedQueue.h"], + "C" => nil + }) end def test_c_by_heuristics - languages = ["C++", "Objective-C", "C"] - results = Heuristics.disambiguate_c(fixture("C/ArrowLeft.h")) - assert_equal nil, results.first + languages = [Language["C++"], Language["Objective-C"], Language["C"]] + results = Heuristics.call(file_blob("C/ArrowLeft.h"), languages) + assert_equal [], results end def test_detect_still_works_if_nothing_matches @@ -50,94 +43,89 @@ class TestHeuristcs < Test::Unit::TestCase end # Candidate languages = ["Perl", "Prolog"] - def test_pl_prolog_by_heuristics - results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl")) - assert_equal Language["Prolog"], results.first - end - - # Candidate languages = ["Perl", "Prolog"] - def test_pl_perl_by_heuristics - results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t")) - assert_equal Language["Perl"], results.first + def test_pl_prolog_perl_by_heuristics + assert_heuristics({ + "Prolog" => "Prolog/turing.pl", + "Perl" => "Perl/perl-test.t", + }) end # Candidate languages = ["ECL", "Prolog"] def test_ecl_prolog_by_heuristics - results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl")) - assert_equal Language["Prolog"], results.first + results = Heuristics.call(file_blob("Prolog/or-constraint.ecl"), [Language["ECL"], Language["Prolog"]]) + assert_equal [Language["Prolog"]], results end # Candidate languages = ["ECL", "Prolog"] - def test_ecl_ecl_by_heuristics - results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl")) - assert_equal Language["ECL"], results.first + def test_ecl_prolog_by_heuristics + assert_heuristics({ + "ECL" => "ECL/sample.ecl", + "Prolog" => "Prolog/or-constraint.ecl" + }) end # Candidate languages = ["IDL", "Prolog"] - def test_pro_prolog_by_heuristics - results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro")) - assert_equal Language["Prolog"], results.first - end - - # Candidate languages = ["IDL", "Prolog"] - def test_pro_idl_by_heuristics - results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro")) - assert_equal Language["IDL"], results.first + def test_pro_prolog_idl_by_heuristics + assert_heuristics({ + "Prolog" => "Prolog/logic-problem.pro", + "IDL" => "IDL/mg_acosh.pro" + }) end # Candidate languages = ["AGS Script", "AsciiDoc"] def test_asc_asciidoc_by_heuristics - results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc")) - assert_equal Language["AsciiDoc"], results.first - end - - # Candidate languages = ["TypeScript", "XML"] - def test_ts_typescript_by_heuristics - results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts")) - assert_equal Language["TypeScript"], results.first - end - - # Candidate languages = ["TypeScript", "XML"] - def test_ts_xml_by_heuristics - results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml")) - assert_equal Language["XML"], results.first + assert_heuristics({ + "AsciiDoc" => "AsciiDoc/list.asc", + "AGS Script" => nil + }) end def test_cl_by_heuristics - languages = ["Common Lisp", "OpenCL"] - languages.each do |language| - all_fixtures(language).each do |fixture| - results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}")) - assert_equal Language[language], results.first - end - end + assert_heuristics({ + "Common Lisp" => all_fixtures("Common Lisp"), + "OpenCL" => all_fixtures("OpenCL") + }) end def test_f_by_heuristics - languages = ["FORTRAN", "Forth"] - languages.each do |language| - all_fixtures(language).each do |fixture| - results = Heuristics.disambiguate_f(fixture("#{language}/#{File.basename(fixture)}")) - assert_equal Language[language], results.first - end - end + assert_heuristics({ + "FORTRAN" => all_fixtures("FORTRAN"), + "Forth" => all_fixtures("Forth") + }) end # Candidate languages = ["Hack", "PHP"] def test_hack_by_heuristics - results = Heuristics.disambiguate_hack(fixture("Hack/funs.php")) - assert_equal Language["Hack"], results.first + assert_heuristics({ + "Hack" => "Hack/funs.php", + "PHP" => "PHP/Model.php" + }) end # Candidate languages = ["Scala", "SuperCollider"] - def test_sc_supercollider_by_heuristics - results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc")) - assert_equal Language["SuperCollider"], results.first + def test_sc_supercollider_scala_by_heuristics + assert_heuristics({ + "SuperCollider" => "SuperCollider/WarpPreset.sc", + "Scala" => "Scala/node11.sc" + }) end - # Candidate languages = ["Scala", "SuperCollider"] - def test_sc_scala_by_heuristics - results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc")) - assert_equal Language["Scala"], results.first + def test_fs_by_heuristics + assert_heuristics({ + "F#" => all_fixtures("F#"), + "Forth" => all_fixtures("Forth"), + "GLSL" => all_fixtures("GLSL") + }) + end + + def assert_heuristics(hash) + candidates = hash.keys.map { |l| Language[l] } + + hash.each do |language, blobs| + Array(blobs).each do |blob| + result = Heuristics.call(file_blob(blob), candidates) + assert_equal [Language[language]], result + end + end end end diff --git a/test/test_language.rb b/test/test_language.rb index 1ad47e33..c5c5255f 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -1,6 +1,4 @@ -require 'linguist/language' -require 'test/unit' -require 'yaml' +require_relative "./helper" class TestLanguage < Test::Unit::TestCase include Linguist diff --git a/test/test_md5.rb b/test/test_md5.rb index 6019fcf5..17006e2e 100644 --- a/test/test_md5.rb +++ b/test/test_md5.rb @@ -1,6 +1,4 @@ -require 'linguist/md5' - -require 'test/unit' +require_relative "./helper" class TestMD5 < Test::Unit::TestCase include Linguist diff --git a/test/test_pedantic.rb b/test/test_pedantic.rb index c1819fb6..be8ce063 100644 --- a/test/test_pedantic.rb +++ b/test/test_pedantic.rb @@ -1,5 +1,4 @@ -require 'test/unit' -require 'yaml' +require_relative "./helper" class TestPedantic < Test::Unit::TestCase filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__) diff --git a/test/test_repository.rb b/test/test_repository.rb index 1fba9b57..d07d86da 100644 --- a/test/test_repository.rb +++ b/test/test_repository.rb @@ -1,6 +1,4 @@ -require 'linguist/repository' -require 'linguist/lazy_blob' -require 'test/unit' +require_relative "./helper" class TestRepository < Test::Unit::TestCase def rugged_repository diff --git a/test/test_samples.rb b/test/test_samples.rb index 929a7a00..06ede379 100644 --- a/test/test_samples.rb +++ b/test/test_samples.rb @@ -1,8 +1,5 @@ -require 'linguist/samples' -require 'linguist/language' -require 'tempfile' -require 'yajl' -require 'test/unit' +require_relative "./helper" +require "tempfile" class TestSamples < Test::Unit::TestCase include Linguist @@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c } assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c } assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } } + assert !data["interpreters"].empty? end - # Check that there aren't samples with extensions that aren't explicitly defined in languages.yml - def test_parity - extensions = Samples.cache['extnames'] - languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) - languages = YAML.load_file(languages_yml) - - languages.each do |name, options| + # Check that there aren't samples with extensions or interpreters that + # aren't explicitly defined in languages.yml + languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__) + YAML.load_file(languages_yml).each do |name, options| + define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do options['extensions'] ||= [] - - if extnames = extensions[name] + if extnames = Samples.cache['extnames'][name] extnames.each do |extname| next if extname == '.script!' assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml" end end + + options['interpreters'] ||= [] + if interpreters = Samples.cache['interpreters'][name] + interpreters.each do |interpreter| + # next if extname == '.script!' + assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml" + end + end end end @@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase end end end + + def test_shebang + assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal") + assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4") + end end diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb index 0521f4da..5dab023e 100644 --- a/test/test_tokenizer.rb +++ b/test/test_tokenizer.rb @@ -1,6 +1,4 @@ -require 'linguist/tokenizer' - -require 'test/unit' +require_relative "./helper" class TestTokenizer < Test::Unit::TestCase include Linguist diff --git a/vendor/cache/byebug-3.5.1.gem b/vendor/cache/byebug-3.5.1.gem new file mode 100644 index 00000000..4a2f7840 Binary files /dev/null and b/vendor/cache/byebug-3.5.1.gem differ diff --git a/vendor/cache/columnize-0.8.9.gem b/vendor/cache/columnize-0.8.9.gem new file mode 100644 index 00000000..bc23af26 Binary files /dev/null and b/vendor/cache/columnize-0.8.9.gem differ diff --git a/vendor/cache/debugger-linecache-1.2.0.gem b/vendor/cache/debugger-linecache-1.2.0.gem new file mode 100644 index 00000000..723619d3 Binary files /dev/null and b/vendor/cache/debugger-linecache-1.2.0.gem differ