From 16a67cb852342b7c26cdde272458eb48cd016f4b Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Fri, 3 Aug 2012 15:07:36 -0500 Subject: [PATCH] Move shebang detection into classifier Fixes #203 --- lib/linguist/blob_helper.rb | 88 +------ lib/linguist/language.rb | 27 --- lib/linguist/languages.yml | 17 -- lib/linguist/samples.json | 214 +++++++++--------- lib/linguist/tokenizer.rb | 36 ++- .../Groovy/{script.groovy => groovy.script!} | 0 samples/JavaScript/{script.js => js.script!} | 0 samples/JavaScript/js2.script! | 7 + samples/Nu/{script.nu => nu.script!} | 0 samples/Perl/perl.script! | 2 + samples/Python/{script.py => python.script!} | 0 samples/Racket/{script.rkt => rkt.script!} | 0 .../Ruby/{macruby-script => macruby.script!} | 0 samples/Ruby/{script.rb => ruby.script!} | 0 samples/Ruby/{script2.rb => ruby2.script!} | 2 +- samples/Scala/{script.scala => scala.script!} | 0 samples/Shell/bash.script! | 2 + samples/Shell/sh.script! | 2 + samples/Shell/zsh.script! | 2 + samples/Text/script.foo | 2 - test/test_blob.rb | 23 -- test/test_classifier.rb | 7 +- test/test_language.rb | 10 - test/test_tokenizer.rb | 12 +- 24 files changed, 178 insertions(+), 275 deletions(-) rename samples/Groovy/{script.groovy => groovy.script!} (100%) rename samples/JavaScript/{script.js => js.script!} (100%) create mode 100644 samples/JavaScript/js2.script! rename samples/Nu/{script.nu => nu.script!} (100%) create mode 100755 samples/Perl/perl.script! rename samples/Python/{script.py => python.script!} (100%) rename samples/Racket/{script.rkt => rkt.script!} (100%) rename samples/Ruby/{macruby-script => macruby.script!} (100%) rename samples/Ruby/{script.rb => ruby.script!} (100%) rename samples/Ruby/{script2.rb => ruby2.script!} (74%) rename samples/Scala/{script.scala => scala.script!} (100%) create mode 100755 samples/Shell/bash.script! create mode 100755 samples/Shell/sh.script! create mode 100755 samples/Shell/zsh.script! delete mode 100755 samples/Text/script.foo diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index ee9f32fa..af0150ca 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -129,15 +129,6 @@ module Linguist ['.png', '.jpg', '.jpeg', '.gif'].include?(extname) end - # Public: Is the blob likely to have a shebang? - # - # Return true or false - def shebang_extname? - extname.empty? && - mode && - (mode.to_i(8) & 05) == 05 - end - MEGABYTE = 1024 * 1024 # Public: Is the blob too big to load? @@ -410,14 +401,23 @@ module Linguist def guess_language return if binary_mime_type? - possible_languages = Language.find_by_filename(name.to_s) + name = self.name.to_s + + # A bit of an elegant hack. If the file is exectable but extensionless, + # append a "magic" extension so it can be classified with other + # languages that have shebang scripts. + if extname.empty? && mode && (mode.to_i(8) & 05) == 05 + name += ".script!" + end + + possible_languages = Language.find_by_filename(name) if possible_languages.length > 1 if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first Language[result[0]] end else - possible_languages.first || shebang_language + possible_languages.first end end @@ -428,72 +428,6 @@ module Linguist language ? language.lexer : Pygments::Lexer.find_by_name('Text only') end - # Internal: Extract the script name from the shebang line - # - # Requires Blob#data - # - # Examples - # - # '#!/usr/bin/ruby' - # # => 'ruby' - # - # '#!/usr/bin/env ruby' - # # => 'ruby' - # - # '#!/usr/bash/python2.4' - # # => 'python' - # - # Please add additional test coverage to - # `test/test_blob.rb#test_shebang_script` if you make any changes. - # - # Returns a script name String or nil - def shebang_script - # Fail fast if blob isn't viewable? - return unless viewable? - - if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/ - bang.sub!(/^#! /, '#!') - tokens = bang.split(' ') - pieces = tokens.first.split('/') - if pieces.size > 1 - script = pieces.last - else - script = pieces.first.sub('#!', '') - end - - script = script == 'env' ? tokens[1] : script - - # python2.4 => python - if script =~ /((?:\d+\.?)+)/ - script.sub! $1, '' - end - - # Check for multiline shebang hacks that exec themselves - # - # #!/bin/sh - # exec foo "$0" "$@" - # - if script == 'sh' && - lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) } - script = $1 - end - - script - end - end - - # Internal: Get Language for shebang script - # - # Returns the Language or nil - def shebang_language - # Skip file extensions unlikely to have shebangs - return unless shebang_extname? - - if script = shebang_script - Language[script] - end - end - # Public: Highlight syntax of blob # # options - A Hash of options (defaults to {}) diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 8586fa7e..643a5239 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -11,7 +11,6 @@ module Linguist # Languages are defined in `lib/linguist/languages.yml`. class Language @languages = [] - @overrides = {} @index = {} @name_index = {} @alias_index = {} @@ -21,13 +20,6 @@ module Linguist # Valid Languages types TYPES = [:data, :markup, :programming] - # Internal: Test if extension maps to multiple Languages. - # - # Returns true or false. - def self.ambiguous?(extension) - @overrides.include?(extension) - end - # Internal: Create a new Language object # # attributes - A hash of attributes @@ -63,18 +55,6 @@ module Linguist @extension_index[extension] << language end - language.overrides.each do |extension| - if extension !~ /^\./ - raise ArgumentError, "Extension is missing a '.': #{extension.inspect}" - end - - if l = @overrides[extension] - raise ArgumentError, "#{extension} is already overridden by #{l.name}" - end - - @overrides[extension] = language - end - language.filenames.each do |filename| @filename_index[filename] << language end @@ -216,7 +196,6 @@ module Linguist # Set extensions or default to []. @extensions = attributes[:extensions] || [] - @overrides = attributes[:overrides] || [] @filenames = attributes[:filenames] || [] unless @primary_extension = attributes[:primary_extension] @@ -324,11 +303,6 @@ module Linguist # Returns the extension String. attr_reader :primary_extension - # Internal: Get overridden extensions. - # - # Returns the extensions Array. - attr_reader :overrides - # Public: Get filenames # # Examples @@ -461,7 +435,6 @@ module Linguist :search_term => options['search_term'], :extensions => options['extensions'].sort, :primary_extension => options['primary_extension'], - :overrides => options['overrides'], :filenames => options['filenames'], :popular => popular.include?(name) ) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index a9bdeef0..2a1bdeff 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -15,7 +15,6 @@ # the language. Must be unique. Used when a Language is picked # from a dropdown and we need to automatically choose an # extension. -# overrides - An Array of extensions that takes precedence over conflicts # searchable - Boolean flag to enable searching (defaults to true) # search_term - Deprecated: Some languages maybe indexed under a # different alias. Avoid defining new exceptions. @@ -67,8 +66,6 @@ Apex: type: programming lexer: Text only primary_extension: .cls - overrides: - - .cls AppleScript: aliases: @@ -157,8 +154,6 @@ Bro: C: type: programming color: "#555" - overrides: - - .h primary_extension: .c extensions: - .w @@ -533,8 +528,6 @@ Groovy: Groovy Server Pages: group: Groovy lexer: Java Server Page - overrides: - - .gsp aliases: - gsp primary_extension: .gsp @@ -841,8 +834,6 @@ ObjDump: Objective-C: type: programming color: "#438eff" - overrides: - - .m primary_extension: .m extensions: - .mm @@ -915,8 +906,6 @@ Perl: ace_mode: perl color: "#0298c3" primary_extension: .pl - overrides: - - .pl extensions: - .PL - .perl @@ -983,8 +972,6 @@ R: type: programming color: "#198ce7" lexer: S - overrides: - - .r primary_extension: .r extensions: - .r @@ -1208,8 +1195,6 @@ Turing: color: "#45f715" lexer: Text only primary_extension: .t - overrides: - - .t extensions: - .tu @@ -1241,8 +1226,6 @@ Verilog: type: programming lexer: verilog color: "#848bf3" - overrides: - - .v primary_extension: .v VimL: diff --git a/lib/linguist/samples.json b/lib/linguist/samples.json index 471917e7..8318b7dc 100644 --- a/lib/linguist/samples.json +++ b/lib/linguist/samples.json @@ -57,7 +57,7 @@ ], "Groovy": [ ".gradle", - ".groovy" + ".script!" ], "Groovy Server Pages": [ ".gsp" @@ -72,7 +72,8 @@ ".java" ], "JavaScript": [ - ".js" + ".js", + ".script!" ], "JSON": [ ".maxhelp", @@ -104,7 +105,7 @@ ".nim" ], "Nu": [ - ".nu" + ".script!" ], "Objective-C": [ ".h", @@ -132,7 +133,8 @@ "Perl": [ ".pm", ".pl", - ".t" + ".t", + ".script!" ], "PHP": [ ".php", @@ -146,20 +148,22 @@ ".pl" ], "Python": [ - ".py" + ".py", + ".script!" ], "R": [ ".R" ], "Racket": [ - ".scrbl", - ".rkt" + ".script!", + ".scrbl" ], "Rebol": [ ".r" ], "Ruby": [ ".rb", + ".script!", ".rabl", ".rake" ], @@ -171,7 +175,7 @@ ], "Scala": [ ".sbt", - ".scala" + ".script!" ], "Scheme": [ ".sps" @@ -185,6 +189,7 @@ ".scss" ], "Shell": [ + ".script!", ".bash", ".sh", ".zsh" @@ -235,8 +240,8 @@ "PKGBUILD" ] }, - "tokens_total": 331511, - "languages_total": 250, + "tokens_total": 331500, + "languages_total": 255, "tokens": { "Apex": { "/*": 15, @@ -6682,9 +6687,7 @@ "println": 2, "it.toString": 1, "-": 1, - "#": 1, - "/usr/bin/env": 1, - "groovy": 1 + "SHEBANG#!groovy": 1 }, "Groovy Server Pages": { "": 4, @@ -6744,9 +6747,7 @@ "josh@github.com": 1 }, "Ioke": { - "#": 1, - "/usr/bin/env": 1, - "ioke": 1, + "SHEBANG#!ioke": 1, "println": 1 }, "Java": { @@ -7528,13 +7529,13 @@ "JavaScript": { "/*": 134, "*/": 138, - "function": 2319, - "(": 18005, - ")": 18021, - "{": 6174, - ";": 8412, + "function": 2320, + "(": 18012, + ")": 18028, + "{": 6176, + ";": 8417, "//": 2853, - "var": 1528, + "var": 1529, "Modal": 2, "content": 5, "options": 112, @@ -7545,7 +7546,7 @@ ".proxy": 1, "this.hide": 1, "this": 1045, - "}": 6180, + "}": 6182, "Modal.prototype": 1, "constructor": 4, "toggle": 16, @@ -7643,9 +7644,9 @@ "_super": 4, "Snake.name": 1, "Horse.name": 1, - "console.log": 2, + "console.log": 3, "util": 1, - "require": 8, + "require": 9, "net": 1, "stream": 1, "url": 77, @@ -7924,7 +7925,7 @@ "OutgoingMessage.prototype._flush": 1, "this.socket.writable": 2, "this.socket.write": 1, - "req": 36, + "req": 37, "OutgoingMessage.call": 2, "req.method": 5, "req.httpVersionMajor": 2, @@ -8057,7 +8058,7 @@ "req.res": 9, "req.res.readable": 1, "req.res.emit": 1, - "res": 16, + "res": 17, "req.res._emitPending": 1, "res._emitEnd": 1, "res.emit": 1, @@ -8318,7 +8319,7 @@ "Ta": 1, "<[\\w\\W]+>": 4, "|": 343, - "#": 23, + "#": 22, "Ua": 1, ".": 67, "Va": 1, @@ -9761,7 +9762,7 @@ "results.splice": 2, "Sizzle.matches": 2, "Sizzle.matchesSelector": 2, - "node": 46, + "node": 45, "isXML": 34, "Expr.order.length": 2, "Expr.order": 2, @@ -11635,7 +11636,7 @@ "url=": 1, "dataTypes=": 1, "crossDomain=": 2, - "http": 2, + "http": 3, "80": 2, "443": 2, "s=": 14, @@ -12250,6 +12251,11 @@ "define": 2, "define.amd": 1, "define.amd.jQuery": 1, + "SHEBANG#!node": 2, + "http.createServer": 1, + "res.writeHead": 1, + "res.end": 1, + ".listen": 1, "JSON": 3, "Date.prototype.toJSON": 2, "this.valueOf": 2, @@ -12807,7 +12813,6 @@ "this.column": 1, "result.SyntaxError.prototype": 1, "Error.prototype": 1, - "/usr/bin/env": 1, "steelseries": 13, "n.charAt": 1, "n.substring": 1, @@ -16148,9 +16153,7 @@ "echo": 1 }, "Nu": { - "#": 1, - "/usr/bin/env": 1, - "nush": 1, + "SHEBANG#!nush": 1, "(": 1, "puts": 1, ")": 1 @@ -19406,27 +19409,22 @@ "lcPostBase64Data.": 1 }, "Parrot Assembly": { - "#": 1, - "/usr/bin/env": 1, - "parrot": 1, + "SHEBANG#!parrot": 1, ".pcc_sub": 1, "main": 2, "say": 1, "end": 1 }, "Parrot Internal Representation": { - "#": 1, - "/usr/bin/env": 1, - "parrot": 1, + "SHEBANG#!parrot": 1, ".sub": 1, "main": 1, "say": 1, ".end": 1 }, "Perl": { - "#": 258, - "/usr/bin/env": 1, - "perl": 13, + "SHEBANG#!perl": 4, + "#": 249, "use": 70, "warnings": 15, ";": 1152, @@ -19434,14 +19432,14 @@ "our": 34, "VERSION": 15, "MAIN": 1, - "{": 1102, + "{": 1100, "if": 267, "(": 895, "App": 129, "Ack": 134, "ne": 11, "main": 3, - ")": 895, + ")": 893, "die": 37, "}": 1113, "my": 395, @@ -19451,7 +19449,7 @@ "last": 15, "_": 100, "eq": 31, - "/": 70, + "/": 68, "-": 843, "th": 1, "[": 154, @@ -19734,7 +19732,7 @@ "unless": 34, "explicitly": 1, "": 2, - "print": 29, + "print": 30, "file.": 2, "Multiple": 1, "with": 25, @@ -19867,6 +19865,7 @@ "skipped": 2, "make": 3, "binary": 3, + "perl": 8, "ruby": 3, "php": 2, "python": 1, @@ -20390,6 +20389,7 @@ "lc": 5, "r": 10, "header": 17, + "SHEBANG#!#!": 2, "lua": 2, "erl": 2, "hp": 2, @@ -20669,6 +20669,7 @@ "number": 1, "handed": 1, "argument.": 1, + "SHEBANG#!#! perl": 4, "examples/benchmarks/fib.pl": 1, "Fibonacci": 2, "Benchmark": 1, @@ -21066,9 +21067,7 @@ "formats": 1, "<+3M>": 1, "reference.": 1, - "AUTHOR": 1, - "/usr/local/bin/perl": 1, - "/usr/bin/perl": 1 + "AUTHOR": 1 }, "PHP": { "<": 9, @@ -22178,7 +22177,7 @@ "future_builtins": 1, "zip": 3, "django.db.models.manager": 1, - "#": 177, + "#": 175, "django.conf": 1, "settings": 1, "django.core.exceptions": 1, @@ -22707,10 +22706,8 @@ "meth": 5, "request.method.lower": 1, "request.method": 1, - "/usr/bin/env": 2, - "python2.4": 1, + "SHEBANG#!python": 2, "print": 1, - "python": 1, "absolute_import": 1, "division": 1, "with_statement": 1, @@ -22899,13 +22896,29 @@ "}": 1 }, "Racket": { + "SHEBANG#!sh": 1, + "#": 2, + "|": 2, + "-": 95, + "*": 2, + "scheme": 1, + "exec": 1, + "racket": 1, + "um": 1, + "(": 7, + "require": 2, + "racket/file": 1, + "racket/path": 1, + "racket/list": 1, + "racket/string": 1, + "for": 2, + "syntax": 1, + "racket/base": 1, + ")": 7, "#lang": 1, "scribble/manual": 1, "@": 3, - "(": 7, - "require": 2, "scribble/bnf": 1, - ")": 7, "@title": 1, "{": 2, "Scribble": 3, @@ -22922,11 +22935,9 @@ "collection": 1, "of": 3, "tools": 1, - "for": 2, "creating": 1, "prose": 2, "documents": 1, - "-": 95, "papers": 1, "books": 1, "library": 1, @@ -22986,21 +22997,7 @@ ";": 1, "@include": 8, "section": 9, - "@index": 1, - "#": 3, - "/bin/sh": 1, - "|": 2, - "*": 2, - "scheme": 1, - "exec": 1, - "racket": 1, - "um": 1, - "racket/file": 1, - "racket/path": 1, - "racket/list": 1, - "racket/string": 1, - "syntax": 1, - "racket/base": 1 + "@index": 1 }, "Rebol": { "REBOL": 1, @@ -23025,12 +23022,12 @@ "task": 2, "default": 2, "do": 36, - "puts": 20, + "puts": 21, "end": 248, "module": 8, "Foo": 1, "require": 58, - "#": 481, + "#": 476, "class": 7, "Formula": 2, "include": 3, @@ -23173,7 +23170,7 @@ "e": 8, "ARGV.debug": 1, "%": 11, - "w": 8, + "w": 7, "config.log": 1, "CMakeCache.txt": 1, ".select": 1, @@ -23197,7 +23194,7 @@ "to_s": 2, "std_cmake_args": 1, "W": 1, - "-": 33, + "-": 31, "DCMAKE_INSTALL_PREFIX": 1, "DCMAKE_BUILD_TYPE": 1, "None": 1, @@ -23643,8 +23640,7 @@ "err.to_s": 1, "DEFAULTS.deep_merge": 1, ".deep_merge": 1, - "/usr/bin/env": 5, - "macruby": 1, + "SHEBANG#!macruby": 1, "object": 2, "@user": 1, "person": 1, @@ -23784,11 +23780,8 @@ "keys": 6, "redis.keys": 1, "key.sub": 1, - "rake": 1, - "ruby": 2, - "Ilib": 1, - "test": 6, - "echo": 1, + "SHEBANG#!ruby": 2, + "SHEBANG#!rake": 1, "Sinatra": 2, "Request": 2, "<": 2, @@ -23824,6 +23817,7 @@ ".to_sym": 1, "raise_errors": 1, "Proc.new": 11, + "test": 5, "dump_errors": 1, "show_exceptions": 1, "sessions": 1, @@ -23974,7 +23968,7 @@ "Delegator.target.helpers": 1, "self.use": 1, "Delegator.target.use": 1, - "python": 1 + "SHEBANG#!python": 1 }, "Rust": { "fn": 1, @@ -24122,10 +24116,10 @@ "Credentials": 2, "Path.userHome": 1, "/": 2, - "#": 2, - "/bin/sh": 1, + "SHEBANG#!sh": 1, "exec": 1, "scala": 1, + "#": 1, "object": 1, "HelloWorld": 1, "def": 1, @@ -24385,7 +24379,9 @@ "Shell": { "export": 6, "PATH": 5, - "#": 10, + "SHEBANG#!bash": 4, + "echo": 14, + "#": 5, "pkgname": 1, "stud": 4, "-": 23, @@ -24439,13 +24435,10 @@ "init.stud": 1, "mkdir": 1, "p": 1, - "/usr/bin/env": 2, - "bash": 2, "set": 2, "e": 1, "n": 2, "x": 1, - "echo": 11, "unset": 3, "system": 1, "exec": 1, @@ -24479,9 +24472,8 @@ "rvm_is_not_a_shell_function": 2, "rvm_path/scripts": 1, "rvm": 1, - "/bin/bash": 1, - "/bin/sh": 1, - "/bin/zsh": 1 + "SHEBANG#!sh": 2, + "SHEBANG#!zsh": 2 }, "Standard ML": { "signature": 2, @@ -26469,13 +26461,13 @@ "Emacs Lisp": 3, "GAS": 133, "Gosu": 422, - "Groovy": 71, + "Groovy": 69, "Groovy Server Pages": 91, "Haml": 4, "INI": 8, - "Ioke": 4, + "Ioke": 2, "Java": 7515, - "JavaScript": 150260, + "JavaScript": 150293, "JSON": 619, "Julia": 202, "Kotlin": 155, @@ -26485,30 +26477,30 @@ "Max": 58, "Nemerle": 17, "Nimrod": 2, - "Nu": 6, + "Nu": 4, "Objective-C": 38749, "OCaml": 273, "Opa": 32, "OpenCL": 88, "OpenEdge ABL": 3072, - "Parrot Assembly": 8, - "Parrot Internal Representation": 7, - "Perl": 17087, + "Parrot Assembly": 6, + "Parrot Internal Representation": 5, + "Perl": 17075, "PHP": 23550, "PowerShell": 14, "Prolog": 61, - "Python": 4084, + "Python": 4080, "R": 14, - "Racket": 270, + "Racket": 269, "Rebol": 11, - "Ruby": 4339, + "Ruby": 4324, "Rust": 8, "Sass": 28, - "Scala": 319, + "Scala": 318, "Scheme": 3484, "Scilab": 72, "SCSS": 39, - "Shell": 315, + "Shell": 314, "Standard ML": 247, "SuperCollider": 141, "Tea": 3, @@ -26545,7 +26537,7 @@ "INI": 1, "Ioke": 1, "Java": 5, - "JavaScript": 19, + "JavaScript": 20, "JSON": 5, "Julia": 1, "Kotlin": 1, @@ -26563,7 +26555,7 @@ "OpenEdge ABL": 5, "Parrot Assembly": 1, "Parrot Internal Representation": 1, - "Perl": 12, + "Perl": 13, "PHP": 6, "PowerShell": 2, "Prolog": 1, @@ -26578,7 +26570,7 @@ "Scheme": 1, "Scilab": 3, "SCSS": 1, - "Shell": 11, + "Shell": 14, "Standard ML": 2, "SuperCollider": 1, "Tea": 1, @@ -26593,5 +26585,5 @@ "XSLT": 1, "YAML": 1 }, - "md5": "34a5b1ab9d3cf845a0603cef9e9f6509" + "md5": "c1a765b2d321e1a0fe84a6f1624b6663" } \ No newline at end of file diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb index ed06a9fe..5682173b 100644 --- a/lib/linguist/tokenizer.rb +++ b/lib/linguist/tokenizer.rb @@ -1,3 +1,5 @@ +require 'strscan' + module Linguist # Generic programming language tokenizer. # @@ -50,8 +52,13 @@ module Linguist tokens = [] until s.eos? + if token = s.scan(/^#!.+$/) + if name = extract_shebang(token) + tokens << "SHEBANG#!#{name}" + end + # Single line comment - if token = s.scan(START_SINGLE_LINE_COMMENT) + elsif token = s.scan(START_SINGLE_LINE_COMMENT) tokens << token.strip s.skip_until(/\n|\Z/) @@ -103,6 +110,33 @@ module Linguist tokens end + # Internal: Extract normalized shebang command token. + # + # Examples + # + # extract_shebang("#!/usr/bin/ruby") + # # => "ruby" + # + # extract_shebang("#!/usr/bin/env node") + # # => "node" + # + # Returns String token or nil it couldn't be parsed. + def extract_shebang(data) + s = StringScanner.new(data) + + if path = s.scan(/^#!\s*\S+/) + script = path.split('/').last + if script == 'env' + s.scan(/\s+/) + script = s.scan(/\S+/) + end + script = script[/[^\d]+/, 0] + return script + end + + nil + end + # Internal: Extract tokens from inside SGML tag. # # data - SGML tag String. diff --git a/samples/Groovy/script.groovy b/samples/Groovy/groovy.script! similarity index 100% rename from samples/Groovy/script.groovy rename to samples/Groovy/groovy.script! diff --git a/samples/JavaScript/script.js b/samples/JavaScript/js.script! similarity index 100% rename from samples/JavaScript/script.js rename to samples/JavaScript/js.script! diff --git a/samples/JavaScript/js2.script! b/samples/JavaScript/js2.script! new file mode 100644 index 00000000..d4d290a5 --- /dev/null +++ b/samples/JavaScript/js2.script! @@ -0,0 +1,7 @@ +#!/usr/bin/env node +var http = require('http'); +http.createServer(function (req, res) { + res.writeHead(200, {'Content-Type': 'text/plain'}); + res.end('Hello World\n'); +}).listen(1337, '127.0.0.1'); +console.log('Server running at http://127.0.0.1:1337/'); diff --git a/samples/Nu/script.nu b/samples/Nu/nu.script! similarity index 100% rename from samples/Nu/script.nu rename to samples/Nu/nu.script! diff --git a/samples/Perl/perl.script! b/samples/Perl/perl.script! new file mode 100755 index 00000000..bb20fbc2 --- /dev/null +++ b/samples/Perl/perl.script! @@ -0,0 +1,2 @@ +#!/usr/local/bin/perl +print "Perl\n" diff --git a/samples/Python/script.py b/samples/Python/python.script! similarity index 100% rename from samples/Python/script.py rename to samples/Python/python.script! diff --git a/samples/Racket/script.rkt b/samples/Racket/rkt.script! similarity index 100% rename from samples/Racket/script.rkt rename to samples/Racket/rkt.script! diff --git a/samples/Ruby/macruby-script b/samples/Ruby/macruby.script! similarity index 100% rename from samples/Ruby/macruby-script rename to samples/Ruby/macruby.script! diff --git a/samples/Ruby/script.rb b/samples/Ruby/ruby.script! similarity index 100% rename from samples/Ruby/script.rb rename to samples/Ruby/ruby.script! diff --git a/samples/Ruby/script2.rb b/samples/Ruby/ruby2.script! similarity index 74% rename from samples/Ruby/script2.rb rename to samples/Ruby/ruby2.script! index 40aeb7cb..4b860648 100644 --- a/samples/Ruby/script2.rb +++ b/samples/Ruby/ruby2.script! @@ -1,2 +1,2 @@ #! /usr/bin/env ruby -w -Ilib:test -echo "Ruby" +puts "Ruby" diff --git a/samples/Scala/script.scala b/samples/Scala/scala.script! similarity index 100% rename from samples/Scala/script.scala rename to samples/Scala/scala.script! diff --git a/samples/Shell/bash.script! b/samples/Shell/bash.script! new file mode 100755 index 00000000..0c2172c2 --- /dev/null +++ b/samples/Shell/bash.script! @@ -0,0 +1,2 @@ +#!/bin/bash +echo "bash" diff --git a/samples/Shell/sh.script! b/samples/Shell/sh.script! new file mode 100755 index 00000000..e5ed467e --- /dev/null +++ b/samples/Shell/sh.script! @@ -0,0 +1,2 @@ +#!/bin/sh +echo "sh" diff --git a/samples/Shell/zsh.script! b/samples/Shell/zsh.script! new file mode 100755 index 00000000..ed274ab4 --- /dev/null +++ b/samples/Shell/zsh.script! @@ -0,0 +1,2 @@ +#!/bin/zsh +echo "zsh" diff --git a/samples/Text/script.foo b/samples/Text/script.foo deleted file mode 100755 index f94d7407..00000000 --- a/samples/Text/script.foo +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/foo -??? diff --git a/test/test_blob.rb b/test/test_blob.rb index 079554a4..c8f103ee 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -45,10 +45,6 @@ class TestBlob < Test::Unit::TestCase assert_equal "application/pdf", blob("Binary/foo.pdf").content_type assert_equal "image/png", blob("Binary/foo.png").content_type assert_equal "text/plain; charset=iso-8859-2", blob("Text/README").content_type - assert_equal "text/plain; charset=iso-8859-1", blob("Perl/script.pl").content_type - assert_equal "text/plain; charset=iso-8859-1", blob("Python/script.py").content_type - assert_equal "text/plain; charset=iso-8859-1", blob("Ruby/script.rb").content_type - assert_equal "text/plain; charset=iso-8859-1", blob("Shell/script.sh").content_type end def test_disposition @@ -280,25 +276,6 @@ class TestBlob < Test::Unit::TestCase assert_equal Lexer['Ruby'], blob("Ruby/foo.rb").lexer end - def test_shebang_script - assert_equal 'sh', script_blob("Shell/script.sh").shebang_script - assert_equal 'bash', script_blob("Shell/script.bash").shebang_script - assert_equal 'zsh', script_blob("Shell/script.zsh").shebang_script - assert_equal 'perl', script_blob("Perl/script.pl").shebang_script - assert_equal 'ruby', script_blob("Ruby/script.rb").shebang_script - assert_equal 'ruby', script_blob("Ruby/script2.rb").shebang_script - assert_equal 'python', script_blob("Python/script.py").shebang_script - assert_equal 'node', script_blob("JavaScript/script.js").shebang_script - assert_equal 'groovy', script_blob("Groovy/script.groovy").shebang_script - assert_equal 'macruby', script_blob("Ruby/macruby-script").shebang_script - assert_equal 'rake', script_blob("Ruby/script.rake").shebang_script - assert_equal 'foo', script_blob("Text/script.foo").shebang_script - assert_equal 'nush', script_blob("Nu/script.nu").shebang_script - assert_equal 'scala', script_blob("Scala/script.scala").shebang_script - assert_equal 'racket', script_blob("Racket/script.rkt").shebang_script - assert_equal nil, script_blob("Ruby/foo.rb").shebang_script - end - def test_colorize assert_equal <<-HTML, blob("Ruby/foo.rb").colorize
module Foo
diff --git a/test/test_classifier.rb b/test/test_classifier.rb
index a24c5ba1..0a477831 100644
--- a/test/test_classifier.rb
+++ b/test/test_classifier.rb
@@ -54,11 +54,8 @@ class TestClassifier < Test::Unit::TestCase
 
   def test_classify_ambiguous_languages
     Samples.each do |sample|
-      language = Linguist::Language.find_by_name(sample[:language])
-      next unless language.overrides.any?
-
-      extname   = File.extname(sample[:path])
-      languages = Language.all.select { |l| l.extensions.include?(extname) }.map(&:name)
+      language  = Linguist::Language.find_by_name(sample[:language])
+      languages = Language.find_by_filename(sample[:path]).map(&:name)
       next unless languages.length > 1
 
       results = Classifier.classify(Samples::DATA, File.read(sample[:path]), languages)
diff --git a/test/test_language.rb b/test/test_language.rb
index 757b9177..47483b0c 100644
--- a/test/test_language.rb
+++ b/test/test_language.rb
@@ -8,16 +8,6 @@ class TestLanguage < Test::Unit::TestCase
 
   Lexer = Pygments::Lexer
 
-  def test_ambiguous_extensions
-    assert Language.ambiguous?('.cls')
-    assert Language.ambiguous?('.h')
-    assert Language.ambiguous?('.m')
-    assert Language.ambiguous?('.pl')
-    assert Language.ambiguous?('.r')
-    assert Language.ambiguous?('.t')
-    assert Language.ambiguous?('.v')
-  end
-
   def test_lexer
     assert_equal Lexer['ActionScript 3'], Language['ActionScript'].lexer
     assert_equal Lexer['Bash'], Language['Gentoo Ebuild'].lexer
diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb
index d57726c3..4fb49a4a 100644
--- a/test/test_tokenizer.rb
+++ b/test/test_tokenizer.rb
@@ -85,6 +85,17 @@ class TestTokenizer < Test::Unit::TestCase
     assert_equal %w(#import  int main \( int argc char *argv [ ] \) { NSLog \( @ \) ; return ; }), tokenize(:"Objective-C/hello.m")
   end
 
+  def test_shebang
+    assert_equal "SHEBANG#!sh", tokenize(:"Shell/sh.script!")[0]
+    assert_equal "SHEBANG#!bash", tokenize(:"Shell/bash.script!")[0]
+    assert_equal "SHEBANG#!zsh", tokenize(:"Shell/zsh.script!")[0]
+    assert_equal "SHEBANG#!perl", tokenize(:"Perl/perl.script!")[0]
+    assert_equal "SHEBANG#!python", tokenize(:"Python/python.script!")[0]
+    assert_equal "SHEBANG#!ruby", tokenize(:"Ruby/ruby.script!")[0]
+    assert_equal "SHEBANG#!ruby", tokenize(:"Ruby/ruby2.script!")[0]
+    assert_equal "SHEBANG#!node", tokenize(:"JavaScript/js.script!")[0]
+  end
+
   def test_javascript_tokens
     assert_equal %w( \( function \( \) { console.log \( \) ; } \) .call \( this \) ;), tokenize(:"JavaScript/hello.js")
   end
@@ -95,7 +106,6 @@ class TestTokenizer < Test::Unit::TestCase
 
   def test_ruby_tokens
     assert_equal %w(module Foo end), tokenize(:"Ruby/foo.rb")
-    assert_equal %w(# /usr/bin/env ruby puts), tokenize(:"Ruby/script.rb")
     assert_equal %w(task default do puts end), tokenize(:"Ruby/filenames/Rakefile")
   end
 end