diff --git a/lib/linguist/file_blob.rb b/lib/linguist/file_blob.rb index 7e7f1acd..bc475023 100644 --- a/lib/linguist/file_blob.rb +++ b/lib/linguist/file_blob.rb @@ -52,5 +52,20 @@ module Linguist def size File.size(@path) end + + # Public: Get file extension. + # + # Returns a String. + def extension + # File.extname returns nil if the filename is an extension. + extension = File.extname(name) + basename = File.basename(name) + # Checks if the filename is an extension. + if extension.empty? && basename[0] == "." + basename + else + extension + end + end end end diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index 81e70361..b2245b87 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -104,12 +104,13 @@ module Linguist # # We'll perform a more comprehensive test later which actually involves # looking for binary characters in the blob - return nil if blob.likely_binary? + return nil if blob.likely_binary? || blob.binary? # A bit of an elegant hack. If the file is executable but extensionless, # append a "magic" extension so it can be classified with other # languages that have shebang scripts. - if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05 + extension = FileBlob.new(name).extension + if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05 name += ".script!" end @@ -124,7 +125,7 @@ module Linguist possible_language_names = possible_languages.map(&:name) # Don't bother with binary contents or an empty file - if blob.binary? || data.nil? || data == "" + if data.nil? || data == "" nil # Check if there's a shebang line and use that as authoritative elsif (result = find_by_shebang(data)) && !result.empty? @@ -189,7 +190,8 @@ module Linguist # # Returns all matching Languages or [] if none were found. def self.find_by_filename(filename) - basename, extname = File.basename(filename), File.extname(filename) + basename = File.basename(filename) + extname = FileBlob.new(filename).extension langs = @filename_index[basename] + @extension_index[extname] langs.compact.uniq @@ -401,7 +403,7 @@ module Linguist # # Returns the extensions Array attr_reader :filenames - + # Public: Return all possible extensions for language def all_extensions (extensions + [primary_extension]).uniq diff --git a/lib/linguist/samples.json b/lib/linguist/samples.json index 0438a55f..46357516 100644 --- a/lib/linguist/samples.json +++ b/lib/linguist/samples.json @@ -743,6 +743,9 @@ "Nginx": [ "nginx.conf" ], + "PHP": [ + ".php" + ], "Perl": [ "ack" ], @@ -785,6 +788,9 @@ ".gvimrc", ".vimrc" ], + "XML": [ + ".cproject" + ], "YAML": [ ".gemrc" ], @@ -794,8 +800,8 @@ "exception.zep.php" ] }, - "tokens_total": 638833, - "languages_total": 872, + "tokens_total": 640042, + "languages_total": 874, "tokens": { "ABAP": { "*/**": 1, @@ -50730,7 +50736,7 @@ }, "PHP": { "<": 11, - "php": 12, + "php": 14, "namespace": 28, "Symfony": 24, "Component": 24, @@ -51454,6 +51460,19 @@ "base_url": 1, "php_filter_info": 1, "filters": 2, + "SHEBANG#!php": 4, + "": 1, + "aMenuLinks": 1, + "Array": 13, + "Blog": 1, + "SITE_DIR": 4, + "Photos": 1, + "photo": 1, + "About": 1, + "me": 1, + "about": 1, + "Contact": 1, + "contacts": 1, "Field": 9, "FormField": 3, "ArrayAccess": 1, @@ -51748,7 +51767,6 @@ "isUnique": 1, "is_bool": 1, "sql": 1, - "SHEBANG#!php": 3, "echo": 2, "Yii": 3, "console": 3, @@ -67843,9 +67861,9 @@ "return": 1 }, "XML": { - "": 11, - "version=": 17, - "encoding=": 7, + "": 12, + "version=": 21, + "encoding=": 8, "": 7, "ToolsVersion=": 6, "DefaultTargets=": 5, @@ -67922,6 +67940,94 @@ "": 10, "": 5, "": 7, + "standalone=": 1, + "": 1, + "4": 1, + "0": 2, + "": 1, + "storage_type_id=": 1, + "": 14, + "moduleId=": 14, + "": 2, + "id=": 141, + "buildSystemId=": 2, + "name=": 270, + "": 2, + "": 2, + "": 12, + "point=": 12, + "": 2, + "": 7, + "": 2, + "artifactName=": 2, + "buildArtefactType=": 2, + "buildProperties=": 2, + "cleanCommand=": 2, + "description=": 4, + "cdt": 2, + "managedbuild": 2, + "config": 2, + "gnu": 2, + "exe": 2, + "debug": 1, + "1803931088": 1, + "parent=": 2, + "": 2, + "resourcePath=": 2, + "": 2, + "superClass=": 42, + "": 2, + "": 2, + "buildPath=": 2, + "keepEnvironmentInBuildfile=": 2, + "managedBuildOn=": 2, + "": 12, + "": 4, + "": 8, + "": 8, + "defaultValue=": 2, + "": 4, + "kind=": 6, + "paths=": 4, + "": 2, + "": 2, + "": 2, + "": 2, + "": 2, + "flags=": 2, + "": 2, + "": 2, + "": 2, + "release": 1, + "32754498": 1, + "": 2, + "projectType=": 1, + "": 5, + "enabled=": 125, + "problemReportingEnabled=": 5, + "selectedProfileId=": 5, + "": 40, + "": 40, + "": 40, + "filePath=": 40, + "": 80, + "": 40, + "": 40, + "": 40, + "arguments=": 40, + "command=": 40, + "useDefault=": 40, + "": 40, + "": 40, + "": 4, + "instanceId=": 4, + "": 4, + "": 1, "": 2, "": 2, "cfa7a11": 1, @@ -67969,8 +68075,6 @@ "FSharp": 1, "": 1, "": 1, - "": 1, - "name=": 227, "xmlns": 2, "ea=": 2, "": 4, @@ -68025,14 +68129,12 @@ "application": 2, "": 1, "": 1, - "value=": 1, "": 1, "": 1, "": 1, "": 1, "": 2, "visibility=": 2, - "description=": 2, "": 1, "": 1, "": 4, @@ -70552,7 +70654,7 @@ "Ox": 1006, "Oxygene": 157, "PAWN": 3263, - "PHP": 20724, + "PHP": 20754, "Pan": 130, "Parrot Assembly": 6, "Parrot Internal Representation": 5, @@ -70618,7 +70720,7 @@ "Visual Basic": 581, "Volt": 388, "XC": 24, - "XML": 7057, + "XML": 8236, "XProc": 22, "XQuery": 801, "XSLT": 44, @@ -70753,7 +70855,7 @@ "Ox": 3, "Oxygene": 1, "PAWN": 1, - "PHP": 9, + "PHP": 10, "Pan": 1, "Parrot Assembly": 1, "Parrot Internal Representation": 1, @@ -70819,7 +70921,7 @@ "Visual Basic": 3, "Volt": 1, "XC": 1, - "XML": 13, + "XML": 14, "XProc": 1, "XQuery": 1, "XSLT": 1, @@ -70832,5 +70934,5 @@ "fish": 3, "wisp": 1 }, - "md5": "bd012ec237a6d47ded2203578618abfc" + "md5": "e1a3dc40ad721b18cc222fa645d65304" } \ No newline at end of file diff --git a/lib/linguist/vendor.yml b/lib/linguist/vendor.yml index 68e60eae..c497960c 100644 --- a/lib/linguist/vendor.yml +++ b/lib/linguist/vendor.yml @@ -40,6 +40,9 @@ - foundation.min.css - foundation.css +# Normalize.css +- normalize.css + # Vendored dependencies - thirdparty/ - vendors?/ diff --git a/lib/linguist/version.rb b/lib/linguist/version.rb index b7ed14c9..89151b75 100644 --- a/lib/linguist/version.rb +++ b/lib/linguist/version.rb @@ -1,3 +1,3 @@ module Linguist - VERSION = "3.0.2" + VERSION = "3.0.3" end diff --git a/samples/PHP/filenames/.php b/samples/PHP/filenames/.php new file mode 100755 index 00000000..be170195 --- /dev/null +++ b/samples/PHP/filenames/.php @@ -0,0 +1,34 @@ +#!/usr/bin/env php + diff --git a/samples/XML/filenames/.cproject b/samples/XML/filenames/.cproject new file mode 100755 index 00000000..5fbff7b7 --- /dev/null +++ b/samples/XML/filenames/.cproject @@ -0,0 +1,542 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/test_blob.rb b/test/test_blob.rb index 2977bb90..fc9e13c5 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -140,6 +140,13 @@ class TestBlob < Test::Unit::TestCase assert !blob("Perl/script.pl").binary? end + def test_all_binary + Samples.each do |sample| + blob = blob(sample[:path]) + assert ! (blob.likely_binary? || blob.binary?), "#{sample[:path]} is a binary file" + end + end + def test_text assert blob("Text/README").text? assert blob("Text/dump.sql").text? @@ -277,7 +284,7 @@ class TestBlob < Test::Unit::TestCase # 'thirdparty' directory assert blob("thirdparty/lib/main.c").vendored? - + # 'extern(al)' directory assert blob("extern/util/__init__.py").vendored? assert blob("external/jquery.min.js").vendored? @@ -385,7 +392,10 @@ class TestBlob < Test::Unit::TestCase # NuGet Packages assert blob("packages/Modernizr.2.0.6/Content/Scripts/modernizr-2.0.6-development-only.js").vendored? - + + # Normalize + assert blob("some/asset/path/normalize.css").vendored? + # Cocoapods assert blob('Pods/blah').vendored? diff --git a/test/test_language.rb b/test/test_language.rb index 28caabf4..52f0ddf9 100644 --- a/test/test_language.rb +++ b/test/test_language.rb @@ -251,8 +251,7 @@ class TestLanguage < Test::Unit::TestCase assert_equal Language['Nginx'], Language.find_by_filename('nginx.conf').first assert_equal ['C', 'C++', 'Objective-C'], Language.find_by_filename('foo.h').map(&:name).sort assert_equal [], Language.find_by_filename('rb') - assert_equal [], Language.find_by_filename('.rb') - assert_equal [], Language.find_by_filename('.nkt') + assert_equal [], Language.find_by_filename('.null') assert_equal [Language['Shell']], Language.find_by_filename('.bashrc') assert_equal [Language['Shell']], Language.find_by_filename('bash_profile') assert_equal [Language['Shell']], Language.find_by_filename('.zshrc')