diff --git a/lib/linguist/language.rb b/lib/linguist/language.rb index de1d36f6..88ccb9fb 100644 --- a/lib/linguist/language.rb +++ b/lib/linguist/language.rb @@ -442,11 +442,13 @@ module Linguist end extensions = Sample.extensions + filenames = Sample.filenames popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| aliases = [name.downcase.gsub(/\s/, '-') ] + (options[:aliases] || []) options['extensions'] ||= [] + options['filenames'] ||= [] aliases.each do |name| if extnames = extensions[name] extnames.each do |extname| @@ -457,6 +459,16 @@ module Linguist end end end + + if fns = filenames[name] + fns.each do |filename| + if !options['filenames'].include?(filename) + options['filenames'] << filename + else + warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml." + end + end + end end lang = Language.create( diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index c9d6dd5f..ddfd5e90 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -1064,11 +1064,9 @@ Ruby: - .thor - .watchr filenames: - - Capfile - Gemfile - Guardfile - Podfile - - Rakefile - Thorfile - Vagrantfile @@ -1151,7 +1149,6 @@ Shell: - .zshrc - bashrc - zshrc - - PKGBUILD Smalltalk: type: programming diff --git a/lib/linguist/sample.rb b/lib/linguist/sample.rb index 096a9cba..a200a2c5 100644 --- a/lib/linguist/sample.rb +++ b/lib/linguist/sample.rb @@ -22,7 +22,24 @@ module Linguist dirname = File.join(PATH, category) Dir.entries(dirname).each do |filename| next if filename == '.' || filename == '..' - yield({ :path => File.join(dirname, filename), :language => category }) + + if filename == 'filenames' + Dir.entries(File.join(dirname, filename)).each do |subfilename| + next if subfilename == '.' || subfilename == '..' + + yield({ + :path => File.join(dirname, filename, subfilename), + :language => category, + :filename => subfilename + }) + end + else + yield({ + :path => File.join(dirname, filename), + :language => category, + :extname => File.extname(filename) + }) + end end end @@ -36,15 +53,29 @@ module Linguist def self.extensions extensions = {} each do |sample| - extname = File.extname(sample[:path]) # TODO: For now skip empty extnames - next if extname == "" + next if sample[:extname].nil? || sample[:extname] == "" extensions[sample[:language]] ||= Set.new - extensions[sample[:language]] << extname + extensions[sample[:language]] << sample[:extname] end extensions end + # Get all filenames listed in samples/ + # + # Returns Hash of sample language keys with a Set of filename + # Strings. + def self.filenames + filenames = {} + each do |sample| + # TODO: For now skip empty extnames + next if sample[:filename].nil? + filenames[sample[:language]] ||= Set.new + filenames[sample[:language]] << sample[:filename] + end + filenames + end + # Public: Build Classifier from all samples. # # Returns trained Classifier. diff --git a/samples/ruby/Capfile b/samples/ruby/filenames/Capfile similarity index 100% rename from samples/ruby/Capfile rename to samples/ruby/filenames/Capfile diff --git a/samples/ruby/Rakefile b/samples/ruby/filenames/Rakefile similarity index 100% rename from samples/ruby/Rakefile rename to samples/ruby/filenames/Rakefile diff --git a/samples/shell/PKGBUILD b/samples/shell/filenames/PKGBUILD similarity index 100% rename from samples/shell/PKGBUILD rename to samples/shell/filenames/PKGBUILD diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb index 00142b5c..420c7768 100644 --- a/test/test_tokenizer.rb +++ b/test/test_tokenizer.rb @@ -87,6 +87,6 @@ class TestTokenizer < Test::Unit::TestCase def test_ruby_tokens assert_equal %w(module Foo end), tokenize(:"ruby/foo.rb") assert_equal %w(# /usr/bin/env ruby puts), tokenize(:"ruby/script.rb") - assert_equal %w(task default do puts end), tokenize(:"ruby/Rakefile") + assert_equal %w(task default do puts end), tokenize(:"ruby/filenames/Rakefile") end end