Merge remote-tracking branch 'origin/master' into filename-matches-multiple-langages

* origin/master:
  Allow mime-types 2.x to be used with Linguist
  Upgrade to rugged 0.22.0b1
  Mention that languages need to be quite popular
  fix vendor/cache
  Gemfile.lock is nolonger considered generated
  Tests for BlobHelper#empty?
  remove reference to empty.js
  Remove more empty samples
  Bail earlier if the file is empty.
  Moving comments
  Use heuristics earlier to inform the rest of the classification process
  Removing inconsistency of `find_by_heuristics` (was sometimes returning nil and sometimes returning and empty array)
  Removing unused array of candidate languages.
  Reworking most heuristics to only return one match
This commit is contained in:
Brandon Keepers
2014-11-18 14:09:15 -05:00
14 changed files with 114 additions and 92 deletions

View File

@@ -193,8 +193,8 @@ class TestBlob < Test::Unit::TestCase
assert blob("Binary/MainMenu.nib").generated?
assert !blob("XML/project.pbxproj").generated?
# Gemfile.locks
assert blob("Gemfile.lock").generated?
# Gemfile.lock is NOT generated
assert !blob("Gemfile.lock").generated?
# Generated .NET Docfiles
assert blob("XML/net_docfile.xml").generated?
@@ -226,7 +226,6 @@ class TestBlob < Test::Unit::TestCase
assert !blob("PostScript/sierpinski.ps").generated?
# These examples are too basic to tell
assert !blob("JavaScript/empty.js").generated?
assert !blob("JavaScript/hello.js").generated?
assert blob("JavaScript/intro-old.js").generated?
@@ -469,4 +468,13 @@ class TestBlob < Test::Unit::TestCase
def test_minified_files_not_safe_to_highlight
assert !blob("JavaScript/jquery-1.6.1.min.js").safe_to_colorize?
end
def test_empty
blob = Struct.new(:data) { include Linguist::BlobHelper }
assert blob.new("").empty?
assert blob.new(nil).empty?
refute blob.new(" ").empty?
refute blob.new("nope").empty?
end
end

View File

@@ -20,18 +20,18 @@ class TestHeuristcs < Test::Unit::TestCase
Dir.glob("#{samples_path}/#{language_name}/#{file}")
end
# Candidate languages = ["C++", "Objective-C"]
def test_obj_c_by_heuristics
languages = ["C++", "Objective-C"]
# Only calling out '.h' filenames as these are the ones causing issues
all_fixtures("Objective-C", "*.h").each do |fixture|
results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}"), languages)
results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}"))
assert_equal Language["Objective-C"], results.first
end
end
# Candidate languages = ["C++", "Objective-C"]
def test_cpp_by_heuristics
languages = ["C++", "Objective-C"]
results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp"), languages)
results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp"))
assert_equal Language["C++"], results.first
end
@@ -41,57 +41,57 @@ class TestHeuristcs < Test::Unit::TestCase
assert_equal Language["Objective-C"], match
end
# Candidate languages = ["Perl", "Prolog"]
def test_pl_prolog_by_heuristics
languages = ["Perl", "Prolog"]
results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl"), languages)
results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl"))
assert_equal Language["Prolog"], results.first
end
# Candidate languages = ["Perl", "Prolog"]
def test_pl_perl_by_heuristics
languages = ["Perl", "Prolog"]
results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t"), languages)
results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t"))
assert_equal Language["Perl"], results.first
end
# Candidate languages = ["ECL", "Prolog"]
def test_ecl_prolog_by_heuristics
languages = ["ECL", "Prolog"]
results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl"), languages)
results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl"))
assert_equal Language["Prolog"], results.first
end
# Candidate languages = ["ECL", "Prolog"]
def test_ecl_ecl_by_heuristics
languages = ["ECL", "Prolog"]
results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl"), languages)
results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl"))
assert_equal Language["ECL"], results.first
end
# Candidate languages = ["IDL", "Prolog"]
def test_pro_prolog_by_heuristics
languages = ["IDL", "Prolog"]
results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"), languages)
results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"))
assert_equal Language["Prolog"], results.first
end
# Candidate languages = ["IDL", "Prolog"]
def test_pro_idl_by_heuristics
languages = ["IDL", "Prolog"]
results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"), languages)
results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"))
assert_equal Language["IDL"], results.first
end
# Candidate languages = ["AGS Script", "AsciiDoc"]
def test_asc_asciidoc_by_heuristics
languages = ["AGS Script", "AsciiDoc"]
results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc"), languages)
results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc"))
assert_equal Language["AsciiDoc"], results.first
end
# Candidate languages = ["TypeScript", "XML"]
def test_ts_typescript_by_heuristics
languages = ["TypeScript", "XML"]
results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"), languages)
results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"))
assert_equal Language["TypeScript"], results.first
end
# Candidate languages = ["TypeScript", "XML"]
def test_ts_xml_by_heuristics
languages = ["TypeScript", "XML"]
results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml"), languages)
results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml"))
assert_equal Language["XML"], results.first
end
@@ -99,27 +99,27 @@ class TestHeuristcs < Test::Unit::TestCase
languages = ["Common Lisp", "OpenCL"]
languages.each do |language|
all_fixtures(language).each do |fixture|
results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}"), languages)
results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}"))
assert_equal Language[language], results.first
end
end
end
# Candidate languages = ["Hack", "PHP"]
def test_hack_by_heuristics
languages = ["Hack", "PHP"]
results = Heuristics.disambiguate_hack(fixture("Hack/funs.php"), languages)
results = Heuristics.disambiguate_hack(fixture("Hack/funs.php"))
assert_equal Language["Hack"], results.first
end
# Candidate languages = ["Scala", "SuperCollider"]
def test_sc_supercollider_by_heuristics
languages = ["Scala", "SuperCollider"]
results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc"), languages)
results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc"))
assert_equal Language["SuperCollider"], results.first
end
# Candidate languages = ["Scala", "SuperCollider"]
def test_sc_scala_by_heuristics
languages = ["Scala", "SuperCollider"]
results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"), languages)
results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"))
assert_equal Language["Scala"], results.first
end
end