Merge branch 'refactor-heuristics' into 1036-local

* refactor-heuristics: (43 commits)
  update docs
  Clean up heuristic logic
  Allow disambiguate to return an Array
  Rename .create to .disambiguate
  docs
  Remove inactive heuristics
  Refactor heuristics
  Not going back
  docs
  Move call method into existing Classifier class
  Try strategies until one language is returned
  Remove unneded empty blob check
  Add F# and GLSL samples.  Add Forth and GLSL extension .fs. Add heuristic to disambiguate between F#, Forth, and GLSL.
  byebug requires ruby 2.0
  Remove test for removed extension
  Fix typo in test
  add rake interpreter
  add python3 interpreter
  Remove old wrong_shebang.rb sample
  Add byebug
  ...

Conflicts:
	lib/linguist/heuristics.rb
	test/test_heuristics.rb
This commit is contained in:
Brandon Keepers
2014-11-28 17:58:00 -06:00
32 changed files with 2199 additions and 294 deletions

22
test/fixtures/Python/run_tests.module vendored Normal file
View File

@@ -0,0 +1,22 @@
#!/usr/bin/env python
import sys, os
# Set the current working directory to the directory where this script is located
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
#### Set the name of the application here and moose directory relative to the application
app_name = 'stork'
MODULE_DIR = os.path.abspath('..')
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
#### See if MOOSE_DIR is already in the environment instead
if os.environ.has_key("MOOSE_DIR"):
MOOSE_DIR = os.environ['MOOSE_DIR']
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
import path_tool
path_tool.activate_module('TestHarness')
from TestHarness import TestHarness
# Run the tests!
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)

1505
test/fixtures/Shell/mintleaf.module vendored Normal file

File diff suppressed because it is too large Load Diff

4
test/helper.rb Normal file
View File

@@ -0,0 +1,4 @@
require "bundler/setup"
require "test/unit"
require "mocha/setup"
require "linguist"

View File

@@ -1,9 +1,4 @@
require 'linguist/file_blob'
require 'linguist/samples'
require 'test/unit'
require 'mocha/setup'
require 'mime/types'
require_relative "./helper"
class TestBlob < Test::Unit::TestCase
include Linguist
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
assert blob.language, "No language for #{sample[:path]}"
assert_equal sample[:language], blob.language.name, blob.name
end
# Test language detection for files which shouldn't be used as samples
root = File.expand_path('../fixtures', __FILE__)
Dir.entries(root).each do |language|
next unless File.file?(language)
# Each directory contains test files of a language
dirname = File.join(root, language)
Dir.entries(dirname).each do |filename|
next unless File.file?(filename)
# By default blob search the file in the samples;
# thus, we need to give it the absolute path
filepath = File.join(dirname, filename)
blob = blob(filepath)
assert blob.language, "No language for #{filepath}"
assert_equal language, blob.language.name, blob.name
end
end
end
def test_minified_files_not_safe_to_highlight

View File

@@ -1,9 +1,4 @@
require 'linguist/classifier'
require 'linguist/language'
require 'linguist/samples'
require 'linguist/tokenizer'
require 'test/unit'
require_relative "./helper"
class TestClassifier < Test::Unit::TestCase
include Linguist

10
test/test_file_blob.rb Normal file
View File

@@ -0,0 +1,10 @@
require 'linguist/file_blob'
require 'test/unit'
class TestFileBlob < Test::Unit::TestCase
def test_extensions
assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
assert_equal [".xml"], Linguist::FileBlob.new("build.xml").extensions
assert_equal [".html.erb", ".erb"], Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
end
end

View File

@@ -1,9 +1,4 @@
require 'linguist/heuristics'
require 'linguist/language'
require 'linguist/samples'
require 'linguist/file_blob'
require 'test/unit'
require_relative "./helper"
class TestHeuristcs < Test::Unit::TestCase
include Linguist
@@ -16,6 +11,11 @@ class TestHeuristcs < Test::Unit::TestCase
File.read(File.join(samples_path, name))
end
def file_blob(name)
path = File.exist?(name) ? name : File.join(samples_path, name)
FileBlob.new(path)
end
def all_fixtures(language_name, file="*")
Dir.glob("#{samples_path}/#{language_name}/#{file}")
end
@@ -23,24 +23,17 @@ class TestHeuristcs < Test::Unit::TestCase
# Candidate languages = ["C++", "Objective-C"]
def test_obj_c_by_heuristics
# Only calling out '.h' filenames as these are the ones causing issues
all_fixtures("Objective-C", "*.h").each do |fixture|
results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}"))
assert_equal Language["Objective-C"], results.first, "Failed for #{File.basename(fixture)}"
end
end
# Candidate languages = ["C++", "Objective-C"]
def test_cpp_by_heuristics
results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp"))
assert_equal Language["C++"], results.first
results = Heuristics.disambiguate_c(fixture("C++/ThreadedQueue.h"))
assert_equal Language["C++"], results.first
assert_heuristics({
"Objective-C" => all_fixtures("Objective-C", "*.h"),
"C++" => ["C++/render_adapter.cpp", "C++/ThreadedQueue.h"],
"C" => nil
})
end
def test_c_by_heuristics
languages = ["C++", "Objective-C", "C"]
results = Heuristics.disambiguate_c(fixture("C/ArrowLeft.h"))
assert_equal nil, results.first
languages = [Language["C++"], Language["Objective-C"], Language["C"]]
results = Heuristics.call(file_blob("C/ArrowLeft.h"), languages)
assert_equal [], results
end
def test_detect_still_works_if_nothing_matches
@@ -50,94 +43,89 @@ class TestHeuristcs < Test::Unit::TestCase
end
# Candidate languages = ["Perl", "Prolog"]
def test_pl_prolog_by_heuristics
results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl"))
assert_equal Language["Prolog"], results.first
end
# Candidate languages = ["Perl", "Prolog"]
def test_pl_perl_by_heuristics
results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t"))
assert_equal Language["Perl"], results.first
def test_pl_prolog_perl_by_heuristics
assert_heuristics({
"Prolog" => "Prolog/turing.pl",
"Perl" => "Perl/perl-test.t",
})
end
# Candidate languages = ["ECL", "Prolog"]
def test_ecl_prolog_by_heuristics
results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl"))
assert_equal Language["Prolog"], results.first
results = Heuristics.call(file_blob("Prolog/or-constraint.ecl"), [Language["ECL"], Language["Prolog"]])
assert_equal [Language["Prolog"]], results
end
# Candidate languages = ["ECL", "Prolog"]
def test_ecl_ecl_by_heuristics
results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl"))
assert_equal Language["ECL"], results.first
def test_ecl_prolog_by_heuristics
assert_heuristics({
"ECL" => "ECL/sample.ecl",
"Prolog" => "Prolog/or-constraint.ecl"
})
end
# Candidate languages = ["IDL", "Prolog"]
def test_pro_prolog_by_heuristics
results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"))
assert_equal Language["Prolog"], results.first
end
# Candidate languages = ["IDL", "Prolog"]
def test_pro_idl_by_heuristics
results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"))
assert_equal Language["IDL"], results.first
def test_pro_prolog_idl_by_heuristics
assert_heuristics({
"Prolog" => "Prolog/logic-problem.pro",
"IDL" => "IDL/mg_acosh.pro"
})
end
# Candidate languages = ["AGS Script", "AsciiDoc"]
def test_asc_asciidoc_by_heuristics
results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc"))
assert_equal Language["AsciiDoc"], results.first
end
# Candidate languages = ["TypeScript", "XML"]
def test_ts_typescript_by_heuristics
results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"))
assert_equal Language["TypeScript"], results.first
end
# Candidate languages = ["TypeScript", "XML"]
def test_ts_xml_by_heuristics
results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml"))
assert_equal Language["XML"], results.first
assert_heuristics({
"AsciiDoc" => "AsciiDoc/list.asc",
"AGS Script" => nil
})
end
def test_cl_by_heuristics
languages = ["Common Lisp", "OpenCL"]
languages.each do |language|
all_fixtures(language).each do |fixture|
results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}"))
assert_equal Language[language], results.first
end
end
assert_heuristics({
"Common Lisp" => all_fixtures("Common Lisp"),
"OpenCL" => all_fixtures("OpenCL")
})
end
def test_f_by_heuristics
languages = ["FORTRAN", "Forth"]
languages.each do |language|
all_fixtures(language).each do |fixture|
results = Heuristics.disambiguate_f(fixture("#{language}/#{File.basename(fixture)}"))
assert_equal Language[language], results.first
end
end
assert_heuristics({
"FORTRAN" => all_fixtures("FORTRAN"),
"Forth" => all_fixtures("Forth")
})
end
# Candidate languages = ["Hack", "PHP"]
def test_hack_by_heuristics
results = Heuristics.disambiguate_hack(fixture("Hack/funs.php"))
assert_equal Language["Hack"], results.first
assert_heuristics({
"Hack" => "Hack/funs.php",
"PHP" => "PHP/Model.php"
})
end
# Candidate languages = ["Scala", "SuperCollider"]
def test_sc_supercollider_by_heuristics
results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc"))
assert_equal Language["SuperCollider"], results.first
def test_sc_supercollider_scala_by_heuristics
assert_heuristics({
"SuperCollider" => "SuperCollider/WarpPreset.sc",
"Scala" => "Scala/node11.sc"
})
end
# Candidate languages = ["Scala", "SuperCollider"]
def test_sc_scala_by_heuristics
results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"))
assert_equal Language["Scala"], results.first
def test_fs_by_heuristics
assert_heuristics({
"F#" => all_fixtures("F#"),
"Forth" => all_fixtures("Forth"),
"GLSL" => all_fixtures("GLSL")
})
end
def assert_heuristics(hash)
candidates = hash.keys.map { |l| Language[l] }
hash.each do |language, blobs|
Array(blobs).each do |blob|
result = Heuristics.call(file_blob(blob), candidates)
assert_equal [Language[language]], result
end
end
end
end

View File

@@ -1,6 +1,4 @@
require 'linguist/language'
require 'test/unit'
require 'yaml'
require_relative "./helper"
class TestLanguage < Test::Unit::TestCase
include Linguist

View File

@@ -1,6 +1,4 @@
require 'linguist/md5'
require 'test/unit'
require_relative "./helper"
class TestMD5 < Test::Unit::TestCase
include Linguist

View File

@@ -1,5 +1,4 @@
require 'test/unit'
require 'yaml'
require_relative "./helper"
class TestPedantic < Test::Unit::TestCase
filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)

View File

@@ -1,6 +1,4 @@
require 'linguist/repository'
require 'linguist/lazy_blob'
require 'test/unit'
require_relative "./helper"
class TestRepository < Test::Unit::TestCase
def rugged_repository

View File

@@ -1,8 +1,5 @@
require 'linguist/samples'
require 'linguist/language'
require 'tempfile'
require 'yajl'
require 'test/unit'
require_relative "./helper"
require "tempfile"
class TestSamples < Test::Unit::TestCase
include Linguist
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
assert !data["interpreters"].empty?
end
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
def test_parity
extensions = Samples.cache['extnames']
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
languages = YAML.load_file(languages_yml)
languages.each do |name, options|
# Check that there aren't samples with extensions or interpreters that
# aren't explicitly defined in languages.yml
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
YAML.load_file(languages_yml).each do |name, options|
define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
options['extensions'] ||= []
if extnames = extensions[name]
if extnames = Samples.cache['extnames'][name]
extnames.each do |extname|
next if extname == '.script!'
assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
end
end
options['interpreters'] ||= []
if interpreters = Samples.cache['interpreters'][name]
interpreters.each do |interpreter|
# next if extname == '.script!'
assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
end
end
end
end
@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
end
end
end
def test_shebang
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
end
end

View File

@@ -1,6 +1,4 @@
require 'linguist/tokenizer'
require 'test/unit'
require_relative "./helper"
class TestTokenizer < Test::Unit::TestCase
include Linguist