mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge branch 'refactor-heuristics' into 1036-local
* refactor-heuristics: (43 commits) update docs Clean up heuristic logic Allow disambiguate to return an Array Rename .create to .disambiguate docs Remove inactive heuristics Refactor heuristics Not going back docs Move call method into existing Classifier class Try strategies until one language is returned Remove unneded empty blob check Add F# and GLSL samples. Add Forth and GLSL extension .fs. Add heuristic to disambiguate between F#, Forth, and GLSL. byebug requires ruby 2.0 Remove test for removed extension Fix typo in test add rake interpreter add python3 interpreter Remove old wrong_shebang.rb sample Add byebug ... Conflicts: lib/linguist/heuristics.rb test/test_heuristics.rb
This commit is contained in:
22
test/fixtures/Python/run_tests.module
vendored
Normal file
22
test/fixtures/Python/run_tests.module
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
import sys, os
|
||||
|
||||
# Set the current working directory to the directory where this script is located
|
||||
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
|
||||
|
||||
#### Set the name of the application here and moose directory relative to the application
|
||||
app_name = 'stork'
|
||||
|
||||
MODULE_DIR = os.path.abspath('..')
|
||||
MOOSE_DIR = os.path.abspath(os.path.join(MODULE_DIR, '..'))
|
||||
#### See if MOOSE_DIR is already in the environment instead
|
||||
if os.environ.has_key("MOOSE_DIR"):
|
||||
MOOSE_DIR = os.environ['MOOSE_DIR']
|
||||
|
||||
sys.path.append(os.path.join(MOOSE_DIR, 'python'))
|
||||
import path_tool
|
||||
path_tool.activate_module('TestHarness')
|
||||
|
||||
from TestHarness import TestHarness
|
||||
# Run the tests!
|
||||
TestHarness.buildAndRun(sys.argv, app_name, MOOSE_DIR)
|
||||
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
1505
test/fixtures/Shell/mintleaf.module
vendored
Normal file
File diff suppressed because it is too large
Load Diff
4
test/helper.rb
Normal file
4
test/helper.rb
Normal file
@@ -0,0 +1,4 @@
|
||||
require "bundler/setup"
|
||||
require "test/unit"
|
||||
require "mocha/setup"
|
||||
require "linguist"
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/file_blob'
|
||||
require 'linguist/samples'
|
||||
|
||||
require 'test/unit'
|
||||
require 'mocha/setup'
|
||||
require 'mime/types'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestBlob < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -470,6 +465,25 @@ class TestBlob < Test::Unit::TestCase
|
||||
assert blob.language, "No language for #{sample[:path]}"
|
||||
assert_equal sample[:language], blob.language.name, blob.name
|
||||
end
|
||||
|
||||
# Test language detection for files which shouldn't be used as samples
|
||||
root = File.expand_path('../fixtures', __FILE__)
|
||||
Dir.entries(root).each do |language|
|
||||
next unless File.file?(language)
|
||||
|
||||
# Each directory contains test files of a language
|
||||
dirname = File.join(root, language)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
next unless File.file?(filename)
|
||||
|
||||
# By default blob search the file in the samples;
|
||||
# thus, we need to give it the absolute path
|
||||
filepath = File.join(dirname, filename)
|
||||
blob = blob(filepath)
|
||||
assert blob.language, "No language for #{filepath}"
|
||||
assert_equal language, blob.language.name, blob.name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_minified_files_not_safe_to_highlight
|
||||
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/language'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/tokenizer'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestClassifier < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
10
test/test_file_blob.rb
Normal file
10
test/test_file_blob.rb
Normal file
@@ -0,0 +1,10 @@
|
||||
require 'linguist/file_blob'
|
||||
require 'test/unit'
|
||||
|
||||
class TestFileBlob < Test::Unit::TestCase
|
||||
def test_extensions
|
||||
assert_equal [".gitignore"], Linguist::FileBlob.new(".gitignore").extensions
|
||||
assert_equal [".xml"], Linguist::FileBlob.new("build.xml").extensions
|
||||
assert_equal [".html.erb", ".erb"], Linguist::FileBlob.new("dotted.dir/index.html.erb").extensions
|
||||
end
|
||||
end
|
||||
@@ -1,9 +1,4 @@
|
||||
require 'linguist/heuristics'
|
||||
require 'linguist/language'
|
||||
require 'linguist/samples'
|
||||
require 'linguist/file_blob'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestHeuristcs < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -16,6 +11,11 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
File.read(File.join(samples_path, name))
|
||||
end
|
||||
|
||||
def file_blob(name)
|
||||
path = File.exist?(name) ? name : File.join(samples_path, name)
|
||||
FileBlob.new(path)
|
||||
end
|
||||
|
||||
def all_fixtures(language_name, file="*")
|
||||
Dir.glob("#{samples_path}/#{language_name}/#{file}")
|
||||
end
|
||||
@@ -23,24 +23,17 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
# Candidate languages = ["C++", "Objective-C"]
|
||||
def test_obj_c_by_heuristics
|
||||
# Only calling out '.h' filenames as these are the ones causing issues
|
||||
all_fixtures("Objective-C", "*.h").each do |fixture|
|
||||
results = Heuristics.disambiguate_c(fixture("Objective-C/#{File.basename(fixture)}"))
|
||||
assert_equal Language["Objective-C"], results.first, "Failed for #{File.basename(fixture)}"
|
||||
end
|
||||
end
|
||||
|
||||
# Candidate languages = ["C++", "Objective-C"]
|
||||
def test_cpp_by_heuristics
|
||||
results = Heuristics.disambiguate_c(fixture("C++/render_adapter.cpp"))
|
||||
assert_equal Language["C++"], results.first
|
||||
results = Heuristics.disambiguate_c(fixture("C++/ThreadedQueue.h"))
|
||||
assert_equal Language["C++"], results.first
|
||||
assert_heuristics({
|
||||
"Objective-C" => all_fixtures("Objective-C", "*.h"),
|
||||
"C++" => ["C++/render_adapter.cpp", "C++/ThreadedQueue.h"],
|
||||
"C" => nil
|
||||
})
|
||||
end
|
||||
|
||||
def test_c_by_heuristics
|
||||
languages = ["C++", "Objective-C", "C"]
|
||||
results = Heuristics.disambiguate_c(fixture("C/ArrowLeft.h"))
|
||||
assert_equal nil, results.first
|
||||
languages = [Language["C++"], Language["Objective-C"], Language["C"]]
|
||||
results = Heuristics.call(file_blob("C/ArrowLeft.h"), languages)
|
||||
assert_equal [], results
|
||||
end
|
||||
|
||||
def test_detect_still_works_if_nothing_matches
|
||||
@@ -50,94 +43,89 @@ class TestHeuristcs < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
# Candidate languages = ["Perl", "Prolog"]
|
||||
def test_pl_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_pl(fixture("Prolog/turing.pl"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["Perl", "Prolog"]
|
||||
def test_pl_perl_by_heuristics
|
||||
results = Heuristics.disambiguate_pl(fixture("Perl/perl-test.t"))
|
||||
assert_equal Language["Perl"], results.first
|
||||
def test_pl_prolog_perl_by_heuristics
|
||||
assert_heuristics({
|
||||
"Prolog" => "Prolog/turing.pl",
|
||||
"Perl" => "Perl/perl-test.t",
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["ECL", "Prolog"]
|
||||
def test_ecl_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_ecl(fixture("Prolog/or-constraint.ecl"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
results = Heuristics.call(file_blob("Prolog/or-constraint.ecl"), [Language["ECL"], Language["Prolog"]])
|
||||
assert_equal [Language["Prolog"]], results
|
||||
end
|
||||
|
||||
# Candidate languages = ["ECL", "Prolog"]
|
||||
def test_ecl_ecl_by_heuristics
|
||||
results = Heuristics.disambiguate_ecl(fixture("ECL/sample.ecl"))
|
||||
assert_equal Language["ECL"], results.first
|
||||
def test_ecl_prolog_by_heuristics
|
||||
assert_heuristics({
|
||||
"ECL" => "ECL/sample.ecl",
|
||||
"Prolog" => "Prolog/or-constraint.ecl"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["IDL", "Prolog"]
|
||||
def test_pro_prolog_by_heuristics
|
||||
results = Heuristics.disambiguate_pro(fixture("Prolog/logic-problem.pro"))
|
||||
assert_equal Language["Prolog"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["IDL", "Prolog"]
|
||||
def test_pro_idl_by_heuristics
|
||||
results = Heuristics.disambiguate_pro(fixture("IDL/mg_acosh.pro"))
|
||||
assert_equal Language["IDL"], results.first
|
||||
def test_pro_prolog_idl_by_heuristics
|
||||
assert_heuristics({
|
||||
"Prolog" => "Prolog/logic-problem.pro",
|
||||
"IDL" => "IDL/mg_acosh.pro"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["AGS Script", "AsciiDoc"]
|
||||
def test_asc_asciidoc_by_heuristics
|
||||
results = Heuristics.disambiguate_asc(fixture("AsciiDoc/list.asc"))
|
||||
assert_equal Language["AsciiDoc"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["TypeScript", "XML"]
|
||||
def test_ts_typescript_by_heuristics
|
||||
results = Heuristics.disambiguate_ts(fixture("TypeScript/classes.ts"))
|
||||
assert_equal Language["TypeScript"], results.first
|
||||
end
|
||||
|
||||
# Candidate languages = ["TypeScript", "XML"]
|
||||
def test_ts_xml_by_heuristics
|
||||
results = Heuristics.disambiguate_ts(fixture("XML/pt_BR.xml"))
|
||||
assert_equal Language["XML"], results.first
|
||||
assert_heuristics({
|
||||
"AsciiDoc" => "AsciiDoc/list.asc",
|
||||
"AGS Script" => nil
|
||||
})
|
||||
end
|
||||
|
||||
def test_cl_by_heuristics
|
||||
languages = ["Common Lisp", "OpenCL"]
|
||||
languages.each do |language|
|
||||
all_fixtures(language).each do |fixture|
|
||||
results = Heuristics.disambiguate_cl(fixture("#{language}/#{File.basename(fixture)}"))
|
||||
assert_equal Language[language], results.first
|
||||
end
|
||||
end
|
||||
assert_heuristics({
|
||||
"Common Lisp" => all_fixtures("Common Lisp"),
|
||||
"OpenCL" => all_fixtures("OpenCL")
|
||||
})
|
||||
end
|
||||
|
||||
def test_f_by_heuristics
|
||||
languages = ["FORTRAN", "Forth"]
|
||||
languages.each do |language|
|
||||
all_fixtures(language).each do |fixture|
|
||||
results = Heuristics.disambiguate_f(fixture("#{language}/#{File.basename(fixture)}"))
|
||||
assert_equal Language[language], results.first
|
||||
end
|
||||
end
|
||||
assert_heuristics({
|
||||
"FORTRAN" => all_fixtures("FORTRAN"),
|
||||
"Forth" => all_fixtures("Forth")
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Hack", "PHP"]
|
||||
def test_hack_by_heuristics
|
||||
results = Heuristics.disambiguate_hack(fixture("Hack/funs.php"))
|
||||
assert_equal Language["Hack"], results.first
|
||||
assert_heuristics({
|
||||
"Hack" => "Hack/funs.php",
|
||||
"PHP" => "PHP/Model.php"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Scala", "SuperCollider"]
|
||||
def test_sc_supercollider_by_heuristics
|
||||
results = Heuristics.disambiguate_sc(fixture("SuperCollider/WarpPreset.sc"))
|
||||
assert_equal Language["SuperCollider"], results.first
|
||||
def test_sc_supercollider_scala_by_heuristics
|
||||
assert_heuristics({
|
||||
"SuperCollider" => "SuperCollider/WarpPreset.sc",
|
||||
"Scala" => "Scala/node11.sc"
|
||||
})
|
||||
end
|
||||
|
||||
# Candidate languages = ["Scala", "SuperCollider"]
|
||||
def test_sc_scala_by_heuristics
|
||||
results = Heuristics.disambiguate_sc(fixture("Scala/node11.sc"))
|
||||
assert_equal Language["Scala"], results.first
|
||||
def test_fs_by_heuristics
|
||||
assert_heuristics({
|
||||
"F#" => all_fixtures("F#"),
|
||||
"Forth" => all_fixtures("Forth"),
|
||||
"GLSL" => all_fixtures("GLSL")
|
||||
})
|
||||
end
|
||||
|
||||
def assert_heuristics(hash)
|
||||
candidates = hash.keys.map { |l| Language[l] }
|
||||
|
||||
hash.each do |language, blobs|
|
||||
Array(blobs).each do |blob|
|
||||
result = Heuristics.call(file_blob(blob), candidates)
|
||||
assert_equal [Language[language]], result
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/language'
|
||||
require 'test/unit'
|
||||
require 'yaml'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestLanguage < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/md5'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestMD5 < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
require 'test/unit'
|
||||
require 'yaml'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestPedantic < Test::Unit::TestCase
|
||||
filename = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/repository'
|
||||
require 'linguist/lazy_blob'
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestRepository < Test::Unit::TestCase
|
||||
def rugged_repository
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
require 'linguist/samples'
|
||||
require 'linguist/language'
|
||||
require 'tempfile'
|
||||
require 'yajl'
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
require "tempfile"
|
||||
|
||||
class TestSamples < Test::Unit::TestCase
|
||||
include Linguist
|
||||
@@ -34,23 +31,29 @@ class TestSamples < Test::Unit::TestCase
|
||||
assert_equal data['languages_total'], data['languages'].inject(0) { |n, (_, c)| n += c }
|
||||
assert_equal data['tokens_total'], data['language_tokens'].inject(0) { |n, (_, c)| n += c }
|
||||
assert_equal data['tokens_total'], data['tokens'].inject(0) { |n, (_, ts)| n += ts.inject(0) { |m, (_, c)| m += c } }
|
||||
assert !data["interpreters"].empty?
|
||||
end
|
||||
|
||||
# Check that there aren't samples with extensions that aren't explicitly defined in languages.yml
|
||||
def test_parity
|
||||
extensions = Samples.cache['extnames']
|
||||
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
languages = YAML.load_file(languages_yml)
|
||||
|
||||
languages.each do |name, options|
|
||||
# Check that there aren't samples with extensions or interpreters that
|
||||
# aren't explicitly defined in languages.yml
|
||||
languages_yml = File.expand_path("../../lib/linguist/languages.yml", __FILE__)
|
||||
YAML.load_file(languages_yml).each do |name, options|
|
||||
define_method "test_samples_have_parity_with_languages_yml_for_#{name}" do
|
||||
options['extensions'] ||= []
|
||||
|
||||
if extnames = extensions[name]
|
||||
if extnames = Samples.cache['extnames'][name]
|
||||
extnames.each do |extname|
|
||||
next if extname == '.script!'
|
||||
assert options['extensions'].include?(extname), "#{name} has a sample with extension (#{extname}) that isn't explicitly defined in languages.yml"
|
||||
end
|
||||
end
|
||||
|
||||
options['interpreters'] ||= []
|
||||
if interpreters = Samples.cache['interpreters'][name]
|
||||
interpreters.each do |interpreter|
|
||||
# next if extname == '.script!'
|
||||
assert options['interpreters'].include?(interpreter), "#{name} has a sample with an interpreter (#{interpreter}) that isn't explicitly defined in languages.yml"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -79,4 +82,9 @@ class TestSamples < Test::Unit::TestCase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def test_shebang
|
||||
assert_equal "crystal", Linguist.interpreter_from_shebang("#!/usr/bin/env bin/crystal")
|
||||
assert_equal "python2", Linguist.interpreter_from_shebang("#!/usr/bin/python2.4")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
require 'linguist/tokenizer'
|
||||
|
||||
require 'test/unit'
|
||||
require_relative "./helper"
|
||||
|
||||
class TestTokenizer < Test::Unit::TestCase
|
||||
include Linguist
|
||||
|
||||
Reference in New Issue
Block a user