mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Merge branch 'master' of https://github.com/github/linguist
Conflicts: grammars.yml
This commit is contained in:
@@ -57,14 +57,20 @@ module Linguist
|
||||
#
|
||||
# Returns a String.
|
||||
def extension
|
||||
# File.extname returns nil if the filename is an extension.
|
||||
extension = File.extname(name)
|
||||
basename = File.basename(name)
|
||||
# Checks if the filename is an extension.
|
||||
if extension.empty? && basename[0] == "."
|
||||
basename
|
||||
else
|
||||
extension
|
||||
extensions.last || ""
|
||||
end
|
||||
|
||||
# Public: Return an array of the file extensions
|
||||
#
|
||||
# >> Linguist::FileBlob.new("app/views/things/index.html.erb").extensions
|
||||
# => [".html.erb", ".erb"]
|
||||
#
|
||||
# Returns an Array
|
||||
def extensions
|
||||
basename, *segments = File.basename(name).split(".")
|
||||
|
||||
segments.map.with_index do |segment, index|
|
||||
"." + segments[index..-1].join(".")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -106,40 +106,52 @@ module Linguist
|
||||
# A bit of an elegant hack. If the file is executable but extensionless,
|
||||
# append a "magic" extension so it can be classified with other
|
||||
# languages that have shebang scripts.
|
||||
extension = FileBlob.new(name).extension
|
||||
if extension.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
||||
extensions = FileBlob.new(name).extensions
|
||||
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
||||
name += ".script!"
|
||||
end
|
||||
|
||||
# First try to find languages that match based on filename.
|
||||
# Find languages that match based on filename.
|
||||
possible_languages = find_by_filename(name)
|
||||
|
||||
# If there is more than one possible language with that extension (or no
|
||||
# extension at all, in the case of extensionless scripts), we need to continue
|
||||
# our detection work
|
||||
if possible_languages.length > 1
|
||||
data = blob.data
|
||||
possible_language_names = possible_languages.map(&:name)
|
||||
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||
if possible_languages.length == 1
|
||||
# Simplest and most common case, we can just return the one match based
|
||||
# on extension
|
||||
possible_languages.first
|
||||
|
||||
if heuristic_languages.size > 1
|
||||
possible_language_names = heuristic_languages.map(&:name)
|
||||
end
|
||||
# If there is more than one possible language with that extension (or no
|
||||
# extension at all, in the case of extensionless scripts), we need to
|
||||
# continue our detection work
|
||||
else
|
||||
# Matches possible_languages.length == 0 || possible_languages.length > 0
|
||||
data = blob.data
|
||||
|
||||
# Check if there's a shebang line and use that as authoritative
|
||||
if (result = find_by_shebang(data)) && !result.empty?
|
||||
result.first
|
||||
# No shebang. Still more work to do. Try to find it with our heuristics.
|
||||
elsif heuristic_languages.size == 1
|
||||
heuristic_languages.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
Language[classified[0]]
|
||||
return result.first
|
||||
|
||||
# More than one language with that extension. We need to make a choice.
|
||||
elsif possible_languages.length > 1
|
||||
|
||||
# First try heuristics
|
||||
|
||||
possible_language_names = possible_languages.map(&:name)
|
||||
heuristic_languages = Heuristics.find_by_heuristics(data, possible_language_names)
|
||||
|
||||
# If there are multiple possible languages returned from heuristics
|
||||
# then reduce language candidates for Bayesian classifier here.
|
||||
if heuristic_languages.size > 1
|
||||
possible_language_names = heuristic_languages.map(&:name)
|
||||
end
|
||||
|
||||
if heuristic_languages.size == 1
|
||||
return heuristic_languages.first
|
||||
# Lastly, fall back to the probabilistic classifier.
|
||||
elsif classified = Classifier.classify(Samples.cache, data, possible_language_names).first
|
||||
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
||||
return Language[classified[0]]
|
||||
end
|
||||
end
|
||||
else
|
||||
# Simplest and most common case, we can just return the one match based on extension
|
||||
possible_languages.first
|
||||
end
|
||||
end
|
||||
|
||||
@@ -190,8 +202,13 @@ module Linguist
|
||||
# Returns all matching Languages or [] if none were found.
|
||||
def self.find_by_filename(filename)
|
||||
basename = File.basename(filename)
|
||||
extname = FileBlob.new(filename).extension
|
||||
(@filename_index[basename] + find_by_extension(extname)).compact.uniq
|
||||
|
||||
# find the first extension with language definitions
|
||||
extname = FileBlob.new(filename).extensions.detect do |e|
|
||||
!@extension_index[e].empty?
|
||||
end
|
||||
|
||||
(@filename_index[basename] + @extension_index[extname]).compact.uniq
|
||||
end
|
||||
|
||||
# Public: Look up Languages by file extension.
|
||||
|
||||
@@ -470,6 +470,7 @@ CoffeeScript:
|
||||
extensions:
|
||||
- .coffee
|
||||
- ._coffee
|
||||
- .cjsx
|
||||
- .cson
|
||||
- .iced
|
||||
filenames:
|
||||
@@ -566,6 +567,8 @@ Crystal:
|
||||
- .cr
|
||||
ace_mode: ruby
|
||||
tm_scope: source.ruby
|
||||
interpreters:
|
||||
- crystal
|
||||
|
||||
Cucumber:
|
||||
extensions:
|
||||
@@ -743,6 +746,8 @@ Erlang:
|
||||
- .es
|
||||
- .escript
|
||||
- .hrl
|
||||
interpreters:
|
||||
- escript
|
||||
|
||||
F#:
|
||||
type: programming
|
||||
@@ -938,6 +943,8 @@ Gnuplot:
|
||||
- .gnuplot
|
||||
- .plot
|
||||
- .plt
|
||||
interpreters:
|
||||
- gnuplot
|
||||
|
||||
Go:
|
||||
type: programming
|
||||
@@ -1203,6 +1210,8 @@ Ioke:
|
||||
color: "#078193"
|
||||
extensions:
|
||||
- .ik
|
||||
interpreters:
|
||||
- ioke
|
||||
|
||||
Isabelle:
|
||||
type: programming
|
||||
@@ -1710,6 +1719,8 @@ Nu:
|
||||
filenames:
|
||||
- Nukefile
|
||||
tm_scope: source.scheme
|
||||
interpreters:
|
||||
- nush
|
||||
|
||||
NumPy:
|
||||
group: Python
|
||||
@@ -1896,6 +1907,8 @@ Parrot Assembly:
|
||||
- pasm
|
||||
extensions:
|
||||
- .pasm
|
||||
interpreters:
|
||||
- parrot
|
||||
tm_scope: none
|
||||
|
||||
Parrot Internal Representation:
|
||||
@@ -1906,6 +1919,8 @@ Parrot Internal Representation:
|
||||
- pir
|
||||
extensions:
|
||||
- .pir
|
||||
interpreters:
|
||||
- parrot
|
||||
|
||||
Pascal:
|
||||
type: programming
|
||||
@@ -1948,6 +1963,8 @@ Perl6:
|
||||
- .p6m
|
||||
- .pl6
|
||||
- .pm6
|
||||
interpreters:
|
||||
- perl6
|
||||
tm_scope: none
|
||||
|
||||
PigLatin:
|
||||
@@ -2012,6 +2029,8 @@ Prolog:
|
||||
- .ecl
|
||||
- .pro
|
||||
- .prolog
|
||||
interpreters:
|
||||
- swipl
|
||||
|
||||
Propeller Spin:
|
||||
type: programming
|
||||
@@ -2075,6 +2094,8 @@ Python:
|
||||
- wscript
|
||||
interpreters:
|
||||
- python
|
||||
- python2
|
||||
- python3
|
||||
|
||||
Python traceback:
|
||||
type: data
|
||||
@@ -2095,6 +2116,8 @@ QMake:
|
||||
extensions:
|
||||
- .pro
|
||||
- .pri
|
||||
interpreters:
|
||||
- qmake
|
||||
|
||||
R:
|
||||
type: programming
|
||||
@@ -2249,6 +2272,8 @@ Ruby:
|
||||
- .watchr
|
||||
interpreters:
|
||||
- ruby
|
||||
- macruby
|
||||
- rake
|
||||
filenames:
|
||||
- .pryrc
|
||||
- Appraisals
|
||||
@@ -2335,6 +2360,8 @@ Scala:
|
||||
- .scala
|
||||
- .sbt
|
||||
- .sc
|
||||
interpreters:
|
||||
- scala
|
||||
|
||||
Scaml:
|
||||
group: HTML
|
||||
|
||||
@@ -52,14 +52,16 @@ module Linguist
|
||||
})
|
||||
end
|
||||
else
|
||||
path = File.join(dirname, filename)
|
||||
|
||||
if File.extname(filename) == ""
|
||||
raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
|
||||
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
||||
end
|
||||
|
||||
yield({
|
||||
:path => File.join(dirname, filename),
|
||||
:path => path,
|
||||
:language => category,
|
||||
:interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
|
||||
:interpreter => Linguist.interpreter_from_shebang(File.read(path)),
|
||||
:extname => File.extname(filename)
|
||||
})
|
||||
end
|
||||
@@ -131,18 +133,19 @@ module Linguist
|
||||
|
||||
script = script == 'env' ? tokens[1] : script
|
||||
|
||||
# "python2.6" -> "python"
|
||||
if script =~ /((?:\d+\.?)+)/
|
||||
script.sub! $1, ''
|
||||
end
|
||||
# If script has an invalid shebang, we might get here
|
||||
return unless script
|
||||
|
||||
# "python2.6" -> "python2"
|
||||
script.sub! $1, '' if script =~ /(\.\d+)$/
|
||||
|
||||
# Check for multiline shebang hacks that call `exec`
|
||||
if script == 'sh' &&
|
||||
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
||||
script = $1
|
||||
end
|
||||
|
||||
script
|
||||
|
||||
File.basename(script)
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user