better heuristic distinction of .d files (#3145)

* fix benchmark

- require json for Hash.to_json

* better heuristic distinction of .d files

- properly recongnize dtrace probes
- recongnize \ in Makefile paths
- recongnize single line `file.ext : dep.ext` make targets
- recognize D module, import, function, and unittest declarations
- add more representative D samples

D changed from 31.2% to 28.1%
DTrace changed from 33.5% to 32.5%
Makefile changed from 35.3% to 39.4%

See
https://gist.github.com/MartinNowak/fda24fdef64f2dbb05c5a5ceabf22bd3
for the scraper used to get a test corpus.
This commit is contained in:
Martin Nowak
2017-03-30 19:25:53 +02:00
committed by Colin Seymour
parent b7e27a9f58
commit fa6ae1116f
10 changed files with 663 additions and 3 deletions

View File

@@ -125,11 +125,18 @@ module Linguist
end
disambiguate ".d" do |data|
if /^module /.match(data)
# see http://dlang.org/spec/grammar
# ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
if /^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}/.match(data)
Language["D"]
elsif /^((dtrace:::)?BEGIN|provider |#pragma (D (option|attributes)|ident)\s)/.match(data)
# see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
elsif /^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)/.match(data)
Language["DTrace"]
elsif /(\/.*:( .* \\)$| : \\$|^ : |: \\$)/.match(data)
# path/target : dependency \
# target : \
# : dependency
# path/file.ext1 : some/path/../file.ext2
elsif /([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)/.match(data)
Language["Makefile"]
end
end