Rename samples subdirectories

This commit is contained in:
Joshua Peek
2012-07-23 15:52:49 -05:00
parent 314f0e4852
commit 7b6caa0f6c
273 changed files with 2952 additions and 2955 deletions

View File

@@ -441,8 +441,8 @@ module Linguist
end
end
extensions = Samples::DATA['extnames'] rescue {} # TODO: BAH!
filenames = Samples::DATA['filenames'] rescue {} # TODO: BAH!
extensions = Samples::DATA['extnames']
filenames = Samples::DATA['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|

View File

@@ -1,6 +1,7 @@
require 'set'
require 'yaml'
require 'linguist/md5'
require 'linguist/classifier'
module Linguist
# Model for accessing classifier training data.
@@ -27,7 +28,7 @@ module Linguist
# Skip text and binary for now
# Possibly reconsider this later
next if category == 'text' || category == 'binary'
next if category == 'Text' || category == 'Binary'
dirname = File.join(ROOT, category)
Dir.entries(dirname).each do |filename|
@@ -60,32 +61,29 @@ module Linguist
#
# Returns trained Classifier.
def self.data
require 'linguist/classifier'
require 'linguist/language'
db = {}
db['extnames'] = {}
db['filenames'] = {}
each do |sample|
language = Language.find_by_alias(sample[:language])
language_name = sample[:language]
# TODO: For now skip empty extnames
if sample[:extname] && sample[:extname] != ""
db['extnames'][language.name] ||= []
if !db['extnames'][language.name].include?(sample[:extname])
db['extnames'][language.name] << sample[:extname]
db['extnames'][language_name] ||= []
if !db['extnames'][language_name].include?(sample[:extname])
db['extnames'][language_name] << sample[:extname]
end
end
# TODO: For now skip empty extnames
if fn = sample[:filename]
db['filenames'][language.name] ||= []
db['filenames'][language.name] << fn
db['filenames'][language_name] ||= []
db['filenames'][language_name] << fn
end
data = File.read(sample[:path])
Classifier.train!(db, language.name, data)
Classifier.train!(db, language_name, data)
end
db['md5'] = Linguist::MD5.hexdigest(db)

File diff suppressed because it is too large Load Diff

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

Some files were not shown because too many files have changed in this diff Show More