mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-28 17:20:22 +00:00
Rename samples subdirectories
This commit is contained in:
@@ -441,8 +441,8 @@ module Linguist
|
||||
end
|
||||
end
|
||||
|
||||
extensions = Samples::DATA['extnames'] rescue {} # TODO: BAH!
|
||||
filenames = Samples::DATA['filenames'] rescue {} # TODO: BAH!
|
||||
extensions = Samples::DATA['extnames']
|
||||
filenames = Samples::DATA['filenames']
|
||||
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
||||
|
||||
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
require 'set'
|
||||
require 'yaml'
|
||||
|
||||
require 'linguist/md5'
|
||||
require 'linguist/classifier'
|
||||
|
||||
module Linguist
|
||||
# Model for accessing classifier training data.
|
||||
@@ -27,7 +28,7 @@ module Linguist
|
||||
|
||||
# Skip text and binary for now
|
||||
# Possibly reconsider this later
|
||||
next if category == 'text' || category == 'binary'
|
||||
next if category == 'Text' || category == 'Binary'
|
||||
|
||||
dirname = File.join(ROOT, category)
|
||||
Dir.entries(dirname).each do |filename|
|
||||
@@ -60,32 +61,29 @@ module Linguist
|
||||
#
|
||||
# Returns trained Classifier.
|
||||
def self.data
|
||||
require 'linguist/classifier'
|
||||
require 'linguist/language'
|
||||
|
||||
db = {}
|
||||
db['extnames'] = {}
|
||||
db['filenames'] = {}
|
||||
|
||||
each do |sample|
|
||||
language = Language.find_by_alias(sample[:language])
|
||||
language_name = sample[:language]
|
||||
|
||||
# TODO: For now skip empty extnames
|
||||
if sample[:extname] && sample[:extname] != ""
|
||||
db['extnames'][language.name] ||= []
|
||||
if !db['extnames'][language.name].include?(sample[:extname])
|
||||
db['extnames'][language.name] << sample[:extname]
|
||||
db['extnames'][language_name] ||= []
|
||||
if !db['extnames'][language_name].include?(sample[:extname])
|
||||
db['extnames'][language_name] << sample[:extname]
|
||||
end
|
||||
end
|
||||
|
||||
# TODO: For now skip empty extnames
|
||||
if fn = sample[:filename]
|
||||
db['filenames'][language.name] ||= []
|
||||
db['filenames'][language.name] << fn
|
||||
db['filenames'][language_name] ||= []
|
||||
db['filenames'][language_name] << fn
|
||||
end
|
||||
|
||||
data = File.read(sample[:path])
|
||||
Classifier.train!(db, language.name, data)
|
||||
Classifier.train!(db, language_name, data)
|
||||
end
|
||||
|
||||
db['md5'] = Linguist::MD5.hexdigest(db)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user