mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
Rename samples subdirectories
This commit is contained in:
@@ -441,8 +441,8 @@ module Linguist
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
extensions = Samples::DATA['extnames'] rescue {} # TODO: BAH!
|
extensions = Samples::DATA['extnames']
|
||||||
filenames = Samples::DATA['filenames'] rescue {} # TODO: BAH!
|
filenames = Samples::DATA['filenames']
|
||||||
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
||||||
|
|
||||||
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
|
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
require 'set'
|
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
|
||||||
require 'linguist/md5'
|
require 'linguist/md5'
|
||||||
|
require 'linguist/classifier'
|
||||||
|
|
||||||
module Linguist
|
module Linguist
|
||||||
# Model for accessing classifier training data.
|
# Model for accessing classifier training data.
|
||||||
@@ -27,7 +28,7 @@ module Linguist
|
|||||||
|
|
||||||
# Skip text and binary for now
|
# Skip text and binary for now
|
||||||
# Possibly reconsider this later
|
# Possibly reconsider this later
|
||||||
next if category == 'text' || category == 'binary'
|
next if category == 'Text' || category == 'Binary'
|
||||||
|
|
||||||
dirname = File.join(ROOT, category)
|
dirname = File.join(ROOT, category)
|
||||||
Dir.entries(dirname).each do |filename|
|
Dir.entries(dirname).each do |filename|
|
||||||
@@ -60,32 +61,29 @@ module Linguist
|
|||||||
#
|
#
|
||||||
# Returns trained Classifier.
|
# Returns trained Classifier.
|
||||||
def self.data
|
def self.data
|
||||||
require 'linguist/classifier'
|
|
||||||
require 'linguist/language'
|
|
||||||
|
|
||||||
db = {}
|
db = {}
|
||||||
db['extnames'] = {}
|
db['extnames'] = {}
|
||||||
db['filenames'] = {}
|
db['filenames'] = {}
|
||||||
|
|
||||||
each do |sample|
|
each do |sample|
|
||||||
language = Language.find_by_alias(sample[:language])
|
language_name = sample[:language]
|
||||||
|
|
||||||
# TODO: For now skip empty extnames
|
# TODO: For now skip empty extnames
|
||||||
if sample[:extname] && sample[:extname] != ""
|
if sample[:extname] && sample[:extname] != ""
|
||||||
db['extnames'][language.name] ||= []
|
db['extnames'][language_name] ||= []
|
||||||
if !db['extnames'][language.name].include?(sample[:extname])
|
if !db['extnames'][language_name].include?(sample[:extname])
|
||||||
db['extnames'][language.name] << sample[:extname]
|
db['extnames'][language_name] << sample[:extname]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: For now skip empty extnames
|
# TODO: For now skip empty extnames
|
||||||
if fn = sample[:filename]
|
if fn = sample[:filename]
|
||||||
db['filenames'][language.name] ||= []
|
db['filenames'][language_name] ||= []
|
||||||
db['filenames'][language.name] << fn
|
db['filenames'][language_name] << fn
|
||||||
end
|
end
|
||||||
|
|
||||||
data = File.read(sample[:path])
|
data = File.read(sample[:path])
|
||||||
Classifier.train!(db, language.name, data)
|
Classifier.train!(db, language_name, data)
|
||||||
end
|
end
|
||||||
|
|
||||||
db['md5'] = Linguist::MD5.hexdigest(db)
|
db['md5'] = Linguist::MD5.hexdigest(db)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user