mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Rename samples subdirectories
This commit is contained in:
		| @@ -441,8 +441,8 @@ module Linguist | |||||||
|     end |     end | ||||||
|   end |   end | ||||||
|  |  | ||||||
|   extensions = Samples::DATA['extnames'] rescue {} # TODO: BAH! |   extensions = Samples::DATA['extnames'] | ||||||
|   filenames = Samples::DATA['filenames'] rescue {} # TODO: BAH! |   filenames = Samples::DATA['filenames'] | ||||||
|   popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) |   popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__)) | ||||||
|  |  | ||||||
|   YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| |   YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| require 'set' |  | ||||||
| require 'yaml' | require 'yaml' | ||||||
|  |  | ||||||
| require 'linguist/md5' | require 'linguist/md5' | ||||||
|  | require 'linguist/classifier' | ||||||
|  |  | ||||||
| module Linguist | module Linguist | ||||||
|   # Model for accessing classifier training data. |   # Model for accessing classifier training data. | ||||||
| @@ -27,7 +28,7 @@ module Linguist | |||||||
|  |  | ||||||
|         # Skip text and binary for now |         # Skip text and binary for now | ||||||
|         # Possibly reconsider this later |         # Possibly reconsider this later | ||||||
|         next if category == 'text' || category == 'binary' |         next if category == 'Text' || category == 'Binary' | ||||||
|  |  | ||||||
|         dirname = File.join(ROOT, category) |         dirname = File.join(ROOT, category) | ||||||
|         Dir.entries(dirname).each do |filename| |         Dir.entries(dirname).each do |filename| | ||||||
| @@ -60,32 +61,29 @@ module Linguist | |||||||
|     # |     # | ||||||
|     # Returns trained Classifier. |     # Returns trained Classifier. | ||||||
|     def self.data |     def self.data | ||||||
|       require 'linguist/classifier' |  | ||||||
|       require 'linguist/language' |  | ||||||
|  |  | ||||||
|       db = {} |       db = {} | ||||||
|       db['extnames'] = {} |       db['extnames'] = {} | ||||||
|       db['filenames'] = {} |       db['filenames'] = {} | ||||||
|  |  | ||||||
|       each do |sample| |       each do |sample| | ||||||
|         language = Language.find_by_alias(sample[:language]) |         language_name = sample[:language] | ||||||
|  |  | ||||||
|         # TODO: For now skip empty extnames |         # TODO: For now skip empty extnames | ||||||
|         if sample[:extname] && sample[:extname] != "" |         if sample[:extname] && sample[:extname] != "" | ||||||
|           db['extnames'][language.name] ||= [] |           db['extnames'][language_name] ||= [] | ||||||
|           if !db['extnames'][language.name].include?(sample[:extname]) |           if !db['extnames'][language_name].include?(sample[:extname]) | ||||||
|             db['extnames'][language.name] << sample[:extname] |             db['extnames'][language_name] << sample[:extname] | ||||||
|           end |           end | ||||||
|         end |         end | ||||||
|  |  | ||||||
|         # TODO: For now skip empty extnames |         # TODO: For now skip empty extnames | ||||||
|         if fn = sample[:filename] |         if fn = sample[:filename] | ||||||
|           db['filenames'][language.name] ||= [] |           db['filenames'][language_name] ||= [] | ||||||
|           db['filenames'][language.name] << fn |           db['filenames'][language_name] << fn | ||||||
|         end |         end | ||||||
|  |  | ||||||
|         data = File.read(sample[:path]) |         data = File.read(sample[:path]) | ||||||
|         Classifier.train!(db, language.name, data) |         Classifier.train!(db, language_name, data) | ||||||
|       end |       end | ||||||
|  |  | ||||||
|       db['md5'] = Linguist::MD5.hexdigest(db) |       db['md5'] = Linguist::MD5.hexdigest(db) | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB | 
| Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB | 
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user