mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			75 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			75 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| require 'linguist/classifier'
 | |
| require 'linguist/language'
 | |
| 
 | |
| module Linguist
 | |
|   # Model for accessing classifier training data.
 | |
|   class Sample
 | |
|     # Samples live in test/ for now, we'll eventually move them out
 | |
|     PATH = File.expand_path("../../../test/fixtures", __FILE__)
 | |
| 
 | |
|     # Public: Iterate over each Sample.
 | |
|     #
 | |
|     # &block - Yields Sample to block
 | |
|     #
 | |
|     # Returns nothing.
 | |
|     def self.each(&block)
 | |
|       Dir.entries(PATH).each do |category|
 | |
|         next if category == '.' || category == '..'
 | |
| 
 | |
|         # Skip text and binary for now
 | |
|         # Possibly reconsider this later
 | |
|         next if category == 'text' || category == 'binary'
 | |
| 
 | |
|         # Map directory name to a Language alias
 | |
|         language = Linguist::Language.find_by_alias(category)
 | |
|         raise "No language for #{category.inspect}" unless language
 | |
| 
 | |
|         dirname = File.join(PATH, category)
 | |
|         Dir.entries(dirname).each do |filename|
 | |
|           next if filename == '.' || filename == '..'
 | |
|           yield new(File.join(dirname, filename), language)
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       nil
 | |
|     end
 | |
| 
 | |
|     # Public: Build Classifier from all samples.
 | |
|     #
 | |
|     # Returns trained Classifier.
 | |
|     def self.classifier
 | |
|       classifier = Classifier.new
 | |
|       each { |sample| classifier.train(sample.language, sample.data) }
 | |
|       classifier.gc
 | |
|     end
 | |
| 
 | |
|     # Internal: Initialize Sample.
 | |
|     #
 | |
|     # Samples should be initialized by Sample.each.
 | |
|     #
 | |
|     # path     - String full path to file.
 | |
|     # language - Language of sample.
 | |
|     def initialize(path, language)
 | |
|       @path     = path
 | |
|       @language = language
 | |
|     end
 | |
| 
 | |
|     # Public: Get full path to file.
 | |
|     #
 | |
|     # Returns String.
 | |
|     attr_reader :path
 | |
| 
 | |
|     # Public: Get sample language.
 | |
|     #
 | |
|     # Returns Language.
 | |
|     attr_reader :language
 | |
| 
 | |
|     # Public: Read file contents.
 | |
|     #
 | |
|     # Returns String.
 | |
|     def data
 | |
|       File.read(path)
 | |
|     end
 | |
|   end
 | |
| end
 |