mirror of
				https://github.com/KevinMidboe/linguist.git
				synced 2025-10-29 17:50:22 +00:00 
			
		
		
		
	Minor docs/naming
This commit is contained in:
		| @@ -15,8 +15,8 @@ module Linguist | |||||||
|     # |     # | ||||||
|     # Returns nothing. |     # Returns nothing. | ||||||
|     # |     # | ||||||
|     # Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token, |     # Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token or | ||||||
|     # per-language.  See also dump_all_tokens, below. |     # per-language.  See also #dump_all_tokens, below. | ||||||
|     def self.train!(db, language, data) |     def self.train!(db, language, data) | ||||||
|       tokens = Tokenizer.tokenize(data) |       tokens = Tokenizer.tokenize(data) | ||||||
|  |  | ||||||
| @@ -151,10 +151,10 @@ module Linguist | |||||||
|         printf "%#{maxlen}s", "" |         printf "%#{maxlen}s", "" | ||||||
|         puts "    #" + languages.map { |lang| sprintf("%10s", lang) }.join |         puts "    #" + languages.map { |lang| sprintf("%10s", lang) }.join | ||||||
|          |          | ||||||
|         tokmap = Hash.new(0) |         token_map = Hash.new(0) | ||||||
|         tokens.each { |tok| tokmap[tok] += 1 } |         tokens.each { |tok| token_map[tok] += 1 } | ||||||
|          |          | ||||||
|         tokmap.sort.each { |tok, count| |         token_map.sort.each { |tok, count| | ||||||
|           arr = languages.map { |lang| [lang, token_probability(tok, lang)] } |           arr = languages.map { |lang| [lang, token_probability(tok, lang)] } | ||||||
|           min = arr.map { |a,b| b }.min |           min = arr.map { |a,b| b }.min | ||||||
|           minlog = Math.log(min) |           minlog = Math.log(min) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user