From e5ae9c328b97fb70d1d1872b9d9eff44063b1da0 Mon Sep 17 00:00:00 2001 From: Joshua Peek Date: Fri, 8 Jun 2012 13:43:57 -0500 Subject: [PATCH] Use language name as hash key --- lib/linguist/classifier.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/linguist/classifier.rb b/lib/linguist/classifier.rb index 90375ff9..8cd65f89 100644 --- a/lib/linguist/classifier.rb +++ b/lib/linguist/classifier.rb @@ -4,15 +4,16 @@ module Linguist # Language bayesian classifier. class Classifier def initialize + @tokens_total = 0 + @languages_total = 0 @tokens = Hash.new { |h, k| h[k] = Hash.new(0) } @language_tokens = Hash.new(0) @languages = Hash.new(0) - @languages_total = 0 - @tokens_total = 0 end def train(language, data) - tokens = Tokenizer.new(data).tokens + language = language.name + tokens = Tokenizer.new(data).tokens tokens.each do |token| @tokens[language][token] += 1 @@ -31,7 +32,7 @@ module Linguist scores[language] = tokens_probability(tokens, language) * language_probability(language) end - scores.sort { |a, b| b[1] <=> a[1] } + scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] } end def tokens_probability(tokens, language)