From 17d0b1e02f49a52381576b107004de98805e015e Mon Sep 17 00:00:00 2001 From: Ted Nyman Date: Sun, 15 Dec 2013 20:17:30 -0800 Subject: [PATCH] More documentation --- README.md | 2 +- lib/linguist/heuristics.rb | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 91f898e5..1300d1ef 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ For disambiguating between files with common extensions, we first apply some common-sense heuristics to pick out obvious languages. After that, we use a [Bayesian classifier](https://github.com/github/linguist/blob/master/lib/linguist/classifier.rb). -For an example, this process us tell the difference between `.h` files which could be either C, C++, or Obj-C. +For an example, this process can help us tell the difference between `.h` files which could be either C, C++, or Obj-C. In the actual GitHub app we deal with `Grit::Blob` objects. For testing, there is a simple `FileBlob` API. diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index a89f7c70..7a40503a 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -1,16 +1,14 @@ -require 'linguist/tokenizer' - module Linguist # A collection of simple heuristics that can be used to better analysis languages. class Heuristics - # Public: Given an array of String language names, a - # apply all heuristics against the given data and return an array + # Public: Given an array of String language names, + # apply heuristics against the given data and return an array # of matching languages, or nil. # # data - Array of tokens or String data to analyze. # languages - Array of language name Strings to restrict to. # - # Returns an array of language name Strings, or [] + # Returns an array of Languages or [] def self.find_by_heuristics(data, languages) if languages.all? { |l| ["Objective-C", "C++"].include?(l) } disambiguate_h(data, languages) @@ -19,6 +17,8 @@ module Linguist # .h extensions are ambigious between C, C++, and Objective-C. # We want to shortcut look for Objective-C. + # + # Returns an array of Languages or [] def self.disambiguate_h(data, languages) matches = [] matches << Language["Objective-C"] if data.include?("@interface") @@ -26,4 +26,3 @@ module Linguist end end end -