Try to classify language types

This commit is contained in:
Joshua Peek
2011-07-05 20:48:06 -05:00
parent 6611f174e5
commit 8f46cd0748
4 changed files with 112 additions and 173 deletions

View File

@@ -16,6 +16,9 @@ module Linguist
@extension_index = {} @extension_index = {}
@filename_index = {} @filename_index = {}
# Valid Languages types
TYPES = [:markup, :programming]
# Internal: Test if extension maps to multiple Languages. # Internal: Test if extension maps to multiple Languages.
# #
# Returns true or false. # Returns true or false.
@@ -198,6 +201,12 @@ module Linguist
# @name is required # @name is required
@name = attributes[:name] || raise(ArgumentError, "missing name") @name = attributes[:name] || raise(ArgumentError, "missing name")
# Set type
@type = attributes[:type] ? attributes[:type].to_sym : nil
if @type && !TYPES.include?(@type)
raise ArgumentError, "invalid type: #{@type}"
end
# Set aliases # Set aliases
@aliases = [default_alias_name] + (attributes[:aliases] || []) @aliases = [default_alias_name] + (attributes[:aliases] || [])
@@ -213,17 +222,12 @@ module Linguist
@overrides = attributes[:overrides] || [] @overrides = attributes[:overrides] || []
@filenames = attributes[:filenames] || [] @filenames = attributes[:filenames] || []
# Set popular, major, and searchable flags # Set popular, and searchable flags
@popular = attributes.key?(:popular) ? attributes[:popular] : false @popular = attributes.key?(:popular) ? attributes[:popular] : false
@major = attributes.key?(:major) ? attributes[:major] : false
@searchable = attributes.key?(:searchable) ? attributes[:searchable] : true @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
# If group name is set, save the name so we can lazy load it later # If group name is set, save the name so we can lazy load it later
if attributes[:group_name] if attributes[:group_name]
if major?
warn "#{name} is a major language, it should not be grouped with #{attributes[:group_name]}"
end
@group = nil @group = nil
@group_name = attributes[:group_name] @group_name = attributes[:group_name]
@@ -231,7 +235,6 @@ module Linguist
else else
@group = self @group = self
end end
end end
# Public: Get proper name # Public: Get proper name
@@ -245,6 +248,11 @@ module Linguist
# Returns the name String # Returns the name String
attr_reader :name attr_reader :name
# Public: Get type.
#
# Returns a type Symbol or nil.
attr_reader :type
# Public: Get aliases # Public: Get aliases
# #
# Examples # Examples
@@ -303,12 +311,6 @@ module Linguist
# Public: Get Language group # Public: Get Language group
# #
# Minor languages maybe grouped with major languages for
# accounting purposes. For an example, JSP files are grouped as
# Java.
#
# For major languages, group should always return self.
#
# Returns a Language # Returns a Language
def group def group
@group ||= Language.find_by_name(@group_name) @group ||= Language.find_by_name(@group_name)
@@ -328,26 +330,6 @@ module Linguist
!popular? !popular?
end end
# Public: Is it major language?
#
# Major languages should be actual programming
# languages. Configuration formats should be excluded.
#
# Returns true or false
def major?
@major
end
# Public: Is it a minor language?
#
# Minor language include variants of major languages and
# markup languages like HTML and YAML.
#
# Returns true or false
def minor?
!major?
end
# Public: Is it searchable? # Public: Is it searchable?
# #
# Unsearchable languages won't by indexed by solr and won't show # Unsearchable languages won't by indexed by solr and won't show
@@ -400,6 +382,7 @@ module Linguist
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options| YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
Language.create( Language.create(
:name => name, :name => name,
:type => options['type'],
:aliases => options['aliases'], :aliases => options['aliases'],
:lexer => options['lexer'], :lexer => options['lexer'],
:group_name => options['group'], :group_name => options['group'],
@@ -408,7 +391,6 @@ module Linguist
:extensions => options['extensions'], :extensions => options['extensions'],
:overrides => options['overrides'], :overrides => options['overrides'],
:filenames => options['filenames'], :filenames => options['filenames'],
:major => options['major'],
:popular => popular.include?(name) :popular => popular.include?(name)
) )
end end

View File

@@ -5,13 +5,12 @@
# lexer exists in lexers.yml. This is a list of available in our # lexer exists in lexers.yml. This is a list of available in our
# version of pygments. # version of pygments.
# #
# type - Either programming, markup, or nil
# lexer - An explicit lexer String (defaults to name.downcase) # lexer - An explicit lexer String (defaults to name.downcase)
# aliases - An Array of additional aliases (implicitly # aliases - An Array of additional aliases (implicitly
# includes name.downcase) # includes name.downcase)
# extension - An Array of associated extensions # extension - An Array of associated extensions
# overrides - An Array of extensions that takes precedence over conflicts # overrides - An Array of extensions that takes precedence over conflicts
# major - Boolean flag major programming languages. Please leave
# this option to GitHub staff to decide.
# searchable - Boolean flag to enable searching (defaults to true) # searchable - Boolean flag to enable searching (defaults to true)
# search_term - Deprecated: Some languages maybe indexed under a # search_term - Deprecated: Some languages maybe indexed under a
# different alias. Avoid defining new exceptions. # different alias. Avoid defining new exceptions.
@@ -22,7 +21,7 @@
# Please keep this list alphabetized. # Please keep this list alphabetized.
ASP: ASP:
major: true type: programming
lexer: aspx-vb lexer: aspx-vb
search_term: aspx-vb search_term: aspx-vb
aliases: aliases:
@@ -38,7 +37,7 @@ ASP:
- .asp - .asp
ActionScript: ActionScript:
major: true type: programming
lexer: ActionScript 3 lexer: ActionScript 3
search_term: as3 search_term: as3
aliases: aliases:
@@ -47,7 +46,7 @@ ActionScript:
- .as - .as
Ada: Ada:
major: true type: programming
extensions: extensions:
- .adb - .adb
- .ads - .ads
@@ -58,13 +57,13 @@ AppleScript:
- .applescript - .applescript
Arc: Arc:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .arc - .arc
Assembly: Assembly:
major: true type: programming
lexer: NASM lexer: NASM
search_term: nasm search_term: nasm
aliases: aliases:
@@ -73,6 +72,7 @@ Assembly:
- .asm - .asm
Batchfile: Batchfile:
type: programming
group: Shell group: Shell
search_term: bat search_term: bat
aliases: aliases:
@@ -90,7 +90,7 @@ BlitzMax:
- .bmx - .bmx
Boo: Boo:
major: true type: programming
extensions: extensions:
- .boo - .boo
@@ -100,7 +100,7 @@ Brainfuck:
- .bf - .bf
C: C:
major: true type: programming
overrides: overrides:
- .h - .h
extensions: extensions:
@@ -108,7 +108,7 @@ C:
- .h - .h
C#: C#:
major: true type: programming
search_term: csharp search_term: csharp
aliases: aliases:
- csharp - csharp
@@ -116,7 +116,7 @@ C#:
- .cs - .cs
C++: C++:
major: true type: programming
search_term: cpp search_term: cpp
aliases: aliases:
- cpp - cpp
@@ -148,19 +148,19 @@ ChucK:
- .ck - .ck
Clojure: Clojure:
major: true type: programming
extensions: extensions:
- .clj - .clj
CoffeeScript: CoffeeScript:
major: true type: programming
extensions: extensions:
- .coffee - .coffee
filenames: filenames:
- Cakefile - Cakefile
ColdFusion: ColdFusion:
major: true type: programming
lexer: Coldfusion HTML lexer: Coldfusion HTML
search_term: cfm search_term: cfm
aliases: aliases:
@@ -170,7 +170,7 @@ ColdFusion:
- .cfc - .cfc
Common Lisp: Common Lisp:
major: true type: programming
aliases: aliases:
- lisp - lisp
extensions: extensions:
@@ -190,6 +190,7 @@ Cucumber:
- .feature - .feature
Cython: Cython:
type: programming
group: Python group: Python
extensions: extensions:
- .pyx - .pyx
@@ -197,7 +198,7 @@ Cython:
- .pxi - .pxi
D: D:
major: true type: programming
extensions: extensions:
- .d - .d
- .di - .di
@@ -216,7 +217,7 @@ Darcs Patch:
- .dpatch - .dpatch
Delphi: Delphi:
major: true type: programming
extensions: extensions:
- .pas - .pas
@@ -226,18 +227,18 @@ Diff:
- .patch - .patch
Dylan: Dylan:
major: true type: programming
extensions: extensions:
- .dylan - .dylan
Eiffel: Eiffel:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .e - .e
Emacs Lisp: Emacs Lisp:
major: true type: programming
lexer: Scheme lexer: Scheme
aliases: aliases:
- elisp - elisp
@@ -246,13 +247,13 @@ Emacs Lisp:
- .emacs - .emacs
Erlang: Erlang:
major: true type: programming
extensions: extensions:
- .hrl - .hrl
- .erl - .erl
F#: F#:
major: true type: programming
lexer: OCaml lexer: OCaml
search_term: ocaml search_term: ocaml
extensions: extensions:
@@ -261,7 +262,7 @@ F#:
- .fsx - .fsx
FORTRAN: FORTRAN:
major: true type: programming
lexer: Fortran lexer: Fortran
extensions: extensions:
- .f - .f
@@ -270,17 +271,18 @@ FORTRAN:
- .F90 - .F90
Factor: Factor:
major: true type: programming
extensions: extensions:
- .factor - .factor
Fancy: Fancy:
major: true type: programming
extensions: extensions:
- .fy - .fy
- .fancypack - .fancypack
GAS: GAS:
type: programming
group: Assembly group: Assembly
extensions: extensions:
- .s - .s
@@ -312,7 +314,7 @@ Gettext Catalog:
- .pot - .pot
Go: Go:
major: true type: programming
extensions: extensions:
- .go - .go
@@ -328,13 +330,14 @@ Groff:
- '.7' - '.7'
Groovy: Groovy:
major: true type: programming
lexer: Java lexer: Java
extensions: extensions:
- .gradle - .gradle
- .groovy - .groovy
HTML: HTML:
type: markup
extensions: extensions:
- .html - .html
- .xhtml - .xhtml
@@ -342,12 +345,14 @@ HTML:
- .xslt - .xslt
HTML+Django: HTML+Django:
type: markup
group: HTML group: HTML
lexer: HTML+Django/Jinja lexer: HTML+Django/Jinja
extensions: extensions:
- .mustache - .mustache
HTML+ERB: HTML+ERB:
type: markup
group: HTML group: HTML
lexer: RHTML lexer: RHTML
extensions: extensions:
@@ -355,12 +360,13 @@ HTML+ERB:
- .html.erb - .html.erb
HTML+PHP: HTML+PHP:
type: markup
group: HTML group: HTML
extensions: extensions:
- .phtml - .phtml
HaXe: HaXe:
major: true type: programming
lexer: haXe lexer: haXe
extensions: extensions:
- .hx - .hx
@@ -368,11 +374,12 @@ HaXe:
- .mtt - .mtt
Haml: Haml:
type: markup
extensions: extensions:
- .haml - .haml
Haskell: Haskell:
major: true type: programming
extensions: extensions:
- .hs - .hs
- .hsc - .hsc
@@ -394,7 +401,7 @@ IRC log:
- .weechatlog - .weechatlog
Io: Io:
major: true type: programming
extensions: extensions:
- .io - .io
@@ -406,7 +413,7 @@ JSON:
- .json - .json
Java: Java:
major: true type: programming
extensions: extensions:
- .java - .java
- .pde - .pde
@@ -421,7 +428,7 @@ Java Server Pages:
- .jsp - .jsp
JavaScript: JavaScript:
major: true type: programming
aliases: aliases:
- js - js
- node - node
@@ -447,6 +454,7 @@ LilyPond:
- .ily - .ily
Literate Haskell: Literate Haskell:
type: programming
group: Haskell group: Haskell
search_term: lhs search_term: lhs
aliases: aliases:
@@ -455,7 +463,7 @@ Literate Haskell:
- .lhs - .lhs
Lua: Lua:
major: true type: programming
extensions: extensions:
- .lua - .lua
- .nse - .nse
@@ -471,6 +479,7 @@ Mako:
- .mao - .mao
Markdown: Markdown:
type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .md - .md
@@ -485,7 +494,7 @@ Matlab:
- .matlab - .matlab
Max/MSP: Max/MSP:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .mxt - .mxt
@@ -494,7 +503,7 @@ MiniD: # Legacy
searchable: false searchable: false
Mirah: Mirah:
major: true type: programming
lexer: Ruby lexer: Ruby
search_term: ruby search_term: ruby
extensions: extensions:
@@ -516,7 +525,7 @@ Nimrod:
- .nim - .nim
Nu: Nu:
major: true type: programming
lexer: Scheme lexer: Scheme
aliases: aliases:
- nush - nush
@@ -533,7 +542,7 @@ NumPy:
- .numpyw - .numpyw
OCaml: OCaml:
major: true type: programming
extensions: extensions:
- .ml - .ml
- .mly - .mly
@@ -546,7 +555,7 @@ ObjDump:
- .objdump - .objdump
Objective-C: Objective-C:
major: true type: programming
overrides: overrides:
- .m - .m
extensions: extensions:
@@ -555,19 +564,20 @@ Objective-C:
- .mm - .mm
Objective-J: Objective-J:
major: true type: programming
extensions: extensions:
- .j - .j
- .sj - .sj
OpenCL: OpenCL:
type: programming
group: C group: C
lexer: C lexer: C
extensions: extensions:
- .cl - .cl
PHP: PHP:
major: true type: programming
extensions: extensions:
- .php - .php
- .aw - .aw
@@ -587,7 +597,7 @@ Parrot Internal Representation:
- .pasm - .pasm
Perl: Perl:
major: true type: programming
overrides: overrides:
- .pl - .pl
extensions: extensions:
@@ -601,20 +611,20 @@ Perl:
- .psgi - .psgi
Prolog: Prolog:
major: true type: programming
extensions: extensions:
- .pl - .pl
- .pro - .pro
- .prolog - .prolog
Pure Data: Pure Data:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .pd - .pd
Python: Python:
major: true type: programming
extensions: extensions:
- .py - .py
- .pyw - .pyw
@@ -629,7 +639,7 @@ Python traceback:
- .pytb - .pytb
R: R:
major: true type: programming
lexer: S lexer: S
overrides: overrides:
- .r - .r
@@ -638,12 +648,13 @@ R:
- .R - .R
RHTML: RHTML:
type: markup
group: HTML group: HTML
extensions: extensions:
- .rhtml - .rhtml
Racket: Racket:
major: true type: programming
lexer: Scheme lexer: Scheme
extensions: extensions:
- .rkt - .rkt
@@ -671,7 +682,7 @@ Redcode:
- .cw - .cw
Ruby: Ruby:
major: true type: programming
aliases: aliases:
- jruby - jruby
- macruby - macruby
@@ -706,12 +717,12 @@ Sass:
- .sass - .sass
Scala: Scala:
major: true type: programming
extensions: extensions:
- .scala - .scala
Scheme: Scheme:
major: true type: programming
extensions: extensions:
- .sls - .sls
- .ss - .ss
@@ -719,13 +730,13 @@ Scheme:
- .scm - .scm
Self: Self:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .self - .self
Shell: Shell:
major: true type: programming
lexer: Bash lexer: Bash
search_term: bash search_term: bash
aliases: aliases:
@@ -745,7 +756,7 @@ Shell:
- .zshrc - .zshrc
Smalltalk: Smalltalk:
major: true type: programming
extensions: extensions:
- .st - .st
@@ -762,17 +773,18 @@ Standard ML:
- .sml - .sml
SuperCollider: SuperCollider:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .sc - .sc
Tcl: Tcl:
major: true type: programming
extensions: extensions:
- .tcl - .tcl
Tcsh: Tcsh:
type: programming
group: Shell group: Shell
extensions: extensions:
- .tcsh - .tcsh
@@ -792,30 +804,31 @@ Text:
- .txt - .txt
Textile: Textile:
type: markup
lexer: Text only lexer: Text only
extensions: extensions:
- .textile - .textile
VHDL: VHDL:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .vhdl - .vhdl
- .vhd - .vhd
Vala: Vala:
major: true type: programming
extensions: extensions:
- .vala - .vala
Verilog: Verilog:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .v - .v
VimL: VimL:
major: true type: programming
search_term: vim search_term: vim
aliases: aliases:
- vim - vim
@@ -826,7 +839,7 @@ VimL:
- .gvimrc - .gvimrc
Visual Basic: Visual Basic:
major: true type: programming
lexer: Text only lexer: Text only
extensions: extensions:
- .bas - .bas
@@ -836,6 +849,7 @@ Visual Basic:
- .vb - .vb
XML: XML:
type: markup
extensions: extensions:
- .xml - .xml
- .rss - .rss
@@ -848,7 +862,7 @@ XML:
- .rdf - .rdf
XQuery: XQuery:
major: true type: programming
extensions: extensions:
- .xq - .xq
- .xqm - .xqm
@@ -861,6 +875,7 @@ XS:
- .xs - .xs
YAML: YAML:
type: markup
extensions: extensions:
- .yml - .yml
- .yaml - .yaml
@@ -873,12 +888,13 @@ mupad:
- .mu - .mu
ooc: ooc:
major: true type: programming
lexer: Ooc lexer: Ooc
extensions: extensions:
- .ooc - .ooc
reStructuredText: reStructuredText:
type: markup
search_term: rst search_term: rst
aliases: aliases:
- rst - rst

View File

@@ -70,12 +70,9 @@ module Linguist
# Skip vendored or generated blobs # Skip vendored or generated blobs
next if blob.vendored? || blob.generated? || blob.language.nil? next if blob.vendored? || blob.generated? || blob.language.nil?
# Get language group # Only include programming languages
language = blob.language.group if blob.language.type == :programming
@sizes[blob.language.group] += blob.size
# Only include major languages
if language.major?
@sizes[language] += blob.size
end end
end end

View File

@@ -213,23 +213,18 @@ class TestLanguage < Test::Unit::TestCase
assert_equal Language['reStructuredText'], Language.find_by_alias('rst') assert_equal Language['reStructuredText'], Language.find_by_alias('rst')
end end
def test_major_groups
Language.all.each do |language|
if language.major?
assert_equal language, language.group
end
end
end
def test_groups def test_groups
assert_equal Language['Assembly'], Language['GAS'].group assert_equal Language['Assembly'], Language['GAS'].group
assert_equal Language['C'], Language['OpenCL'].group assert_equal Language['C'], Language['OpenCL'].group
assert_equal Language['Haskell'], Language['Literate Haskell'].group assert_equal Language['Haskell'], Language['Literate Haskell'].group
assert_equal Language['Java'], Language['Java Server Pages'].group assert_equal Language['Java'], Language['Java Server Pages'].group
assert_equal Language['JavaScript'], Language['JSON'].group assert_equal Language['JavaScript'], Language['JSON'].group
assert_equal Language['Perl'], Language['Perl'].group
assert_equal Language['Python'], Language['Cython'].group assert_equal Language['Python'], Language['Cython'].group
assert_equal Language['Python'], Language['NumPy'].group assert_equal Language['Python'], Language['NumPy'].group
assert_equal Language['Python'], Language['Python traceback'].group assert_equal Language['Python'], Language['Python traceback'].group
assert_equal Language['Python'], Language['Python'].group
assert_equal Language['Ruby'], Language['Ruby'].group
assert_equal Language['Shell'], Language['Batchfile'].group assert_equal Language['Shell'], Language['Batchfile'].group
assert_equal Language['Shell'], Language['Gentoo Ebuild'].group assert_equal Language['Shell'], Language['Gentoo Ebuild'].group
assert_equal Language['Shell'], Language['Gentoo Eclass'].group assert_equal Language['Shell'], Language['Gentoo Eclass'].group
@@ -285,72 +280,21 @@ class TestLanguage < Test::Unit::TestCase
assert Language['Brainfuck'].unpopular? assert Language['Brainfuck'].unpopular?
end end
def test_major def test_programming
# Add an assertion to this list if you add/change any major assert_equal :programming, Language['JavaScript'].type
# settings in languages.yml. Please keep this list alphabetized. assert_equal :programming, Language['Perl'].type
assert Language['ASP'].major? assert_equal :programming, Language['Python'].type
assert Language['ActionScript'].major? assert_equal :programming, Language['Ruby'].type
assert Language['Ada'].major?
assert Language['Arc'].major?
assert Language['Assembly'].major?
assert Language['Boo'].major?
assert Language['C#'].major?
assert Language['C'].major?
assert Language['C++'].major?
assert Language['Clojure'].major?
assert Language['CoffeeScript'].major?
assert Language['ColdFusion'].major?
assert Language['Common Lisp'].major?
assert Language['D'].major?
assert Language['Delphi'].major?
assert Language['Dylan'].major?
assert Language['Eiffel'].major?
assert Language['Emacs Lisp'].major?
assert Language['Erlang'].major?
assert Language['F#'].major?
assert Language['FORTRAN'].major?
assert Language['Factor'].major?
assert Language['Go'].major?
assert Language['Groovy'].major?
assert Language['HaXe'].major?
assert Language['Haskell'].major?
assert Language['Io'].major?
assert Language['Java'].major?
assert Language['JavaScript'].major?
assert Language['Lua'].major?
assert Language['Max/MSP'].major?
assert Language['Nu'].major?
assert Language['OCaml'].major?
assert Language['Objective-C'].major?
assert Language['Objective-J'].major?
assert Language['PHP'].major?
assert Language['Perl'].major?
assert Language['Prolog'].major?
assert Language['Pure Data'].major?
assert Language['Python'].major?
assert Language['R'].major?
assert Language['Racket'].major?
assert Language['Ruby'].major?
assert Language['Scala'].major?
assert Language['Scheme'].major?
assert Language['Self'].major?
assert Language['Smalltalk'].major?
assert Language['SuperCollider'].major?
assert Language['Tcl'].major?
assert Language['VHDL'].major?
assert Language['Vala'].major?
assert Language['Verilog'].major?
assert Language['VimL'].major?
assert Language['Visual Basic'].major?
assert Language['XQuery'].major?
assert Language['ooc'].major?
end end
def test_minor def test_markup
assert Language['Brainfuck'].minor? assert_equal :markup, Language['HTML'].type
assert Language['HTML'].minor? assert_equal :markup, Language['YAML'].type
assert Language['Makefile'].minor? end
assert Language['YAML'].minor?
def test_other
assert_nil Language['Brainfuck'].type
assert_nil Language['Makefile'].type
end end
def test_searchable def test_searchable