Merge pull request #529 from github/more-samples

More samples
This commit is contained in:
Patrick Reynolds
2013-06-06 20:40:12 -07:00
10 changed files with 276 additions and 38 deletions

View File

@@ -14,6 +14,9 @@ module Linguist
# Classifier.train(db, 'Ruby', "def hello; end")
#
# Returns nothing.
#
# Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token,
# per-language. See also dump_all_tokens, below.
def self.train!(db, language, data)
tokens = Tokenizer.tokenize(data)
@@ -77,9 +80,16 @@ module Linguist
tokens = Tokenizer.tokenize(tokens) if tokens.is_a?(String)
scores = {}
if verbosity >= 2
dump_all_tokens(tokens, languages)
end
languages.each do |language|
scores[language] = tokens_probability(tokens, language) +
language_probability(language)
if verbosity >= 1
printf "%10s = %10.3f + %7.3f = %10.3f\n",
language, tokens_probability(tokens, language), language_probability(language), scores[language]
end
end
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
@@ -119,5 +129,39 @@ module Linguist
def language_probability(language)
Math.log(@languages[language].to_f / @languages_total.to_f)
end
private
def verbosity
@verbosity ||= (ENV['LINGUIST_DEBUG']||0).to_i
end
# Internal: show a table of probabilities for each <token,language> pair.
#
# The number in each table entry is the number of "points" that each
# token contributes toward the belief that the file under test is a
# particular language. Points are additive.
#
# Points are the number of times a token appears in the file, times
# how much more likely (log of probability ratio) that token is to
# appear in one language vs. the least-likely language. Dashes
# indicate the least-likely language (and zero points) for each token.
def dump_all_tokens(tokens, languages)
maxlen = tokens.map{|tok| tok.size}.max
printf "%#{maxlen}s", ""
puts " #" + languages.map{|lang| sprintf("%10s", lang)}.join
tokmap = Hash.new(0)
tokens.each{|tok| tokmap[tok] += 1}
tokmap.sort.each{|tok, count|
arr = languages.map{|lang| [lang, token_probability(tok, lang)] }
min = arr.map{|a,b| b}.min
minlog = Math.log(min)
if !arr.inject(true) {|result, n| result && n[1] == arr[0][1]} # if not all the same
printf "%#{maxlen}s%5d", tok, count
puts arr.map{|ent|
ent[1] == min ? " -" : sprintf("%10.3f", count*(Math.log(ent[1])-minlog))
}.join
end
}
end
end
end

View File

@@ -169,6 +169,7 @@
".nsi"
],
"Nu": [
".nu",
".script!"
],
"Objective-C": [
@@ -379,8 +380,8 @@
".gemrc"
]
},
"tokens_total": 357870,
"languages_total": 421,
"tokens_total": 358215,
"languages_total": 424,
"tokens": {
"ABAP": {
"*/**": 1,
@@ -25690,9 +25691,67 @@
},
"Nu": {
"SHEBANG#!nush": 1,
"(": 1,
"(": 14,
"puts": 1,
")": 1
")": 14,
";": 22,
"main.nu": 1,
"Entry": 1,
"point": 1,
"for": 1,
"a": 1,
"Nu": 1,
"program.": 1,
"Copyright": 1,
"c": 1,
"Tim": 1,
"Burks": 1,
"Neon": 1,
"Design": 1,
"Technology": 1,
"Inc.": 1,
"load": 4,
"basics": 1,
"cocoa": 1,
"definitions": 1,
"menu": 1,
"generation": 1,
"Aaron": 1,
"Hillegass": 1,
"t": 1,
"retain": 1,
"it.": 1,
"NSApplication": 2,
"sharedApplication": 2,
"setDelegate": 1,
"set": 1,
"delegate": 1,
"ApplicationDelegate": 1,
"alloc": 1,
"init": 1,
"this": 1,
"makes": 1,
"the": 3,
"application": 1,
"window": 1,
"take": 1,
"focus": 1,
"when": 1,
"we": 1,
"ve": 1,
"started": 1,
"it": 1,
"from": 1,
"terminal": 1,
"activateIgnoringOtherApps": 1,
"YES": 1,
"run": 1,
"main": 1,
"Cocoa": 1,
"event": 1,
"loop": 1,
"NSApplicationMain": 1,
"nil": 1
},
"Objective-C": {
"//": 317,
@@ -32842,25 +32901,54 @@
"}": 1
},
"Racket": {
";": 3,
"Clean": 1,
"simple": 1,
"and": 1,
"efficient": 1,
"code": 1,
"-": 100,
"that": 2,
"s": 1,
"the": 3,
"power": 1,
"of": 4,
"Racket": 2,
"http": 1,
"//racket": 1,
"lang.org/": 1,
"(": 25,
"define": 1,
"bottles": 4,
"n": 8,
"more": 2,
")": 25,
"printf": 2,
"case": 1,
"[": 16,
"]": 16,
"else": 1,
"if": 1,
"for": 3,
"in": 3,
"range": 1,
"sub1": 1,
"displayln": 2,
"SHEBANG#!sh": 1,
"#": 2,
"|": 2,
"-": 95,
"*": 2,
"scheme": 1,
"exec": 1,
"racket": 1,
"um": 1,
"(": 7,
"require": 2,
"racket/file": 1,
"racket/path": 1,
"racket/list": 1,
"racket/string": 1,
"for": 2,
"syntax": 1,
"racket/base": 1,
")": 7,
"#lang": 1,
"scribble/manual": 1,
"@": 3,
@@ -32869,17 +32957,13 @@
"{": 2,
"Scribble": 3,
"The": 1,
"Racket": 1,
"Documentation": 1,
"Tool": 1,
"}": 2,
"@author": 1,
"[": 12,
"]": 12,
"is": 3,
"a": 1,
"collection": 1,
"of": 3,
"tools": 1,
"creating": 1,
"prose": 2,
@@ -32889,7 +32973,6 @@
"library": 1,
"documentation": 1,
"etc.": 1,
"in": 2,
"HTML": 1,
"or": 2,
"PDF": 1,
@@ -32902,13 +32985,11 @@
"you": 1,
"write": 1,
"programs": 1,
"that": 1,
"are": 1,
"rich": 1,
"textual": 1,
"content": 2,
"whether": 1,
"the": 2,
"to": 2,
"be": 2,
"typeset": 1,
@@ -32940,7 +33021,6 @@
"file.": 1,
"@table": 1,
"contents": 1,
";": 1,
"@include": 8,
"section": 9,
"@index": 1
@@ -34515,20 +34595,58 @@
"/": 2
},
"Scala": {
"SHEBANG#!sh": 2,
"exec": 2,
"scala": 2,
"#": 2,
"object": 2,
"Beers": 1,
"extends": 1,
"Application": 1,
"{": 10,
"def": 7,
"bottles": 3,
"(": 34,
"qty": 12,
"Int": 3,
"f": 4,
"String": 5,
")": 34,
"//": 4,
"higher": 1,
"-": 4,
"order": 1,
"functions": 2,
"match": 2,
"case": 5,
"+": 29,
"x": 3,
"}": 11,
"beers": 3,
"sing": 3,
"implicit": 3,
"song": 3,
"takeOne": 2,
"nextQty": 2,
"nested": 1,
"if": 2,
"else": 2,
"refrain": 2,
".capitalize": 1,
"tail": 1,
"recursion": 1,
"val": 2,
"headOfSong": 1,
"println": 2,
"parameter": 1,
"name": 4,
"version": 1,
"organization": 1,
"libraryDependencies": 3,
"+": 17,
"%": 12,
"Seq": 3,
"(": 20,
")": 20,
"{": 5,
"val": 1,
"libosmVersion": 4,
"from": 1,
"}": 6,
"maxErrors": 1,
"pollInterval": 1,
"javacOptions": 1,
@@ -34628,20 +34746,12 @@
"Credentials": 2,
"Path.userHome": 1,
"/": 2,
"SHEBANG#!sh": 1,
"exec": 1,
"scala": 1,
"#": 1,
"object": 1,
"HelloWorld": 1,
"def": 1,
"main": 1,
"args": 1,
"Array": 1,
"[": 1,
"String": 1,
"]": 1,
"println": 1
"]": 1
},
"Scheme": {
"(": 359,
@@ -38378,7 +38488,7 @@
"Nginx": 179,
"Nimrod": 1,
"NSIS": 725,
"Nu": 4,
"Nu": 116,
"Objective-C": 26518,
"OCaml": 382,
"Omgrofl": 57,
@@ -38395,13 +38505,13 @@
"Prolog": 4040,
"Python": 4088,
"R": 14,
"Racket": 269,
"Racket": 360,
"Ragel in Ruby Host": 593,
"Rebol": 11,
"Ruby": 3854,
"Rust": 3566,
"Sass": 28,
"Scala": 278,
"Scala": 420,
"Scheme": 3478,
"Scilab": 69,
"SCSS": 39,
@@ -38478,7 +38588,7 @@
"Nginx": 1,
"Nimrod": 1,
"NSIS": 2,
"Nu": 1,
"Nu": 2,
"Objective-C": 19,
"OCaml": 2,
"Omgrofl": 1,
@@ -38495,13 +38605,13 @@
"Prolog": 6,
"Python": 5,
"R": 1,
"Racket": 2,
"Racket": 3,
"Ragel in Ruby Host": 3,
"Rebol": 1,
"Ruby": 16,
"Rust": 1,
"Sass": 1,
"Scala": 2,
"Scala": 3,
"Scheme": 1,
"Scilab": 3,
"SCSS": 1,
@@ -38525,5 +38635,5 @@
"Xtend": 2,
"YAML": 1
},
"md5": "b7833db14f1bfbdce7124fe6f9cff95f"
"md5": "60253432196336ca0df1d8d79a7f16ad"
}

0
samples/JavaScript/js2.script! Normal file → Executable file
View File

27
samples/Nu/RandomApp.nu Normal file
View File

@@ -0,0 +1,27 @@
;; main.nu
;; Entry point for a Nu program.
;;
;; Copyright (c) 2007 Tim Burks, Neon Design Technology, Inc.
(load "Nu:nu") ;; basics
(load "Nu:cocoa") ;; cocoa definitions
(load "Nu:menu") ;; menu generation
(load "randomapp") ;; Aaron Hillegass' famous example
;; define the application delegate class
(class ApplicationDelegate is NSObject
(imethod (void) applicationDidFinishLaunching: (id) sender is
(build-menu default-application-menu "RandomApp")
(set $random ((RandomAppWindowController alloc) init))))
;; install the delegate and keep a reference to it since
;; the application won't retain it.
((NSApplication sharedApplication) setDelegate:
(set delegate ((ApplicationDelegate alloc) init)))
;; this makes the application window take focus when
;; we've started it from the terminal
((NSApplication sharedApplication) activateIgnoringOtherApps:YES)
;; run the main Cocoa event loop
(NSApplicationMain 0 nil)

0
samples/PHP/php-script.script! Normal file → Executable file
View File

0
samples/PHP/php.script! Normal file → Executable file
View File

View File

@@ -0,0 +1,17 @@
; Clean, simple and efficient code -- that's the power of Racket!
; http://racket-lang.org/
(define (bottles n more)
(printf "~a bottle~a of beer~a"
(case n [(0) "no more"] [(1) "1"] [else n])
(if (= n 1) "" "s")
more))
(for ([n (in-range 99 0 -1)])
(bottles n " on the wall, ")
(bottles n ".\n")
(printf "Take one down and pass it around, ")
(bottles (sub1 n) " on the wall.\n\n"))
(displayln "No more bottles of beer on the wall, no more bottles of beer.")
(displayln "Go to the store and buy some more, 99 bottles of beer on the wall.")

0
samples/Ruby/ruby2.script! Normal file → Executable file
View File

View File

@@ -0,0 +1,40 @@
#!/bin/sh
exec scala "$0" "$@"
!#
object Beers extends Application {
def bottles(qty : Int, f : => String) = // higher-order functions
qty match {
case 0 => "no more bottles of beer" + f
case 1 => "1 bottle of beer" + f
case x => x + " bottles of beer" + f
}
def beers(qty : Int) = bottles(qty, " on the wall.")
def sing(qty : Int)(implicit song : String) : String = {
def takeOne =
qty match {
case 0 => "Go to the store and buy some more."
case x => "Take one down and pass it around."
}
def nextQty = // nested functions
if (qty == 0) 99
else qty - 1
def refrain = {
beers(qty).capitalize + " " + bottles(qty, "") + ".\n" +
takeOne + " " + beers(nextQty) + "\n\n"
}
if (qty == -1) song
else sing(qty - 1)(song + refrain) // tail recursion
}
implicit val headOfSong : String = ""
println(sing(99)) // implicit parameter
}

0
samples/Shell/plugin.script! Normal file → Executable file
View File