Compare commits

..

141 Commits

Author SHA1 Message Date
Joshua Peek
ae137847b4 Linguist 2.3.3 2012-09-04 09:32:21 -05:00
Scott J. Goldman
5443dc50a3 Merge pull request #247 from github/check-size-first
When testing if a blob is indexable or safe to colorize, check size first
2012-09-02 00:09:51 -07:00
Scott J. Goldman
fc435a2541 Linguist 2.3.2 2012-09-02 00:08:37 -07:00
Scott J. Goldman
04394750e7 When testing if a blob is safe to colorize, check size first
Similar to e415a13
2012-09-02 00:08:37 -07:00
Scott J. Goldman
e415a1351b When testing if a blob is indexable, check size first
Otherwise, charlock_holmes will allocate another large binary
buffer for testing the encoding, which is a problem if the binary
blob is many hundreds of MB large. It'll just fail and crash ruby.
2012-08-31 22:47:19 -07:00
Joshua Peek
6ec907a915 Merge pull request #245 from jcazevedo/master
Add Shell sample
2012-08-28 10:55:11 -07:00
Joao Azevedo
1f55f01fa9 Add Shell sample 2012-08-28 18:01:46 +01:00
Joshua Peek
5d79b88875 Linguist 2.3.1 2012-08-27 11:34:55 -05:00
Joshua Peek
458890b4b9 Add C++ sample 2012-08-27 11:33:28 -05:00
Joshua Peek
89267f792d Rebuild samples db 2012-08-27 11:30:44 -05:00
Joshua Peek
b183fcca05 Only read up to 100KB 2012-08-27 11:30:38 -05:00
Joshua Peek
684a57dbc0 Add another C sample 2012-08-27 11:21:57 -05:00
Joshua Peek
400086a5c8 Add more C samples
Closes #237
2012-08-23 13:38:16 -05:00
Joshua Peek
38b966a554 Linguist 2.3.0 2012-08-20 11:50:35 -05:00
Joshua Peek
31b0df67b7 Require newer mime-type gem 2012-08-20 11:42:04 -05:00
Joshua Peek
cfe496e9fc Drop mime type module
Closes #206
2012-08-20 11:40:32 -05:00
Joshua Peek
b85aeaad3e Inline mime type lookup into blob helper 2012-08-20 11:33:16 -05:00
Joshua Peek
64f3509222 Remove other mime type hacks 2012-08-20 11:29:22 -05:00
Joshua Peek
f8df871d85 Only double check binary mime type when lazy loading blob 2012-08-20 11:20:37 -05:00
Joshua Peek
620150d188 Only double check with binary mime type when lazy loading blob 2012-08-20 11:14:45 -05:00
Joshua Peek
630dca515a Trim down mime type overrides that are old or now pushed upstream
Related #206
2012-08-20 11:11:42 -05:00
Joshua Peek
d2de997fcc Add more Prolog samples
Closes #233
2012-08-20 10:48:36 -05:00
Joshua Peek
b8711f8ccf Merge pull request #228 from github/cpp-samples
Add more C++ samples
2012-08-20 08:36:10 -07:00
Joshua Peek
34aaab19b2 Rebuild samples db 2012-08-20 10:34:37 -05:00
Joshua Peek
220108857c Skip emiting comment tokens 2012-08-20 10:34:07 -05:00
Joshua Peek
657adaabec Add more C++ samples
Closes #225
2012-08-15 11:57:55 -07:00
Joshua Peek
a41f40a30e Remove extname from bin out 2012-08-15 09:31:01 -07:00
Joshua Peek
080cd097ba Merge branch 'brcooley-master' 2012-08-13 18:18:04 -07:00
Joshua Peek
866e446dbe Rebuild samples db 2012-08-13 18:17:47 -07:00
Joshua Peek
897f39083d Rename to magic .script! ext 2012-08-13 18:17:44 -07:00
brc
f8a7d11808 Adding extensionless script to Shell samples 2012-08-13 18:07:28 -07:00
Joshua Peek
ed70d29943 Linguist 2.2.1 2012-08-07 10:39:29 -05:00
Joshua Peek
dbb089f610 Fix nil data passed to generated 2012-08-07 10:39:08 -05:00
Joshua Peek
23357736b1 Merge branch 'kassi-master' 2012-08-06 10:23:12 -05:00
Joshua Peek
a35fa88f50 Add more applescript samples 2012-08-06 10:22:38 -05:00
Joshua Peek
a13f246e4f Update samples db 2012-08-06 09:54:40 -05:00
Karsten Silkenbäumer
f55e53c650 Removed scpt sample due to binary file format (extension) 2012-08-06 16:49:09 +02:00
Karsten Silkenbäumer
b6a7b41783 Change primary extension for applescript 2012-08-05 23:51:24 +02:00
Karsten Silkenbäumer
704a3e03d6 Add type programming to applescript 2012-08-05 23:48:26 +02:00
Joshua Peek
566eaefda9 Linguist 2.2.0 2012-08-03 16:47:34 -05:00
Joshua Peek
047d23862e Still index .txt 2012-08-03 16:34:53 -05:00
Joshua Peek
804e23e995 Extract seperate language detection method 2012-08-03 16:03:06 -05:00
Joshua Peek
41b7d13aa7 Extract generated blob check into its own module 2012-08-03 15:47:50 -05:00
Joshua Peek
4531103033 Forgot to move hidden samples to the correct dir 2012-08-03 15:25:38 -05:00
Joshua Peek
96267e8696 Sort test assertion 2012-08-03 15:11:30 -05:00
Joshua Peek
16a67cb852 Move shebang detection into classifier
Fixes #203
2012-08-03 15:07:36 -05:00
Joshua Peek
fbbaff09cd Stop treating text as a language 2012-08-03 13:55:51 -05:00
Joshua Peek
6014bd015e Change find_by_filename api to return all matching languages 2012-08-03 13:53:12 -05:00
Joshua Peek
4a06d2ea7e Merge branch 'jeanSapristi-master' 2012-07-24 11:51:54 -05:00
Joshua Peek
22efcf7aff Update samples db 2012-07-24 11:51:37 -05:00
Joshua Peek
e5d302459f Fix tokenzing empty strings 2012-07-24 11:49:29 -05:00
Joshua Peek
7aac87681b Add brackets to tokens 2012-07-24 11:28:46 -05:00
Joshua Peek
53300ca581 Add brackets to tokens 2012-07-24 11:28:27 -05:00
Joshua Peek
52833b58d5 Rebuild samples db 2012-07-24 11:23:42 -05:00
Joshua Peek
f5705eaf38 Parse float tokens 2012-07-24 11:23:06 -05:00
Joshua Peek
e2a91bba3e json extension is provided by samples 2012-07-24 11:12:57 -05:00
Joshua Peek
be1340bafc Add a few more json samples 2012-07-24 11:12:33 -05:00
Joshua Peek
9777798cf7 Move max json into json samples 2012-07-24 11:10:57 -05:00
Joshua Peek
b7c4d96e5f Max extensions are already covered by samples 2012-07-24 11:05:08 -05:00
Joshua Peek
e816a0a1b1 Update samples db 2012-07-24 11:04:24 -05:00
Joshua Peek
1bc9f555e6 Fix max samples dir 2012-07-24 11:03:34 -05:00
Joshua Peek
059f661eb6 Rename Max/MSP to Max 2012-07-24 11:03:09 -05:00
jeanSapristi
efbcd51ff6 Add samples for MaxMSP 2012-07-24 17:40:04 +02:00
Nicolas Danet
9f782fc261 Update lib/linguist/languages.yml 2012-07-24 12:25:03 +03:00
Nicolas Danet
5c2bdfd733 Add extensions for Max/MSP 2012-07-24 09:03:06 +03:00
Joshua Peek
ade20e4b46 Linguist 2.1.2 2012-07-23 17:20:36 -05:00
Joshua Peek
aedbe1d5b7 Rebuild samples db 2012-07-23 17:20:05 -05:00
Joshua Peek
65d05e02c9 name can be nil 2012-07-23 17:19:11 -05:00
Joshua Peek
b4378ca89f Linguist 2.1.1 2012-07-23 16:59:19 -05:00
Joshua Peek
12b27deb78 Allow for newer version of pygments.rb 2012-07-23 16:58:57 -05:00
Joshua Peek
9250d46b5b Linguist 2.1.0 2012-07-23 16:54:21 -05:00
Joshua Peek
6ac9138aed Remove pathname
Closes #207
2012-07-23 16:50:30 -05:00
Joshua Peek
bacfd4e832 Fix test task 2012-07-23 16:40:16 -05:00
Joshua Peek
840f6e44d9 Update samples db 2012-07-23 16:24:16 -05:00
Joshua Peek
8a933105fb Fix ruby dir 2012-07-23 16:22:28 -05:00
Joshua Peek
8535c17d98 Merge branch 'rabl' of https://github.com/remiprev/linguist into remiprev-rabl 2012-07-23 16:22:02 -05:00
Joshua Peek
fca6a9a9e1 Fix md5 nesting 2012-07-23 16:19:56 -05:00
Joshua Peek
a81105d987 Always rebuild samples 2012-07-23 16:05:20 -05:00
Joshua Peek
12d2e2ec74 Switch to json for samples db 2012-07-23 16:02:14 -05:00
Joshua Peek
f44b31148a Remove unused var 2012-07-23 15:53:32 -05:00
Joshua Peek
7b6caa0f6c Rename samples subdirectories 2012-07-23 15:52:49 -05:00
Joshua Peek
314f0e4852 Use simple yaml dump for now 2012-07-23 15:05:08 -05:00
Joshua Peek
afedf2557d Store extnames in samples.yml 2012-07-23 15:00:42 -05:00
Rémi Prévost
5497bbbd2b Add sample rabl file 2012-07-23 15:37:13 -04:00
Joshua Peek
5cda67530d Seperate test file for samples 2012-07-23 13:55:30 -05:00
Joshua Peek
447aef2183 Don't set DATA const if samples db is missing 2012-07-23 13:51:46 -05:00
Joshua Peek
6842044b52 Store md5 in samples db 2012-07-23 13:50:35 -05:00
Joshua Peek
bf944f6d1a Make classify a function on the Classifier 2012-07-23 13:47:15 -05:00
Joshua Peek
b9779e805e Move outdated check to samples 2012-07-23 13:21:30 -05:00
Joshua Peek
80e8ee7ce6 Rename Sample -> Samples 2012-07-23 13:15:27 -05:00
Joshua Peek
0c9a947f39 Load classifer db into sample data hash 2012-07-23 13:13:52 -05:00
Joshua Peek
97ae7c1a11 Move classifer db to samples.yml 2012-07-23 13:05:08 -05:00
Joshua Peek
3172bf5b46 Remove gc for now 2012-07-23 12:23:20 -05:00
Joshua Peek
5b28336d56 Move db verification into tests 2012-07-23 12:21:26 -05:00
Joshua Peek
b7f58d96cb Compare md5s of dbs 2012-07-23 12:17:32 -05:00
Joshua Peek
d6fb95b06f Add nested md5 digest 2012-07-23 12:13:08 -05:00
Joshua Peek
db88e143ba Dump classifier as plain hash 2012-07-23 11:21:55 -05:00
Joshua Peek
817e3cb946 Clobber before generating classifier 2012-07-23 11:19:33 -05:00
Joshua Peek
95c0985952 Drop defaults in classifier hash 2012-07-23 10:46:54 -05:00
Joshua Peek
13dbd76e94 Merge branch 'jeanlouisboudart-easyant' 2012-07-21 09:40:46 -05:00
Joshua Peek
8ecf2ad322 Update classifier db 2012-07-21 09:40:37 -05:00
Joshua Peek
d23ab8dcda Revert "Add easyant files support to syntax highlighting .ant and .ivy files are both XML"
This reverts commit 0b98354638.
2012-07-21 09:39:51 -05:00
Jean-Louis Boudart
1027047bc7 Add easyant sample files 2012-07-21 14:21:16 +02:00
Jean-Louis Boudart
0b98354638 Add easyant files support to syntax highlighting .ant and .ivy files are both XML 2012-07-21 14:21:03 +02:00
Joshua Peek
36da52e383 Move specific filename samples into their own dir 2012-07-20 17:20:45 -05:00
Joshua Peek
5a5d334999 Use implied extensions from samples 2012-07-20 17:07:08 -05:00
Joshua Peek
144655f2c5 Require explicit primary_extension 2012-07-20 16:54:49 -05:00
Joshua Peek
a775b00d9d Load extensions from samples 2012-07-20 16:37:46 -05:00
Joshua Peek
eb2c07e511 Sample returns simple hash objects 2012-07-20 16:17:37 -05:00
Joshua Peek
ee0ffa0516 Fix sample training 2012-07-20 15:56:16 -05:00
Joshua Peek
f461150eea Fix classifier test task 2012-07-20 15:54:41 -05:00
Joshua Peek
7292bdc180 Change Classifier to accept language name Strings 2012-07-20 15:52:27 -05:00
Joshua Peek
bc84a98b54 Set unused var to _ 2012-07-20 15:43:23 -05:00
Joshua Peek
e58f268258 Associate .module with drupal php 2012-07-20 15:42:21 -05:00
Joshua Peek
bbc5225086 Pending samples work now 2012-07-20 15:36:48 -05:00
Joshua Peek
0867e7b69b Remove old language disambiguation functions 2012-07-20 15:30:53 -05:00
Joshua Peek
2637d8dc55 Add tokenize helper to Tokenize class 2012-07-20 15:14:58 -05:00
Joshua Peek
79be8b8c67 Set unused var to _ 2012-07-20 15:07:19 -05:00
Joshua Peek
3f391c8694 Update classifier db 2012-07-20 15:06:45 -05:00
Joshua Peek
175d4244c2 Extract single and multi line comment parser 2012-07-20 15:06:21 -05:00
Joshua Peek
d063089430 Add coq comments 2012-07-20 14:45:19 -05:00
Joshua Peek
0dcca7228d Merge branch 'seldridge-master' 2012-07-20 14:38:27 -05:00
Joshua Peek
cb01be8a4d Update classifier db 2012-07-20 14:38:09 -05:00
Joshua Peek
6a62ba7b28 Merge branch 'master' of https://github.com/seldridge/linguist into seldridge-master 2012-07-20 14:37:07 -05:00
Joshua Peek
c1181b1f93 Update classifier 2012-07-20 14:36:59 -05:00
Schuyler Eldridge
7363241531 New Verilog and Coq sample files added
New Verilog examples and Coq examples for additional training have
been added since linguist is currently failing Coq/Verilog recognition
tasks (see #201). In case it wasn't obvious, linguist will not
currently pass these new, added test cases.
2012-07-20 10:49:12 -04:00
Ted Nyman
1ac6e87b75 s/to/too in bin/linguist 2012-07-13 23:10:14 -07:00
Joshua Peek
138589e88c Merge pull request #196 from borgified/master
small typo
2012-07-13 13:44:36 -07:00
Fwiffo
41548405b1 small typo 2012-07-13 13:19:26 -07:00
Matthew McCullough
97399acb73 Merge pull request #187 from chochos/master
Add Ceylon to master language list
2012-07-05 10:12:34 -07:00
Enrique Zamudio
80e9639688 Add "text only" lexer to Ceylon 2012-07-05 10:53:38 -05:00
Enrique Zamudio
6d20232027 Add Ceylon 2012-07-03 22:25:56 -05:00
Enrique Zamudio
a1525460a3 Add sample file for Ceylon 2012-07-03 22:25:22 -05:00
Joshua Peek
01ef10116e Fix typo 2012-06-22 10:33:47 -05:00
Joshua Peek
41f7f8414c Fix samples/ link 2012-06-22 10:32:42 -05:00
Joshua Peek
189a123760 Quote all YAML keys cause fuck it 2012-06-22 10:30:50 -05:00
Joshua Peek
a7108e4086 Fix calling to_yaml on 1.9 2012-06-22 10:15:14 -05:00
Joshua Peek
5521dd08a0 Move test fixtures to samples/ 2012-06-22 10:09:24 -05:00
Joshua Peek
b571c47a1c Update contributing docs 2012-06-22 10:04:30 -05:00
Joshua Peek
cf624e44ff Linguist 2.0.1 2012-06-21 11:48:08 -05:00
Joshua Peek
2b712dc790 Guard against classify nil data 2012-06-21 11:47:32 -05:00
329 changed files with 47128 additions and 20671 deletions

View File

@@ -8,7 +8,9 @@ We use this library at GitHub to detect blob languages, highlight code, ignore b
Linguist defines the list of all languages known to GitHub in a [yaml file](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml). In order for a file to be highlighted, a language and lexer must be defined there.
Most languages are detected by their file extension. This is the fastest and most common situation. For script files, which are usually extensionless, we do "deep content inspection"™ and check the shebang of the file. Checking the file's contents may also be used for disambiguating languages. C, C++ and Obj-C all use `.h` files. Looking for common keywords, we are usually able to guess the correct language.
Most languages are detected by their file extension. This is the fastest and most common situation.
For disambiguating between files with common extensions, we use a [bayesian classifier](https://github.com/github/linguist/blob/master/lib/linguist/classifier.rb). For an example, this helps us tell the difference between `.h` files which could be either C, C++, or Obj-C.
In the actual GitHub app we deal with `Grit::Blob` objects. For testing, there is a simple `FileBlob` API.
@@ -22,23 +24,11 @@ See [lib/linguist/language.rb](https://github.com/github/linguist/blob/master/li
The actual syntax highlighting is handled by our Pygments wrapper, [pygments.rb](https://github.com/tmm1/pygments.rb). It also provides a [Lexer abstraction](https://github.com/tmm1/pygments.rb/blob/master/lib/pygments/lexer.rb) that determines which highlighter should be used on a file.
We typically run on a prerelease version of Pygments to get early access to new lexers. The [lexers.yml](https://github.com/github/linguist/blob/master/lib/linguist/lexers.yml) file is a dump of the lexers we have available on our server. If there is a new lexer in pygments-main not on the list, [open an issue](https://github.com/github/linguist/issues) and we'll try to upgrade it soon.
### MIME type detection
Most of the MIME types handling is done by the Ruby [mime-types gem](https://github.com/halostatue/mime-types). But we have our own list of additions and overrides. To add or modify this list, see [lib/linguist/mimes.yml](https://github.com/github/linguist/blob/master/lib/linguist/mimes.yml).
MIME types are used to set the Content-Type of raw binary blobs which are served from a special `raw.github.com` domain. However, all text blobs are served as `text/plain` regardless of their type to ensure they open in the browser rather than downloading.
The MIME type also determines whether a blob is binary or plain text. So if you're seeing a blob that says "View Raw" and it is actually plain text, the mime type and encoding probably needs to be explicitly stated.
Linguist::FileBlob.new("linguist.zip").binary? #=> true
See [lib/linguist/mimes.yml](https://github.com/github/linguist/blob/master/lib/linguist/mimes.yml).
We typically run on a prerelease version of Pygments, [pygments.rb](https://github.com/tmm1/pygments.rb), to get early access to new lexers. The [lexers.yml](https://github.com/github/linguist/blob/master/lib/linguist/lexers.yml) file is a dump of the lexers we have available on our server.
### Stats
The [Language Graph](https://github.com/github/linguist/graphs/languages) is built by aggregating the languages of all repo's blobs. The top language in the graph determines the project's primary language. Collectively, these stats make up the [Top Languages](https://github.com/languages) page.
The Language Graph you see on every repository is built by aggregating the languages of all repo's blobs. The top language in the graph determines the project's primary language. Collectively, these stats make up the [Top Languages](https://github.com/languages) page.
The repository stats API can be used on a directory:
@@ -70,7 +60,9 @@ See [Linguist::BlobHelper#generated?](https://github.com/github/linguist/blob/ma
## Installation
To get it, clone the repo and run [Bundler](http://gembundler.com/) to install its dependencies.
github.com is usually running the latest version of the `github-linguist` gem that is released on [RubyGems.org](http://rubygems.org/gems/github-linguist).
But for development you are going to want to checkout out the source. To get it, clone the repo and run [Bundler](http://gembundler.com/) to install its dependencies.
git clone https://github.com/github/linguist.git
cd linguist/
@@ -80,17 +72,14 @@ To run the tests:
bundle exec rake test
*Since this code is specific to GitHub, is not published as a official rubygem.*
If you are seeing errors like `StandardError: could not find any magic files!`, it means the CharlockHolmes gem didnt install correctly. See the [installing section](https://github.com/brianmario/charlock_holmes/blob/master/README.md) of the CharlockHolmes README for more information.
## Contributing
1. Fork it.
2. Create a branch (`git checkout -b detect-foo-language`)
3. Make your changes
4. Run the tests (`bundle install` then `bundle exec rake`)
5. Commit your changes (`git commit -am "Added detection for the new Foo language"`)
6. Push to the branch (`git push origin detect-foo-language`)
7. Create a [Pull Request](http://help.github.com/pull-requests/) from your branch.
8. Promote it. Get others to drop in and +1 it.
The majority of patches won't need to touch any Ruby code at all. The [master language list](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) is just a configuration file.
Almost all bug fixes or new language additions should come with some additional code samples. Just drop them under [`samples/`](https://github.com/github/linguist/tree/master/samples) in the correct subdirectory and our test suite will automatically test them. In most cases you shouldn't need to add any new assertions.
### Testing
Sometimes getting the tests running can be to much work especially if you don't have much Ruby experience. Its okay, be lazy and let our build bot [Travis](http://travis-ci.org/#!/github/linguist) run the tests for you. Just open a pull request and the bot will start cranking away.
Heres our current build status, which is hopefully green: [![Build Status](https://secure.travis-ci.org/github/linguist.png?branch=master)](http://travis-ci.org/github/linguist)

View File

@@ -7,23 +7,21 @@ Rake::TestTask.new do |t|
t.warning = true
end
file 'lib/linguist/classifier.yml' => Dir['test/fixtures/**/*'] do |f|
require 'linguist/sample'
classifier = Linguist::Sample.classifier
File.open(f.name, 'w') { |io| YAML.dump(classifier, io) }
task :samples do
require 'linguist/samples'
require 'yajl'
data = Linguist::Samples.data
json = Yajl::Encoder.encode(data, :pretty => true)
File.open('lib/linguist/samples.json', 'w') { |io| io.write json }
end
CLOBBER.include 'lib/linguist/classifier.yml'
task :classifier => ['lib/linguist/classifier.yml']
namespace :classifier do
LIMIT = 1_000
desc "Run classifier against #{LIMIT} public gists"
task :test do
require 'linguist/classifier'
require 'linguist/samples'
total, correct, incorrect = 0, 0, 0
$stdout.sync = true
@@ -32,10 +30,10 @@ namespace :classifier do
next if file_language.nil? || file_language == 'Text'
begin
data = open(file_url).read
guessed_language, score = Linguist::Classifier.instance.classify(data).first
guessed_language, score = Linguist::Classifier.classify(Linguist::Samples::DATA, data).first
total += 1
guessed_language.name == file_language ? correct += 1 : incorrect += 1
guessed_language == file_language ? correct += 1 : incorrect += 1
print "\r\e[0K%d:%d %g%%" % [correct, incorrect, (correct.to_f/total.to_f)*100]
$stdout.flush

View File

@@ -23,12 +23,11 @@ elsif File.file?(path)
puts "#{blob.name}: #{blob.loc} lines (#{blob.sloc} sloc)"
puts " type: #{type}"
puts " extension: #{blob.pathname.extname}"
puts " mime type: #{blob.mime_type}"
puts " language: #{blob.language}"
if blob.large?
puts " blob is to large to be shown"
puts " blob is too large to be shown"
end
if blob.generated?

View File

@@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = 'github-linguist'
s.version = '2.0.0'
s.version = '2.3.3'
s.summary = "GitHub Language detection"
s.authors = "GitHub"
@@ -10,8 +10,10 @@ Gem::Specification.new do |s|
s.add_dependency 'charlock_holmes', '~> 0.6.6'
s.add_dependency 'escape_utils', '~> 0.2.3'
s.add_dependency 'mime-types', '~> 1.18'
s.add_dependency 'pygments.rb', '~> 0.2.13'
s.add_dependency 'mime-types', '~> 1.19'
s.add_dependency 'pygments.rb', '>= 0.2.13'
s.add_development_dependency 'mocha'
s.add_development_dependency 'json'
s.add_development_dependency 'rake'
s.add_development_dependency 'yajl-ruby'
end

View File

@@ -1,5 +1,5 @@
require 'linguist/blob_helper'
require 'linguist/generated'
require 'linguist/language'
require 'linguist/mime'
require 'linguist/pathname'
require 'linguist/repository'
require 'linguist/samples'

View File

@@ -1,10 +1,9 @@
require 'linguist/classifier'
require 'linguist/generated'
require 'linguist/language'
require 'linguist/mime'
require 'linguist/pathname'
require 'charlock_holmes'
require 'escape_utils'
require 'mime/types'
require 'pygments'
require 'yaml'
@@ -12,13 +11,6 @@ module Linguist
# BlobHelper is a mixin for Blobish classes that respond to "name",
# "data" and "size" such as Grit::Blob.
module BlobHelper
# Internal: Get a Pathname wrapper for Blob#name
#
# Returns a Pathname.
def pathname
Pathname.new(name || "")
end
# Public: Get the extname of the path
#
# Examples
@@ -28,7 +20,23 @@ module Linguist
#
# Returns a String
def extname
pathname.extname
File.extname(name.to_s)
end
# Internal: Lookup mime type for extension.
#
# Returns a MIME::Type
def _mime_type
if defined? @_mime_type
@_mime_type
else
guesses = ::MIME::Types.type_for(extname.to_s)
# Prefer text mime types over binary
@_mime_type = guesses.detect { |type| type.ascii? } ||
# Otherwise use the first guess
guesses.first
end
end
# Public: Get the actual blob mime type
@@ -40,7 +48,14 @@ module Linguist
#
# Returns a mime type String.
def mime_type
@mime_type ||= pathname.mime_type
_mime_type ? _mime_type.to_s : 'text/plain'
end
# Internal: Is the blob binary according to its mime type
#
# Return true or false
def binary_mime_type?
_mime_type ? _mime_type.binary? : false
end
# Public: Get the Content-Type header value
@@ -72,7 +87,7 @@ module Linguist
elsif name.nil?
"attachment"
else
"attachment; filename=#{EscapeUtils.escape_url(pathname.basename)}"
"attachment; filename=#{EscapeUtils.escape_url(File.basename(name))}"
end
end
@@ -91,15 +106,6 @@ module Linguist
@detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
end
# Public: Is the blob binary according to its mime type
#
# Return true or false
def binary_mime_type?
if mime_type = Mime.lookup_mime_type_for(pathname.extname)
mime_type.binary?
end
end
# Public: Is the blob binary?
#
# Return true or false
@@ -136,22 +142,6 @@ module Linguist
['.png', '.jpg', '.jpeg', '.gif'].include?(extname)
end
# Public: Is the blob a possible drupal php file?
#
# Return true or false
def drupal_extname?
['.module', '.install', '.test', '.inc'].include?(extname)
end
# Public: Is the blob likely to have a shebang?
#
# Return true or false
def shebang_extname?
extname.empty? &&
mode &&
(mode.to_i(8) & 05) == 05
end
MEGABYTE = 1024 * 1024
# Public: Is the blob too big to load?
@@ -170,7 +160,7 @@ module Linguist
#
# Return true or false
def safe_to_colorize?
text? && !large? && !high_ratio_of_long_lines?
!large? && text? && !high_ratio_of_long_lines?
end
# Internal: Does the blob have a ratio of long lines?
@@ -235,143 +225,16 @@ module Linguist
lines.grep(/\S/).size
end
# Internal: Compute average line length.
#
# Returns Integer.
def average_line_length
if lines.any?
lines.inject(0) { |n, l| n += l.length } / lines.length
else
0
end
end
# Public: Is the blob a generated file?
#
# Generated source code is supressed in diffs and is ignored by
# language statistics.
#
# Requires Blob#data
#
# Includes:
# - XCode project XML files
# - Minified JavaScript
# - Compiled CoffeeScript
# - PEG.js-generated parsers
#
# Please add additional test coverage to
# `test/test_blob.rb#test_generated` if you make any changes.
# May load Blob#data
#
# Return true or false
def generated?
if name == 'Gemfile.lock' || minified_javascript? || compiled_coffeescript? ||
xcode_project_file? || generated_net_docfile? || generated_parser?
true
else
false
end
end
# Internal: Is the blob an XCode project file?
#
# Generated if the file extension is an XCode project
# file extension.
#
# Returns true of false.
def xcode_project_file?
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
end
# Internal: Is the blob minified JS?
#
# Consider JS minified if the average line length is
# greater then 100c.
#
# Returns true or false.
def minified_javascript?
return unless extname == '.js'
average_line_length > 100
end
# Internal: Is the blob of JS a parser generated by PEG.js?
#
# Requires Blob#data
#
# PEG.js-generated parsers are not meant to be consumed by humans.
#
# Return true or false
def generated_parser?
return false unless extname == '.js'
# PEG.js-generated parsers include a comment near the top of the file
# that marks them as such.
if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
return true
end
false
end
# Internal: Is the blob of JS generated by CoffeeScript?
#
# Requires Blob#data
#
# CoffeScript is meant to output JS that would be difficult to
# tell if it was generated or not. Look for a number of patterns
# output by the CS compiler.
#
# Return true or false
def compiled_coffeescript?
return false unless extname == '.js'
# CoffeeScript generated by > 1.2 include a comment on the first line
if lines[0] =~ /^\/\/ Generated by /
return true
end
if lines[0] == '(function() {' && # First line is module closure opening
lines[-2] == '}).call(this);' && # Second to last line closes module closure
lines[-1] == '' # Last line is blank
score = 0
lines.each do |line|
if line =~ /var /
# Underscored temp vars are likely to be Coffee
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
# bind and extend functions are very Coffee specific
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
end
end
# Require a score of 3. This is fairly arbitrary. Consider
# tweaking later.
score >= 3
else
false
end
end
# Internal: Is this a generated documentation file for a .NET assembly?
#
# Requires Blob#data
#
# .NET developers often check in the XML Intellisense file along with an
# assembly - however, these don't have a special extension, so we have to
# dig into the contents to determine if it's a docfile. Luckily, these files
# are extremely structured, so recognizing them is easy.
#
# Returns true or false
def generated_net_docfile?
return false unless extname.downcase == ".xml"
return false unless lines.count > 3
# .NET Docfiles always open with <doc> and their first tag is an
# <assembly> tag
return lines[1].include?("<doc>") &&
lines[2].include?("<assembly>") &&
lines[-2].include?("</doc>")
@_generated ||= Generated.generated?(name, lambda { data })
end
# Public: Should the blob be indexed for searching?
@@ -387,16 +250,18 @@ module Linguist
#
# Return true or false
def indexable?
if binary?
if size > 100 * 1024
false
elsif binary?
false
elsif extname == '.txt'
true
elsif language.nil?
false
elsif !language.searchable?
false
elsif generated?
false
elsif size > 100 * 1024
false
else
true
end
@@ -408,33 +273,15 @@ module Linguist
#
# Returns a Language or nil if none is detected
def language
if defined? @language
@language
return @language if defined? @language
if defined?(@data) && @data.is_a?(String)
data = @data
else
@language = guess_language
data = lambda { binary_mime_type? ? "" : self.data }
end
end
# Internal: Guess language
#
# Please add additional test coverage to
# `test/test_blob.rb#test_language` if you make any changes.
#
# Returns a Language or nil
def guess_language
return if binary_mime_type?
# Disambiguate between multiple language extensions
disambiguate_extension_language ||
# See if there is a Language for the extension
pathname.language ||
# Look for idioms in first line
first_line_language ||
# Try to detect Language from shebang line
shebang_language
@language = Language.detect(name.to_s, data, mode)
end
# Internal: Get the lexer of the blob.
@@ -444,247 +291,6 @@ module Linguist
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end
# Internal: Disambiguates between multiple language extensions.
#
# Delegates to "guess_EXTENSION_language".
#
# Please add additional test coverage to
# `test/test_blob.rb#test_language` if you add another method.
#
# Returns a Language or nil.
def disambiguate_extension_language
if Language.ambiguous?(extname)
# name = "guess_#{extname.sub(/^\./, '')}_language"
# send(name) if respond_to?(name)
possible_languages = Language.all.select { |l| l.extensions.include?(extname) }
if possible_languages.any?
if result = Classifier.instance.classify(data, possible_languages).first
result[0]
end
end
end
end
# Internal: Guess language of .cls files
#
# Returns a Language.
def guess_cls_language
if lines.grep(/^(%|\\)/).any?
Language['TeX']
elsif lines.grep(/^\s*(CLASS|METHOD|INTERFACE).*:\s*/i).any? || lines.grep(/^\s*(USING|DEFINE)/i).any?
Language['OpenEdge ABL']
elsif lines.grep(/\{$/).any? || lines.grep(/\}$/).any?
Language['Apex']
elsif lines.grep(/^(\'\*|Attribute|Option|Sub|Private|Protected|Public|Friend)/i).any?
Language['Visual Basic']
else
# The most common language should be the fallback
Language['TeX']
end
end
# Internal: Guess language of header files (.h).
#
# Returns a Language.
def guess_h_language
if lines.grep(/^@(interface|property|private|public|end)/).any?
Language['Objective-C']
elsif lines.grep(/^class |^\s+(public|protected|private):/).any?
Language['C++']
else
Language['C']
end
end
# Internal: Guess language of .m files.
#
# Objective-C heuristics:
# * Keywords ("#import", "#include", "#ifdef", #define, "@end") or "//" and opening "\*" comments
#
# Matlab heuristics:
# * Leading "function " of "classdef " keyword
# * "%" comments
#
# Note: All "#" keywords, e.g., "#import", are guaranteed to be Objective-C. Because the ampersand
# is used to created function handles and anonymous functions in Matlab, most "@" keywords are not
# safe heuristics. However, "end" is a reserved term in Matlab and can't be used to create a valid
# function handle. Because @end is required to close any @implementation, @property, @interface,
# @synthesize, etc. directive in Objective-C, only @end needs to be checked for.
#
# Returns a Language.
def guess_m_language
# Objective-C keywords or comments
if lines.grep(/^#(import|include|ifdef|define)|@end/).any? || lines.grep(/^\s*\/\//).any? || lines.grep(/^\s*\/\*/).any?
Language['Objective-C']
# Matlab file function or class or comments
elsif lines.any? && lines.first.match(/^\s*(function |classdef )/) || lines.grep(/^\s*%/).any?
Language['Matlab']
# Fallback to Objective-C, don't want any Matlab false positives
else
Language['Objective-C']
end
end
# Internal: Guess language of .pl files
#
# The rules for disambiguation are:
#
# 1. Many perl files begin with a shebang
# 2. Most Prolog source files have a rule somewhere (marked by the :- operator)
# 3. Default to Perl, because it is more popular
#
# Returns a Language.
def guess_pl_language
if shebang_script == 'perl'
Language['Perl']
elsif lines.grep(/:-/).any?
Language['Prolog']
else
Language['Perl']
end
end
# Internal: Guess language of .r files.
#
# Returns a Language.
def guess_r_language
if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any?
Language['Rebol']
else
Language['R']
end
end
# Internal: Guess language of .t files.
#
# Returns a Language.
def guess_t_language
score = 0
score += 1 if lines.grep(/^% /).any?
score += data.gsub(/ := /).count
score += data.gsub(/proc |procedure |fcn |function /).count
score += data.gsub(/var \w+: \w+/).count
# Tell-tale signs its gotta be Perl
if lines.grep(/^(my )?(sub |\$|@|%)\w+/).any?
score = 0
end
if score >= 3
Language['Turing']
else
Language['Perl']
end
end
# Internal: Guess language of .v files.
#
# Returns a Language
def guess_v_language
if lines.grep(/^(\/\*|\/\/|module|parameter|input|output|wire|reg|always|initial|begin|\`)/).any?
Language['Verilog']
else
Language['Coq']
end
end
# Internal: Guess language of .gsp files.
#
# Returns a Language.
def guess_gsp_language
if lines.grep(/<%|<%@|\$\{|<%|<g:|<meta name="layout"|<r:/).any?
Language['Groovy Server Pages']
else
Language['Gosu']
end
end
# Internal: Guess language from the first line.
#
# Look for leading "<?php" in Drupal files
#
# Returns a Language.
def first_line_language
# Only check files with drupal php extensions
return unless drupal_extname?
# Fail fast if blob isn't viewable?
return unless viewable?
if lines.first.to_s =~ /^<\?php/
Language['PHP']
end
end
# Internal: Extract the script name from the shebang line
#
# Requires Blob#data
#
# Examples
#
# '#!/usr/bin/ruby'
# # => 'ruby'
#
# '#!/usr/bin/env ruby'
# # => 'ruby'
#
# '#!/usr/bash/python2.4'
# # => 'python'
#
# Please add additional test coverage to
# `test/test_blob.rb#test_shebang_script` if you make any changes.
#
# Returns a script name String or nil
def shebang_script
# Fail fast if blob isn't viewable?
return unless viewable?
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
bang.sub!(/^#! /, '#!')
tokens = bang.split(' ')
pieces = tokens.first.split('/')
if pieces.size > 1
script = pieces.last
else
script = pieces.first.sub('#!', '')
end
script = script == 'env' ? tokens[1] : script
# python2.4 => python
if script =~ /((?:\d+\.?)+)/
script.sub! $1, ''
end
# Check for multiline shebang hacks that exec themselves
#
# #!/bin/sh
# exec foo "$0" "$@"
#
if script == 'sh' &&
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
script = $1
end
script
end
end
# Internal: Get Language for shebang script
#
# Returns the Language or nil
def shebang_language
# Skip file extensions unlikely to have shebangs
return unless shebang_extname?
if script = shebang_script
Language[script]
end
end
# Public: Highlight syntax of blob
#
# options - A Hash of options (defaults to {})
@@ -710,12 +316,5 @@ module Linguist
''
end
end
Language.overridden_extensions.each do |extension|
name = "guess_#{extension.sub(/^\./, '')}_language".to_sym
unless instance_methods.map(&:to_sym).include?(name)
raise NotImplementedError, "Language##{name} was not defined"
end
end
end
end

View File

@@ -1,112 +1,88 @@
require 'linguist/language'
require 'linguist/tokenizer'
module Linguist
# Language bayesian classifier.
class Classifier
# Internal: Path to persisted classifier db.
PATH = File.expand_path('../classifier.yml', __FILE__)
# Public: Check if persisted db exists on disk.
#
# Returns Boolean.
def self.exist?
File.exist?(PATH)
end
# Public: Get persisted Classifier instance.
#
# Returns Classifier.
def self.instance
@instance ||= YAML.load_file(PATH)
end
# Public: Initialize a Classifier.
def initialize
@tokens_total = 0
@languages_total = 0
@tokens = Hash.new { |h, k| h[k] = Hash.new(0) }
@language_tokens = Hash.new(0)
@languages = Hash.new(0)
end
# Public: Compare Classifier objects.
#
# other - Classifier object to compare to.
#
# Returns Boolean.
def eql?(other)
# Lazy fast check counts only
other.is_a?(self.class) &&
@tokens_total == other.instance_variable_get(:@tokens_total) &&
@languages_total == other.instance_variable_get(:@languages_total)
end
alias_method :==, :eql?
# Public: Train classifier that data is a certain language.
#
# language - Language of data
# db - Hash classifier database object
# language - String language of data
# data - String contents of file
#
# Examples
#
# train(Language['Ruby'], "def hello; end")
# Classifier.train(db, 'Ruby', "def hello; end")
#
# Returns nothing.
def train(language, data)
language = language.name
tokens = Tokenizer.new(data).tokens
def self.train!(db, language, data)
tokens = Tokenizer.tokenize(data)
db['tokens_total'] ||= 0
db['languages_total'] ||= 0
db['tokens'] ||= {}
db['language_tokens'] ||= {}
db['languages'] ||= {}
tokens.each do |token|
@tokens[language][token] += 1
@language_tokens[language] += 1
@tokens_total += 1
db['tokens'][language] ||= {}
db['tokens'][language][token] ||= 0
db['tokens'][language][token] += 1
db['language_tokens'][language] ||= 0
db['language_tokens'][language] += 1
db['tokens_total'] += 1
end
@languages[language] += 1
@languages_total += 1
db['languages'][language] ||= 0
db['languages'][language] += 1
db['languages_total'] += 1
nil
end
# Public: Verify internal counts are consistent.
#
# Returns Boolean.
def verify
@languages.inject(0) { |n, (l, c)| n += c } == @languages_total &&
@language_tokens.inject(0) { |n, (l, c)| n += c } == @tokens_total &&
@tokens.inject(0) { |n, (l, ts)| n += ts.inject(0) { |m, (t, c)| m += c } } == @tokens_total
end
# Public: Prune infrequent tokens.
#
# Returns receiver Classifier instance.
def gc
self
end
# Public: Guess language of data.
#
# db - Hash of classifer tokens database.
# data - Array of tokens or String data to analyze.
# languages - Array of Languages to restrict to.
# languages - Array of language name Strings to restrict to.
#
# Examples
#
# classify("def hello; end")
# # => [ [Language['Ruby'], 0.90], [Language['Python'], 0.2], ... ]
# Classifier.classify(db, "def hello; end")
# # => [ 'Ruby', 0.90], ['Python', 0.2], ... ]
#
# Returns sorted Array of result pairs. Each pair contains the
# Language and a Float score.
def classify(tokens, languages = @languages.keys)
tokens = Tokenizer.new(tokens).tokens if tokens.is_a?(String)
# String language name and a Float score.
def self.classify(db, tokens, languages = nil)
languages ||= db['languages'].keys
new(db).classify(tokens, languages)
end
# Internal: Initialize a Classifier.
def initialize(db = {})
@tokens_total = db['tokens_total']
@languages_total = db['languages_total']
@tokens = db['tokens']
@language_tokens = db['language_tokens']
@languages = db['languages']
end
# Internal: Guess language of data
#
# data - Array of tokens or String data to analyze.
# languages - Array of language name Strings to restrict to.
#
# Returns sorted Array of result pairs. Each pair contains the
# String language name and a Float score.
def classify(tokens, languages)
return [] if tokens.nil?
tokens = Tokenizer.tokenize(tokens) if tokens.is_a?(String)
scores = {}
languages.each do |language|
language_name = language.is_a?(Language) ? language.name : language
scores[language_name] = tokens_probability(tokens, language_name) +
language_probability(language_name)
scores[language] = tokens_probability(tokens, language) +
language_probability(language)
end
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [Language[score[0]], score[1]] }
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
end
# Internal: Probably of set of tokens in a language occuring - P(D | C)
@@ -143,41 +119,5 @@ module Linguist
def language_probability(language)
Math.log(@languages[language].to_f / @languages_total.to_f)
end
# Public: Serialize classifier to YAML.
#
# opts - Hash of YAML options.
#
# Returns nothing.
def to_yaml(io)
data = "--- !ruby/object:Linguist::Classifier\n"
data << "languages_total: #{@languages_total}\n"
data << "tokens_total: #{@tokens_total}\n"
data << "languages:\n"
@languages.sort.each do |language, count|
data << " #{{language => count}.to_yaml.lines.to_a[1]}"
end
data << "language_tokens:\n"
@language_tokens.sort.each do |language, count|
data << " #{{language => count}.to_yaml.lines.to_a[1]}"
end
data << "tokens:\n"
@tokens.sort.each do |language, tokens|
data << " #{{language => true}.to_yaml.lines.to_a[1].sub(/ true/, "")}"
tokens.sort.each do |token, count|
data << " #{{token => count}.to_yaml.lines.to_a[1]}"
end
end
io.write data
nil
end
end
# Eager load instance
Classifier.instance if Classifier.exist?
end

File diff suppressed because it is too large Load Diff

162
lib/linguist/generated.rb Normal file
View File

@@ -0,0 +1,162 @@
module Linguist
class Generated
# Public: Is the blob a generated file?
#
# name - String filename
# data - String blob data. A block also maybe passed in for lazy
# loading. This behavior is deprecated and you should always
# pass in a String.
#
# Return true or false
def self.generated?(name, data)
new(name, data).generated?
end
# Internal: Initialize Generated instance
#
# name - String filename
# data - String blob data
def initialize(name, data)
@name = name
@extname = File.extname(name)
@_data = data
end
attr_reader :name, :extname
# Lazy load blob data if block was passed in.
#
# Awful, awful stuff happening here.
#
# Returns String data.
def data
@data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
end
# Public: Get each line of data
#
# Returns an Array of lines
def lines
# TODO: data should be required to be a String, no nils
@lines ||= data ? data.split("\n", -1) : []
end
# Internal: Is the blob a generated file?
#
# Generated source code is supressed in diffs and is ignored by
# language statistics.
#
# Please add additional test coverage to
# `test/test_blob.rb#test_generated` if you make any changes.
#
# Return true or false
def generated?
name == 'Gemfile.lock' ||
minified_javascript? ||
compiled_coffeescript? ||
xcode_project_file? ||
generated_net_docfile? ||
generated_parser?
end
# Internal: Is the blob an XCode project file?
#
# Generated if the file extension is an XCode project
# file extension.
#
# Returns true of false.
def xcode_project_file?
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
end
# Internal: Is the blob minified JS?
#
# Consider JS minified if the average line length is
# greater then 100c.
#
# Returns true or false.
def minified_javascript?
return unless extname == '.js'
if lines.any?
(lines.inject(0) { |n, l| n += l.length } / lines.length) > 100
else
false
end
end
# Internal: Is the blob of JS generated by CoffeeScript?
#
# CoffeScript is meant to output JS that would be difficult to
# tell if it was generated or not. Look for a number of patterns
# output by the CS compiler.
#
# Return true or false
def compiled_coffeescript?
return false unless extname == '.js'
# CoffeeScript generated by > 1.2 include a comment on the first line
if lines[0] =~ /^\/\/ Generated by /
return true
end
if lines[0] == '(function() {' && # First line is module closure opening
lines[-2] == '}).call(this);' && # Second to last line closes module closure
lines[-1] == '' # Last line is blank
score = 0
lines.each do |line|
if line =~ /var /
# Underscored temp vars are likely to be Coffee
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
# bind and extend functions are very Coffee specific
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
end
end
# Require a score of 3. This is fairly arbitrary. Consider
# tweaking later.
score >= 3
else
false
end
end
# Internal: Is this a generated documentation file for a .NET assembly?
#
# .NET developers often check in the XML Intellisense file along with an
# assembly - however, these don't have a special extension, so we have to
# dig into the contents to determine if it's a docfile. Luckily, these files
# are extremely structured, so recognizing them is easy.
#
# Returns true or false
def generated_net_docfile?
return false unless extname.downcase == ".xml"
return false unless lines.count > 3
# .NET Docfiles always open with <doc> and their first tag is an
# <assembly> tag
return lines[1].include?("<doc>") &&
lines[2].include?("<assembly>") &&
lines[-2].include?("</doc>")
end
# Internal: Is the blob of JS a parser generated by PEG.js?
#
# PEG.js-generated parsers are not meant to be consumed by humans.
#
# Return true or false
def generated_parser?
return false unless extname == '.js'
# PEG.js-generated parsers include a comment near the top of the file
# that marks them as such.
if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
return true
end
false
end
end
end

View File

@@ -2,6 +2,9 @@ require 'escape_utils'
require 'pygments'
require 'yaml'
require 'linguist/classifier'
require 'linguist/samples'
module Linguist
# Language names that are recognizable by GitHub. Defined languages
# can be highlighted, searched and listed under the Top Languages page.
@@ -9,30 +12,15 @@ module Linguist
# Languages are defined in `lib/linguist/languages.yml`.
class Language
@languages = []
@overrides = {}
@index = {}
@name_index = {}
@alias_index = {}
@extension_index = {}
@filename_index = {}
@extension_index = Hash.new { |h,k| h[k] = [] }
@filename_index = Hash.new { |h,k| h[k] = [] }
# Valid Languages types
TYPES = [:data, :markup, :programming]
# Internal: Test if extension maps to multiple Languages.
#
# Returns true or false.
def self.ambiguous?(extension)
@overrides.include?(extension)
end
# Internal: Return overridden extensions.
#
# Returns extensions Array.
def self.overridden_extensions
@overrides.keys
end
# Internal: Create a new Language object
#
# attributes - A hash of attributes
@@ -65,34 +53,45 @@ module Linguist
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end
unless ambiguous?(extension)
# Index the extension with a leading ".": ".rb"
@extension_index[extension] = language
# Index the extension without a leading ".": "rb"
@extension_index[extension.sub(/^\./, '')] = language
end
end
language.overrides.each do |extension|
if extension !~ /^\./
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
end
if l = @overrides[extension]
raise ArgumentError, "#{extension} is already overridden by #{l.name}"
end
@overrides[extension] = language
@extension_index[extension] << language
end
language.filenames.each do |filename|
@filename_index[filename] = language
@filename_index[filename] << language
end
language
end
# Public: Detects the Language of the blob.
#
# name - String filename
# data - String blob data. A block also maybe passed in for lazy
# loading. This behavior is deprecated and you should always
# pass in a String.
# mode - Optional String mode (defaults to nil)
#
# Returns Language or nil.
def self.detect(name, data, mode = nil)
# A bit of an elegant hack. If the file is exectable but extensionless,
# append a "magic" extension so it can be classified with other
# languages that have shebang scripts.
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
name += ".script!"
end
possible_languages = find_by_filename(name)
if possible_languages.length > 1
data = data.call() if data.respond_to?(:call)
if result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
Language[result[0]]
end
else
possible_languages.first
end
end
# Public: Get all Languages
#
# Returns an Array of Languages
@@ -128,33 +127,19 @@ module Linguist
@alias_index[name]
end
# Public: Look up Language by extension.
#
# extension - The extension String. May include leading "."
#
# Examples
#
# Language.find_by_extension('.rb')
# # => #<Language name="Ruby">
#
# Returns the Language or nil if none was found.
def self.find_by_extension(extension)
@extension_index[extension]
end
# Public: Look up Language by filename.
# Public: Look up Languages by filename.
#
# filename - The path String.
#
# Examples
#
# Language.find_by_filename('foo.rb')
# # => #<Language name="Ruby">
# # => [#<Language name="Ruby">]
#
# Returns the Language or nil if none was found.
# Returns all matching Languages or [] if none were found.
def self.find_by_filename(filename)
basename, extname = File.basename(filename), File.extname(filename)
@filename_index[basename] || @extension_index[extname]
@filename_index[basename] + @extension_index[extname]
end
# Public: Look up Language by its name or lexer.
@@ -241,10 +226,11 @@ module Linguist
# Set extensions or default to [].
@extensions = attributes[:extensions] || []
@overrides = attributes[:overrides] || []
@filenames = attributes[:filenames] || []
@primary_extension = attributes[:primary_extension] || default_primary_extension || extensions.first
unless @primary_extension = attributes[:primary_extension]
raise ArgumentError, "#{@name} is missing primary extension"
end
# Prepend primary extension unless its already included
if primary_extension && !extensions.include?(primary_extension)
@@ -347,11 +333,6 @@ module Linguist
# Returns the extension String.
attr_reader :primary_extension
# Internal: Get overridden extensions.
#
# Returns the extensions Array.
attr_reader :overrides
# Public: Get filenames
#
# Examples
@@ -381,13 +362,6 @@ module Linguist
name.downcase.gsub(/\s/, '-')
end
# Internal: Get default primary extension.
#
# Returns the extension String.
def default_primary_extension
extensions.first
end
# Public: Get Language group
#
# Returns a Language
@@ -451,9 +425,34 @@ module Linguist
end
end
extensions = Samples::DATA['extnames']
filenames = Samples::DATA['filenames']
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
options['extensions'] ||= []
options['filenames'] ||= []
if extnames = extensions[name]
extnames.each do |extname|
if !options['extensions'].include?(extname)
options['extensions'] << extname
else
warn "#{name} #{extname.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end
if fns = filenames[name]
fns.each do |filename|
if !options['filenames'].include?(filename)
options['filenames'] << filename
else
warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml."
end
end
end
Language.create(
:name => name,
:color => options['color'],
@@ -464,9 +463,8 @@ module Linguist
:group_name => options['group'],
:searchable => options.key?('searchable') ? options['searchable'] : true,
:search_term => options['search_term'],
:extensions => options['extensions'],
:extensions => options['extensions'].sort,
:primary_extension => options['primary_extension'],
:overrides => options['overrides'],
:filenames => options['filenames'],
:popular => popular.include?(name)
)

File diff suppressed because it is too large Load Diff

38
lib/linguist/md5.rb Normal file
View File

@@ -0,0 +1,38 @@
require 'digest/md5'
module Linguist
module MD5
# Public: Create deep nested digest of value object.
#
# Useful for object comparsion.
#
# obj - Object to digest.
#
# Returns String hex digest
def self.hexdigest(obj)
digest = Digest::MD5.new
case obj
when String, Symbol, Integer
digest.update "#{obj.class}"
digest.update "#{obj}"
when TrueClass, FalseClass, NilClass
digest.update "#{obj.class}"
when Array
digest.update "#{obj.class}"
for e in obj
digest.update(hexdigest(e))
end
when Hash
digest.update "#{obj.class}"
for e in obj.map { |(k, v)| hexdigest([k, v]) }.sort
digest.update(e)
end
else
raise TypeError, "can't convert #{obj.inspect} into String"
end
digest.hexdigest
end
end
end

View File

@@ -1,91 +0,0 @@
require 'mime/types'
require 'yaml'
class MIME::Type
attr_accessor :override
end
# Register additional mime type extensions
#
# Follows same format as mime-types data file
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
# Regexp was cargo culted from mime-types lib
next unless line =~ %r{^
#{MIME::Type::MEDIA_TYPE_RE}
(?:\s@([^\s]+))?
(?:\s:(#{MIME::Type::ENCODING_RE}))?
}x
mediatype = $1
subtype = $2
extensions = $3
encoding = $4
# Lookup existing mime type
mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
# Or create a new instance
MIME::Type.new("#{mediatype}/#{subtype}")
if extensions
extensions.split(/,/).each do |extension|
mime_type.extensions << extension
end
end
if encoding
mime_type.encoding = encoding
end
mime_type.override = true
# Kind of hacky, but we need to reindex the mime type after making changes
MIME::Types.add_type_variant(mime_type)
MIME::Types.index_extensions(mime_type)
end
module Linguist
module Mime
# Internal: Look up mime type for extension.
#
# ext - The extension String. May include leading "."
#
# Examples
#
# Mime.mime_for('.html')
# # => 'text/html'
#
# Mime.mime_for('txt')
# # => 'text/plain'
#
# Return mime type String otherwise falls back to 'text/plain'.
def self.mime_for(ext)
mime_type = lookup_mime_type_for(ext)
mime_type ? mime_type.to_s : 'text/plain'
end
# Internal: Lookup mime type for extension or mime type
#
# ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
#
# Returns a MIME::Type
def self.lookup_mime_type_for(ext_or_mime_type)
ext_or_mime_type ||= ''
if ext_or_mime_type =~ /\w+\/\w+/
guesses = ::MIME::Types[ext_or_mime_type]
else
guesses = ::MIME::Types.type_for(ext_or_mime_type)
end
# Use custom override first
guesses.detect { |type| type.override } ||
# Prefer text mime types over binary
guesses.detect { |type| type.ascii? } ||
# Otherwise use the first guess
guesses.first
end
end
end

View File

@@ -1,62 +0,0 @@
# Additional types to add to MIME::Types
#
# MIME types are used to set the Content-Type of raw binary blobs. All text
# blobs are served as text/plain regardless of their type to ensure they
# open in the browser rather than downloading.
#
# The encoding helps determine whether a file should be treated as plain
# text or binary. By default, a mime type's encoding is base64 (binary).
# These types will show a "View Raw" link. To force a type to render as
# plain text, set it to 8bit for UTF-8. text/* types will be treated as
# text by default.
#
# <type> @<extensions> :<encoding>
#
# type - mediatype/subtype
# extensions - comma seperated extension list
# encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
# quoted-printable (Printable ASCII).
#
# Follows same format as mime-types data file
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
#
# Any additions or modifications (even trivial) should have corresponding
# test change in `test/test_mime.rb`.
# TODO: Lookup actual types
application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
# Please keep this list alphabetized
application/java-archive @ear,war
application/netcdf :8bit
application/ogg @ogg
application/postscript :base64
application/vnd.adobe.air-application-installer-package+zip @air
application/vnd.mozilla.xul+xml :8bit
application/vnd.oasis.opendocument.presentation @odp
application/vnd.oasis.opendocument.spreadsheet @ods
application/vnd.oasis.opendocument.text @odt
application/vnd.openofficeorg.extension @oxt
application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
application/x-chrome-extension @crx
application/x-iwork-keynote-sffkey @key
application/x-iwork-numbers-sffnumbers @numbers
application/x-iwork-pages-sffpages @pages
application/x-ms-xbap @xbap :8bit
application/x-parrot-bytecode @pbc
application/x-shockwave-flash @swf
application/x-silverlight-app @xap
application/x-supercollider @sc :8bit
application/x-troff-ms :8bit
application/x-wais-source :8bit
application/xaml+xml @xaml :8bit
application/xslt+xml @xslt :8bit
image/x-icns @icns
text/cache-manifest @manifest
text/plain @cu,cxx
text/x-logtalk @lgt
text/x-nemerle @n
text/x-nimrod @nim
text/x-ocaml @ml,mli,mll,mly,sig,sml
text/x-rust @rs,rc
text/x-scheme @rkt,scm,sls,sps,ss

View File

@@ -1,92 +0,0 @@
require 'linguist/language'
require 'linguist/mime'
require 'pygments'
module Linguist
# Similar to ::Pathname, Linguist::Pathname wraps a path string and
# provides helpful query methods. Its useful when you only have a
# filename but not a blob and need to figure out the language of the file.
class Pathname
# Public: Initialize a Pathname
#
# path - A filename String. The file may or maybe actually exist.
#
# Returns a Pathname.
def initialize(path)
@path = path
end
# Public: Get the basename of the path
#
# Examples
#
# Pathname.new('sub/dir/file.rb').basename
# # => 'file.rb'
#
# Returns a String.
def basename
File.basename(@path)
end
# Public: Get the extname of the path
#
# Examples
#
# Pathname.new('.rb').extname
# # => '.rb'
#
# Pathname.new('file.rb').extname
# # => '.rb'
#
# Returns a String.
def extname
File.extname(@path)
end
# Public: Get the language of the path
#
# The path extension name is the only heuristic used to detect the
# language name.
#
# Examples
#
# Pathname.new('file.rb').language
# # => Language['Ruby']
#
# Returns a Language or nil if none was found.
def language
@language ||= Language.find_by_filename(@path)
end
# Internal: Get the lexer of the path
#
# Returns a Lexer.
def lexer
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
end
# Public: Get the mime type
#
# Examples
#
# Pathname.new('index.html').mime_type
# # => 'text/html'
#
# Returns a mime type String.
def mime_type
@mime_type ||= Mime.mime_for(extname)
end
# Public: Return self as String
#
# Returns a String
def to_s
@path.dup
end
def eql?(other)
other.is_a?(self.class) && @path == other.to_s
end
alias_method :==, :eql?
end
end

View File

@@ -80,7 +80,7 @@ module Linguist
end
# Compute total size
@size = @sizes.inject(0) { |s,(k,v)| s + v }
@size = @sizes.inject(0) { |s,(_,v)| s + v }
# Get primary language
if primary = @sizes.max_by { |(_, size)| size }

View File

@@ -1,74 +0,0 @@
require 'linguist/classifier'
require 'linguist/language'
module Linguist
# Model for accessing classifier training data.
class Sample
# Samples live in test/ for now, we'll eventually move them out
PATH = File.expand_path("../../../test/fixtures", __FILE__)
# Public: Iterate over each Sample.
#
# &block - Yields Sample to block
#
# Returns nothing.
def self.each(&block)
Dir.entries(PATH).each do |category|
next if category == '.' || category == '..'
# Skip text and binary for now
# Possibly reconsider this later
next if category == 'text' || category == 'binary'
# Map directory name to a Language alias
language = Linguist::Language.find_by_alias(category)
raise "No language for #{category.inspect}" unless language
dirname = File.join(PATH, category)
Dir.entries(dirname).each do |filename|
next if filename == '.' || filename == '..'
yield new(File.join(dirname, filename), language)
end
end
nil
end
# Public: Build Classifier from all samples.
#
# Returns trained Classifier.
def self.classifier
classifier = Classifier.new
each { |sample| classifier.train(sample.language, sample.data) }
classifier.gc
end
# Internal: Initialize Sample.
#
# Samples should be initialized by Sample.each.
#
# path - String full path to file.
# language - Language of sample.
def initialize(path, language)
@path = path
@language = language
end
# Public: Get full path to file.
#
# Returns String.
attr_reader :path
# Public: Get sample language.
#
# Returns Language.
attr_reader :language
# Public: Read file contents.
#
# Returns String.
def data
File.read(path)
end
end
end

27802
lib/linguist/samples.json Normal file

File diff suppressed because it is too large Load Diff

96
lib/linguist/samples.rb Normal file
View File

@@ -0,0 +1,96 @@
require 'yaml'
require 'linguist/md5'
require 'linguist/classifier'
module Linguist
# Model for accessing classifier training data.
module Samples
# Path to samples root directory
ROOT = File.expand_path("../../../samples", __FILE__)
# Path for serialized samples db
PATH = File.expand_path('../samples.json', __FILE__)
# Hash of serialized samples object
if File.exist?(PATH)
DATA = YAML.load_file(PATH)
end
# Public: Iterate over each sample.
#
# &block - Yields Sample to block
#
# Returns nothing.
def self.each(&block)
Dir.entries(ROOT).each do |category|
next if category == '.' || category == '..'
# Skip text and binary for now
# Possibly reconsider this later
next if category == 'Text' || category == 'Binary'
dirname = File.join(ROOT, category)
Dir.entries(dirname).each do |filename|
next if filename == '.' || filename == '..'
if filename == 'filenames'
Dir.entries(File.join(dirname, filename)).each do |subfilename|
next if subfilename == '.' || subfilename == '..'
yield({
:path => File.join(dirname, filename, subfilename),
:language => category,
:filename => subfilename
})
end
else
if File.extname(filename) == ""
raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
end
yield({
:path => File.join(dirname, filename),
:language => category,
:extname => File.extname(filename)
})
end
end
end
nil
end
# Public: Build Classifier from all samples.
#
# Returns trained Classifier.
def self.data
db = {}
db['extnames'] = {}
db['filenames'] = {}
each do |sample|
language_name = sample[:language]
if sample[:extname]
db['extnames'][language_name] ||= []
if !db['extnames'][language_name].include?(sample[:extname])
db['extnames'][language_name] << sample[:extname]
end
end
if sample[:filename]
db['filenames'][language_name] ||= []
db['filenames'][language_name] << sample[:filename]
end
data = File.read(sample[:path])
Classifier.train!(db, language_name, data)
end
db['md5'] = Linguist::MD5.hexdigest(db)
db
end
end
end

View File

@@ -1,3 +1,5 @@
require 'strscan'
module Linguist
# Generic programming language tokenizer.
#
@@ -5,25 +7,42 @@ module Linguist
# It strips any data strings or comments and preserves significant
# language symbols.
class Tokenizer
# Public: Initialize a Tokenizer.
# Public: Extract tokens from data
#
# data - String data to scan.
def initialize(data)
@data = data
end
# Public: Get source data.
#
# Returns String.
attr_reader :data
# Public: Extract tokens from data.
# data - String to tokenize
#
# Returns Array of token Strings.
def tokens
extract_tokens(data)
def self.tokenize(data)
new.extract_tokens(data)
end
# Read up to 100KB
BYTE_LIMIT = 100_000
# Start state on token, ignore anything till the next newline
SINGLE_LINE_COMMENTS = [
'//', # C
'#', # Ruby
'%', # Tex
]
# Start state on opening token, ignore anything until the closing
# token is reached.
MULTI_LINE_COMMENTS = [
['/*', '*/'], # C
['<!--', '-->'], # XML
['{-', '-}'], # Haskell
['(*', '*)'] # Coq
]
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
"\s*#{Regexp.escape(c)} "
}.join("|"))
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
Regexp.escape(c[0])
}.join("|"))
# Internal: Extract generic tokens from data.
#
# data - String to scan.
@@ -39,54 +58,48 @@ module Linguist
tokens = []
until s.eos?
# Ruby single line comment
if token = s.scan(/# /)
tokens << "#"
break if s.pos >= BYTE_LIMIT
if token = s.scan(/^#!.+$/)
if name = extract_shebang(token)
tokens << "SHEBANG#!#{name}"
end
# Single line comment
elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
# tokens << token.strip
s.skip_until(/\n|\Z/)
# C style single line comment
elsif token = s.scan(/\/\/ /)
tokens << "//"
s.skip_until(/\n|\Z/)
# Leading Tex or Matlab comments
elsif token = s.scan(/\n%/)
tokens << "%"
s.skip_until(/\n|\Z/)
# C multiline comments
elsif token = s.scan(/\/\*/)
tokens << "/*"
s.skip_until(/\*\//)
tokens << "*/"
# Haskell multiline comments
elsif token = s.scan(/\{-/)
tokens << "{-"
s.skip_until(/-\}/)
tokens << "-}"
# XML multiline comments
elsif token = s.scan(/<!--/)
tokens << "<!--"
s.skip_until(/-->/)
tokens << "-->"
# Multiline comments
elsif token = s.scan(START_MULTI_LINE_COMMENT)
# tokens << token
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
# tokens << close_token
# Skip single or double quoted strings
elsif s.scan(/"/)
s.skip_until(/[^\\]"/)
if s.peek(1) == "\""
s.getch
else
s.skip_until(/[^\\]"/)
end
elsif s.scan(/'/)
s.skip_until(/[^\\]'/)
if s.peek(1) == "'"
s.getch
else
s.skip_until(/[^\\]'/)
end
# Skip number literals
elsif s.scan(/(0x)?\d+/)
elsif s.scan(/(0x)?\d(\d|\.)*/)
# SGML style brackets
elsif token = s.scan(/<[^\s<>][^<>]*>/)
extract_sgml_tokens(token).each { |t| tokens << t }
# Common programming punctuation
elsif token = s.scan(/;|\{|\}|\(|\)/)
elsif token = s.scan(/;|\{|\}|\(|\)|\[|\]/)
tokens << token
# Regular token
@@ -105,6 +118,33 @@ module Linguist
tokens
end
# Internal: Extract normalized shebang command token.
#
# Examples
#
# extract_shebang("#!/usr/bin/ruby")
# # => "ruby"
#
# extract_shebang("#!/usr/bin/env node")
# # => "node"
#
# Returns String token or nil it couldn't be parsed.
def extract_shebang(data)
s = StringScanner.new(data)
if path = s.scan(/^#!\s*\S+/)
script = path.split('/').last
if script == 'env'
s.scan(/\s+/)
script = s.scan(/\S+/)
end
script = script[/[^\d]+/, 0]
return script
end
nil
end
# Internal: Extract tokens from inside SGML tag.
#
# data - SGML tag String.

View File

@@ -0,0 +1,87 @@
(*
Copyright 2003 Apple Computer, Inc.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
property type_list : {"JPEG", "GIFf", "PICT", "TIFF", "PDF", "TEXT"}
property extension_list : {"jpg", "gif", "pct", "tif", "pdf", "rtf"}
--html is not currently handled
on run {}
tell application "Finder" to set FinderSelection to the selection as alias list
set FS to FinderSelection
--Ideally, this list could be passed to the open handler
set SelectionCount to number of FS -- count
if SelectionCount is 0 then
set FS to userPicksFolder()
else if the SelectionCount is 1 then
set MyPath to path to me
if MyPath is item 1 of FS then
--If I'm a droplet then I was double-clicked
set FS to userPicksFolder()
end if
else
--I'm not a double-clicked droplet
end if
open FS
end run
on userPicksFolder()
set these_items to {}
set these_items to (choose file with prompt "Select a file to convert to PDF:" of type {"JPEG", "GIFf", "PICT", "TIFF", "TEXT", "RTF"}) as list
end userPicksFolder
on open these_items
set thesefiles to {}
set the item_info to {}
repeat with i from 1 to the count of these_items
set this_item to (item i of these_items)
set the item_info to info for this_item
if folder of the item_info is true then --if the item is a folder
processFolder(this_item)
else if ((folder of the item_info is false) and (alias of the item_info is false)) and (the file type of the item_info is in the type_list) or ((the name extension of the item_info) is in the extension_list) then
set theFilePath to (item i of these_items as string)
set thePOSIXFilePath to POSIX path of theFilePath as string
processFile(thePOSIXFilePath)
end if
end repeat
end open
--process folders
on processFolder(theFolder)
set these_items to list folder theFolder without invisibles
repeat with i from 1 to the count of these_items
set this_item to alias ((theFolder as text) & (item i of these_items))
set the item_info to info for this_item
if folder of the item_info is true then
processFolder(this_item)
else if (alias of the item_info is false) and ((the file type of the item_info is in the type_list) or the name extension of the item_info is in the extension_list) then
set theFilePath to (this_item as string)
set thePOSIXFilePath to POSIX path of theFilePath as string
processFile(thePOSIXFilePath)
end if
end repeat
end processFolder
on processFile(thePOSIXFileName)
try
set terminalCommand to ""
set convertCommand to "/System/Library/Printers/Libraries/./convert "
set newFileName to thePOSIXFileName & ".pdf"
set terminalCommand to convertCommand & "-f " & "\"" & thePOSIXFileName & "\"" & " -o " & "\"" & newFileName & "\"" & " -j \"application/pdf\""
do shell script terminalCommand
end try
end processFile

View File

@@ -0,0 +1,91 @@
(*
Copyright 2003 Apple Computer, Inc.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
property type_list : {"JPEG", "GIFf", "PICT", "TIFF", "PDF", "TEXT"}
property extension_list : {"jpg", "gif", "pct", "tif", "pdf", "rtf"}
--html is not currently handled
on run {}
tell application "Finder" to set FinderSelection to the selection as alias list
set FS to FinderSelection
--Ideally, this list could be passed to the open handler
set SelectionCount to number of FS -- count
if SelectionCount is 0 then
set FS to userPicksFolder()
else if the SelectionCount is 1 then
set MyPath to path to me
if MyPath is item 1 of FS then
--If I'm a droplet then I was double-clicked
set FS to userPicksFolder()
end if
else
--I'm not a double-clicked droplet
end if
open FS
end run
on userPicksFolder()
set these_items to {}
set these_items to (choose file with prompt "Select a file to convert to PostScript:" of type {"JPEG", "GIFf", "PICT", "TIFF", "TEXT", "RTF"}) as list
end userPicksFolder
on open these_items
set thesefiles to {}
set the item_info to {}
repeat with i from 1 to the count of these_items
set this_item to (item i of these_items)
set the item_info to info for this_item
if folder of the item_info is true then --if the item is a folder
processFolder(this_item)
else if ((folder of the item_info is false) and (alias of the item_info is false)) and (the file type of the item_info is in the type_list) or ((the name extension of the item_info) is in the extension_list) then
set theFilePath to (item i of these_items as string)
set thePOSIXFilePath to POSIX path of theFilePath as string
processFile(thePOSIXFilePath)
end if
end repeat
end open
--process folders
on processFolder(theFolder)
set these_items to list folder theFolder without invisibles
repeat with i from 1 to the count of these_items
set this_item to alias ((theFolder as text) & (item i of these_items))
set the item_info to info for this_item
if folder of the item_info is true then
processFolder(this_item)
else if (alias of the item_info is false) and ((the file type of the item_info is in the type_list) or the name extension of the item_info is in the extension_list) then
set theFilePath to (this_item as string)
set thePOSIXFilePath to POSIX path of theFilePath as string
processFile(thePOSIXFilePath)
end if
end repeat
end processFolder
--need to pass the URL to Terminal
on processFile(thePOSIXFileName)
try
set terminalCommand to ""
set convertCommand to "/System/Library/Printers/Libraries/./convert "
set newFileName to thePOSIXFileName & ".ps"
set terminalCommand to convertCommand & "-f " & "\"" & thePOSIXFileName & "\"" & " -o " & "\"" & newFileName & "\"" & " -j \"application/postscript\""
do shell script terminalCommand
end try
end processFile

View File

@@ -0,0 +1,80 @@
(*
Count Messages in All Mailboxes
Copyright 2002-2012 Apple Inc. All rights reserved.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
(*
This script goes through each mailbox, gets the total message count and
the unread count, then displays the final output in a new email message.
*)
tell application "Mail"
set localMailboxes to every mailbox
if (count of localMailboxes) is greater than 0 then
set messageCountDisplay to "Local mailboxes (On My Mac)" & return & my getMessageCountsForMailboxes(localMailboxes)
else
set messageCountDisplay to ""
end if
set everyAccount to every account
repeat with eachAccount in everyAccount
set accountMailboxes to every mailbox of eachAccount
if (count of accountMailboxes) is greater than 0 then
set messageCountDisplay to messageCountDisplay & return & "Mailboxes for Account: " & name of eachAccount & return & my getMessageCountsForMailboxes(accountMailboxes)
end if
end repeat
set outputMessage to make new outgoing message with properties {content:messageCountDisplay, subject:"Message counts for all my mailboxes", visible:true}
tell outputMessage
set font to "Courier"
set size to 12
end tell
end tell
on getMessageCountsForMailboxes(theMailboxes)
-- (list of mailboxes)
-- returns string
set displayString to ""
tell application "Mail"
repeat with eachMailbox in theMailboxes
set mailboxName to name of eachMailbox
set messageCount to (count of (messages of eachMailbox)) as string
set unreadCount to unread count of eachMailbox as string
set displayString to displayString & " " & my padString(mailboxName, 40) & " " & messageCount & " (" & unreadCount & " unread)" & return
end repeat
end tell
return displayString
end getMessageCountsForMailboxes
on padString(theString, fieldLength)
-- (string, integer)
-- returns string
set stringLength to length of theString
if stringLength is greater than fieldLength then
set paddedString to (text from character 1 to character (fieldLength - 3) of theString) & "..."
else -- stringLength is less than or equal to fieldLength
set paddedString to theString
set paddingLength to fieldLength - stringLength
repeat paddingLength times
set paddedString to paddedString & space
end repeat
end if
return paddedString
end padString

View File

@@ -0,0 +1,68 @@
(*
Crazy Message Text
Copyright 2002-2012 Apple Inc. All rights reserved.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
(*
This script takes a string from the user and then makes a new message
where each letter has a different font, size, and color.
*)
property lowFontSize : 36
property highFontSize : 72
property messageText : "Happy Birthday!"
repeat
set userInput to display dialog "Enter some message text:" & return & return & "Minimum Character Size: " & (lowFontSize as string) & return & "Maximum Character Size: " & (highFontSize as string) default answer messageText buttons {"Cancel", "Set Prefs", "Continue"} default button 3
if the button returned of userInput is "Set Prefs" then
set minimumFontSize to 9
display dialog "Enter the minimum font size to use:" & return & return & "(Must be at least " & (minimumFontSize as string) & ")" default answer lowFontSize buttons {"OK"}
set newFontSize to text returned of the result as integer
if newFontSize is greater than or equal to minimumFontSize then
set lowFontSize to newFontSize
else
set lowFontSize to minimumFontSize
end if
display dialog "Enter the maximum font size to use:" & return & return & "(Must be greater than " & (lowFontSize as string) & ")" default answer highFontSize buttons {"OK"}
set newFontSize to text returned of the result as integer
if newFontSize is greater than lowFontSize then
set highFontSize to newFontSize
else
set highFontSize to lowFontSize
end if
else -- button returned of userInput is "Continue"
set theText to text returned of userInput
if theText is not "" then
set messageText to theText
end if
exit repeat
end if
end repeat
set fontList to {"American Typewriter", "American Typewriter Light", "American Typewriter Bold", "American Typewriter Condensed", "American Typewriter Condensed Light", "American Typewriter Condensed Bold", "Arial", "Arial Italic", "Arial Bold", "Arial Bold Italic", "Arial Black", "Baskerville", "Baskerville Italic", "Baskerville SemiBold", "Baskerville Bold", "Baskerville SemiBold Italic", "Baskerville Bold Italic", "Big Caslon Medium", "Comic Sans MS", "Comic Sans MS Bold", "Copperplate", "Copperplate Light", "Copperplate Bold", "Didot", "Didot Italic", "Didot Bold", "Futura Medium", "Futura Medium Italic", "Futura Condensed Medium", "Futura Condensed ExtraBold", "Geneva", "Gill Sans", "Gill Sans Italic", "Gill Sans Light", "Gill Sans Light Italic", "Gill Sans Bold", "Gill Sans Bold Italic", "Herculanum", "Lucida Grande", "Lucida Grande Bold", "Marker Felt Thin", "Marker Felt Wide", "Optima Regular", "Optima Italic", "Optima Bold", "Optima Bold Italic", "Optima ExtraBlack", "Papyrus", "Verdana", "Verdana Italic", "Verdana Bold", "Verdana Bold Italic", "Zapfino"}
tell application "Mail"
activate
set crazyTextMessage to make new outgoing message with properties {content:messageText, visible:true}
tell crazyTextMessage
repeat with eachCharacter in characters
set font of eachCharacter to (some item of fontList)
set size of eachCharacter to (random number from lowFontSize to highFontSize)
set color of eachCharacter to {random number from 0 to 65535, random number from 0 to 65535, random number from 0 to 65535}
end repeat
end tell
end tell

View File

@@ -0,0 +1,41 @@
(*
Get User Name
This script uses UI element scripting to get the name for the
current user.
If "Enable access for assistive devices" is not checked,
this script will open the Universal Access System Preference and ask
the user to check the checkbox.
Copyright 2007 Apple Inc.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
tell application "System Preferences"
activate
set current pane to pane "com.apple.preferences.users"
end tell
tell application "System Events"
if UI elements enabled then
tell tab group 1 of window "Accounts" of process "System Preferences"
click radio button 1
delay 2
get value of text field 1
end tell
else
tell application "System Preferences"
activate
set current pane to pane "com.apple.preference.universalaccess"
display dialog "UI element scripting is not enabled. Check \"Enable access for assistive devices\""
end tell
end if
end tell

View File

@@ -0,0 +1,75 @@
(*
Speaks the date and time of day
Copyright 2008 Apple Inc. All rights reserved.
You may incorporate this Apple sample code into your program(s) without
restriction. This Apple sample code has been provided "AS IS" and the
responsibility for its operation is yours. You are not permitted to
redistribute this Apple sample code as "Apple sample code" after having
made changes. If you're going to redistribute the code, we require
that you make it clear that the code was descended from Apple sample
code, but that you've made changes.
*)
on isVoiceOverRunning()
set isRunning to false
tell application "System Events"
set isRunning to (name of processes) contains "VoiceOver"
end tell
return isRunning
end isVoiceOverRunning
on isVoiceOverRunningWithAppleScript()
if isVoiceOverRunning() then
set isRunningWithAppleScript to true
-- is AppleScript enabled on VoiceOver --
tell application "VoiceOver"
try
set x to bounds of vo cursor
on error
set isRunningWithAppleScript to false
end try
end tell
return isRunningWithAppleScript
end if
return false
end isVoiceOverRunningWithAppleScript
set currentDate to current date
set amPM to "AM"
set currentHour to (currentDate's hours)
set currentMinutes to currentDate's minutes
if (currentHour > 12 and currentHour < 24) then
set amPM to "PM"
else
set amPM to "AM"
end if
-- make minutes below 10 sound nice
if currentMinutes < 10 then
set currentMinutes to ("0" & currentMinutes) as text
end if
-- ensure 0:nn gets set to 12:nn AM
if currentHour is equal to 0 then
set currentHour to 12
end if
-- readjust for 12 hour time
if (currentHour > 12) then
set currentHour to (currentHour - 12)
end if
set currentTime to ((currentDate's month) as text) & " " & ((currentDate's day) as text) & ", " & (currentHour as text) & ":" & ((currentMinutes) as text) & " " & amPM as text
if isVoiceOverRunningWithAppleScript() then
tell application "VoiceOver"
output currentTime
end tell
else
say currentTime
delay 2
end if

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

View File

Before

Width:  |  Height:  |  Size: 5.4 KiB

After

Width:  |  Height:  |  Size: 5.4 KiB

69
samples/C++/gdsdbreader.h Normal file
View File

@@ -0,0 +1,69 @@
#ifndef GDSDBREADER_H
#define GDSDBREADER_H
// This file contains core structures, classes and types for the entire gds app
// WARNING: DO NOT MODIFY UNTIL IT'S STRICTLY NECESSARY
#include <QDir>
#include "diagramwidget/qgldiagramwidget.h"
#define GDS_DIR "gdsdata"
enum level {LEVEL_ONE, LEVEL_TWO, LEVEL_THREE};
// The internal structure of the db to store information about each node (each level)
// this will be serialized before being written to file
class dbDataStructure
{
public:
QString label;
quint32 depth;
quint32 userIndex;
QByteArray data; // This is COMPRESSED data, optimize ram and disk space, is decompressed
// just when needed (to display the comments)
// The following ID is used to create second-third level files
quint64 uniqueID;
// All the next items linked to this one
QVector<dbDataStructure*> nextItems;
// Corresponding indices vector (used to store data)
QVector<quint32> nextItemsIndices;
// The father element (or NULL if it's root)
dbDataStructure* father;
// Corresponding indices vector (used to store data)
quint32 fatherIndex;
bool noFatherRoot; // Used to tell if this node is the root (so hasn't a father)
// These fields will be useful for levels 2 and 3
QString fileName; // Relative filename for the associated code file
QByteArray firstLineData; // Compressed first line data, this will be used with the line number to retrieve info
QVector<quint32> linesNumbers; // First and next lines (next are relative to the first) numbers
// -- Generic system data not to be stored on disk
void *glPointer; // GL pointer
// These operator overrides prevent the glPointer and other non-disk-necessary data serialization
friend QDataStream& operator<<(QDataStream& stream, const dbDataStructure& myclass)
// Notice: this function has to be "friend" because it cannot be a member function, member functions
// have an additional parameter "this" which isn't in the argument list of an operator overload. A friend
// function has full access to private data of the class without having the "this" argument
{
// Don't write glPointer and every pointer-dependent structure
return stream << myclass.label << myclass.depth << myclass.userIndex << qCompress(myclass.data)
<< myclass.uniqueID << myclass.nextItemsIndices << myclass.fatherIndex << myclass.noFatherRoot
<< myclass.fileName << qCompress(myclass.firstLineData) << myclass.linesNumbers;
}
friend QDataStream& operator>>(QDataStream& stream, dbDataStructure& myclass)
{
//Don't read it, either
stream >> myclass.label >> myclass.depth >> myclass.userIndex >> myclass.data
>> myclass.uniqueID >> myclass.nextItemsIndices >> myclass.fatherIndex >> myclass.noFatherRoot
>> myclass.fileName >> myclass.firstLineData >> myclass.linesNumbers;
myclass.data = qUncompress(myclass.data);
myclass.firstLineData = qUncompress(myclass.firstLineData);
return stream;
}
};
#endif // GDSDBREADER_H

415
samples/C++/qscicommand.h Normal file
View File

@@ -0,0 +1,415 @@
// This defines the interface to the QsciCommand class.
//
// Copyright (c) 2011 Riverbank Computing Limited <info@riverbankcomputing.com>
//
// This file is part of QScintilla.
//
// This file may be used under the terms of the GNU General Public
// License versions 2.0 or 3.0 as published by the Free Software
// Foundation and appearing in the files LICENSE.GPL2 and LICENSE.GPL3
// included in the packaging of this file. Alternatively you may (at
// your option) use any later version of the GNU General Public
// License if such license has been publicly approved by Riverbank
// Computing Limited (or its successors, if any) and the KDE Free Qt
// Foundation. In addition, as a special exception, Riverbank gives you
// certain additional rights. These rights are described in the Riverbank
// GPL Exception version 1.1, which can be found in the file
// GPL_EXCEPTION.txt in this package.
//
// If you are unsure which license is appropriate for your use, please
// contact the sales department at sales@riverbankcomputing.com.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#ifndef QSCICOMMAND_H
#define QSCICOMMAND_H
#ifdef __APPLE__
extern "C++" {
#endif
#include <qstring.h>
#include <Qsci/qsciglobal.h>
#include <Qsci/qsciscintillabase.h>
class QsciScintilla;
//! \brief The QsciCommand class represents an internal editor command that may
//! have one or two keys bound to it.
//!
//! Methods are provided to change the keys bound to the command and to remove
//! a key binding. Each command has a user friendly description of the command
//! for use in key mapping dialogs.
class QSCINTILLA_EXPORT QsciCommand
{
public:
//! This enum defines the different commands that can be assigned to a key.
enum Command {
//! Move down one line.
LineDown = QsciScintillaBase::SCI_LINEDOWN,
//! Extend the selection down one line.
LineDownExtend = QsciScintillaBase::SCI_LINEDOWNEXTEND,
//! Extend the rectangular selection down one line.
LineDownRectExtend = QsciScintillaBase::SCI_LINEDOWNRECTEXTEND,
//! Scroll the view down one line.
LineScrollDown = QsciScintillaBase::SCI_LINESCROLLDOWN,
//! Move up one line.
LineUp = QsciScintillaBase::SCI_LINEUP,
//! Extend the selection up one line.
LineUpExtend = QsciScintillaBase::SCI_LINEUPEXTEND,
//! Extend the rectangular selection up one line.
LineUpRectExtend = QsciScintillaBase::SCI_LINEUPRECTEXTEND,
//! Scroll the view up one line.
LineScrollUp = QsciScintillaBase::SCI_LINESCROLLUP,
//! Scroll to the start of the document.
ScrollToStart = QsciScintillaBase::SCI_SCROLLTOSTART,
//! Scroll to the end of the document.
ScrollToEnd = QsciScintillaBase::SCI_SCROLLTOEND,
//! Scroll vertically to centre the current line.
VerticalCentreCaret = QsciScintillaBase::SCI_VERTICALCENTRECARET,
//! Move down one paragraph.
ParaDown = QsciScintillaBase::SCI_PARADOWN,
//! Extend the selection down one paragraph.
ParaDownExtend = QsciScintillaBase::SCI_PARADOWNEXTEND,
//! Move up one paragraph.
ParaUp = QsciScintillaBase::SCI_PARAUP,
//! Extend the selection up one paragraph.
ParaUpExtend = QsciScintillaBase::SCI_PARAUPEXTEND,
//! Move left one character.
CharLeft = QsciScintillaBase::SCI_CHARLEFT,
//! Extend the selection left one character.
CharLeftExtend = QsciScintillaBase::SCI_CHARLEFTEXTEND,
//! Extend the rectangular selection left one character.
CharLeftRectExtend = QsciScintillaBase::SCI_CHARLEFTRECTEXTEND,
//! Move right one character.
CharRight = QsciScintillaBase::SCI_CHARRIGHT,
//! Extend the selection right one character.
CharRightExtend = QsciScintillaBase::SCI_CHARRIGHTEXTEND,
//! Extend the rectangular selection right one character.
CharRightRectExtend = QsciScintillaBase::SCI_CHARRIGHTRECTEXTEND,
//! Move left one word.
WordLeft = QsciScintillaBase::SCI_WORDLEFT,
//! Extend the selection left one word.
WordLeftExtend = QsciScintillaBase::SCI_WORDLEFTEXTEND,
//! Move right one word.
WordRight = QsciScintillaBase::SCI_WORDRIGHT,
//! Extend the selection right one word.
WordRightExtend = QsciScintillaBase::SCI_WORDRIGHTEXTEND,
//! Move to the end of the previous word.
WordLeftEnd = QsciScintillaBase::SCI_WORDLEFTEND,
//! Extend the selection to the end of the previous word.
WordLeftEndExtend = QsciScintillaBase::SCI_WORDLEFTENDEXTEND,
//! Move to the end of the next word.
WordRightEnd = QsciScintillaBase::SCI_WORDRIGHTEND,
//! Extend the selection to the end of the next word.
WordRightEndExtend = QsciScintillaBase::SCI_WORDRIGHTENDEXTEND,
//! Move left one word part.
WordPartLeft = QsciScintillaBase::SCI_WORDPARTLEFT,
//! Extend the selection left one word part.
WordPartLeftExtend = QsciScintillaBase::SCI_WORDPARTLEFTEXTEND,
//! Move right one word part.
WordPartRight = QsciScintillaBase::SCI_WORDPARTRIGHT,
//! Extend the selection right one word part.
WordPartRightExtend = QsciScintillaBase::SCI_WORDPARTRIGHTEXTEND,
//! Move to the start of the document line.
Home = QsciScintillaBase::SCI_HOME,
//! Extend the selection to the start of the document line.
HomeExtend = QsciScintillaBase::SCI_HOMEEXTEND,
//! Extend the rectangular selection to the start of the document line.
HomeRectExtend = QsciScintillaBase::SCI_HOMERECTEXTEND,
//! Move to the start of the displayed line.
HomeDisplay = QsciScintillaBase::SCI_HOMEDISPLAY,
//! Extend the selection to the start of the displayed line.
HomeDisplayExtend = QsciScintillaBase::SCI_HOMEDISPLAYEXTEND,
//! Move to the start of the displayed or document line.
HomeWrap = QsciScintillaBase::SCI_HOMEWRAP,
//! Extend the selection to the start of the displayed or document
//! line.
HomeWrapExtend = QsciScintillaBase::SCI_HOMEWRAPEXTEND,
//! Move to the first visible character in the document line.
VCHome = QsciScintillaBase::SCI_VCHOME,
//! Extend the selection to the first visible character in the document
//! line.
VCHomeExtend = QsciScintillaBase::SCI_VCHOMEEXTEND,
//! Extend the rectangular selection to the first visible character in
//! the document line.
VCHomeRectExtend = QsciScintillaBase::SCI_VCHOMERECTEXTEND,
//! Move to the first visible character of the displayed or document
//! line.
VCHomeWrap = QsciScintillaBase::SCI_VCHOMEWRAP,
//! Extend the selection to the first visible character of the
//! displayed or document line.
VCHomeWrapExtend = QsciScintillaBase::SCI_VCHOMEWRAPEXTEND,
//! Move to the end of the document line.
LineEnd = QsciScintillaBase::SCI_LINEEND,
//! Extend the selection to the end of the document line.
LineEndExtend = QsciScintillaBase::SCI_LINEENDEXTEND,
//! Extend the rectangular selection to the end of the document line.
LineEndRectExtend = QsciScintillaBase::SCI_LINEENDRECTEXTEND,
//! Move to the end of the displayed line.
LineEndDisplay = QsciScintillaBase::SCI_LINEENDDISPLAY,
//! Extend the selection to the end of the displayed line.
LineEndDisplayExtend = QsciScintillaBase::SCI_LINEENDDISPLAYEXTEND,
//! Move to the end of the displayed or document line.
LineEndWrap = QsciScintillaBase::SCI_LINEENDWRAP,
//! Extend the selection to the end of the displayed or document line.
LineEndWrapExtend = QsciScintillaBase::SCI_LINEENDWRAPEXTEND,
//! Move to the start of the document.
DocumentStart = QsciScintillaBase::SCI_DOCUMENTSTART,
//! Extend the selection to the start of the document.
DocumentStartExtend = QsciScintillaBase::SCI_DOCUMENTSTARTEXTEND,
//! Move to the end of the document.
DocumentEnd = QsciScintillaBase::SCI_DOCUMENTEND,
//! Extend the selection to the end of the document.
DocumentEndExtend = QsciScintillaBase::SCI_DOCUMENTENDEXTEND,
//! Move up one page.
PageUp = QsciScintillaBase::SCI_PAGEUP,
//! Extend the selection up one page.
PageUpExtend = QsciScintillaBase::SCI_PAGEUPEXTEND,
//! Extend the rectangular selection up one page.
PageUpRectExtend = QsciScintillaBase::SCI_PAGEUPRECTEXTEND,
//! Move down one page.
PageDown = QsciScintillaBase::SCI_PAGEDOWN,
//! Extend the selection down one page.
PageDownExtend = QsciScintillaBase::SCI_PAGEDOWNEXTEND,
//! Extend the rectangular selection down one page.
PageDownRectExtend = QsciScintillaBase::SCI_PAGEDOWNRECTEXTEND,
//! Stuttered move up one page.
StutteredPageUp = QsciScintillaBase::SCI_STUTTEREDPAGEUP,
//! Stuttered extend the selection up one page.
StutteredPageUpExtend = QsciScintillaBase::SCI_STUTTEREDPAGEUPEXTEND,
//! Stuttered move down one page.
StutteredPageDown = QsciScintillaBase::SCI_STUTTEREDPAGEDOWN,
//! Stuttered extend the selection down one page.
StutteredPageDownExtend = QsciScintillaBase::SCI_STUTTEREDPAGEDOWNEXTEND,
//! Delete the current character.
Delete = QsciScintillaBase::SCI_CLEAR,
//! Delete the previous character.
DeleteBack = QsciScintillaBase::SCI_DELETEBACK,
//! Delete the previous character if not at start of line.
DeleteBackNotLine = QsciScintillaBase::SCI_DELETEBACKNOTLINE,
//! Delete the word to the left.
DeleteWordLeft = QsciScintillaBase::SCI_DELWORDLEFT,
//! Delete the word to the right.
DeleteWordRight = QsciScintillaBase::SCI_DELWORDRIGHT,
//! Delete right to the end of the next word.
DeleteWordRightEnd = QsciScintillaBase::SCI_DELWORDRIGHTEND,
//! Delete the line to the left.
DeleteLineLeft = QsciScintillaBase::SCI_DELLINELEFT,
//! Delete the line to the right.
DeleteLineRight = QsciScintillaBase::SCI_DELLINERIGHT,
//! Delete the current line.
LineDelete = QsciScintillaBase::SCI_LINEDELETE,
//! Cut the current line to the clipboard.
LineCut = QsciScintillaBase::SCI_LINECUT,
//! Copy the current line to the clipboard.
LineCopy = QsciScintillaBase::SCI_LINECOPY,
//! Transpose the current and previous lines.
LineTranspose = QsciScintillaBase::SCI_LINETRANSPOSE,
//! Duplicate the current line.
LineDuplicate = QsciScintillaBase::SCI_LINEDUPLICATE,
//! Select the whole document.
SelectAll = QsciScintillaBase::SCI_SELECTALL,
//! Move the selected lines up one line.
MoveSelectedLinesUp = QsciScintillaBase::SCI_MOVESELECTEDLINESUP,
//! Move the selected lines down one line.
MoveSelectedLinesDown = QsciScintillaBase::SCI_MOVESELECTEDLINESDOWN,
//! Duplicate the selection.
SelectionDuplicate = QsciScintillaBase::SCI_SELECTIONDUPLICATE,
//! Convert the selection to lower case.
SelectionLowerCase = QsciScintillaBase::SCI_LOWERCASE,
//! Convert the selection to upper case.
SelectionUpperCase = QsciScintillaBase::SCI_UPPERCASE,
//! Cut the selection to the clipboard.
SelectionCut = QsciScintillaBase::SCI_CUT,
//! Copy the selection to the clipboard.
SelectionCopy = QsciScintillaBase::SCI_COPY,
//! Paste from the clipboard.
Paste = QsciScintillaBase::SCI_PASTE,
//! Toggle insert/overtype.
EditToggleOvertype = QsciScintillaBase::SCI_EDITTOGGLEOVERTYPE,
//! Insert a platform dependent newline.
Newline = QsciScintillaBase::SCI_NEWLINE,
//! Insert a formfeed.
Formfeed = QsciScintillaBase::SCI_FORMFEED,
//! Indent one level.
Tab = QsciScintillaBase::SCI_TAB,
//! De-indent one level.
Backtab = QsciScintillaBase::SCI_BACKTAB,
//! Cancel any current operation.
Cancel = QsciScintillaBase::SCI_CANCEL,
//! Undo the last command.
Undo = QsciScintillaBase::SCI_UNDO,
//! Redo the last command.
Redo = QsciScintillaBase::SCI_REDO,
//! Zoom in.
ZoomIn = QsciScintillaBase::SCI_ZOOMIN,
//! Zoom out.
ZoomOut = QsciScintillaBase::SCI_ZOOMOUT,
};
//! Return the command that will be executed by this instance.
Command command() const {return scicmd;}
//! Execute the command.
void execute();
//! Binds the key \a key to the command. If \a key is 0 then the key
//! binding is removed. If \a key is invalid then the key binding is
//! unchanged. Valid keys are any visible or control character or any
//! of \c Key_Down, \c Key_Up, \c Key_Left, \c Key_Right, \c Key_Home,
//! \c Key_End, \c Key_PageUp, \c Key_PageDown, \c Key_Delete,
//! \c Key_Insert, \c Key_Escape, \c Key_Backspace, \c Key_Tab and
//! \c Key_Return. Keys may be modified with any combination of \c SHIFT,
//! \c CTRL, \c ALT and \c META.
//!
//! \sa key(), setAlternateKey(), validKey()
void setKey(int key);
//! Binds the alternate key \a altkey to the command. If \a key is 0
//! then the alternate key binding is removed.
//!
//! \sa alternateKey(), setKey(), validKey()
void setAlternateKey(int altkey);
//! The key that is currently bound to the command is returned.
//!
//! \sa setKey(), alternateKey()
int key() const {return qkey;}
//! The alternate key that is currently bound to the command is
//! returned.
//!
//! \sa setAlternateKey(), key()
int alternateKey() const {return qaltkey;}
//! If the key \a key is valid then true is returned.
static bool validKey(int key);
//! The user friendly description of the command is returned.
QString description() const;
private:
friend class QsciCommandSet;
QsciCommand(QsciScintilla *qs, Command cmd, int key, int altkey,
const char *desc);
void bindKey(int key,int &qk,int &scik);
QsciScintilla *qsCmd;
Command scicmd;
int qkey, scikey, qaltkey, scialtkey;
const char *descCmd;
QsciCommand(const QsciCommand &);
QsciCommand &operator=(const QsciCommand &);
};
#ifdef __APPLE__
}
#endif
#endif

116
samples/C++/qsciprinter.h Normal file
View File

@@ -0,0 +1,116 @@
// This module defines interface to the QsciPrinter class.
//
// Copyright (c) 2011 Riverbank Computing Limited <info@riverbankcomputing.com>
//
// This file is part of QScintilla.
//
// This file may be used under the terms of the GNU General Public
// License versions 2.0 or 3.0 as published by the Free Software
// Foundation and appearing in the files LICENSE.GPL2 and LICENSE.GPL3
// included in the packaging of this file. Alternatively you may (at
// your option) use any later version of the GNU General Public
// License if such license has been publicly approved by Riverbank
// Computing Limited (or its successors, if any) and the KDE Free Qt
// Foundation. In addition, as a special exception, Riverbank gives you
// certain additional rights. These rights are described in the Riverbank
// GPL Exception version 1.1, which can be found in the file
// GPL_EXCEPTION.txt in this package.
//
// If you are unsure which license is appropriate for your use, please
// contact the sales department at sales@riverbankcomputing.com.
//
// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#ifndef QSCIPRINTER_H
#define QSCIPRINTER_H
#ifdef __APPLE__
extern "C++" {
#endif
#include <qprinter.h>
#include <Qsci/qsciglobal.h>
#include <Qsci/qsciscintilla.h>
QT_BEGIN_NAMESPACE
class QRect;
class QPainter;
QT_END_NAMESPACE
class QsciScintillaBase;
//! \brief The QsciPrinter class is a sub-class of the Qt QPrinter class that
//! is able to print the text of a Scintilla document.
//!
//! The class can be further sub-classed to alter to layout of the text, adding
//! headers and footers for example.
class QSCINTILLA_EXPORT QsciPrinter : public QPrinter
{
public:
//! Constructs a printer paint device with mode \a mode.
QsciPrinter(PrinterMode mode = ScreenResolution);
//! Destroys the QsciPrinter instance.
virtual ~QsciPrinter();
//! Format a page, by adding headers and footers for example, before the
//! document text is drawn on it. \a painter is the painter to be used to
//! add customised text and graphics. \a drawing is true if the page is
//! actually being drawn rather than being sized. \a painter drawing
//! methods must only be called when \a drawing is true. \a area is the
//! area of the page that will be used to draw the text. This should be
//! modified if it is necessary to reserve space for any customised text or
//! graphics. By default the area is relative to the printable area of the
//! page. Use QPrinter::setFullPage() because calling printRange() if you
//! want to try and print over the whole page. \a pagenr is the number of
//! the page. The first page is numbered 1.
virtual void formatPage(QPainter &painter, bool drawing, QRect &area,
int pagenr);
//! Return the number of points to add to each font when printing.
//!
//! \sa setMagnification()
int magnification() const {return mag;}
//! Sets the number of points to add to each font when printing to \a
//! magnification.
//!
//! \sa magnification()
virtual void setMagnification(int magnification);
//! Print a range of lines from the Scintilla instance \a qsb. \a from is
//! the first line to print and a negative value signifies the first line
//! of text. \a to is the last line to print and a negative value
//! signifies the last line of text. true is returned if there was no
//! error.
virtual int printRange(QsciScintillaBase *qsb, int from = -1, int to = -1);
//! Return the line wrap mode used when printing. The default is
//! QsciScintilla::WrapWord.
//!
//! \sa setWrapMode()
QsciScintilla::WrapMode wrapMode() const {return wrap;}
//! Sets the line wrap mode used when printing to \a wmode.
//!
//! \sa wrapMode()
virtual void setWrapMode(QsciScintilla::WrapMode wmode);
private:
int mag;
QsciScintilla::WrapMode wrap;
QsciPrinter(const QsciPrinter &);
QsciPrinter &operator=(const QsciPrinter &);
};
#ifdef __APPLE__
}
#endif
#endif

1267
samples/C/rf_io.c Normal file

File diff suppressed because it is too large Load Diff

682
samples/C/rf_io.h Normal file
View File

@@ -0,0 +1,682 @@
/**
** Copyright (c) 2011-2012, Karapetsas Eleftherios
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
** 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
** the documentation and/or other materials provided with the distribution.
** 3. Neither the name of the Original Author of Refu nor the names of its contributors may be used to endorse or promote products derived from
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
** INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
** SERVICES;LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
** WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/
#ifndef REFU_IO_H
#define REFU_IO_H
#include <rf_setup.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C"
{// opening bracket for calling from C++
#endif
// New line feed
#define RF_LF 0xA
// Carriage Return
#define RF_CR 0xD
#ifdef REFU_WIN32_VERSION
#define i_PLUSB_WIN32 "b"
#else
#define i_PLUSB_WIN32 ""
#endif
// This is the type that represents the file offset
#ifdef _MSC_VER
typedef __int64 foff_rft;
#else
#include <sys/types.h>
typedef off64_t foff_rft;
#endif
///Fseek and Ftelll definitions
#ifdef _MSC_VER
#define rfFseek(i_FILE_,i_OFFSET_,i_WHENCE_) _fseeki64(i_FILE_,i_OFFSET_,i_WHENCE_)
#define rfFtell(i_FILE_) _ftelli64(i_FILE_)
#else
#define rfFseek(i_FILE_,i_OFFSET_,i_WHENCE_) fseeko64(i_FILE_,i_OFFSET_,i_WHENCE_)
#define rfFtell(i_FILE_) ftello64(i_FILE_)
#endif
/**
** @defgroup RF_IOGRP I/O
** @addtogroup RF_IOGRP
** @{
**/
// @brief Reads a UTF-8 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
// shall not be adding any CR character that is found in the file behind a newline character since this is
// the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
//
// @param[in] f The file descriptor to read
// @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
// and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
// @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
// @param[out] bufferSize Give an @c uint32_t here to receive the capacity of the @c utf8 buffer in bytes
// @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
// with reading this line
// @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
// The possible errors to return are the same as rfFgets_UTF8()
i_DECLIMEX_ int32_t rfFReadLine_UTF8(FILE* f,char** utf8,uint32_t* byteLength,uint32_t* bufferSize,char* eof);
// @brief Reads a Big Endian UTF-16 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
// shall not be adding any CR character that is found in the file behind a newline character since this is
// the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
//
// @param[in] f The file descriptor to read
// @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
// and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
// @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
// @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
// with reading this line
// @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
// + Any error that can be returned by @ref rfFgets_UTF16BE()
// + @c RE_UTF16_INVALID_SEQUENCE: Failed to decode the UTF-16 byte stream of the file descriptor
// + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
i_DECLIMEX_ int32_t rfFReadLine_UTF16BE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
// @brief Reads a Little Endian UTF-16 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
// shall not be adding any CR character that is found in the file behind a newline character since this is
// the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
//
// @param[in] f The file descriptor to read
// @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
// and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
// @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
// @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
// with reading this line
// @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
// + Any error that can be returned by @ref rfFgets_UTF16LE()
// + @c RE_UTF16_INVALID_SEQUENCE: Failed to decode the UTF-16 byte stream of the file descriptor
// + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
i_DECLIMEX_ int32_t rfFReadLine_UTF16LE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
// @brief Reads a Big Endian UTF-32 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
// shall not be adding any CR character that is found in the file behind a newline character since this is
// the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
//
// @param[in] f The file descriptor to read
// @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
// and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
// @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
// @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
// with reading this line
// @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
// + Any error that can be returned by @ref rfFgets_UTF32BE()
// + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
i_DECLIMEX_ int32_t rfFReadLine_UTF32BE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
// @brief Reads a Little Endian UTF-32 file descriptor until end of line or EOF is found and returns a UTF-8 byte buffer
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// When the compile flag @c RF_NEWLINE_CRLF is defined (the default case at Windows) then this function
// shall not be adding any CR character that is found in the file behind a newline character since this is
// the Windows line ending scheme. Beware though that the returned read bytes value shall still count the CR character inside.
//
// @param[in] f The file descriptor to read
// @param[out] utf8 Give here a refence to an unitialized char* that will be allocated inside the function
// and contain the utf8 byte buffer. Needs to be freed by the caller explicitly later
// @param[out] byteLength Give an @c uint32_t here to receive the length of the @c utf8 buffer in bytes
// @param[out] eof Pass a pointer to a char to receive a true or false value in case the end of file
// with reading this line
// @return Returns either a positive number for success that represents the number of bytes read from @c f and and error in case something goes wrong.
// + Any error that can be returned by @ref rfFgets_UTF32LE()
// + @c RE_UTF8_ENCODING: Failed to encode the UTF-16 of the file descriptor into UTF-8
i_DECLIMEX_ int32_t rfFReadLine_UTF32LE(FILE* f,char** utf8,uint32_t* byteLength,char* eof);
// @brief Gets a number of bytes from a BIG endian UTF-32 file descriptor
//
// This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
// have been read or new line or EOF character has been encountered.
//
// The function will read until @c num characters are read and if @c num
// would take us to the middle of a UTF32 character then the next character shall also be read
// and the function will return the number of bytes read.
// Since the function null terminates the buffer the given @c buff needs to be of at least
// @c num+7 size to cater for the worst case.
//
// The final bytestream stored inside @c buff is in the endianess of the system.
//
// If right after the last character read comes the EOF, the function
// shall detect so and assign @c true to @c eof.
//
// In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
// shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+7
// @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 4 bytes). Should be a multiple of 4
// @param[in] f A valid FILE descriptor from which to read the bytes
// @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
// @return Returns the actual number of bytes read or an error if there was a problem.
// The possible errors are:
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgets_UTF32BE(char* buff,uint32_t num,FILE* f,char* eof);
// @brief Gets a number of bytes from a Little endian UTF-32 file descriptor
//
// This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
// have been read or new line or EOF character has been encountered.
//
// The function will read until @c num characters are read and if @c num
// would take us to the middle of a UTF32 character then the next character shall also be read
// and the function will return the number of bytes read.
// Since the function null terminates the buffer the given @c buff needs to be of at least
// @c num+7 size to cater for the worst case.
//
// The final bytestream stored inside @c buff is in the endianess of the system.
//
// If right after the last character read comes the EOF, the function
// shall detect so and assign @c true to @c eof.
//
// In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
// shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+7
// @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 4 bytes). Should be a multiple of 4
// @param[in] f A valid FILE descriptor from which to read the bytes
// @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
// @return Returns the actual number of bytes read or an error if there was a problem.
// The possible errors are:
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgets_UTF32LE(char* buff,uint32_t num,FILE* f,char* eof);
// @brief Gets a number of bytes from a BIG endian UTF-16 file descriptor
//
// This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
// have been read or new line or EOF character has been encountered.
//
// The function will read until @c num characters are read and if @c num
// would take us to the middle of a UTF16 character then the next character shall also be read
// and the function will return the number of bytes read.
// Since the function null terminates the buffer the given @c buff needs to be of at least
// @c num+5 size to cater for the worst case.
//
// The final bytestream stored inside @c buff is in the endianess of the system.
//
// If right after the last character read comes the EOF, the function
// shall detect so and assign @c true to @c eof.
//
// In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
// shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+5
// @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 2 bytes). Should be a multiple of 2
// @param[in] f A valid FILE descriptor from which to read the bytes
// @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
// @return Returns the actual number of bytes read or an error if there was a problem.
// The possible errors are:
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgets_UTF16BE(char* buff,uint32_t num,FILE* f,char* eof);
// @brief Gets a number of bytes from a Little endian UTF-16 file descriptor
//
// This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num bytes
// have been read or new line or EOF character has been encountered.
//
// The function will read until @c num characters are read and if @c num
// would take us to the middle of a UTF16 character then the next character shall also be read
// and the function will return the number of bytes read.
// Since the function null terminates the buffer the given @c buff needs to be of at least
// @c num+5 size to cater for the worst case.
//
// The final bytestream stored inside @c buff is in the endianess of the system.
//
// If right after the last character read comes the EOF, the function
// shall detect so and assign @c true to @c eof.
//
// In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
// shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param[in] buff A buffer to be filled with the contents of the file. Should be of size at least @c num+2
// @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 2 bytes). Should be a multiple of 2
// @param[in] f A valid FILE descriptor from which to read the bytes
// @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
// @return Returns the actual number of bytes read or an error if there was a problem.
// The possible errors are:
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgets_UTF16LE(char* buff,uint32_t num,FILE* f,char* eof);
// @brief Gets a number of bytes from a UTF-8 file descriptor
//
// This is a function that's similar to c library fgets but it also returns the number of bytes read. Reads in from the file until @c num characters
// have been read or new line or EOF character has been encountered.
//
// The function automatically adds a null termination character at the end of
// @c buff but this character is not included in the returned actual number of bytes.
//
// The function will read until @c num characters are read and if @c num
// would take us to the middle of a UTF8 character then the next character shall also be read
// and the function will return the number of bytes read.
// Since the function null terminates the buffer the given @c buff needs to be of at least
// @c num+4 size to cater for the worst case.
//
// If right after the last character read comes the EOF, the function
// shall detect so and assign @c true to @c eof.
//
// In Windows where file endings are in the form of 2 bytes CR-LF (Carriage return - NewLine) this function
// shall just ignore the carriage returns and not return it inside the return buffer at @c buff.
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param[in] buff A buffer to be filled with the contents of the file. Should of size at least @c num+4
// @param[in] num The maximum number of bytes to read from within the file NOT including the null terminating character(which in itelf is 1 byte)
// @param[in] f A valid FILE descriptor from which to read the bytes
// @param[out] eof Pass a reference to a char to receive a true/false value for whether EOF has been reached.
// @return Returns the actual number of bytes read or an error if there was a problem.
// The possible errors are:
// + @c RE_UTF8_INVALID_SEQUENCE_INVALID_BYTE: If an invalid UTF-8 byte has been found
// + @c RE_UTF8_INVALID_SEQUENCE_CONBYTE: If during parsing the file we were expecting a continuation
// byte and did not find it
// + @c RE_UTF8_INVALID_SEQUENCE_END: If the null character is encountered in between bytes that should
// have been continuation bytes
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgets_UTF8(char* buff,uint32_t num,FILE* f,char* eof);
// @brief Gets a unicode character from a UTF-8 file descriptor
//
// This function attempts to assume a more modern fgetc() role for UTF-8 encoded files.
// Reads bytes from the File descriptor @c f until a full UTF-8 unicode character has been read
//
// After this function the file pointer will have moved either by @c 1, @c 2, @c 3 or @c 4
// bytes if the return value is positive. You can see how much by checking the return value.
//
// You shall need to provide an integer at @c c to contain either the decoded Unicode
// codepoint or the UTF-8 endoced byte depending on the value of the @c cp argument.
//
// @param f A valid FILE descriptor from which to read the bytes
// @param c Pass an int that will receive either the unicode code point value or
// the UTF8 bytes depending on the value of the @c cp flag
// @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
// of the read character, so the UTF-8 will be decoded.
// If @c false the int passed at @c c will contain the value of the read bytes in UTF-8 without any decoding
// @return Returns the number of bytes read (either @c 1, @c 2, @c 3 or @c 4) or an error if the function
// fails for some reason. Possible error values are:
// + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
// in the middle of a UTF-8 encoded character where we would be expecting something different
// and @c RE_UTF8_INVALID_SEQUENCE_END error is also logged
// + @c RE_UTF8_INVALID_SEQUENCE_INVALID_BYTE: If an invalid UTF-8 byte has been found
// + @c RE_UTF8_INVALID_SEQUENCE_CONBYTE: If during parsing the file we were expecting a continuation
// byte and did not find it
// + @c RE_UTF8_INVALID_SEQUENCE_END: If the null character is encountered in between bytes that should
// have been continuation bytes
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgetc_UTF8(FILE* f,uint32_t *c,char cp);
// @brief Gets a unicode character from a UTF-16 Big Endian file descriptor
//
// This function attempts to assume a more modern fgetc() role for UTF-16 encoded files.
// Reads bytes from the File descriptor @c f until a full UTF-16 unicode character has been read
//
// After this function the file pointer will have moved either by @c 2 or @c 4
// bytes if the return value is positive. You can see how much by checking the return value.
//
// You shall need to provide an integer at @c c to contain either the decoded Unicode
// codepoint or the Bigendian encoded UTF-16 bytes depending on the value of @c the cp argument.
//
// @param f A valid FILE descriptor from which to read the bytes
// @param c Pass an int that will receive either the unicode code point value or
// the UTF16 bytes depending on the value of the @c cp flag
// @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
// of the read character, so the UTF-16 will be decoded.
// If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
// @return Returns the number of bytes read (either @c 2 or @c 4) or an error if the function
// fails for some reason. Possible error values are:
// + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
// + @c RE_UTF16_NO_SURRPAIR: According to the first read word a surrogate pair was expected but none was found
// + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
// while we expect a UTF-16 surrogate pair an appropriate error is logged
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgetc_UTF16BE(FILE* f,uint32_t *c,char cp);
// @brief Gets a unicode character from a UTF-16 Little Endian file descriptor
//
// This function attempts to assume a more modern fgetc() role for UTF-16 encoded files.
// Reads bytes from the File descriptor @c f until a full UTF-16 unicode character has been read
//
// After this function the file pointer will have moved either by @c 2 or @c 4
// bytes if the return value is positive. You can see how much by checking the return value.
//
// You shall need to provide an integer at @c c to contain either the decoded Unicode
// codepoint or the Bigendian encoded UTF-16 bytes depending on the value of @c the cp argument.
//
// @param f A valid FILE descriptor from which to read the bytes
// @param c Pass an int that will receive either the unicode code point value or
// the UTF16 bytes depending on the value of the @c cp flag
// @param cp A boolean flag. If @c true then the int passed at @c c will contain the unicode code point
// of the read character, so the UTF-16 will be decoded.
// If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
// @return Returns the number of bytes read (either @c 2 or @c 4) or an error if the function
// fails for some reason. Possible error values are:
// + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
// + @c RE_UTF16_NO_SURRPAIR: According to the first read word a surrogate pair was expected but none was found
// + @c RE_FILE_EOF: The end of file has been found while reading. If the end of file is encountered
// while we expect a UTF-16 surrogate pair an appropriate error is logged
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgetc_UTF16LE(FILE* f,uint32_t *c,char cp);
// @brief Gets a unicode character from a UTF-32 Little Endian file descriptor
//
// This function attempts to assume a more modern fgetc() role for UTF-32 encoded files.
// Reads bytes from the File descriptor @c f until a full UTF-32 unicode character has been read
//
// After this function the file pointer will have moved by @c 4
// bytes if the return value is positive.
//
// You shall need to provide an integer at @c to contain the UTF-32 codepoint.
//
// @param f A valid FILE descriptor from which to read the bytes
// @param c Pass an int that will receive either the unicode code point value or
// the UTF16 bytes depending on the value of the @c cp flag
// If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
// @return Returns either @c RF_SUCCESS for succesfull readin or one of the following errors:
// + @c RE_FILE_EOF: The end of file has been found while reading.
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgetc_UTF32LE(FILE* f,uint32_t *c);
// @brief Gets a unicode character from a UTF-32 Big Endian file descriptor
//
// This function attempts to assume a more modern fgetc() role for UTF-32 encoded files.
// Reads bytes from the File descriptor @c f until a full UTF-32 unicode character has been read
//
// After this function the file pointer will have moved by @c 4
// bytes if the return value is positive.
//
// You shall need to provide an integer at @c to contain the UTF-32 codepoint.
//
// @param f A valid FILE descriptor from which to read the bytes
// @param c Pass an int that will receive either the unicode code point value or
// the UTF16 bytes depending on the value of the @c cp flag
// If @c false the int passed at @c c will contain the value of the read bytes in UTF-16 without any decoding
// @return Returns either @c RF_SUCCESS for succesfull readin or one of the following errors:
// + @c RE_FILE_EOF: The end of file has been found while reading.
// + @c RE_FILE_READ: If during reading the file there was an unknown read error
// + @c RE_FILE_READ_BLOCK: If the read operation failed due to the file descriptor being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the file descriptor's mode was not correctly set for reading
// + @c RE_FILE_POS_OVERFLOW: If during reading, the current file position can't be represented by the system
// + @c RE_INTERRUPT: If during reading, there was a system interrupt
// + @c RE_FILE_IO: If there was a physical I/O error
// + @c RE_FILE_NOSPACE: If reading failed due to insufficient storage space
i_DECLIMEX_ int32_t rfFgetc_UTF32BE(FILE* f,uint32_t *c);
// @brief Moves a unicode character backwards in a big endian UTF-32 file stream
//
// @param f The file stream
// @param c Returns the character we moved back to as a unicode codepoint
// @return Returns either @c RF_SUCCESS for success or one of the following errors:
// + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
// + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
// + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
// + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
// + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
// + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
// + @c RE_FILE_IO: There was a physical I/O error
// + @c RE_FILE_NOSPACE: There was no space on the device holding the file
// + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
// + @c RE_FILE_READ: If during reading the file there was an error
// + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
i_DECLIMEX_ int32_t rfFback_UTF32BE(FILE* f,uint32_t *c);
// @brief Moves a unicode character backwards in a little endian UTF-32 file stream
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param f The file stream
// @param c Returns the character we moved back to as a unicode codepoint
// @return Returns either @c RF_SUCCESS for success or one of the following errors:
// + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
// + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
// + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
// + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
// + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
// + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
// + @c RE_FILE_IO: There was a physical I/O error
// + @c RE_FILE_NOSPACE: There was no space on the device holding the file
// + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
// + @c RE_FILE_READ: If during reading the file there was an error
// + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
i_DECLIMEX_ int32_t rfFback_UTF32LE(FILE* f,uint32_t *c);
// @brief Moves a unicode character backwards in a big endian UTF-16 file stream
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param f The file stream
// @param c Returns the character we moved back to as a unicode codepoint
// @return Returns either the number of bytes moved backwards (either @c 4 or @c 2) for success or one of the following errors:
// + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
// + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
// + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
// + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
// + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
// + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
// + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
// + @c RE_FILE_IO: There was a physical I/O error
// + @c RE_FILE_NOSPACE: There was no space on the device holding the file
// + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
// + @c RE_FILE_READ: If during reading the file there was an error
// + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
i_DECLIMEX_ int32_t rfFback_UTF16BE(FILE* f,uint32_t *c);
// @brief Moves a unicode character backwards in a little endian UTF-16 file stream
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param f The file stream
// @param c Returns the character we moved back to as a unicode codepoint
// @return Returns either the number of bytes moved backwards (either @c 4 or @c 2) for success or one of the following errors:
// + @c RE_UTF16_INVALID_SEQUENCE: Either the read word or its surrogate pair if 4 bytes were read held illegal values
// + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
// + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
// + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
// + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
// + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
// + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
// + @c RE_FILE_IO: There was a physical I/O error
// + @c RE_FILE_NOSPACE: There was no space on the device holding the file
// + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
// + @c RE_FILE_READ: If during reading the file there was an error
// + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
i_DECLIMEX_ int32_t rfFback_UTF16LE(FILE* f,uint32_t *c);
// @brief Moves a unicode character backwards in a UTF-8 file stream
//
// The file descriptor at @c f must have been opened in <b>binary</b> and not text mode. That means that if under
// Windows make sure to call fopen with "wb", "rb" e.t.c. instead of the simple "w", "r" e.t.c. since the initial
// default value under Windows is text mode. Alternatively you can set the initial value using _get_fmode() and
// _set_fmode(). For more information take a look at the msdn pages here:
// http://msdn.microsoft.com/en-us/library/ktss1a9b.aspx
//
// @param f The file stream
// @param c Returns the character we moved back to as a unicode codepoint
// @return Returns either the number of bytes moved backwards for success (either @c 4, @c 3, @c 2 or @c 1) or one of the following errors:
// + @c RE_UTF8_INVALID_SEQUENCE: If during moving bacwards in the file unexpected UTF-8 bytes were found
// + @c RE_FILE_POS_OVERFLOW: If during trying to read the current file's position it can't be represented by the system
// + @c RE_FILE_BAD: If The file descriptor is corrupt/illegal
// + @c RE_FILE_NOTFILE: If the file descriptor is not a file but something else. e.g. socket.
// + @c RE_FILE_GETFILEPOS: If the file's position could not be retrieved for some unknown reason
// + @c RE_FILE_WRITE_BLOCK: While attempting to move the file pointer, it was occupied by another thread, and the no block flag was set
// + @c RE_INTERRUPT: Operating on the file failed due to a system interrupt
// + @c RE_FILE_IO: There was a physical I/O error
// + @c RE_FILE_NOSPACE: There was no space on the device holding the file
// + @c RE_FILE_NOTFILE: The device we attempted to manipulate is non-existent
// + @c RE_FILE_READ: If during reading the file there was an error
// + @c RE_FILE_READ_BLOCK: If during reading the file the read operation failed due to the file being occupied by another thread
// + @c RE_FILE_MODE: If during reading the file the underlying file descriptor's mode was not correctly set for reading
i_DECLIMEX_ int32_t rfFback_UTF8(FILE* f,uint32_t *c);
// @brief Opens another process as a pipe
//
// This function is a cross-platform popen wrapper. In linux it uses popen and in Windows it uses
// _popen.
// @lmsFunction
// @param command The string with the command to execute. Is basically the name of the program/process you want to spawn
// with its full path and its parameters. @inhtype{String,StringX} @tmpSTR
// @param mode The mode you want the pipe to work in. There are two possible values:
// + @c "r" The calling process can read the spawned command's standard output via the returned stream.
// + @c "w" The calling process can write to the spawned command's standard input via the returned stream.
//
// Anything else will result in an error
// @return For success popen will return a FILE descriptor that can be used to either read or write from the pipe.
// If there was an error @c 0 is returned and an error is logged.
#ifdef RF_IAMHERE_FOR_DOXYGEN
i_DECLIMEX_ FILE* rfPopen(void* command,const char* mode);
#else
i_DECLIMEX_ FILE* i_rfPopen(void* command,const char* mode);
#define rfPopen(i_CMD_,i_MODE_) i_rfLMS_WRAP2(FILE*,i_rfPopen,i_CMD_,i_MODE_)
#endif
// @brief Closes a pipe
//
// This function is a cross-platform wrapper for pclose. It closes a file descriptor opened with @ref rfPopen() and
// returns the exit code of the process that was running
// @param stream The file descriptor of the pipe returned by @ref rfPopen() that we want to close
// @return Returns the exit code of the process or -1 if there was an error
i_DECLIMEX_ int rfPclose(FILE* stream);
// @} End of I/O group
#ifdef __cplusplus
}///closing bracket for calling from C++
#endif
#endif//include guards end

2348
samples/C/rfc_string.c Normal file

File diff suppressed because it is too large Load Diff

1459
samples/C/rfc_string.h Normal file

File diff suppressed because it is too large Load Diff

1363
samples/C/wglew.h Normal file

File diff suppressed because it is too large Load Diff

15
samples/Ceylon/Foo.ceylon Normal file
View File

@@ -0,0 +1,15 @@
doc "Test function for Ceylon"
by "Enrique"
shared void test() {
print("test");
}
doc "Test class for Ceylon"
shared class Test(name) satisfies Comparable<Test> {
shared String name;
shared actual String string = "Test " name ".";
shared actual Comparison compare(Test other) {
return name<=>other.name;
}
}

707
samples/Coq/Basics.v Executable file
View File

@@ -0,0 +1,707 @@
Inductive day : Type :=
| monday : day
| tuesday : day
| wednesday : day
| thursday : day
| friday : day
| saturday : day
| sunday : day.
Definition next_weekday (d:day) : day :=
match d with
| monday => tuesday
| tuesday => wednesday
| wednesday => thursday
| thursday => friday
| friday => monday
| saturday => monday
| sunday => monday
end.
Example test_next_weekday:
(next_weekday (next_weekday saturday)) = tuesday.
Proof. simpl. reflexivity. Qed.
Inductive bool : Type :=
| true : bool
| false : bool.
Definition negb (b:bool) : bool :=
match b with
| true => false
| false => true
end.
Definition andb (b1:bool) (b2:bool) : bool :=
match b1 with
| true => b2
| false => false
end.
Definition orb (b1:bool) (b2:bool) : bool :=
match b1 with
| true => true
| false => b2
end.
Example test_orb1: (orb true false) = true.
Proof. simpl. reflexivity. Qed.
Example test_orb2: (orb false false) = false.
Proof. simpl. reflexivity. Qed.
Example test_orb3: (orb false true) = true.
Proof. simpl. reflexivity. Qed.
Example test_orb4: (orb true true) = true.
Proof. simpl. reflexivity. Qed.
Definition nandb (b1: bool) (b2:bool) : bool :=
match b1 with
| true => match b2 with
| false => true
| true => false
end
| false => true
end.
Example test_nandb1: (nandb true false) = true.
Proof. simpl. reflexivity. Qed.
Example test_nandb2: (nandb false false) = true.
Proof. simpl. reflexivity. Qed.
Example test_nandb3: (nandb false true) = true.
Proof. simpl. reflexivity. Qed.
Example test_nandb4: (nandb true true) = false.
Proof. simpl. reflexivity. Qed.
Definition andb3 (b1: bool) (b2:bool) (b3:bool) : bool :=
match b1 with
| false => false
| true => match b2 with
| false => false
| true => b3
end
end.
Example test_andb31: (andb3 true true true) = true.
Proof. simpl. reflexivity. Qed.
Example test_andb32: (andb3 false true true) = false.
Proof. simpl. reflexivity. Qed.
Example test_andb33: (andb3 true false true) = false.
Proof. simpl. reflexivity. Qed.
Example test_andb34: (andb3 true true false) = false.
Proof. simpl. reflexivity. Qed.
Module Playground1.
Inductive nat : Type :=
| O : nat
| S : nat -> nat.
Definition pred (n : nat) : nat :=
match n with
| O => O
| S n' => n'
end.
Definition minustwo (n : nat) : nat :=
match n with
| O => O
| S O => O
| S (S n') => n'
end.
Fixpoint evenb (n : nat) : bool :=
match n with
| O => true
| S O => false
| S (S n') => evenb n'
end.
Definition oddb (n : nat) : bool := negb (evenb n).
Example test_oddb1: (oddb (S O)) = true.
Proof. reflexivity. Qed.
Example test_oddb2: (oddb (S (S (S (S O))))) = false.
Proof. reflexivity. Qed.
Fixpoint plus (n : nat) (m : nat) : nat :=
match n with
| O => m
| S n' => S (plus n' m)
end.
Fixpoint mult (n m : nat) : nat :=
match n with
| O => O
| S n' => plus m (mult n' m)
end.
Fixpoint minus (n m : nat) : nat :=
match n, m with
| O, _ => n
| S n', O => S n'
| S n', S m' => minus n' m'
end.
Fixpoint exp (base power : nat) : nat :=
match power with
| O => S O
| S p => mult base (exp base p)
end.
Fixpoint factorial (n : nat) : nat :=
match n with
| O => S O
| S n' => mult n (factorial n')
end.
Example test_factorial1: (factorial (S (S (S O)))) = (S (S (S (S (S (S O)))))).
Proof. simpl. reflexivity. Qed.
Notation "x + y" := (plus x y) (at level 50, left associativity) : nat_scope.
Notation "x - y" := (minus x y) (at level 50, left associativity) : nat_scope.
Notation "x * y" := (mult x y) (at level 40, left associativity) : nat_scope.
Fixpoint beq_nat (n m : nat) : bool :=
match n with
| O => match m with
| O => true
| S m' => false
end
| S n' => match m with
| O => false
| S m' => beq_nat n' m'
end
end.
Fixpoint ble_nat (n m : nat) : bool :=
match n with
| O => true
| S n' =>
match m with
| O => false
| S m' => ble_nat n' m'
end
end.
Example test_ble_nat1: (ble_nat (S (S O)) (S (S O))) = true.
Proof. simpl. reflexivity. Qed.
Example test_ble_nat2: (ble_nat (S (S O)) (S (S (S (S O))))) = true.
Proof. simpl. reflexivity. Qed.
Example test_ble_nat3: (ble_nat (S (S (S (S O)))) (S (S O))) = false.
Proof. simpl. reflexivity. Qed.
Definition blt_nat (n m : nat) : bool :=
(andb (negb (beq_nat n m)) (ble_nat n m)).
Example test_blt_nat1: (blt_nat (S (S O)) (S (S O))) = false.
Proof. simpl. reflexivity. Qed.
Example test_blt_nat3: (blt_nat (S (S (S (S O)))) (S (S O))) = false.
Proof. simpl. reflexivity. Qed.
Example test_blt_nat2 : (blt_nat (S (S O)) (S (S (S (S O))))) = true.
Proof. simpl. reflexivity. Qed.
Theorem plus_O_n : forall n : nat, O + n = n.
Proof.
simpl. reflexivity. Qed.
Theorem plus_O_n' : forall n : nat, O + n = n.
Proof.
reflexivity. Qed.
Theorem plus_O_n'' : forall n : nat, O + n = n.
Proof.
intros n. reflexivity. Qed.
Theorem plus_1_1 : forall n : nat, (S O) + n = S n.
Proof.
intros n. reflexivity. Qed.
Theorem mult_0_1: forall n : nat, O * n = O.
Proof.
intros n. reflexivity. Qed.
Theorem plus_id_example : forall n m:nat,
n = m -> n + n = m + m.
Proof.
intros n m.
intros H.
rewrite -> H.
reflexivity. Qed.
Theorem plus_id_exercise : forall n m o: nat,
n = m -> m = o -> n + m = m + o.
Proof.
intros n m o.
intros H.
intros H'.
rewrite -> H.
rewrite <- H'.
reflexivity.
Qed.
Theorem mult_0_plus : forall n m : nat,
(O + n) * m = n * m.
Proof.
intros n m.
rewrite -> plus_O_n.
reflexivity. Qed.
Theorem mult_1_plus : forall n m: nat,
((S O) + n) * m = m + (n * m).
Proof.
intros n m.
rewrite -> plus_1_1.
reflexivity.
Qed.
Theorem mult_1 : forall n : nat,
n * (S O) = n.
Proof.
intros n.
induction n as [| n'].
reflexivity.
simpl.
rewrite -> IHn'.
reflexivity.
Qed.
Theorem plus_1_neq_0 : forall n : nat,
beq_nat (n + (S O)) O = false.
Proof.
intros n.
destruct n as [| n'].
reflexivity.
reflexivity.
Qed.
Theorem zero_nbeq_plus_1 : forall n : nat,
beq_nat O (n + (S O)) = false.
Proof.
intros n.
destruct n.
reflexivity.
reflexivity.
Qed.
Require String. Open Scope string_scope.
Ltac move_to_top x :=
match reverse goal with
| H : _ |- _ => try move x after H
end.
Tactic Notation "assert_eq" ident(x) constr(v) :=
let H := fresh in
assert (x = v) as H by reflexivity;
clear H.
Tactic Notation "Case_aux" ident(x) constr(name) :=
first [
set (x := name); move_to_top x
| assert_eq x name; move_to_top x
| fail 1 "because we are working on a different case" ].
Ltac Case name := Case_aux Case name.
Ltac SCase name := Case_aux SCase name.
Ltac SSCase name := Case_aux SSCase name.
Ltac SSSCase name := Case_aux SSSCase name.
Ltac SSSSCase name := Case_aux SSSSCase name.
Ltac SSSSSCase name := Case_aux SSSSSCase name.
Ltac SSSSSSCase name := Case_aux SSSSSSCase name.
Ltac SSSSSSSCase name := Case_aux SSSSSSSCase name.
Theorem andb_true_elim1 : forall b c : bool,
andb b c = true -> b = true.
Proof.
intros b c H.
destruct b.
Case "b = true".
reflexivity.
Case "b = false".
rewrite <- H. reflexivity. Qed.
Theorem plus_0_r : forall n : nat, n + O = n.
Proof.
intros n. induction n as [| n'].
Case "n = 0". reflexivity.
Case "n = S n'". simpl. rewrite -> IHn'. reflexivity. Qed.
Theorem minus_diag : forall n,
minus n n = O.
Proof.
intros n. induction n as [| n'].
Case "n = 0".
simpl. reflexivity.
Case "n = S n'".
simpl. rewrite -> IHn'. reflexivity. Qed.
Theorem mult_0_r : forall n:nat,
n * O = O.
Proof.
intros n. induction n as [| n'].
Case "n = 0".
reflexivity.
Case "n = S n'".
simpl. rewrite -> IHn'. reflexivity. Qed.
Theorem plus_n_Sm : forall n m : nat,
S (n + m) = n + (S m).
Proof.
intros n m. induction n as [| n'].
Case "n = 0".
reflexivity.
Case "n = S n'".
simpl. rewrite -> IHn'. reflexivity. Qed.
Theorem plus_assoc : forall n m p : nat,
n + (m + p) = (n + m) + p.
Proof.
intros n m p.
induction n as [| n'].
reflexivity.
simpl.
rewrite -> IHn'.
reflexivity. Qed.
Theorem plus_distr : forall n m: nat, S (n + m) = n + (S m).
Proof.
intros n m. induction n as [| n'].
Case "n = 0".
reflexivity.
Case "n = S n'".
simpl. rewrite -> IHn'. reflexivity. Qed.
Theorem mult_distr : forall n m: nat, n * ((S O) + m) = n * (S m).
Proof.
intros n m.
induction n as [| n'].
reflexivity.
reflexivity.
Qed.
Theorem plus_comm : forall n m : nat,
n + m = m + n.
Proof.
intros n m.
induction n as [| n'].
Case "n = 0".
simpl.
rewrite -> plus_0_r.
reflexivity.
Case "n = S n'".
simpl.
rewrite -> IHn'.
rewrite -> plus_distr.
reflexivity. Qed.
Fixpoint double (n:nat) :=
match n with
| O => O
| S n' => S (S (double n'))
end.
Lemma double_plus : forall n, double n = n + n.
Proof.
intros n. induction n as [| n'].
Case "n = 0".
reflexivity.
Case "n = S n'".
simpl. rewrite -> IHn'.
rewrite -> plus_distr. reflexivity.
Qed.
Theorem beq_nat_refl : forall n : nat,
true = beq_nat n n.
Proof.
intros n. induction n as [| n'].
Case "n = 0".
reflexivity.
Case "n = S n".
simpl. rewrite <- IHn'.
reflexivity. Qed.
Theorem plus_rearrange: forall n m p q : nat,
(n + m) + (p + q) = (m + n) + (p + q).
Proof.
intros n m p q.
assert(H: n + m = m + n).
Case "Proof by assertion".
rewrite -> plus_comm. reflexivity.
rewrite -> H. reflexivity. Qed.
Theorem plus_swap : forall n m p: nat,
n + (m + p) = m + (n + p).
Proof.
intros n m p.
rewrite -> plus_assoc.
assert(H: m + (n + p) = (m + n) + p).
rewrite -> plus_assoc.
reflexivity.
rewrite -> H.
assert(H2: m + n = n + m).
rewrite -> plus_comm.
reflexivity.
rewrite -> H2.
reflexivity.
Qed.
Theorem plus_swap' : forall n m p: nat,
n + (m + p) = m + (n + p).
Proof.
intros n m p.
rewrite -> plus_assoc.
assert(H: m + (n + p) = (m + n) + p).
rewrite -> plus_assoc.
reflexivity.
rewrite -> H.
replace (m + n) with (n + m).
rewrite -> plus_comm.
reflexivity.
rewrite -> plus_comm.
reflexivity.
Qed.
Theorem mult_1_distr: forall m n: nat,
n * ((S O) + m) = n * (S O) + n * m.
Proof.
intros n m.
rewrite -> mult_1.
rewrite -> plus_1_1.
simpl.
induction m as [|m'].
simpl.
reflexivity.
simpl.
rewrite -> plus_swap.
rewrite <- IHm'.
reflexivity.
Qed.
Theorem mult_comm: forall m n : nat,
m * n = n * m.
Proof.
intros m n.
induction n as [| n'].
Case "n = 0".
simpl.
rewrite -> mult_0_r.
reflexivity.
Case "n = S n'".
simpl.
rewrite <- mult_distr.
rewrite -> mult_1_distr.
rewrite -> mult_1.
rewrite -> IHn'.
reflexivity.
Qed.
Theorem evenb_next : forall n : nat,
evenb n = evenb (S (S n)).
Proof.
intros n.
Admitted.
Theorem negb_negb : forall n : bool,
n = negb (negb n).
Proof.
intros n.
destruct n.
reflexivity.
reflexivity.
Qed.
Theorem evenb_n_oddb_Sn : forall n : nat,
evenb n = negb (evenb (S n)).
Proof.
intros n.
induction n as [|n'].
reflexivity.
assert(H: evenb n' = evenb (S (S n'))).
reflexivity.
rewrite <- H.
rewrite -> IHn'.
rewrite <- negb_negb.
reflexivity.
Qed.
(*Fixpoint bad (n : nat) : bool :=
match n with
| O => true
| S O => bad (S n)
| S (S n') => bad n'
end.*)
Theorem ble_nat_refl : forall n:nat,
true = ble_nat n n.
Proof.
intros n.
induction n as [|n'].
Case "n = 0".
reflexivity.
Case "n = S n".
simpl.
rewrite <- IHn'.
reflexivity.
Qed.
Theorem zero_nbeq_S : forall n: nat,
beq_nat O (S n) = false.
Proof.
intros n.
reflexivity.
Qed.
Theorem andb_false_r : forall b : bool,
andb b false = false.
Proof.
intros b.
destruct b.
reflexivity.
reflexivity.
Qed.
Theorem plus_ble_compat_1 : forall n m p : nat,
ble_nat n m = true -> ble_nat (p + n) (p + m) = true.
Proof.
intros n m p.
intros H.
induction p.
Case "p = 0".
simpl.
rewrite -> H.
reflexivity.
Case "p = S p'".
simpl.
rewrite -> IHp.
reflexivity.
Qed.
Theorem S_nbeq_0 : forall n:nat,
beq_nat (S n) O = false.
Proof.
intros n.
reflexivity.
Qed.
Theorem mult_1_1 : forall n:nat, (S O) * n = n.
Proof.
intros n.
simpl.
rewrite -> plus_0_r.
reflexivity. Qed.
Theorem all3_spec : forall b c : bool,
orb (andb b c)
(orb (negb b)
(negb c))
= true.
Proof.
intros b c.
destruct b.
destruct c.
reflexivity.
reflexivity.
reflexivity.
Qed.
Lemma mult_plus_1 : forall n m : nat,
S(m + n) = m + (S n).
Proof.
intros n m.
induction m.
reflexivity.
simpl.
rewrite -> IHm.
reflexivity.
Qed.
Theorem mult_mult : forall n m : nat,
n * (S m) = n * m + n.
Proof.
intros n m.
induction n.
reflexivity.
simpl.
rewrite -> IHn.
rewrite -> plus_assoc.
rewrite -> mult_plus_1.
reflexivity.
Qed.
Theorem mult_plus_distr_r : forall n m p:nat,
(n + m) * p = (n * p) + (m * p).
Proof.
intros n m p.
induction p.
rewrite -> mult_0_r.
rewrite -> mult_0_r.
rewrite -> mult_0_r.
reflexivity.
rewrite -> mult_mult.
rewrite -> mult_mult.
rewrite -> mult_mult.
rewrite -> IHp.
assert(H1: ((n * p) + n) + (m * p + m) = (n * p) + (n + (m * p + m))).
rewrite <- plus_assoc.
reflexivity.
rewrite -> H1.
assert(H2: (n + (m * p + m)) = (m * p + (n + m))).
rewrite -> plus_swap.
reflexivity.
rewrite -> H2.
assert(H3: (n * p) + (m * p + (n + m)) = ((n * p ) + (m * p)) + (n + m)).
rewrite -> plus_assoc.
reflexivity.
rewrite -> H3.
reflexivity.
Qed.
Theorem mult_assoc : forall n m p : nat,
n * (m * p) = (n * m) * p.
Proof.
intros n m p.
induction n.
simpl.
reflexivity.
simpl.
rewrite -> mult_plus_distr_r.
rewrite -> IHn.
reflexivity.
Qed.
Inductive bin : Type :=
| BO : bin
| D : bin -> bin
| M : bin -> bin.
Fixpoint incbin (n : bin) : bin :=
match n with
| BO => M (BO)
| D n' => M n'
| M n' => D (incbin n')
end.
Fixpoint bin2un (n : bin) : nat :=
match n with
| BO => O
| D n' => double (bin2un n')
| M n' => S (double (bin2un n'))
end.
Theorem bin_comm : forall n : bin,
bin2un(incbin n) = S (bin2un n).
Proof.
intros n.
induction n.
reflexivity.
reflexivity.
simpl.
rewrite -> IHn.
reflexivity.
Qed.
End Playground1.

Some files were not shown because too many files have changed in this diff Show More