diff --git a/lib/linguist/generated.rb b/lib/linguist/generated.rb index 826fdc93..41937ff3 100644 --- a/lib/linguist/generated.rb +++ b/lib/linguist/generated.rb @@ -57,6 +57,7 @@ module Linguist xcode_project_file? || generated_net_docfile? || generated_parser? || + generated_postscript? || generated_protocol_buffer? end @@ -160,6 +161,29 @@ module Linguist false end + # Internal: Is the blob of PostScript generated? + # + # PostScript files are often generated by other programs. If they tell us so, + # we can detect them. + # + # Returns true or false. + def generated_postscript? + return false unless ['.ps', '.eps'].include? extname + + # We analyze the "%%Creator:" comment, which contains the author/generator + # of the file. If there is one, it should be in one of the first few lines. + creator = lines[0..9].find {|line| line =~ /^%%Creator: /} + return false if creator.nil? + + # Most generators write their version number, while human authors' or companies' + # names don't contain numbers. So look if the line contains digits. Also + # look for some special cases without version numbers. + return creator =~ /[0-9]/ || + creator.include? "mpage" || + creator.include? "draw" || + creator.include? "ImageMagick" + end + # Internal: Is the blob a C++, Java or Python source file generated by the # Protocol Buffer compiler? # diff --git a/test/test_blob.rb b/test/test_blob.rb index a5efbb6f..5df3c53c 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -188,6 +188,9 @@ class TestBlob < Test::Unit::TestCase # PEG.js-generated parsers assert blob("JavaScript/parser.js").generated? + # Generated PostScript + assert !blob("PostScript/sierpinski.ps").generated? + # These examples are too basic to tell assert !blob("JavaScript/empty.js").generated? assert !blob("JavaScript/hello.js").generated?