diff --git a/lib/linguist/strategy/modeline.rb b/lib/linguist/strategy/modeline.rb index 4e16a03c..d9ddf9c3 100644 --- a/lib/linguist/strategy/modeline.rb +++ b/lib/linguist/strategy/modeline.rb @@ -2,18 +2,67 @@ module Linguist module Strategy class Modeline EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i + VIM_MODELINE = / - # First form vim modeline - # [text]{white}{vi:|vim:|ex:}[white]{options} - # ex: 'vim: syntax=ruby' - VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i + # Start modeline. Could be `vim:`, `vi:` or `ex:` + (?: + (?:\s|^) + vi + (?:m[<=>]?\d+|m)? # Version-specific modeline + | + (?!^)\s + ex + ) - # Second form vim modeline (compatible with some versions of Vi) - # [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text] - # ex: 'vim set syntax=ruby:' - VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i + # If the option-list begins with `set ` or `se `, it indicates an alternative + # modeline syntax partly-compatible with older versions of Vi. Here, the colon + # serves as a terminator for an option sequence, delimited by whitespace. + (?= + # So we have to ensure the modeline ends with a colon + : (?=\s* set? \s [^\n:]+ :) | - MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2] + # Otherwise, it isn't valid syntax and should be ignored + : (?!\s* set? \s) + ) + + # Possible (unrelated) `option=value` pairs to skip past + (?: + # Option separator. Vim uses whitespace or colons to separate options (except if + # the alternate "vim: set " form is used, where only whitespace is used) + (?: + \s + | + \s* : \s* # Note that whitespace around colons is accepted too: + ) # vim: noai : ft=ruby:noexpandtab + + # Option's name. All recognised Vim options have an alphanumeric form. + \w* + + # Possible value. Not every option takes an argument. + (?: + # Whitespace between name and value is allowed: `vim: ft =ruby` + \s*= + + # Option's value. Might be blank; `vim: ft= ` says "use no filetype". + (?: + [^\\\s] # Beware of escaped characters: titlestring=\ ft=ruby + | # will be read by Vim as { titlestring: " ft=ruby" }. + \\. + )* + )? + )* + + # The actual filetype declaration + [\s:] (?:filetype|ft|syntax) \s*= + + # Language's name + (\w+) + + # Ensure it's followed by a legal separator + (?=\s|:|$) + /xi + + MODELINES = [EMACS_MODELINE, VIM_MODELINE] # Scope of the search for modelines # Number of lines to check at the beginning and at the end of the file diff --git a/test/fixtures/Data/Modelines/iamjs.pl b/test/fixtures/Data/Modelines/iamjs.pl new file mode 100644 index 00000000..b5d8f236 --- /dev/null +++ b/test/fixtures/Data/Modelines/iamjs.pl @@ -0,0 +1,3 @@ +# vim: noexpandtab: ft=javascript + +"It's JavaScript, baby"; diff --git a/test/fixtures/Data/Modelines/iamjs2.pl b/test/fixtures/Data/Modelines/iamjs2.pl new file mode 100644 index 00000000..623b827a --- /dev/null +++ b/test/fixtures/Data/Modelines/iamjs2.pl @@ -0,0 +1,4 @@ +# vim:noexpandtab titlestring=hi\|there\\\ ft=perl ts=4 +# vim:noexpandtab titlestring=hi|there\\ ft=javascript ts=4 + +"Still JavaScript, bruh"; diff --git a/test/test_modelines.rb b/test/test_modelines.rb index 192da9d4..b2eba82a 100644 --- a/test/test_modelines.rb +++ b/test/test_modelines.rb @@ -30,6 +30,8 @@ class TestModelines < Minitest::Test assert_modeline Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c") assert_modeline Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl") assert_modeline Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md") + assert_modeline Language["JavaScript"], fixture_blob("Data/Modelines/iamjs.pl") + assert_modeline Language["JavaScript"], fixture_blob("Data/Modelines/iamjs2.pl") assert_modeline Language["PHP"], fixture_blob("Data/Modelines/iamphp.inc") assert_modeline nil, sample_blob("C/main.c") end @@ -51,6 +53,8 @@ class TestModelines < Minitest::Test assert_equal Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c").language assert_equal Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl").language assert_equal Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md").language + assert_equal Language["JavaScript"], fixture_blob("Data/Modelines/iamjs.pl").language + assert_equal Language["JavaScript"], fixture_blob("Data/Modelines/iamjs2.pl").language assert_equal Language["PHP"], fixture_blob("Data/Modelines/iamphp.inc").language end end