mirror of
https://github.com/KevinMidboe/linguist.git
synced 2026-01-09 10:55:32 +00:00
Revise pattern for Emacs modeline detection
This is a rewrite of the regex that handles Emacs modeline matching. The
current one is a little flaky, causing some files to be misclassified as
"E", among other things.
It's worth noting malformed modelines can still change a file's language
in Emacs. Provided the -*- delimiters are intact, and the mode's name is
decipherable, Emacs will set the appropriate language mode *and* display
a warning about a malformed modeline:
-*- foo-bar mode: ruby -*- # Malformed, but understandable
-*- mode: ruby--*- # Completely invalid
The new pattern accommodates this leniency, making no effort to validate
a modeline's syntax beyond readable mode-names. In other words, if Emacs
accepts certain errors, we should too.
This commit is contained in:
@@ -1,7 +1,36 @@
|
|||||||
module Linguist
|
module Linguist
|
||||||
module Strategy
|
module Strategy
|
||||||
class Modeline
|
class Modeline
|
||||||
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
EMACS_MODELINE = /
|
||||||
|
-\*-
|
||||||
|
(?:
|
||||||
|
# Short form: `-*- ruby -*-`
|
||||||
|
\s* (?= [^:;\s]+ \s* -\*-)
|
||||||
|
|
|
||||||
|
# Longer form: `-*- foo:bar; mode: ruby; -*-`
|
||||||
|
(?:
|
||||||
|
.*? # Preceding variables: `-*- foo:bar bar:baz;`
|
||||||
|
[;\s] # Which are delimited by spaces or semicolons
|
||||||
|
|
|
||||||
|
(?<=-\*-) # Not preceded by anything: `-*-mode:ruby-*-`
|
||||||
|
)
|
||||||
|
mode # Major mode indicator
|
||||||
|
\s*:\s* # Allow whitespace around colon: `mode : ruby`
|
||||||
|
)
|
||||||
|
([^:;\s]+) # Name of mode
|
||||||
|
|
||||||
|
# Ensure the mode is terminated correctly
|
||||||
|
(?=
|
||||||
|
# Followed by semicolon or whitespace
|
||||||
|
[\s;]
|
||||||
|
|
|
||||||
|
# Touching the ending sequence: `ruby-*-`
|
||||||
|
(?<![-*]) # Don't allow stuff like `ruby--*-` to match; it'll invalidate the mode
|
||||||
|
-\*- # Emacs has no problems reading `ruby --*-`, however.
|
||||||
|
)
|
||||||
|
.*? # Anything between a cleanly-terminated mode and the ending -*-
|
||||||
|
-\*-
|
||||||
|
/xi
|
||||||
|
|
||||||
# First form vim modeline
|
# First form vim modeline
|
||||||
# [text]{white}{vi:|vim:|ex:}[white]{options}
|
# [text]{white}{vi:|vim:|ex:}[white]{options}
|
||||||
|
|||||||
3
test/fixtures/Data/Modelines/seeplusplusEmacs10
vendored
Normal file
3
test/fixtures/Data/Modelines/seeplusplusEmacs10
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
// -*- foo-bar mode: c++ -*-
|
||||||
|
|
||||||
|
"Malformed modeline, but still understood by Emacs to be C++."
|
||||||
1
test/fixtures/Data/Modelines/seeplusplusEmacs11
vendored
Normal file
1
test/fixtures/Data/Modelines/seeplusplusEmacs11
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/* -*- mode: c++ -------*- */
|
||||||
1
test/fixtures/Data/Modelines/seeplusplusEmacs12
vendored
Normal file
1
test/fixtures/Data/Modelines/seeplusplusEmacs12
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
-*--------- foo:bar mode: c++ -*-
|
||||||
@@ -27,6 +27,9 @@ class TestModelines < Minitest::Test
|
|||||||
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs7")
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs7")
|
||||||
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs8")
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs8")
|
||||||
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs9")
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs9")
|
||||||
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs10")
|
||||||
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs11")
|
||||||
|
assert_modeline Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs12")
|
||||||
assert_modeline Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c")
|
assert_modeline Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c")
|
||||||
assert_modeline Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl")
|
assert_modeline Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl")
|
||||||
assert_modeline Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md")
|
assert_modeline Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md")
|
||||||
@@ -48,6 +51,9 @@ class TestModelines < Minitest::Test
|
|||||||
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs7").language
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs7").language
|
||||||
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs8").language
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs8").language
|
||||||
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs9").language
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs9").language
|
||||||
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs10").language
|
||||||
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs11").language
|
||||||
|
assert_equal Language["C++"], fixture_blob("Data/Modelines/seeplusplusEmacs12").language
|
||||||
assert_equal Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c").language
|
assert_equal Language["Text"], fixture_blob("Data/Modelines/fundamentalEmacs.c").language
|
||||||
assert_equal Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl").language
|
assert_equal Language["Prolog"], fixture_blob("Data/Modelines/not_perl.pl").language
|
||||||
assert_equal Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md").language
|
assert_equal Language["Smalltalk"], fixture_blob("Data/Modelines/example_smalltalk.md").language
|
||||||
|
|||||||
Reference in New Issue
Block a user