diff --git a/lib/linguist/tokenizer.rb b/lib/linguist/tokenizer.rb
index 5682173b..ef3d54b7 100644
--- a/lib/linguist/tokenizer.rb
+++ b/lib/linguist/tokenizer.rb
@@ -16,12 +16,15 @@ module Linguist
       new.extract_tokens(data)
     end
 
+    # Start state on token, ignore anything till the next newline
     SINGLE_LINE_COMMENTS = [
       '//', # C
       '#',  # Ruby
       '%',  # Tex
     ]
 
+    # Start state on opening token, ignore anything until the closing
+    # token is reached.
     MULTI_LINE_COMMENTS = [
       ['/*', '*/'],    # C
       ['<!--', '-->'], # XML
@@ -30,7 +33,7 @@ module Linguist
     ]
 
     START_SINGLE_LINE_COMMENT =  Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
-      "^\s*#{Regexp.escape(c)} "
+      "\s*#{Regexp.escape(c)} "
     }.join("|"))
 
     START_MULTI_LINE_COMMENT =  Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
@@ -58,16 +61,16 @@ module Linguist
           end
 
         # Single line comment
-        elsif token = s.scan(START_SINGLE_LINE_COMMENT)
-          tokens << token.strip
+        elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
+          # tokens << token.strip
           s.skip_until(/\n|\Z/)
 
         # Multiline comments
         elsif token = s.scan(START_MULTI_LINE_COMMENT)
-          tokens << token
+          # tokens << token
           close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
           s.skip_until(Regexp.compile(Regexp.escape(close_token)))
-          tokens << close_token
+          # tokens << close_token
 
         # Skip single or double quoted strings
         elsif s.scan(/"/)
diff --git a/test/test_tokenizer.rb b/test/test_tokenizer.rb
index 4fb49a4a..af649204 100644
--- a/test/test_tokenizer.rb
+++ b/test/test_tokenizer.rb
@@ -34,15 +34,15 @@ class TestTokenizer < Test::Unit::TestCase
   end
 
   def test_skip_comments
-    assert_equal %w(foo #), tokenize("foo\n# Comment")
-    assert_equal %w(foo # bar), tokenize("foo\n# Comment\nbar")
-    assert_equal %w(foo //), tokenize("foo\n// Comment")
-    assert_equal %w(foo /* */), tokenize("foo /* Comment */")
-    assert_equal %w(foo /* */), tokenize("foo /* \nComment\n */")
-    assert_equal %w(foo <!-- -->), tokenize("foo <!-- Comment -->")
-    assert_equal %w(foo {- -}), tokenize("foo {- Comment -}")
-    assert_equal %w(foo \(* *\)), tokenize("foo (* Comment *)")
-    assert_equal %w(% %), tokenize("2 % 10\n% Comment")
+    assert_equal %w(foo), tokenize("foo\n# Comment")
+    assert_equal %w(foo bar), tokenize("foo\n# Comment\nbar")
+    assert_equal %w(foo), tokenize("foo\n// Comment")
+    assert_equal %w(foo), tokenize("foo /* Comment */")
+    assert_equal %w(foo), tokenize("foo /* \nComment\n */")
+    assert_equal %w(foo), tokenize("foo <!-- Comment -->")
+    assert_equal %w(foo), tokenize("foo {- Comment -}")
+    assert_equal %w(foo), tokenize("foo (* Comment *)")
+    assert_equal %w(%), tokenize("2 % 10\n% Comment")
   end
 
   def test_sgml_tags