mirror of
https://github.com/KevinMidboe/linguist.git
synced 2025-10-29 17:50:22 +00:00
New Grammars Compiler (#3915)
* grammars: Update several grammars with compat issues * [WIP] Add new grammar conversion tools * Wrap in a Docker script * Proper Dockerfile support * Add Javadoc grammar * Remove NPM package.json * Remove superfluous test This is now always checked by the grammars compiler * Update JSyntax grammar to new submodule * Approve Javadoc license * grammars: Remove checked-in dependencies * grammars: Add regex checks to the compiler * grammars: Point Oz to its actual submodule * grammars: Refactor compiler to group errors by repo * grammars: Cleanups to error reporting
This commit is contained in:
227
tools/grammars/compiler/converter.go
Normal file
227
tools/grammars/compiler/converter.go
Normal file
@@ -0,0 +1,227 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
grammar "github.com/github/linguist/tools/grammars/proto"
|
||||
"github.com/golang/protobuf/proto"
|
||||
pb "gopkg.in/cheggaaa/pb.v1"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
type Converter struct {
|
||||
root string
|
||||
|
||||
modified bool
|
||||
grammars map[string][]string
|
||||
Loaded map[string]*Repository
|
||||
|
||||
progress *pb.ProgressBar
|
||||
wg sync.WaitGroup
|
||||
queue chan string
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (conv *Converter) Load(src string) *Repository {
|
||||
if strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://") {
|
||||
return LoadFromURL(src)
|
||||
}
|
||||
return LoadFromFilesystem(conv.root, src)
|
||||
}
|
||||
|
||||
func (conv *Converter) work() {
|
||||
for source := range conv.queue {
|
||||
repo := conv.Load(source)
|
||||
|
||||
conv.mu.Lock()
|
||||
conv.Loaded[source] = repo
|
||||
conv.mu.Unlock()
|
||||
|
||||
conv.progress.Increment()
|
||||
}
|
||||
|
||||
conv.wg.Done()
|
||||
}
|
||||
|
||||
func (conv *Converter) AddGrammar(source string) error {
|
||||
repo := conv.Load(source)
|
||||
if len(repo.Files) == 0 {
|
||||
return fmt.Errorf("source '%s' contains no grammar files", source)
|
||||
}
|
||||
|
||||
conv.grammars[source] = repo.Scopes()
|
||||
conv.modified = true
|
||||
|
||||
fmt.Printf("OK! added grammar source '%s'\n", source)
|
||||
for scope := range repo.Files {
|
||||
fmt.Printf("\tnew scope: %s\n", scope)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (conv *Converter) ScopeMap() map[string]*Repository {
|
||||
allScopes := make(map[string]*Repository)
|
||||
|
||||
for _, repo := range conv.Loaded {
|
||||
for scope := range repo.Files {
|
||||
if original := allScopes[scope]; original != nil {
|
||||
repo.Fail(&DuplicateScopeError{original, scope})
|
||||
} else {
|
||||
allScopes[scope] = repo
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allScopes
|
||||
}
|
||||
|
||||
func (conv *Converter) ConvertGrammars(update bool) error {
|
||||
conv.Loaded = make(map[string]*Repository)
|
||||
conv.queue = make(chan string, 128)
|
||||
|
||||
conv.progress = pb.New(len(conv.grammars))
|
||||
conv.progress.Start()
|
||||
|
||||
for i := 0; i < runtime.NumCPU(); i++ {
|
||||
conv.wg.Add(1)
|
||||
go conv.work()
|
||||
}
|
||||
|
||||
for src := range conv.grammars {
|
||||
conv.queue <- src
|
||||
}
|
||||
|
||||
close(conv.queue)
|
||||
conv.wg.Wait()
|
||||
|
||||
done := fmt.Sprintf("done! processed %d grammars\n", len(conv.Loaded))
|
||||
conv.progress.FinishPrint(done)
|
||||
|
||||
if update {
|
||||
conv.grammars = make(map[string][]string)
|
||||
conv.modified = true
|
||||
}
|
||||
|
||||
knownScopes := conv.ScopeMap()
|
||||
|
||||
for source, repo := range conv.Loaded {
|
||||
repo.FixRules(knownScopes)
|
||||
|
||||
if update {
|
||||
conv.grammars[source] = repo.Scopes()
|
||||
} else {
|
||||
expected := conv.grammars[source]
|
||||
repo.CompareScopes(expected)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (conv *Converter) WriteProto(path string) error {
|
||||
library := grammar.Library{
|
||||
Grammars: make(map[string]*grammar.Rule),
|
||||
}
|
||||
|
||||
for _, repo := range conv.Loaded {
|
||||
for scope, file := range repo.Files {
|
||||
library.Grammars[scope] = file.Rule
|
||||
}
|
||||
}
|
||||
|
||||
pb, err := proto.Marshal(&library)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ioutil.WriteFile(path, pb, 0666)
|
||||
}
|
||||
|
||||
func (conv *Converter) writeJSONFile(path string, rule *grammar.Rule) error {
|
||||
j, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer j.Close()
|
||||
|
||||
enc := json.NewEncoder(j)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(rule)
|
||||
}
|
||||
|
||||
func (conv *Converter) WriteJSON(rulePath string) error {
|
||||
if err := os.MkdirAll(rulePath, os.ModePerm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, repo := range conv.Loaded {
|
||||
for scope, file := range repo.Files {
|
||||
p := path.Join(rulePath, scope+".json")
|
||||
if err := conv.writeJSONFile(p, file.Rule); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (conv *Converter) WriteGrammarList() error {
|
||||
if !conv.modified {
|
||||
return nil
|
||||
}
|
||||
|
||||
outyml, err := yaml.Marshal(conv.grammars)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ymlpath := path.Join(conv.root, "grammars.yml")
|
||||
return ioutil.WriteFile(ymlpath, outyml, 0666)
|
||||
}
|
||||
|
||||
func (conv *Converter) Report(w io.Writer) {
|
||||
var failed []*Repository
|
||||
for _, repo := range conv.Loaded {
|
||||
if len(repo.Errors) > 0 {
|
||||
failed = append(failed, repo)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(failed, func(i, j int) bool {
|
||||
return failed[i].Source < failed[j].Source
|
||||
})
|
||||
|
||||
for _, repo := range failed {
|
||||
fmt.Fprintf(w, "- [ ] %s (%d errors)\n", repo, len(repo.Errors))
|
||||
for _, err := range repo.Errors {
|
||||
fmt.Fprintf(w, " - [ ] %s\n", err)
|
||||
}
|
||||
fmt.Fprintf(w, "\n")
|
||||
}
|
||||
}
|
||||
|
||||
func NewConverter(root string) (*Converter, error) {
|
||||
yml, err := ioutil.ReadFile(path.Join(root, "grammars.yml"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
conv := &Converter{root: root}
|
||||
|
||||
if err := yaml.Unmarshal(yml, &conv.grammars); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conv, nil
|
||||
}
|
||||
21
tools/grammars/compiler/cson.go
Normal file
21
tools/grammars/compiler/cson.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
func ConvertCSON(data []byte) ([]byte, error) {
|
||||
stdin := bytes.NewBuffer(data)
|
||||
stdout := &bytes.Buffer{}
|
||||
|
||||
cmd := exec.Command("csonc")
|
||||
cmd.Stdin = stdin
|
||||
cmd.Stdout = stdout
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return stdout.Bytes(), nil
|
||||
}
|
||||
29
tools/grammars/compiler/data.go
Normal file
29
tools/grammars/compiler/data.go
Normal file
@@ -0,0 +1,29 @@
|
||||
package compiler
|
||||
|
||||
var GrammarAliases = map[string]string{
|
||||
"source.erb": "text.html.erb",
|
||||
"source.cpp": "source.c++",
|
||||
"source.less": "source.css.less",
|
||||
"text.html.markdown": "source.gfm",
|
||||
"text.md": "source.gfm",
|
||||
"source.php": "text.html.php",
|
||||
"text.plain": "",
|
||||
"source.asciidoc": "text.html.asciidoc",
|
||||
"source.perl6": "source.perl6fe",
|
||||
"source.css.scss": "source.scss",
|
||||
}
|
||||
|
||||
var KnownFields = map[string]bool{
|
||||
"comment": true,
|
||||
"uuid": true,
|
||||
"author": true,
|
||||
"comments": true,
|
||||
"macros": true,
|
||||
"fileTypes": true,
|
||||
"firstLineMatch": true,
|
||||
"keyEquivalent": true,
|
||||
"foldingStopMarker": true,
|
||||
"foldingStartMarker": true,
|
||||
"foldingEndMarker": true,
|
||||
"limitLineLength": true,
|
||||
}
|
||||
85
tools/grammars/compiler/errors.go
Normal file
85
tools/grammars/compiler/errors.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package compiler
|
||||
|
||||
import "fmt"
|
||||
import "strings"
|
||||
|
||||
type ConversionError struct {
|
||||
Path string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (err *ConversionError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Grammar conversion failed. File `%s` failed to parse: %s",
|
||||
err.Path, err.Err)
|
||||
}
|
||||
|
||||
type DuplicateScopeError struct {
|
||||
Original *Repository
|
||||
Duplicate string
|
||||
}
|
||||
|
||||
func (err *DuplicateScopeError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Duplicate scope in repository: scope `%s` was already defined in %s",
|
||||
err.Duplicate, err.Original)
|
||||
}
|
||||
|
||||
type MissingScopeError struct {
|
||||
Scope string
|
||||
}
|
||||
|
||||
func (err *MissingScopeError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Missing scope in repository: `%s` is listed in grammars.yml but cannot be found",
|
||||
err.Scope)
|
||||
}
|
||||
|
||||
type UnexpectedScopeError struct {
|
||||
File *LoadedFile
|
||||
Scope string
|
||||
}
|
||||
|
||||
func (err *UnexpectedScopeError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Unexpected scope in repository: `%s` declared in %s was not listed in grammars.yml",
|
||||
err.Scope, err.File)
|
||||
}
|
||||
|
||||
type MissingIncludeError struct {
|
||||
File *LoadedFile
|
||||
Include string
|
||||
}
|
||||
|
||||
func (err *MissingIncludeError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Missing include in grammar: %s attempts to include `%s` but the scope cannot be found",
|
||||
err.File, err.Include)
|
||||
}
|
||||
|
||||
type UnknownKeysError struct {
|
||||
File *LoadedFile
|
||||
Keys []string
|
||||
}
|
||||
|
||||
func (err *UnknownKeysError) Error() string {
|
||||
var keys []string
|
||||
for _, k := range err.Keys {
|
||||
keys = append(keys, fmt.Sprintf("`%s`", k))
|
||||
}
|
||||
|
||||
return fmt.Sprintf(
|
||||
"Unknown keys in grammar: %s contains invalid keys (%s)",
|
||||
err.File, strings.Join(keys, ", "))
|
||||
}
|
||||
|
||||
type InvalidRegexError struct {
|
||||
File *LoadedFile
|
||||
Err error
|
||||
}
|
||||
|
||||
func (err *InvalidRegexError) Error() string {
|
||||
return fmt.Sprintf(
|
||||
"Invalid regex in grammar: %s contains a malformed regex (%s)",
|
||||
err.File, err.Err)
|
||||
}
|
||||
124
tools/grammars/compiler/loader.go
Normal file
124
tools/grammars/compiler/loader.go
Normal file
@@ -0,0 +1,124 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
grammar "github.com/github/linguist/tools/grammars/proto"
|
||||
)
|
||||
|
||||
type LoadedFile struct {
|
||||
Path string
|
||||
Rule *grammar.Rule
|
||||
}
|
||||
|
||||
func (f *LoadedFile) String() string {
|
||||
return fmt.Sprintf("`%s` (in `%s`)", f.Rule.ScopeName, f.Path)
|
||||
}
|
||||
|
||||
type Repository struct {
|
||||
Source string
|
||||
Upstream string
|
||||
Files map[string]*LoadedFile
|
||||
Errors []error
|
||||
}
|
||||
|
||||
func newRepository(src string) *Repository {
|
||||
return &Repository{
|
||||
Source: src,
|
||||
Files: make(map[string]*LoadedFile),
|
||||
}
|
||||
}
|
||||
|
||||
func (repo *Repository) String() string {
|
||||
str := fmt.Sprintf("repository `%s`", repo.Source)
|
||||
if repo.Upstream != "" {
|
||||
str = str + fmt.Sprintf(" (from %s)", repo.Upstream)
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
func (repo *Repository) Fail(err error) {
|
||||
repo.Errors = append(repo.Errors, err)
|
||||
}
|
||||
|
||||
func (repo *Repository) AddFile(path string, rule *grammar.Rule, uk []string) {
|
||||
file := &LoadedFile{
|
||||
Path: path,
|
||||
Rule: rule,
|
||||
}
|
||||
|
||||
repo.Files[rule.ScopeName] = file
|
||||
if len(uk) > 0 {
|
||||
repo.Fail(&UnknownKeysError{file, uk})
|
||||
}
|
||||
}
|
||||
|
||||
func toMap(slice []string) map[string]bool {
|
||||
m := make(map[string]bool)
|
||||
for _, s := range slice {
|
||||
m[s] = true
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func (repo *Repository) CompareScopes(scopes []string) {
|
||||
expected := toMap(scopes)
|
||||
|
||||
for scope, file := range repo.Files {
|
||||
if !expected[scope] {
|
||||
repo.Fail(&UnexpectedScopeError{file, scope})
|
||||
}
|
||||
}
|
||||
|
||||
for scope := range expected {
|
||||
if _, ok := repo.Files[scope]; !ok {
|
||||
repo.Fail(&MissingScopeError{scope})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (repo *Repository) FixRules(knownScopes map[string]*Repository) {
|
||||
for _, file := range repo.Files {
|
||||
w := walker{
|
||||
File: file,
|
||||
Known: knownScopes,
|
||||
Missing: make(map[string]bool),
|
||||
}
|
||||
|
||||
w.walk(file.Rule)
|
||||
repo.Errors = append(repo.Errors, w.Errors...)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (repo *Repository) Scopes() (scopes []string) {
|
||||
for s := range repo.Files {
|
||||
scopes = append(scopes, s)
|
||||
}
|
||||
sort.Strings(scopes)
|
||||
return
|
||||
}
|
||||
|
||||
func isValidGrammar(path string, info os.FileInfo) bool {
|
||||
if info.IsDir() {
|
||||
return false
|
||||
}
|
||||
|
||||
dir := filepath.Dir(path)
|
||||
ext := filepath.Ext(path)
|
||||
|
||||
switch strings.ToLower(ext) {
|
||||
case ".plist":
|
||||
return strings.HasSuffix(dir, "/Syntaxes")
|
||||
case ".tmlanguage", ".yaml-tmlanguage":
|
||||
return true
|
||||
case ".cson", ".json":
|
||||
return strings.HasSuffix(dir, "/grammars")
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
80
tools/grammars/compiler/loader_fs.go
Normal file
80
tools/grammars/compiler/loader_fs.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type fsLoader struct {
|
||||
*Repository
|
||||
abspath string
|
||||
}
|
||||
|
||||
func (l *fsLoader) findGrammars() (files []string, err error) {
|
||||
err = filepath.Walk(l.abspath,
|
||||
func(path string, info os.FileInfo, err error) error {
|
||||
if err == nil && isValidGrammar(path, info) {
|
||||
files = append(files, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func (l *fsLoader) load() {
|
||||
grammars, err := l.findGrammars()
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, path := range grammars {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
continue
|
||||
}
|
||||
|
||||
if rel, err := filepath.Rel(l.abspath, path); err == nil {
|
||||
path = rel
|
||||
}
|
||||
|
||||
rule, unknown, err := ConvertProto(filepath.Ext(path), data)
|
||||
if err != nil {
|
||||
l.Fail(&ConversionError{path, err})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := l.Files[rule.ScopeName]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
l.AddFile(path, rule, unknown)
|
||||
}
|
||||
}
|
||||
|
||||
func gitRemoteName(path string) (string, error) {
|
||||
remote, err := exec.Command("git", "-C", path, "remote", "get-url", "origin").Output()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strings.TrimSpace(string(remote)), nil
|
||||
}
|
||||
|
||||
func LoadFromFilesystem(root, src string) *Repository {
|
||||
loader := fsLoader{
|
||||
Repository: newRepository(src),
|
||||
abspath: path.Join(root, src),
|
||||
}
|
||||
loader.load()
|
||||
|
||||
if ups, err := gitRemoteName(loader.abspath); err == nil {
|
||||
loader.Repository.Upstream = ups
|
||||
}
|
||||
|
||||
return loader.Repository
|
||||
}
|
||||
93
tools/grammars/compiler/loader_url.go
Normal file
93
tools/grammars/compiler/loader_url.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type urlLoader struct {
|
||||
*Repository
|
||||
}
|
||||
|
||||
func (l *urlLoader) loadTarball(r io.Reader) {
|
||||
gzf, err := gzip.NewReader(r)
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
return
|
||||
}
|
||||
defer gzf.Close()
|
||||
|
||||
tarReader := tar.NewReader(gzf)
|
||||
for true {
|
||||
header, err := tarReader.Next()
|
||||
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
l.Fail(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if isValidGrammar(header.Name, header.FileInfo()) {
|
||||
data, err := ioutil.ReadAll(tarReader)
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
return
|
||||
}
|
||||
|
||||
ext := filepath.Ext(header.Name)
|
||||
rule, unknown, err := ConvertProto(ext, data)
|
||||
if err != nil {
|
||||
l.Fail(&ConversionError{header.Name, err})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := l.Files[rule.ScopeName]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
l.AddFile(header.Name, rule, unknown)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *urlLoader) load() {
|
||||
res, err := http.Get(l.Source)
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
return
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
if strings.HasSuffix(l.Source, ".tar.gz") {
|
||||
l.loadTarball(res.Body)
|
||||
return
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
l.Fail(err)
|
||||
return
|
||||
}
|
||||
|
||||
ext := filepath.Ext(l.Source)
|
||||
filename := filepath.Base(l.Source)
|
||||
rule, unknown, err := ConvertProto(ext, data)
|
||||
if err != nil {
|
||||
l.Fail(&ConversionError{filename, err})
|
||||
return
|
||||
}
|
||||
|
||||
l.AddFile(filename, rule, unknown)
|
||||
}
|
||||
|
||||
func LoadFromURL(src string) *Repository {
|
||||
loader := urlLoader{newRepository(src)}
|
||||
loader.load()
|
||||
return loader.Repository
|
||||
}
|
||||
68
tools/grammars/compiler/pcre.go
Normal file
68
tools/grammars/compiler/pcre.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/github/linguist/tools/grammars/pcre"
|
||||
)
|
||||
|
||||
type replacement struct {
|
||||
pos int
|
||||
len int
|
||||
val string
|
||||
}
|
||||
|
||||
func fixRegex(re string) (string, bool) {
|
||||
var (
|
||||
replace []replacement
|
||||
escape = false
|
||||
hasBackRefs = false
|
||||
)
|
||||
|
||||
for i, ch := range re {
|
||||
if escape {
|
||||
if ch == 'h' {
|
||||
replace = append(replace, replacement{i - 1, 2, "[[:xdigit:]]"})
|
||||
}
|
||||
if '0' <= ch && ch <= '9' {
|
||||
hasBackRefs = true
|
||||
}
|
||||
}
|
||||
escape = !escape && ch == '\\'
|
||||
}
|
||||
|
||||
if len(replace) > 0 {
|
||||
reb := []byte(re)
|
||||
offset := 0
|
||||
for _, repl := range replace {
|
||||
reb = append(
|
||||
reb[:offset+repl.pos],
|
||||
append([]byte(repl.val), reb[offset+repl.pos+repl.len:]...)...)
|
||||
offset += len(repl.val) - repl.len
|
||||
}
|
||||
return string(reb), hasBackRefs
|
||||
}
|
||||
|
||||
return re, hasBackRefs
|
||||
}
|
||||
|
||||
func CheckPCRE(re string) (string, error) {
|
||||
hasBackRefs := false
|
||||
|
||||
if re == "" {
|
||||
return "", nil
|
||||
}
|
||||
if len(re) > 32*1024 {
|
||||
return "", fmt.Errorf(
|
||||
"regex %s: definition too long (%d bytes)",
|
||||
pcre.RegexPP(re), len(re))
|
||||
}
|
||||
|
||||
re, hasBackRefs = fixRegex(re)
|
||||
if !hasBackRefs {
|
||||
if err := pcre.CheckRegexp(re, pcre.DefaultFlags); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
return re, nil
|
||||
}
|
||||
27
tools/grammars/compiler/pcre_test.go
Normal file
27
tools/grammars/compiler/pcre_test.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func Test_fixRegex(t *testing.T) {
|
||||
tests := []struct {
|
||||
re string
|
||||
want string
|
||||
}{
|
||||
{"foobar", "foobar"},
|
||||
{`testing\h`, "testing[[:xdigit:]]"},
|
||||
{`\htest`, `[[:xdigit:]]test`},
|
||||
{`abc\hdef`, `abc[[:xdigit:]]def`},
|
||||
{`\\\htest`, `\\[[:xdigit:]]test`},
|
||||
{`\\htest`, `\\htest`},
|
||||
{`\h\h\h\h`, `[[:xdigit:]][[:xdigit:]][[:xdigit:]][[:xdigit:]]`},
|
||||
{`abc\hdef\hghi\h`, `abc[[:xdigit:]]def[[:xdigit:]]ghi[[:xdigit:]]`},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got, _ := fixRegex(tt.re)
|
||||
if got != tt.want {
|
||||
t.Errorf("fixRegex() got = %v, want %v", got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
96
tools/grammars/compiler/proto.go
Normal file
96
tools/grammars/compiler/proto.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
grammar "github.com/github/linguist/tools/grammars/proto"
|
||||
"github.com/groob/plist"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
func looseDecoder(f reflect.Kind, t reflect.Kind, data interface{}) (interface{}, error) {
|
||||
dataVal := reflect.ValueOf(data)
|
||||
switch t {
|
||||
case reflect.Bool:
|
||||
switch f {
|
||||
case reflect.Bool:
|
||||
return dataVal.Bool(), nil
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return (int(dataVal.Float()) != 0), nil
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return (dataVal.Int() != 0), nil
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
||||
return (dataVal.Uint() != 0), nil
|
||||
case reflect.String:
|
||||
switch dataVal.String() {
|
||||
case "1":
|
||||
return true, nil
|
||||
case "0":
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func filterUnusedKeys(keys []string) (out []string) {
|
||||
for _, k := range keys {
|
||||
parts := strings.Split(k, ".")
|
||||
field := parts[len(parts)-1]
|
||||
if !KnownFields[field] {
|
||||
out = append(out, k)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func ConvertProto(ext string, data []byte) (*grammar.Rule, []string, error) {
|
||||
var (
|
||||
raw map[string]interface{}
|
||||
out grammar.Rule
|
||||
err error
|
||||
md mapstructure.Metadata
|
||||
)
|
||||
|
||||
switch strings.ToLower(ext) {
|
||||
case ".plist", ".tmlanguage":
|
||||
err = plist.Unmarshal(data, &raw)
|
||||
case ".yaml-tmlanguage":
|
||||
err = yaml.Unmarshal(data, &raw)
|
||||
case ".cson":
|
||||
data, err = ConvertCSON(data)
|
||||
if err == nil {
|
||||
err = json.Unmarshal(data, &raw)
|
||||
}
|
||||
case ".json":
|
||||
err = json.Unmarshal(data, &raw)
|
||||
default:
|
||||
err = fmt.Errorf("grammars: unsupported extension '%s'", ext)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
config := mapstructure.DecoderConfig{
|
||||
Result: &out,
|
||||
Metadata: &md,
|
||||
DecodeHook: looseDecoder,
|
||||
}
|
||||
|
||||
decoder, err := mapstructure.NewDecoder(&config)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if err := decoder.Decode(raw); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return &out, filterUnusedKeys(md.Unused), nil
|
||||
}
|
||||
79
tools/grammars/compiler/walker.go
Normal file
79
tools/grammars/compiler/walker.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package compiler
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
grammar "github.com/github/linguist/tools/grammars/proto"
|
||||
)
|
||||
|
||||
func (w *walker) checkInclude(rule *grammar.Rule) {
|
||||
include := rule.Include
|
||||
|
||||
if include == "" || include[0] == '#' || include[0] == '$' {
|
||||
return
|
||||
}
|
||||
|
||||
if alias, ok := GrammarAliases[include]; ok {
|
||||
rule.Include = alias
|
||||
return
|
||||
}
|
||||
|
||||
include = strings.Split(include, "#")[0]
|
||||
_, ok := w.Known[include]
|
||||
if !ok {
|
||||
if !w.Missing[include] {
|
||||
w.Missing[include] = true
|
||||
w.Errors = append(w.Errors, &MissingIncludeError{w.File, include})
|
||||
}
|
||||
rule.Include = ""
|
||||
}
|
||||
}
|
||||
|
||||
func (w *walker) checkRegexps(rule *grammar.Rule) {
|
||||
check := func(re string) string {
|
||||
re2, err := CheckPCRE(re)
|
||||
if err != nil {
|
||||
w.Errors = append(w.Errors, &InvalidRegexError{w.File, err})
|
||||
}
|
||||
return re2
|
||||
}
|
||||
|
||||
rule.Match = check(rule.Match)
|
||||
rule.Begin = check(rule.Begin)
|
||||
rule.While = check(rule.While)
|
||||
rule.End = check(rule.End)
|
||||
}
|
||||
|
||||
func (w *walker) walk(rule *grammar.Rule) {
|
||||
w.checkInclude(rule)
|
||||
w.checkRegexps(rule)
|
||||
|
||||
for _, rule := range rule.Patterns {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.Captures {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.BeginCaptures {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.WhileCaptures {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.EndCaptures {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.Repository {
|
||||
w.walk(rule)
|
||||
}
|
||||
for _, rule := range rule.Injections {
|
||||
w.walk(rule)
|
||||
}
|
||||
}
|
||||
|
||||
type walker struct {
|
||||
File *LoadedFile
|
||||
Known map[string]*Repository
|
||||
Missing map[string]bool
|
||||
Errors []error
|
||||
}
|
||||
Reference in New Issue
Block a user