#!/usr/bin/env ruby

require "json"
require "linguist"
require "set"
require "yaml"

def find_includes(json)
  case json
  when Hash
    result = []
    if inc = json["include"]
      result << inc.split("#", 2).first unless inc.start_with?("#", "$")
    end
    result + json.values.flat_map { |v| find_includes(v) }
  when Array
    json.flat_map { |v| find_includes(v) }
  else
    []
  end
end

def transitive_includes(scope, includes)
  scopes = Set.new
  queue = includes[scope] || []
  while s = queue.shift
    next if scopes.include?(s)
    scopes << s
    queue += includes[s] || []
  end
  scopes
end

includes = {}
Dir["grammars/*.json"].each do |path|
  scope = File.basename(path).sub(/\.json/, '')
  json = JSON.load(File.read(path))
  incs = find_includes(json)
  next if incs.empty?
  includes[scope] ||= []
  includes[scope] += incs
end

yaml = YAML.load(File.read("grammars.yml"))
language_scopes = Linguist::Language.all.map(&:tm_scope).to_set

# The set of used scopes is the scopes for each language, plus all the scopes
# they include, transitively.
used_scopes = language_scopes + language_scopes.flat_map { |s| transitive_includes(s, includes).to_a }.to_set

unused = yaml.reject { |repo, scopes| scopes.any? { |scope| used_scopes.include?(scope) } }

puts "Unused grammar repos"
puts unused.map { |repo, scopes| sprintf("%-100s %s", repo, scopes.join(", ")) }.sort.join("\n")

yaml.delete_if { |k| unused.key?(k) }
File.write("grammars.yml", YAML.dump(yaml))
