-- The preprocessing step of static analysis determines which parts of the input files contain expl3 code.

local get_option = require("explcheck-config").get_option
local ranges = require("explcheck-ranges")
local parsers = require("explcheck-parsers")
local utils = require("explcheck-utils")

local new_range = ranges.new_range
local range_flags = ranges.range_flags

local EXCLUSIVE = range_flags.EXCLUSIVE
local INCLUSIVE = range_flags.INCLUSIVE

local lpeg = require("lpeg")
local B, Cmt, Cp, Ct, Cc, P, V = lpeg.B, lpeg.Cmt, lpeg.Cp, lpeg.Ct, lpeg.Cc, lpeg.P, lpeg.V

-- Preprocess the content and register any issues.
local function preprocessing(pathname, content, issues, results, options)

  -- Determine the bytes where lines begin.
  local line_starting_byte_numbers = {}

  local function record_line(line_start)
    table.insert(line_starting_byte_numbers, line_start)
  end

  local line_numbers_grammar = (
    Cp() / record_line
    * (
      (
        parsers.linechar^0
        * parsers.newline
        * Cp()
      ) / record_line
    )^0
  )
  lpeg.match(line_numbers_grammar, content)

  -- Strip TeX comments before further analysis.
  local function strip_comments()
    local transformed_index = 0
    local numbers_of_bytes_removed = {}
    local transformed_text_table = {}
    for index, text_position in ipairs(lpeg.match(parsers.commented_lines, content)) do
      local span_size = text_position - transformed_index - 1
      if span_size > 0 then
        if index % 2 == 1 then  -- chunk of text
          table.insert(transformed_text_table, content:sub(transformed_index + 1, text_position - 1))
        else  -- comment
          local comment_text = content:sub(transformed_index + 1, text_position - 1)
          local ignored_issues = lpeg.match(parsers.ignored_issues, comment_text)
          -- If a comment specifies ignored issues, register them.
          if ignored_issues ~= nil then
            local comment_line_number = utils.convert_byte_to_line_and_column(line_starting_byte_numbers, transformed_index + 1)
            assert(comment_line_number <= #line_starting_byte_numbers)
            local comment_range_start = line_starting_byte_numbers[comment_line_number]
            local comment_range_end, comment_range
            if(comment_line_number + 1 <= #line_starting_byte_numbers) then
              comment_range_end = line_starting_byte_numbers[comment_line_number + 1]
              comment_range = new_range(comment_range_start, comment_range_end, EXCLUSIVE, #content)
            else
              comment_range_end = #content
              comment_range = new_range(comment_range_start, comment_range_end, INCLUSIVE, #content)
            end
            if #ignored_issues == 0 then  -- ignore all issues on this line
              issues:ignore(nil, comment_range)
            else  -- ignore specific issues on this line or everywhere (for file-wide issues)
              for _, identifier in ipairs(ignored_issues) do
                issues:ignore(identifier, comment_range)
              end
            end
          end
          table.insert(numbers_of_bytes_removed, {transformed_index, span_size})
        end
        transformed_index = transformed_index + span_size
      end
    end
    table.insert(transformed_text_table, content:sub(transformed_index + 1, -1))
    local transformed_text = table.concat(transformed_text_table, "")
    local function map_back(index)
      local mapped_index = index
      for _, where_and_number_of_bytes_removed in ipairs(numbers_of_bytes_removed) do
        local where, number_of_bytes_removed = table.unpack(where_and_number_of_bytes_removed)
        if mapped_index > where then
          mapped_index = mapped_index + number_of_bytes_removed
        else
          break
        end
      end
      assert(mapped_index > 0)
      assert(mapped_index <= #content + 1)
      if mapped_index <= #content then
        assert(transformed_text[index] == content[mapped_index])
      end
      return mapped_index
    end
    return transformed_text, map_back
  end

  local transformed_content, map_back = strip_comments()

  -- Determine which parts of the input files contain expl3 code.
  local expl_ranges = {}
  local input_ended = false

  local function capture_range(should_skip, range_start, range_end)
    if not should_skip then
      local range = new_range(range_start, range_end, EXCLUSIVE, #transformed_content, map_back, #content)
      table.insert(expl_ranges, range)
    end
  end

  local function unexpected_pattern(pattern, code, message, test)
    return Ct(Cp() * pattern * Cp()) / function(range_table)
      if not input_ended and (test == nil or test()) then
        local range_start, range_end = range_table[#range_table - 1], range_table[#range_table]
        local range = new_range(range_start, range_end, EXCLUSIVE, #transformed_content, map_back, #content)
        issues:add(code, message, range)
      end
    end
  end

  local num_provides = 0
  local FirstLineProvides, FirstLineExplSyntaxOn, HeadlessCloser, Head, Any =
    parsers.fail, parsers.fail, parsers.fail, parsers.fail, parsers.any
  local expl3_detection_strategy = get_option('expl3_detection_strategy', options, pathname)
  if expl3_detection_strategy ~= 'never' and expl3_detection_strategy ~= 'always' then
    FirstLineProvides = unexpected_pattern(
      parsers.provides,
      "e104",
      [[multiple delimiters `\ProvidesExpl*` in a single file]],
      function()
        num_provides = num_provides + 1
        return num_provides > 1
      end
    )
    FirstLineExplSyntaxOn = parsers.expl_syntax_on
    HeadlessCloser = (
      parsers.expl_syntax_off
      + parsers.endinput
      / function()
        input_ended = true
      end
    )
    -- (Under)estimate the current TeX grouping level.
    local estimated_grouping_level = 0
    Any = (
      -B(parsers.expl3_catcodes[0])  -- no preceding backslash
      * parsers.expl3_catcodes[1]  -- begin grouping
      * Cmt(
        parsers.success,
        function()
          estimated_grouping_level = estimated_grouping_level + 1
          return true
        end
      )
      + parsers.expl3_catcodes[2]  -- end grouping
      * Cmt(
        parsers.success,
        function()
          estimated_grouping_level = math.max(0, estimated_grouping_level - 1)
          return true
        end
      )
      + parsers.any
    )
    -- Allow indent before a standard delimiter outside a TeX grouping.
    Head = (
      parsers.newline
      + Cmt(
        parsers.success,
        function()
          return estimated_grouping_level == 0
        end
      )
    )
  end

  local num_expl3like_material = 0
  local analysis_grammar = P{
    "Root";
    Root = (
      (
        V"FirstLineExplPart" / capture_range
      )^-1
      * (
        V"NonExplPart"
        * V"ExplPart" / capture_range
      )^0
      * V"NonExplPart"
    ),
    NonExplPart = (
      (
        unexpected_pattern(
          (
            V"Head"
            * Cp()
            * V"HeadlessCloser"
          ),
          "w101",
          "unexpected delimiters"
        )
        + unexpected_pattern(
            parsers.expl3like_material,
            "e102",
            "expl3 material in non-expl3 parts",
            function()
              num_expl3like_material = num_expl3like_material + 1
              return true
            end
          )
        + (
          V"Any"
          - V"Opener"
        )
      )^0
    ),
    FirstLineExplPart = (
      Cc(input_ended)
      * V"FirstLineOpener"
      * Cp()
      * (
          V"Provides"
          + unexpected_pattern(
            (
              V"Head"
              * Cp()
              * V"FirstLineOpener"
            ),
            "w101",
            "unexpected delimiters"
          )
          + (
            V"Any"
            - V"Closer"
          )
        )^0
      * (
        V"Head"
        * Cp()
        * V"HeadlessCloser"
        + Cp()
        * parsers.eof
      )
    ),
    ExplPart = (
      V"Head"
      * V"FirstLineExplPart"
    ),
    FirstLineProvides = FirstLineProvides,
    Provides = (
      V"Head"
      * V"FirstLineProvides"
    ),
    FirstLineOpener = (
      FirstLineExplSyntaxOn
      + V"FirstLineProvides"
    ),
    Opener = (
      V"Head"
      * V"FirstLineOpener"
    ),
    HeadlessCloser = HeadlessCloser,
    Closer = (
      V"Head"
      * V"HeadlessCloser"
    ),
    Head = Head,
    Any = Any,
  }
  lpeg.match(analysis_grammar, transformed_content)

  -- Determine whether the pathname/content looks like it originates from a LaTeX style file.
  local seems_like_latex_style_file
  local suffix = utils.get_suffix(pathname)
  if suffix == ".cls" or suffix == ".opt" or suffix == ".sty" then
    seems_like_latex_style_file = true
  else
    seems_like_latex_style_file = lpeg.match(parsers.latex_style_file_content, transformed_content) ~= nil
  end

  -- If no expl3 parts were detected, decide whether no part or the whole input file is in expl3.
  if(#expl_ranges == 0 and #content > 0) then
    issues:ignore('e102')
    if expl3_detection_strategy == "precision" or expl3_detection_strategy == "never" then
      -- Assume that no part of the input file is in expl3.
    elseif expl3_detection_strategy == "recall" or expl3_detection_strategy == "always" then
      -- Assume that the whole input file is in expl3.
      if expl3_detection_strategy == "recall" then
        issues:add('w100', 'no standard delimiters')
      end
      local range = new_range(1, #content, INCLUSIVE, #content)
      table.insert(expl_ranges, range)
    elseif expl3_detection_strategy == "auto" then
      -- Use context clues to determine whether no part or the whole
      -- input file is in expl3.
      if num_expl3like_material >= get_option('min_expl3like_material', options, pathname) then
        issues:add('w100', 'no standard delimiters')
        local range = new_range(1, #content, INCLUSIVE, #content)
        table.insert(expl_ranges, range)
      end
    else
      assert(false, 'Unknown strategy "' .. expl3_detection_strategy .. '"')
    end
  end

  -- Check for overlong lines within the expl3 parts.
  for _, expl_range in ipairs(expl_ranges) do
    local offset = expl_range:start() - 1

    local function line_too_long(range_start, range_end)
        local range = new_range(offset + range_start, offset + range_end, EXCLUSIVE, #transformed_content, map_back, #content)
        issues:add('s103', 'line too long', range)
      end

      local overline_lines_grammar = (
        (
          Cp() * parsers.linechar^(get_option('max_line_length', options, pathname) + 1) * Cp() / line_too_long
          + parsers.linechar^0
        )
        * parsers.newline
      )^0

      lpeg.match(overline_lines_grammar, transformed_content:sub(expl_range:start(), expl_range:stop()))
    end

    -- Store the intermediate results of the analysis.
    results.line_starting_byte_numbers = line_starting_byte_numbers
    results.expl_ranges = expl_ranges
    results.seems_like_latex_style_file = seems_like_latex_style_file
  end

  return {
  process = preprocessing
}