/**
 * Authors: Frederik Leyvraz, David Degenhardt
 * License: GNU General Public License v3.0 only
 * Version: 1.0.1
 */

package ch.bfh.ti.latexindexer;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class IndexWriter {
    private String latexFilePath;
    private final String DETEX = "detex";
    private final String DETEX_OPTIONS = "-1";

    /**
     * Constructor
     * @param latexFile The path to the latex file to which the index writer should write
     */
    public IndexWriter(String latexFile) {
        this.latexFilePath = latexFile;
    }


    /**
     * replace given word in the entire latex document.
     * @param word the word to be added to the index.
     * @throws IOException If the file can't be written to.
     */
    public void replaceWordInFiles(Word word) throws IOException {
        Map<String, List<Integer>> occurrences = getOccurrences(word);
        addToIndexByOccurrence(occurrences, word);
    }

    /**
     * takes a String and appends the \index{...} macro to it.
     * @param content The line that contains the word.
     * @param word The word that is to be indexed.
     * @return The line with \index{...} appended to the word.
     */
    String addIndexTag(String content, Word word) {

        // turns {"This", "example", "an", "is"} into "\\bThis\\b|\\bexample\\b|\\ban\\b|\\bis\\b"
        String pattern = word.getVariations().stream().reduce((a, b) -> a + "\\b" + "|" + "\\b" + b).get();
        pattern = "\\b" + pattern + "\\b";

        Pattern regexpPattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
        Matcher matcher = regexpPattern.matcher(content);

        StringBuffer result = new StringBuffer();
        while (matcher.find()) {
            String matchedWord = matcher.group();
            String replacement = matchedWord + word.getWrappedIndexString();
            matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
        }
        matcher.appendTail(result);
        return result.toString();
    }

    /**
     * Adds the \index{} macros in the (parsed representation of the) document.
     * @param occurrences A map containing the lines with occurrences of the word.
     * @param word The word that is to be indexed.
     * @throws IOException If the destination file cannot be written to.
     */
    public void addToIndexByOccurrence(Map<String, List<Integer>> occurrences, Word word) throws IOException {

        for (Map.Entry<String, List<Integer>> entry : occurrences.entrySet()) {

            Path filePath = Paths.get(entry.getKey());
            List<String> lines = Files.readAllLines(filePath);
            for (int lineNumber : entry.getValue()) {
                String content = lines.get(lineNumber);
                    lines.set(lineNumber, addIndexTag(content, word)); //replace a line with the tag
            }

            Files.write(filePath, lines);
        }
    }

    /**
     * Finds the files in which the given word occurs.
     * @param word The word that is to be indexed.
     * @return A map containing the files with occurrences of the word and their path.
     * @throws IOException If the file cannot be read from.
     */
    Map<String, List<Integer>> getFileOccurrences(Word word) throws IOException {
        Set<String> variations = word.getVariations();

        ProcessBuilder processBuilder = new ProcessBuilder(DETEX, DETEX_OPTIONS, latexFilePath);
        Process process = processBuilder.start();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(process.getInputStream()));

        String line;
        Map<String, List<Integer>> occurrences = new HashMap<>();

        while ((line = bufferedReader.readLine()) != null) {
            for (String variation : variations) {
                if (line.contains(variation)) {
                    String[] lineParts = line.split(":");
                    String filePath = lineParts[0];
                    if (!occurrences.containsKey(filePath)) {
                        occurrences.put(filePath, new ArrayList<>());
                    }
                }
            }
        }
        bufferedReader.close();
        process.destroy();
        return occurrences;
    }

    /**
     * Finds the lines where the given word occurs for each file.
     * @param word The word that is to be indexed.
     * @return A map containing the lines with occurrences of the word and their position.
     * @throws IOException If the file cannot be read from.
     */
    Map<String, List<Integer>> getOccurrences(Word word) throws IOException {
        Set<String> variations = word.getVariations();

        Map<String, List<Integer>> occurrences = getFileOccurrences(word);

        // Workaround for a bug in detex: miscounting the line numbers.
        // Detex skips some lines while counting

        for (Map.Entry<String, List<Integer>> entry : occurrences.entrySet()) {
            String filePathString = entry.getKey();
            Path filePath = Paths.get(filePathString);
            List<String> lines = Files.readAllLines(filePath);

            for (int lineNumber = 0; lineNumber < lines.size(); lineNumber++) {
                for (String variation : variations) {
                    if (lines.get(lineNumber).contains(variation)) {
                        occurrences.get(filePathString).add(lineNumber);
                        break;
                    }
                }
            }
        }

        return occurrences;
    }
}
