package delig;

import java.io.*;
import java.util.Hashtable;
import java.util.regex.*;

/**
 * DeLig disables misplaced ligatures in LaTeX documents
 * using a dictionary approach. 
 * 
 * DeLig is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * It may be copied or modified under the terms of the
 * GNU General Public License version 3
 * as published by the Free Software Foundation
 * (http://www.gnu.org/copyleft/gpl.html).
 *
 * This version is intended for German language texts only. 
 * DeLig is partially based on the Perl script rmligs by Bjrn Jacke,
 * in particular the wordlist data is based on his igerman98 dictionary.
 * 
 * @author Daniel Warner, delig@nospam@daniel-warner.de
 * @version 2008-03-09
 */
public class DeLig {
    private int minLen = 4;

    private boolean prefixAllowed = true;

    private boolean suffixAllowed = true;

    private boolean properSubwordAllowed = true;

    private int tokensCharShift = 256;
    
    private String [][] tokens =
    	{{"\"-", ""}, {"\\-", ""}, {"\"\"", ""},
	 {"{\\\"a}", ""}, {"\\\"a", ""}, {"\\\"{a}", ""}, {"\"a", ""},
	 {"{\\\"o}", ""}, {"\\\"o", ""}, {"\\\"{o}", ""}, {"\"o", ""},
	 {"{\\\"u}", ""}, {"\\\"u", ""}, {"\\\"{u}", ""}, {"\"u", ""},
    	 {"{\\\"A}", ""}, {"\\\"A", ""}, {"\\\"{A}", ""}, {"\"A", ""},
	 {"{\\\"O}", ""}, {"\\\"O", ""}, {"\\\"{O}", ""}, {"\"O", ""},
	 {"{\\\"U}", ""}, {"\\\"U", ""}, {"\\\"{U}", ""}, {"\"U", ""},
	 {"\\ss{}", ""}, {"\\ss", ""}, {"\"s", ""}, {"\"z", ""},
    };

    public void process(String[] args) throws DeLigException {
	println("DeLig [Version 0.1]");
	println("Author: Daniel Warner (delig@nospam@daniel-warner.de)");
	println();
	println("\tDeLig disables misplaced ligatures in LaTeX documents");
	println("\tusing a dictionary approach.");
	println();
	println("DeLig is distributed in the hope that it will be useful,");
	println("but WITHOUT ANY WARRANTY; without even the implied warranty of");
	println("MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.");
	println();
	println("It may be copied or modified under the terms of the");
	println("GNU General Public License version 3");
	println("as published by the Free Software Foundation");
	println("(http://www.gnu.org/copyleft/gpl.html).");
	println();
	if (args.length != 2) {
	    println("Usage: DeLig inFilename outFilename");
	} else {
	    try {
		// Build ligature hash table

		Hashtable<String, String> hash = new Hashtable<String, String>();
		try {
		    BufferedReader ligatureFile = new BufferedReader(new FileReader("DeLig.list"));
		    String value;

		    while ((value = ligatureFile.readLine()) != null) {
			String key = value.replace("|", "").toLowerCase();
			hash.put(key, value);
		    }
		    ligatureFile.close();
		} catch (IOException e) {
		    String errorMessage = "Error: Could not read ligature file DeLig.list.";
		    throw new DeLigException(errorMessage);
		}

		// Get filenames

		String inFilename = args[0];
		String outFilename = args[1];

		// Open input and output file

		if (!new File(inFilename).exists()) {
		    String errorMessage = "Error: The input file " + inFilename + " does not exist.";
		    throw new DeLigException(errorMessage);
		}

		if (new File(outFilename).exists()) {
		    String errorMessage = "Error: The output file " + outFilename + " already exists.";
		    throw new DeLigException(errorMessage);
		}

		LineNumberReader in = new LineNumberReader(new FileReader(inFilename));
		PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(outFilename)), true);

		// Process input file and write output file

		println("Processing " + inFilename + " and writing output to " + outFilename + " ...");
		println();

		String leadingBackspacesExpr = "[\\\\]*";
		String firstCharExpr = "([A-Za-z]|(\\{\\\\\"[aouAOU]\\})|(\\\\\"[aouAOU])|(\\\\\"\\{[aouAOU]\\})|(\"[aouAOU]))";
		String subExpr = "([a-z]|(\\{\\\\\"[aou]\\})|(\\\\\"[aou])|(\\\\\"\\{[aou]\\})|(\"[aou])|(\\\\ss\\{\\})|(\"s)|(\"z)|(\"-)|(\\\\-)|(\"\"))*";
		String ligatureExpr = "f[fil]";
		String trailingSZ = "(\\\\ss)?";
		String regExpr = leadingBackspacesExpr + firstCharExpr + subExpr + ligatureExpr + subExpr + trailingSZ;
		Pattern pattern = Pattern.compile(regExpr);

		int disabledLigatures = 0; // Counts the number of disabled ligatures
		String inLine; // Currently processed line of the input file
		while ((inLine = in.readLine()) != null) {
		    if (inLine.length() == 0) {
			out.println();
			continue;
		    }

		    String outLine = inLine;
		    boolean lineModified = false;

		    Matcher matcher = pattern.matcher(outLine);
		    int regionEnd = outLine.indexOf("%"); // Ignore comments in LaTeX file, if any
		    regionEnd = (regionEnd >= 0 ? regionEnd : outLine.length());
		    matcher.region(0, regionEnd);

		    while (matcher.find()) {
			MatchResult match = matcher.toMatchResult();
			String word = match.group();

			// Determine ligature, if present

			String compareWord = word.replace("\\\\", "").toLowerCase(); // Remove double backslashes (newline)
			String workWord = word.replace("\\\\", "");
			int removedBackslashes = word.length() - compareWord.length(); // Number of removed backslashes		
			for (int i = 0; i < tokens.length; i++) {
			    compareWord = compareWord.replace(tokens[i][0], tokens[i][1]);
			    workWord = workWord.replace(tokens[i][0], ((char) (i + tokensCharShift)) + "");
			}

			if (compareWord.charAt(0) != '\\') // If compareWord starts with a backslash, then it is a command and may not be modified
			{
			    boolean keyFound = hash.containsKey(compareWord);
			    boolean exactMatchFound = keyFound;
			    boolean prefixFound = false;
			    boolean suffixFound = false;
			    boolean properSubwordFound = false;

			    int start = 0;
			    int end = compareWord.length();

			    if (prefixAllowed && !keyFound) {
				start = 0;
				end = compareWord.length();
				while (end >= minLen && !(keyFound = hash.containsKey(compareWord.substring(0, end)))) {
				    end--;
				}
				prefixFound = keyFound;
			    }

			    if (suffixAllowed && !keyFound) {
				start = 0;
				end = compareWord.length();
				while (compareWord.length() - start >= minLen && !(keyFound = hash.containsKey(compareWord.substring(start, compareWord.length())))) {
				    start++;
				}
				suffixFound = keyFound;
			    }

			    if (properSubwordAllowed && !keyFound) {
				for (String key : hash.keySet()) {
				    if (compareWord.contains(key)) {
					start = compareWord.indexOf(key);
					end = start + key.length();
					keyFound = true;
					break;
				    }
				}
				properSubwordFound = keyFound;
			    }

			    if (keyFound) {
				String ligatureWord = hash.get(compareWord.substring(start, end));
				String modifiedWord = "";

				// i: Position in workWord, i - delta: Position in compareWord, ligPos: Position in ligatureWord
				int delta = 0;
				int ligPos = 0;
				int i = 0;
				while (i < workWord.length()) {
				    boolean inLigatureWord = (i - delta >= start && i - delta < end);

				    if (inLigatureWord && ligatureWord.charAt(ligPos) == '|') {
					modifiedWord += "\"|";
					ligPos++;
				    } else {
					int charValue = ((int) workWord.charAt(i) - tokensCharShift);
					if (charValue >= 0 && charValue < tokens.length) {
					    modifiedWord += tokens[charValue][0];
					    delta += (1 - tokens[charValue][1].length());
					    if (inLigatureWord) {
						ligPos += tokens[charValue][1].length();
					    }
					} else {
					    modifiedWord += workWord.charAt(i);
					    if (inLigatureWord) {
						ligPos++;
					    }
					}
					i++;
				    }
				}

				outLine = outLine.substring(0, match.start() + removedBackslashes) + modifiedWord + outLine.substring(match.end(), outLine.length());

				String ligaturePosition = "";
				if (exactMatchFound)
				    ligaturePosition = "exact match";
				else if (prefixFound)
				    ligaturePosition = "prefix";
				else if (suffixFound)
				    ligaturePosition = "suffix";
				else if (properSubwordFound)
				    ligaturePosition = "proper subword";

				println("Disabled ligature [" + ligatureWord + ", " + ligaturePosition + "] in line " + in.getLineNumber() + ": " + word.substring(removedBackslashes) + " => " + modifiedWord);
				disabledLigatures++;
				lineModified = true;

				matcher = pattern.matcher(outLine);
				regionEnd = outLine.indexOf("%"); // Ignore comments in LaTeX file, if any
				regionEnd = (regionEnd >= 0 ? regionEnd : outLine.length());
				matcher.region(0, regionEnd);
			    }
			}
		    }
		    if (lineModified) {
			println();
			println("Modified line " + in.getLineNumber() + ":");
			println("Input  : " + inLine);
			println();
			println("Output : " + outLine);
			println();
		    }
		    out.println(outLine); // Write output line to output file
		}

		// Print statistics

		if (disabledLigatures == 0) {
		    println("No ligatures were disabled.");
		} else if (disabledLigatures == 1) {
		    println("Disabled " + disabledLigatures + " ligature in " + inFilename + ".");
		} else {
		    println("Disabled " + disabledLigatures + " ligatures in " + inFilename + ".");
		}

		// Close input and output file

		in.close();
		out.close();
	    } catch (IOException e) {
		throw new DeLigException(e.getMessage());
	    }
	}
    }

    protected void println() {
	System.out.println();
    }

    protected void println(String output) {
	System.out.println(output);
    }

    protected void print(String output) {
	System.out.print(output);
    }

    public static void main(String[] args) {
	try {
	    new DeLig().process(args);
	} catch (DeLigException e) {
	    System.err.println(e.getMessage());
	}
    }
}