//PRET-Extractor
//Copyright (c) 2013 Tetsuya Kanda
//
//http://sel.ist.osaka-u.ac.jp/pret/
//
//Permission is hereby granted, free of charge, to any person obtaining
//a copy of this software and associated documentation files (the
//"Software"), to deal in the Software without restriction, including
//without limitation the rights to use, copy, modify, merge, publish,
//distribute, sublicense, and/or sell copies of the Software, and to
//permit persons to whom the Software is furnished to do so, subject to
//the following conditions:
//
//The above copyright notice and this permission notice shall be
//included in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package jp.ac.osaka_u.ist.sel.pret.engine.diff;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;

import jp.ac.osaka_u.ist.sel.pret.util.EncodeDetector;
import jp.ac.osaka_u.ist.sel.t_kanda.tkdiff.Diff;
import jp.ac.osaka_u.ist.sel.t_kanda.tkdiff.Difference;

//Similarity(A, B) = |AB| / |AB|

/**
 * calc similarity and diff using java-diff (customized)
 * 
 * @author t-kanda
 * 
 */
public class TKJavaDiff extends ADiffCmd {

	private List<String> sA;
	private List<String> sB;

	@Override
	public void set(Path file1, Path file2) {
		this.sA = listLines(file1);
		this.sB = listLines(file2);
		calc();
	}

	public void set(List<String> sA, Path file2) {
		this.sA = sA;
		this.sB = listLines(file2);
		calc();
	}

	/**
	 * Similarity(A, B) = |AB| / |AB|<br>
	 * skip blanks
	 */

	protected void calc() {
		Diff diff = new Diff(sA, sB);
		List<Difference> diffList = diff.diff();
		int nAdd = 0;
		int nDel = 0;
		for (Difference d : diffList) {
			if (d.getDeletedEnd() >= 0) {
				nDel += d.getDeletedEnd() - d.getDeletedStart() + 1;
			}
			if (d.getAddedEnd() >= 0) {
				nAdd += d.getAddedEnd() - d.getAddedStart() + 1;
			}
		}

		int nUnionAB2 = sA.size() + sB.size() + nAdd + nDel;
		int nIntersectionAB2 = sA.size() + sB.size() - nAdd - nDel;

		sim = (float) nIntersectionAB2 / nUnionAB2;
		lcs = nIntersectionAB2 / 2;
		add = nAdd;
		del = nDel;
	}

	public static List<String> listLines(Path path) {
		try {
			List<String> result = new ArrayList<>();
			for (String s : Files.readAllLines(path, EncodeDetector.charset(path))) {
				s = s.replaceAll("[ \t]", "");
				if (s.length() > 0) {
					result.add(s);
				}
			}
			return result;
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return new ArrayList<>();
	}

	public String diff(Path file1, List<String> sA, Path file2, List<String> sB) {
		List<Difference> diffList = new Diff(sA, sB).diff();
		if (diffList.size() > 0) {
			StringBuilder buf = new StringBuilder();
			try {
				buf.append(String.format("--- %9s   %s\n", file1.toAbsolutePath(), Files.getLastModifiedTime(file1)));
				buf.append(String.format("+++ %9s   %s\n", file2.toAbsolutePath(), Files.getLastModifiedTime(file2)));
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			for (Difference d : diffList) {
				if (d.getDeletedEnd() >= 0) {
					if (d.getAddedEnd() >= 0) {
						if (d.getDeletedEnd() == d.getDeletedStart()) {
							buf.append(d.getDeletedStart() + 1);
						} else {
							buf.append((d.getDeletedStart() + 1) + "," + (d.getDeletedEnd() + 1));
						}
						buf.append('c');
						if (d.getAddedEnd() == d.getAddedStart()) {
							buf.append(d.getAddedStart() + 1);
						} else {
							buf.append((d.getAddedStart() + 1) + "," + (d.getAddedEnd() + 1));
						}
						buf.append('\n');
						for (int i = d.getDeletedStart(); i <= d.getDeletedEnd(); i++) {
							buf.append("< " + sA.get(i));
							buf.append('\n');
						}
						buf.append("---\n");
						for (int i = d.getAddedStart(); i <= d.getAddedEnd(); i++) {
							buf.append("> " + sB.get(i));
							buf.append('\n');
						}
					} else {
						if (d.getDeletedEnd() == d.getDeletedStart()) {
							buf.append(d.getDeletedStart() + 1);
						} else {
							buf.append((d.getDeletedStart() + 1) + "," + (d.getDeletedEnd() + 1));
						}
						buf.append('d');
						buf.append(d.getAddedStart());
						buf.append('\n');
						for (int i = d.getDeletedStart(); i <= d.getDeletedEnd(); i++) {
							buf.append("< " + sA.get(i));
							buf.append('\n');
						}
					}
				} else if (d.getAddedEnd() >= 0) {
					buf.append(d.getDeletedStart());
					buf.append('a');
					if (d.getAddedEnd() == d.getAddedStart()) {
						buf.append(d.getAddedStart() + 1);
					} else {
						buf.append((d.getAddedStart() + 1) + "," + (d.getAddedEnd() + 1));
					}
					buf.append('\n');
					for (int i = d.getAddedStart(); i <= d.getAddedEnd(); i++) {
						buf.append("> " + sB.get(i));
						buf.append('\n');
					}
				}
			}
			// System.out.println(buf);
			return buf.toString();
		} else {
			return "";
		}
	}

}
