//PRET-Extractor
//Copyright (c) 2013 Tetsuya Kanda
//
//http://sel.ist.osaka-u.ac.jp/pret/
//
//Permission is hereby granted, free of charge, to any person obtaining
//a copy of this software and associated documentation files (the
//"Software"), to deal in the Software without restriction, including
//without limitation the rights to use, copy, modify, merge, publish,
//distribute, sublicense, and/or sell copies of the Software, and to
//permit persons to whom the Software is furnished to do so, subject to
//the following conditions:
//
//The above copyright notice and this permission notice shall be
//included in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package jp.ac.osaka_u.ist.sel.pret.engine.preprocess.c;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import jp.ac.osaka_u.ist.sel.pret.engine.data.FileInfo;
import jp.ac.osaka_u.ist.sel.pret.engine.preprocess.Preprocessor;
import jp.ac.osaka_u.ist.sel.pret.util.EncodeDetector;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.Token;

public class CPreprocessor extends Preprocessor {

	public CPreprocessor(Path tmp) {
		super(tmp);
	}

	@Override
	public void preprocessForCalcSimilarity(FileInfo source) {

		Path tmpdir = Paths.get(tmp.toString(), source.parent().getName());
		try {
			Files.createDirectory(tmpdir);
		} catch (IOException e) {
			// e.printStackTrace();
		}
		Path tokenized = Paths.get(tmpdir.toString(), source.fileId() + "_t.tmp");

		readIdentifiers(Paths.get(source.path()), tokenized);
		source.preSim = tokenized.toUri();

	}

	@SuppressWarnings("unchecked")
	public static int readIdentifiers(Path path, Path tokenized) {
		// System.out.println(inputCSourceFileName);
		Charset cs = EncodeDetector.charset(path);

		ANTLRStringStream in = new ANTLRStringStream(rmc(path, cs));

		CLexer lexer = new CLexer(in);
		CommonTokenStream tokens = new CommonTokenStream(lexer);
		tokens.getTokens();

		try (BufferedWriter bw = Files.newBufferedWriter(tokenized, cs)) {
			for (Token t : (List<Token>) tokens.getTokens()) {
				if (t.getType() != CLexer.WS) {
					bw.write(t.getText());
					bw.newLine();
				}
			}
			return in.getLine();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return tokens.size();
	}

	private static String rmc(Path path, Charset cs) {

		StringBuilder bf = new StringBuilder();

		try (BufferedReader fin = Files.newBufferedReader(path, cs)) {
			String line;
			while (null != (line = fin.readLine())) {
				if (line.endsWith("\\")) {
					bf.append(line.substring(0, line.length() - 1));
				} else {
					bf.append(line);
					bf.append('\n');
				}
			}
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		String r = "(\"(?:\\\\.|[^\"\\\\])*\"|'(?:\\\\.|[^'\\\\])*')|/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*/|//[^\\n]*";
		Pattern p = Pattern.compile(r);
		Matcher m = p.matcher(bf);

		return m.replaceAll("$1");

	}

	@Override
	public void preprocessForDiff(FileInfo source) {
		source.preDiff = source.preSim;
	}

}
