//PRET-Extractor
//Copyright (c) 2013 Tetsuya Kanda
//
//http://sel.ist.osaka-u.ac.jp/pret/
//
//Permission is hereby granted, free of charge, to any person obtaining
//a copy of this software and associated documentation files (the
//"Software"), to deal in the Software without restriction, including
//without limitation the rights to use, copy, modify, merge, publish,
//distribute, sublicense, and/or sell copies of the Software, and to
//permit persons to whom the Software is furnished to do so, subject to
//the following conditions:
//
//The above copyright notice and this permission notice shall be
//included in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package jp.ac.osaka_u.ist.sel.pret.engine.preprocess.java;

import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.List;

import jp.ac.osaka_u.ist.sel.pret.engine.data.FileInfo;
import jp.ac.osaka_u.ist.sel.pret.engine.preprocess.Preprocessor;
import jp.ac.osaka_u.ist.sel.pret.util.EncodeDetector;

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.Token;

public class JavaPreprocessorWithComment extends Preprocessor {

	public JavaPreprocessorWithComment(Path tmp) {
		super(tmp);
	}

	@Override
	public void preprocessForCalcSimilarity(FileInfo source) {
		try {
			Path tmpdir = Paths.get(tmp.toString(), source.parent().getName());
			Files.createDirectory(tmpdir);
			Path tokenized = Paths.get(tmpdir.toString(), source.fileId() + "_t.tmp");

			readIdentifiers(Paths.get(source.path()), tokenized);
			source.preSim = tokenized.toUri();

		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	@SuppressWarnings("unchecked")
	public static int readIdentifiers(Path path, Path tokenized) {
		// System.out.println(inputJavaSourceFileName);
		String encode = EncodeDetector.detect(path);
		Charset cs;
		if (encode != null && Charset.isSupported(encode)) {
			cs = Charset.forName(encode);
		} else {
			cs = Charset.defaultCharset();
		}

		try (BufferedWriter bw = Files.newBufferedWriter(tokenized, cs, StandardOpenOption.TRUNCATE_EXISTING)) {
			ANTLRFileStream in = new ANTLRFileStream(path.toString(), encode);
			JavaLexer2 lexer = new JavaLexer2(in);
			CommonTokenStream tokens = new CommonTokenStream(lexer);
			for (Token t : (List<Token>) tokens.getTokens()) {
				if (t.getType() != JavaLexer2.WS) {
					bw.append(t.getText());
					bw.newLine();
				}
			}
			return in.getLine();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return 0;
	}

	@Override
	public void preprocessForDiff(FileInfo source) {
		source.preDiff = source.preSim;
	}

}
