//PRET-Extractor
//Copyright (c) 2013 Tetsuya Kanda
//
//http://sel.ist.osaka-u.ac.jp/pret/
//
//Permission is hereby granted, free of charge, to any person obtaining
//a copy of this software and associated documentation files (the
//"Software"), to deal in the Software without restriction, including
//without limitation the rights to use, copy, modify, merge, publish,
//distribute, sublicense, and/or sell copies of the Software, and to
//permit persons to whom the Software is furnished to do so, subject to
//the following conditions:
//
//The above copyright notice and this permission notice shall be
//included in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package jp.ac.osaka_u.ist.sel.pret.engine.preprocess.nc;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

import jp.ac.osaka_u.ist.sel.pret.engine.data.FileInfo;
import jp.ac.osaka_u.ist.sel.pret.engine.preprocess.Preprocessor;
import jp.ac.osaka_u.ist.sel.pret.util.EncodeDetector;

public class CPreprocessor extends Preprocessor {

	public CPreprocessor(Path tmp) {
		super(tmp);
	}

	@Override
	public void preprocessForCalcSimilarity(FileInfo source) {

		Path tmpdir = Paths.get(tmp.toString(), source.parent().getName());
		try {
			Files.createDirectory(tmpdir);
		} catch (IOException e) {
			// e.printStackTrace();
		}
		Path tokenized = Paths.get(tmpdir.toString(), source.fileId() + "_t.tmp");

		readIdentifiers(Paths.get(source.path()), tokenized);
		source.preSim = tokenized.toUri();

	}

	public static int readIdentifiers(Path path, Path tokenized) {
		// System.out.println(inputCSourceFileName);
		Charset cs = EncodeDetector.charset(path);
		List<String> lex = new ArrayList<>();

		try {
			StringBuilder buf = new StringBuilder();

			for (String l : Files.readAllLines(path, cs)) {
				buf.append(l);
				buf.append('\n');
			}

			CLexer c = new CLexer(new StringReader(buf.toString()));
			lex = c.tklex();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			System.err.println(path.toString());
			e1.printStackTrace();
		}

		try (BufferedWriter bw = Files.newBufferedWriter(tokenized, cs)) {
			for(String t : lex){
				bw.write(t);
				bw.newLine();
			}
			return 2;
		} catch (IOException e) {
			// e.printStackTrace();
			System.err.println(path.toString());
		}
		return 2;
	}

	@Override
	public void preprocessForDiff(FileInfo source) {
		source.preDiff = source.preSim;
	}

}
