//PRET-Extractor
//Copyright (c) 2013 Tetsuya Kanda
//
//http://sel.ist.osaka-u.ac.jp/pret/
//
//Permission is hereby granted, free of charge, to any person obtaining
//a copy of this software and associated documentation files (the
//"Software"), to deal in the Software without restriction, including
//without limitation the rights to use, copy, modify, merge, publish,
//distribute, sublicense, and/or sell copies of the Software, and to
//permit persons to whom the Software is furnished to do so, subject to
//the following conditions:
//
//The above copyright notice and this permission notice shall be
//included in all copies or substantial portions of the Software.
//
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package jp.ac.osaka_u.ist.sel.pret.engine.graph;

import java.util.Comparator;
import java.util.Set;
import java.util.TreeSet;

import gnu.trove.iterator.TIntIterator;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.THashSet;
import gnu.trove.set.hash.TIntHashSet;
import jp.ac.osaka_u.ist.sel.pret.engine.data.Edge;
import jp.ac.osaka_u.ist.sel.pret.engine.data.Group;
import jp.ac.osaka_u.ist.sel.pret.engine.data.ISimilarity;

/**
 * Minimum spanning tree (Kruskal)
 * 
 * @author t-kanda
 * 
 */
public class SpanningTreeK {

	/**
	 * Minimum spanning tree (Kruskal)
	 * 
	 * @param group
	 *            target group
	 * @return tree (target group with limited edges)
	 */
	public static Group getSpanningTree(final Group group, final ISimilarity similarity, boolean gather) {

		if (group.gatheredFiles().size() == 1) {
			return group;
		}

		int maxSize = group.gatheredFiles().size();

		Set<TIntSet> forest = new THashSet<TIntSet>((int) (maxSize * 1.25));
		// all vertexes are independent tree
		for (TIntIterator it = group.gatheredFiles().iterator(); it.hasNext();) {
			int i = it.next();
			TIntSet t = new TIntHashSet();
			t.add(i);
			forest.add(t);
		}

		TreeSet<Edge> keys = new TreeSet<>(new Comparator<Edge>() {
			@Override
			public int compare(Edge e1, Edge e2) {
				int diff1 = similarity.getDiffSize(group.choose1(e1.fileId1()), group.choose1(e1.fileId2()));
				int diff2 = similarity.getDiffSize(group.choose1(e2.fileId1()), group.choose1(e2.fileId2()));
				return diff1 > diff2 ? 1 : -1;
			}
		});
		keys.addAll(group.gatheredEdges());

		Set<Edge> newEdges = new THashSet<>();

		// from all edges
		while (keys.size() > 0) {
			// remove edge with the samllest cost
			Edge minFileInfo = keys.pollFirst();

			TIntSet tree1 = null, tree2 = null;

			// find trees that removed edge connects
			for (TIntSet tree : forest) {
				if (tree.contains(minFileInfo.fileId1())) {
					tree1 = tree;
				}
				if (tree.contains(minFileInfo.fileId2())) {
					tree2 = tree;
				}
			}

			// if trees are not same, join
			if (!tree1.equals(tree2)) {
				forest.remove(tree1);
				forest.remove(tree2);
				TIntSet newTree = new TIntHashSet();
				newTree.addAll(tree1);
				newTree.addAll(tree2);
				forest.add(newTree);
				newEdges.add(minFileInfo);
				if (forest.size() == 1) {
					break;
				}
			}
		}

		Group sp = new Group(group.type());
		if (gather) {
			sp.gather(group, newEdges);
		} else {
			sp.noGather();
		}
		return sp;

	}
}
