/*******************************************************************************
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 * 
 * Contact Info:
 * 	Bruce Donald
 * 	Duke University
 * 	Department of Computer Science
 * 	Levine Science Research Center (LSRC)
 * 	Durham
 * 	NC 27708-0129 
 * 	USA
 * 	brd@cs.duke.edu
 * 
 * Copyright (C) 2011 Jeffrey W. Martin and Bruce R. Donald
 * 
 * <signature of Bruce Donald>, April 2011
 * Bruce Donald, Professor of Computer Science
 ******************************************************************************/


package edu.duke.donaldLab.share.clustering.distance;

import java.util.LinkedList;
import java.util.List;

import edu.duke.donaldLab.share.perf.Profiler;
import edu.duke.donaldLab.share.perf.MessageListener;
import edu.duke.donaldLab.share.perf.Progress;

public class DistanceClusterer
{
	/**************************
	 *   Definitions
	 **************************/
	
	private static int ProgressUpdateInterval = 15000;
	
	
	/**************************
	 *   Data Members
	 **************************/
	
	private DistanceMatrix m_distances;
	private double m_targetDist;
	private MessageListener m_messageListener;
	private LinkedList<DistanceCluster> m_clusters;
	
	
	/**************************
	 *   Constructors
	 **************************/
	
	public DistanceClusterer( DistanceMatrix distances, double targetDist )
	{
		// save parameters
		m_distances = distances;
		m_targetDist = targetDist;
		
		// init defaults
		m_messageListener = null;
		m_clusters = new LinkedList<DistanceCluster>();
	}
	
	
	/**************************
	 *   Methods
	 **************************/
	
	public void setProgressListener( MessageListener progressListener )
	{
		m_messageListener = progressListener;
	}
	
	public List<DistanceCluster> cluster( )
	{
		// NOTE: this is a pretty spiffy O(n^2logn) algorithm from Day, Edelsbrunner '84
		
		// ALERT
		if( m_messageListener != null )
		{
			m_messageListener.message( "Start heap size: " + Profiler.getMemoryUsed() );
			m_messageListener.message( "Creating clusters..." );
		}
		
		// O(n)
		// fill the list with clusters
		for( int i=0; i<m_distances.getNumPoints(); i++ )
		{
			m_clusters.add( new DistanceCluster( i, m_distances.getNumPoints() ) );
		}
		
		// ALERT
		Progress initProgress = null;
		if( m_messageListener != null )
		{
			m_messageListener.message( "Memory After allocating clusters: " + Profiler.getMemoryUsed() );
			initProgress = new Progress( m_distances.getNumPoints(), ProgressUpdateInterval );
			initProgress.setMessageListener( m_messageListener );
			initProgress.setShowMemory( true );
			m_messageListener.message( "Initializing clusters..." );
		}
		
		// init the clusters
		// O(n^2logn)
		for( DistanceCluster cluster : m_clusters )
		{
			cluster.init( m_distances, m_clusters );
			
			// ALERT
			if( initProgress != null )
			{
				initProgress.incrementProgress();
			}
		}
		
		// DEBUG: break early
		m_messageListener.message( "After init heap size: " + Profiler.getMemoryUsed() );
		//if( true ) return new LinkedList<DistanceCluster>();
		
		// ALERT
		Progress clusterProgress = null;
		if( m_messageListener != null )
		{
			clusterProgress = new Progress( m_distances.getNumPoints(), ProgressUpdateInterval );
			clusterProgress.setMessageListener( m_messageListener );
			m_messageListener.message( "Clustering..." );
		}
		
		// O(n^2logn)
		// keep merging the closest pair until they're too far apart
		while( m_clusters.size() > 1 )
		{
			// O(n)
			//Profiler.start( "getMinPair" );
			DistanceClusterPair pair = getMinPair();
			//Profiler.stop( "getMinPair" );
			
			// O(1)
			// get the distance
			double dist = m_distances.get( pair.left.getIndex(), pair.right.getIndex() );
			if( dist > m_targetDist )
			{
				break;
			}
			
			// O(nlogn)
			// remove the two clusters from all the queues
			//Profiler.start( "removeClusters" );
			for( DistanceCluster cluster : m_clusters )
			{
				// skip left and right
				if( cluster == pair.left || cluster == pair.right )
				{
					continue;
				}
				
				cluster.removeCluster( pair.left );
				cluster.removeCluster( pair.right );
			}
			//Profiler.stop( "removeClusters" );
			
			// O(n)
			// update the distance matrix
			//Profiler.start( "updateDistances" );
			m_distances.updateCompleteLinkage( pair.left.getIndex(), pair.right.getIndex() );
			//Profiler.stop( "updateDistances" );
			
			// O(nlogn)
			// update the queues with the new distances
			//Profiler.start( "updateQueues" );
			for( DistanceCluster cluster : m_clusters )
			{
				// skip these two clusters
				if( cluster == pair.left || cluster == pair.right )
				{
					continue;
				}
				
				cluster.addCluster( pair.left );
			}
			//Profiler.stop( "updateQueues" );
			
			// O(1)
			// remove the right cluster from the list
			m_clusters.remove( pair.right );
			
			// O(nlogn)
			// merge right into left
			//Profiler.start( "join" );
			pair.left.join( pair.right, m_distances, m_clusters );
			//Profiler.stop( "join" );
			
			// ALERT
			if( clusterProgress != null )
			{
				clusterProgress.incrementProgress();
			}
		}
		
		// ALERT
		//System.out.println( Profiler.getReport() );
		
		return m_clusters;
	}
	
	
	/**************************
	 *   Functions
	 **************************/
	
	private DistanceClusterPair getMinPair( )
	{
		double minDist = Double.POSITIVE_INFINITY;
		DistanceCluster minLeft = null;
		DistanceCluster minRight = null;
		
		// for each queue...
		for( DistanceCluster cluster : m_clusters )
		{
			DistanceCluster other = cluster.getMinDist();
			
			double dist = m_distances.get( cluster.getIndex(), other.getIndex() );
			if( dist < minDist )
			{
				minDist = dist;
				minLeft = cluster;
				minRight = other;
			}
		}
		
		assert( minLeft != null );
		assert( minRight != null );
		
		return new DistanceClusterPair( minLeft, minRight );
	}
}
