/*******************************************************************************
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 * 
 * Contact Info:
 * 	Bruce Donald
 * 	Duke University
 * 	Department of Computer Science
 * 	Levine Science Research Center (LSRC)
 * 	Durham
 * 	NC 27708-0129 
 * 	USA
 * 	brd@cs.duke.edu
 * 
 * Copyright (C) 2011 Jeffrey W. Martin and Bruce R. Donald
 * 
 * <signature of Bruce Donald>, April 2011
 * Bruce Donald, Professor of Computer Science
 ******************************************************************************/


package edu.duke.donaldLab.share.nmr;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

import edu.duke.donaldLab.share.io.Transformer;
import edu.duke.donaldLab.share.protein.AtomAddressReadable;

public class AssignReader
{
	/**************************
	 *   Definitions
	 **************************/
	
	private enum Token
	{
		Assign( null, "assign" ),
		Segid( null, "segid" ),
		Resid( null, "resid", "residue" ),
		Name( null, "name" ),
		Or( null, "or" ),
		And( null, "and" ),
		OpenGroup( "(", "\\(" ),
		CloseGroup( ")", "\\)" ),
		Comment( "!", "\\!" ),
		Number( null, "\\-?\\d*\\.?\\d*" );
		
		private String m_replace;
		private ArrayList<String> m_patterns;
		
		public static Token lookup( String in )
		{
			for( Token token : Token.values() )
			{
				for( String pattern : token.getPatterns() )
				{
					Pattern regex = Pattern.compile( pattern, Pattern.CASE_INSENSITIVE );
					if( regex.matcher( in ).matches() )
					{
						return token;
					}
				}
			}
			return null;
		}
		
		private Token( String replace, String ... patterns )
		{
			m_replace = replace;
			m_patterns = Transformer.toArrayList( patterns );
		}
		
		public String getReplace( )
		{
			return m_replace;
		}
		
		public List<String> getPatterns( )
		{
			return Collections.unmodifiableList( m_patterns );
		}
	}
	
	
	/**************************
	 *   Methods
	 **************************/
	
	public ArrayList<Assign> read( String path )
	throws IOException
	{
		return read( new File( path ) );
	}
	
	public ArrayList<Assign> read( File file )
	throws IOException
	{
		return read( new FileInputStream( file ) );
	}
	
	public ArrayList<Assign> read( InputStream in )
	throws IOException
	{
		/* need to be able to read lines that look like this:
			assign (resid 7 and name ha1 and segid A)(resid 15 and name hd# and segid C) 4.0 2.2 4.0
			assign (resid 7 and name ha1 and segid A)((resid 15 and name hd# and segid C)
				or (resid 15 and name hd# and segid B)) 4.0 2.2 4.0
		*/
		
		ArrayList<Assign> assigns = new ArrayList<Assign>();
		Assign assign = null;
		ArrayList<AtomAddressReadable> addresses = new ArrayList<AtomAddressReadable>();
		AtomAddressReadable address = new AtomAddressReadable();
		int groupDepth = 0;
		
		// for each line...
		BufferedReader reader = new BufferedReader( new InputStreamReader( in ) );
		String line = null;
		while( ( line = reader.readLine() ) != null )
		{
			// skip blank lines
			line = line.trim();
			if( line.length() <= 0 )
			{
				continue;
			}
			
			// preprocess to make sure tokens are spaced out
			line = performSpacing( line );
			
			// tokenize
			StringTokenizer tokenizer = new StringTokenizer( line );
			while( tokenizer.hasMoreTokens() )
			{
				// get the next token
				String stringToken = tokenizer.nextToken();
				Token token = Token.lookup( stringToken );
				if( token == null )
				{
					continue;
				}
				
				switch( token )
				{
					case Assign:
						// start a new assign
						assign = new Assign();
					break;
					
					case Segid:
						address.setSubunitName( tokenizer.nextToken().charAt( 0 ) );
					break;
					
					case Resid:
						address.setResidueNumber( Integer.parseInt( tokenizer.nextToken() ) );
					break;
					
					case Name:
						address.setAtomName( tokenizer.nextToken() );
					break;
					
					case OpenGroup:
						groupDepth++;
						
						// make a new address if needed
						if( address == null )
						{
							address = new AtomAddressReadable();
						}
					break;
					
					case CloseGroup:
						groupDepth--;
						
						// finish the address if needed
						if( address != null )
						{
							addresses.add( address );
							address = null;
						}
						
						if( groupDepth == 0 )
						{
							// we just finished a group
							assign.getAddresses().add( addresses );
							addresses = new ArrayList<AtomAddressReadable>();
						}
					break;
					
					case And:
						// just ignore
					break;
					
					case Or:
						// just ignore
					break;
					
					case Comment:
						// skip the rest of the line
						while( tokenizer.hasMoreTokens() )
						{
							tokenizer.nextToken();
						}
					break;
					
					case Number:
						assign.getNumbers().add( Double.parseDouble( stringToken ) );
					break;	
				}
			}
			
			// are we done with this assign?
			if( assign != null && assign.getNumbers().size() > 0 )
			{
				assigns.add( assign );
				assign = new Assign();
			}
		}
		
		return assigns;
	}
	
	
	/**************************
	 *   Functions
	 **************************/
	
	private String performSpacing( String in )
	{
		for( Token token : Token.values() )
		{
			String replace = token.getReplace();
			if( replace != null )
			{
				for( String pattern : token.getPatterns() )
				{
					in = in.replaceAll( pattern, " " + replace + " " );
				}
			}
		}
		
		return in;
	}
}
