#! /usr/bin/perl -w

use FileHandle;
use Bio::TreeIO;
use Bio::Tree::Node;
use Math::Matrix;
use strict;

my @equiProb = (0.25, 0.25, 0.25, 0.25);

if( @ARGV < 1 )
  {
    print "Usage: perl seq.weights.pl tree_file <equilibrium_distribution>\n";
    print "tree_file                 file containing tree in Newick format\n";
    print "equilibrium_distribution  optional, background probability distribution\n";
    exit();
  }

my $treeFile = $ARGV[0];
if( @ARGV > 1 )
  {
    for( my $i = 0; $i < scalar( @ARGV ) - 1; $i++ )
      {
	$equiProb[$i] = $ARGV[$i+1];
      }

    my $sum = 0;
    for( my $i = 0; $i < 4; $i++ )
      {
	$sum += $equiProb[$i];
      }
    for( my $i = 0; $i < 4; $i++ )
      {
	$equiProb[$i] /= $sum;
      }
  }

my $treein = new Bio::TreeIO(-format => 'newick',
			     -file => $treeFile );
my $tree = $treein->next_tree();

my $dist = DistanceMatrix( $tree );
my $weights = SequenceWeights( $dist, \@equiProb );

my $sum = 0;
my @nodes = $tree->get_leaf_nodes();
for( my $i = 0; $i < scalar( @nodes ); $i++ )
  {
    print $i+1, "\t", sprintf( "%7.4f", $weights->[$i] ), "\t!  ", $nodes[$i]->id(), "\n";
    $sum += $weights->[$i];
  }
print sprintf( "! sum of weights %7.4f", $sum ), "\n";



sub SequenceWeights
  {
    my ($dist, $prob) = @_;

    my $size = scalar( @{$dist->[0]} );
    my @ones = (1) x $size;
    my $onesM = Math::Matrix->new( \@ones );

    my $sum = 0;
    foreach my $p (@{$prob})
      {
	$sum += $p * $p;
      }
    my $k = 1 / (1 - $sum);

    my @dist2;
    for( my $i = 0; $i < $size; $i++ )
      {
	for( my $j = 0; $j < $size; $j++ )
	  {
	    $dist2[$i][$j] = exp( -$k * $dist->[$i][$j] );
	  }
      }

    my $C = Math::Matrix->new( @dist2 );
    my $Cinv = $C->invert();

    my $W = $Cinv->multiply( $onesM->transpose() );

    $sum = 0;
    my $sumW = 0;
    for( my $i = 0; $i < $size; $i++ )
      {
	$sum += $W->[$i][0];
	$W->[$i][0] = 0 if( $W->[$i][0] < 0 );
	$sumW += $W->[$i][0];
      }

    $W = $W->multiply_scalar( $sum/$sumW );

    my @weights;
    for( my $i = 0; $i < $size; $i++ )
      {	
	$weights[$i] = $W->[$i][0];
      }

    return \@weights;
  }


sub DistanceMatrix
  {
    my ($tree) = @_;

    my @speciesList = $tree->get_leaf_nodes();
    my $size = scalar( @speciesList );

    my @dist;
    for( my $i = 0; $i < $size; $i++ )
      {
	my $species1 = $speciesList[$i];
	my $node1 = $tree->find_node( $species1->id() );
	for( my $j = 0; $j < $size; $j++ )
	  {
	    my $species2 = $speciesList[$j];
	    my $node2 = $tree->find_node( $species2->id() );
	    my @nodes = ($node1, $node2);
	    my $common = $tree->get_lca( -nodes => \@nodes );
	    my $distance = $tree->distance( -nodes => \@nodes );
	    $dist[$i][$j] = $distance;
	  }
      }

    return \@dist;
  }
