package Statistics::ANOVA;

use Statistics::Distributions qw/ fdistr fprob/ ;
use Statistics::Basic qw/:all/;
use Carp;

$VERSION=q/1.0/;

my %fields={
	data =>	undef,
	sample_mean => undef,
	sst  => undef,
	sse  => undef,
	f => undef,
	pvalue=>undef
};


sub new{
	my $proto=shift;
	my $class=ref $proto || $proto;
	my $self={%fiels};
	bless ($self,$class);
	return $self;
	
}


sub load_data{
	my $self=shift;
	my $data=shift;
	croak "Invalid input type for load_data(), The input must be hash reference\n" if ref $data ne 'HASH';
	$self->{'data'}=$data;

	return $self;
}

sub perform_anova{
	
	my $self=shift;
	my $data=$self->{'data'};
	my @sampleMean;
	my @sampleSize;
	my @dataAll;
	my $totalMean;
	my $totalSize;

	for my $sample( sort {$a<=>$b} keys %$data){
		push @sampleMean,mean( @{ $data->{$sample} } );
		push @sampleSize,scalar @{ $data->{$sample} } ;
		push @dataAll,@{ $data->{$sample} };
		
	}

	$totalSize=scalar @dataAll;
	$totalMean=mean ( @dataAll );
	

	# Get Sum of squares for treatment 
	my $sst;
	my $sumSample;
	
	$sumSample+=$sampleSize[$_]*$sampleMean[$_]**2 for 0..$#sampleMean;
	$sst=$sumSample-$totalSize*$totalMean**2;

	# Get Total sum of squares 
	
	my $sse;
	my $sumAll;
	$sumAll+=$_**2 for @dataAll; 
	$sse=$sumAll-$sumSample;

	# Get p-value
	my $degOfFreeMST=scalar @sampleSize-1;
	my $degOfFreeMSE=$totalSize-scalar @sampleSize;


	my $mst=$sst/$degOfFreeMST;
	my $mse=$sse/$degOfFreeMSE;
	my $f=$mst/$mse;

	my $pvalue=fprob ($degOfFreeMST,$degOfFreeMSE,$f);

	$self->{'sse'}=$sse;
	$self->{'sst'}=$sst;
	$self->{'sample_mean'}=\@sampleMean;
	$self->{'f'}=$f;
	$self->{'pvalue'}=$pvalue;
}


sub pvalue{
	my $self=shift;

	return $self->{'pvalue'};
}

sub fstatistic{
	my $self=shift;
	return $self->{'f'};
}
1;


__END__

=head1 NAME

 Statistics::ANOVA - Perl module to perform one-way ANOVA test 

=head1 SYNOPSIS

  # Example to conduct one-way ANOVA test

  use Statistics::ANOVA;

  my %data=( sampleA=>[1,2,3], sampleB=>[4,5,6] );
  my $anova=Statistics::ANOVA->new;
  $anova->load_data(\%data);
  $anova->perform_anova;
  my $pvalue=$anova->pvalue;

  print "ANOVA pvalue : $pvalue\n";

=head1 DESCRIPTION
 This module is to conduct one-way ANOVA test on a given sample set. The test result of null hypothesis of one-way ANOVA could be achived by pvalue or f-statistics. The module takes hash for the data format
=head1 AUTHOR

 Changwon Keum ( cwkeum@gmail.com )

=head1 SEE ALSO

Statistics::TTest;

=cut