/*
 * STAN - Stream Analyser
 * Copyright (c) 2001 Konrad Rieck <kr@r0q.cx>
 * The Roqefellaz, http://www.r0q.cx/stan
 *
 * Here you'll find all the mathematical stuff. In general it is simple, but
 * remember that all sums have to be build over the nodes of the treap and
 * therefore it is necessary to traverse the treap.
 * $Id: stats.c,v 1.10 2001/04/10 10:56:53 kr Exp $
 */

#include <stdio.h>
#include <ctype.h>
#include <math.h>

#include <stan.h>
#include <treap.h>
#include <pattern.h>
#include <data.h>
#include <stats.h>
#include <config.h>

extern tnode_t **treap;

double mean, median, variance, deviation, chi_squared;
/* 
 * Dummy variables that are declared global for performance purpose.
 */
double sum, total_size, tmp_median;

void print_header_patterns(size_t patlen)
{
    printf("Pattern length %d, different %d, total %d, "
         "bytes %d, depth %d\n",
	 patlen, tsize_tnodes(&treap[patlen - 1]),
	 tsize_total(&treap[patlen - 1]), size_data(),
	 tdepth(&treap[patlen - 1]));
}

void print_range_patterns(size_t patlen)
{
    printf(" - Pattern range\n");
    printf("   ");
    print_pattern(*tmin(&treap[patlen - 1]), patlen);

    printf(" - ");
    if (patlen > 6)
	printf("\n   ");

    print_pattern(*tmax(&treap[patlen - 1]), patlen);
    printf("\n");
}

void print_top_patterns(int number, size_t patlen)
{
    int i;

    printf(" - %d most used patterns\n", number);
    for (i = 0; i < number && treap[patlen - 1]; i++) {
	printf("   ");
	print_pattern(treap[patlen - 1], patlen);
        tdelete(&treap[patlen - 1], patlen);
        if ((patlen - 1 < 2 && i % 3 == 2) ||
	    (patlen - 1 >= 2 && patlen - 1 < 6 && i % 2 == 1) ||
	    (patlen - 1 >= 6) || !treap[patlen - 1] || i == number - 1)
	    printf("\n");
    }
}

void calc_sum(tnode_t * tnode)
{
    sum += (double) tnode->pattern[0] * tnode->count;
}

void find_median(tnode_t * tnode)
{
    if (sum < total_size / 2) {
	tmp_median = (double) tnode->pattern[0];
	sum += tnode->count;
    }
}

void calc_mean()
{
    sum = 0;
    ttraverse(&treap[0], calc_sum, inorder);
    mean = sum / (double) tsize_total(&treap[0]);
}

void calc_median()
{
    sum = 0;
    total_size = tsize_total(&treap[0]);
    ttraverse(&treap[0], find_median, inorder);
    median = tmp_median;
}

void calc_variance_sum(tnode_t * tnode)
{
    sum += pow(mean - (double) tnode->pattern[0], 2) * 
          (double) tnode->count;
}

void calc_chi_squared_sum(tnode_t * tnode)
{
    sum += pow(tnode->count-(tsize_total(&treap[0])/(double)BYTE_MAX),2)/
           (tsize_total(&treap[0])/(double)BYTE_MAX);
}

void calc_chi_squared()
{
    sum = 0;
    ttraverse(&treap[0], calc_chi_squared_sum, inorder);
    chi_squared = sum;
}

void calc_variance()
{
    sum = 0;
    ttraverse(&treap[0], calc_variance_sum, inorder);
    variance = sum / (double) tsize_total(&treap[0]);
}

void calc_deviation()
{
    deviation = sqrt(variance);
}

void print_stats()
{
    printf("General statistics for the stream, bytes %d\n", size_data());

    calc_mean();
    printf("   Mean:        ");
    print_double(mean);
    printf("\n");

    calc_median();
    printf("   Median:      ");
    print_double(median);
    printf("\n");

    calc_variance();
    calc_deviation();
    printf("   Deviation:   ");
    print_double(deviation);
    printf("\n");

    calc_chi_squared();
    printf("   Chi^2:       ");
    print_double(chi_squared);
    printf("\n\n");

}

void print_double(double d)
{
    printf("%10.2f", d);
    if (d>0 && d<=BYTE_MAX && isprint((int)d)) {
	printf("  ~  0x%.2x", (int) d);
	printf("(%c)",(int)d);
    }
}
