Metrics and Models for Multi-layer Networks
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mammult/structure/correlations/fit_utils.c

398 lines
7.3 KiB

/*
* This file is part of MAMMULT: Metrics And Models for Multilayer Networks
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
*
* Fit a given sequence with a power-law function
*
* a*x^{b}
*
* The fit is actually performed as a linear fit on the
* exponential-binned log-log distribution
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <gsl/gsl_fit.h>
/**
*
* Load a sequence from a file, which contains one element on each line
*
*/
void load_sequence(char *fname, double **v, int *N){
int size;
char buff[256];
FILE *f;
f = fopen(fname, "r");
if (!f){
fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
exit(1);
}
*N =0;
size = 10;
if (*v == NULL)
*v = malloc(size * sizeof(double));
else{
*v = realloc(*v, size * sizeof(double));
}
while (fgets(buff, 255, f)){
(*v)[*N] = atof(buff);
*N += 1;
if (*N == size){
size += 10;
*v = realloc(*v, size * sizeof(double));
}
}
*v = realloc(*v, (*N) * sizeof(double));
fclose(f);
}
/**
*
* Load a sequence, getting the "col"-th column of the input file
*
*/
void load_sequence_col(char *fname, double **v, int *N, int col){
int size, n;
char buff[256];
char *ptr;
FILE *f;
f = fopen(fname, "r");
if (!f){
fprintf(stderr, "Error opening file %s!!!! Exiting...\n", fname);
exit(1);
}
*N =0;
size = 10;
if (*v == NULL)
*v = malloc(size * sizeof(double));
else{
*v = realloc(*v, size * sizeof(double));
}
while (fgets(buff, 255, f)){
ptr = strtok(buff, " ");
if (col > 0){
n = 0;
while (n<col){
ptr = strtok(NULL, " ");
n += 1;
}
}
(*v)[*N] = atof(ptr);
*N += 1;
if (*N == size){
size += 10;
*v = realloc(*v, size * sizeof(double));
}
}
*v = realloc(*v, (*N) * sizeof(double));
fclose(f);
}
/**
*
* Make the exponential binning of a distribution, with a giving
* exponent alpha. The value of y[i] is the number of elements of v
* whose value lies between x[i-1] and x[i]...
*
*/
void exp_bin_cnt(double *v, int N, double alpha, double **x, double **y, int *num){
double min_v, max_v, val, last_val;
int i, j, size, num_x;
double last_size, new_size;
min_v = max_v = v[0];
for (i=1; i<N; i ++){
if (v[i] > max_v)
max_v = v[i];
else if (v[i] > 0 && v[i] < min_v)
min_v = v[i];
}
size = 10;
if (*x == NULL){
*x = malloc(size * sizeof(double));
}
else{
*x = realloc(*x, size * sizeof(double));
}
val = min_v;
last_size = min_v;
(*x)[0] = min_v;
num_x = 1;
while(val < max_v){
new_size = last_size * alpha;
val = last_size + new_size;
last_size = new_size;
last_val = val;
(*x)[num_x] = val;
num_x += 1;
if (num_x == size){
size += 10;
*x = realloc(*x, size * sizeof(double));
}
}
if (*y == NULL){
*y = malloc(num_x * sizeof(double));
}
else{
*y = realloc(*y, num_x * sizeof(double));
}
for (i=0; i < num_x; i++){
(*y)[i] = 0;
}
for(i=0; i <N; i ++){
j = 0;
while(v[i] > (*x)[j]){
j ++;
}
(*y)[j] += 1;
}
*num = num_x;
}
/**
*
* Make the exponential binning of a distribution, with a giving
* exponent alpha. The value of y[i] is the average of the values in
* the vector "w" whose corresponding v lies between x[i-1] and x[i]...
*
*/
void exp_bin_avg(double *v, double *w, int N, double alpha, double **x, double **y, int *num){
double min_v, max_v, val, last_val;
int i, j, size, num_x;
double last_size, new_size;
int *cnt;
min_v = max_v = v[0];
for (i=1; i<N; i ++){
if (v[i] > max_v)
max_v = v[i];
else if (v[i] > 0 && v[i] < min_v)
min_v = v[i];
}
size = 10;
if (*x == NULL){
*x = malloc(size * sizeof(double));
}
else{
*x = realloc(*x, size * sizeof(double));
}
val = min_v;
last_size = min_v;
(*x)[0] = min_v;
num_x = 1;
while(val < max_v){
new_size = last_size * alpha;
val = last_size + new_size;
last_size = new_size;
last_val = val;
(*x)[num_x] = val;
num_x += 1;
if (num_x == size){
size += 10;
*x = realloc(*x, size * sizeof(double));
}
}
cnt = malloc(num_x * sizeof(int));
if (*y == NULL){
*y = malloc(num_x * sizeof(double));
}
else{
*y = realloc(*y, num_x * sizeof(double));
}
for (i=0; i < num_x; i++){
(*y)[i] = 0;
cnt[i] = 0;
}
for(i=0; i <N; i ++){
j = 0;
while(j < num_x && v[i] > (*x)[j]){
j ++;
}
if(j == num_x){
printf("Error!!!!! Trying to assing a non-existing bin!!! -- fit_utils.c:exp_bin_avg!!!\n");
exit(37);
}
(*y)[j] += w[i];
cnt[j] += 1;
}
*num = num_x;
for(i=0; i< num_x; i++){
if (cnt[i] > 0){
(*y)[i] = (*y)[i] / cnt[i];
}
}
free(cnt);
}
/**
*
* Print a distribution on stdout
*
*/
void dump_distr(double *x, double *y, int N){
int i;
for(i=0; i<N; i ++){
printf("%g %g\n", x[i], y[i]);
}
}
/**
* Compact a distribution, leaving only the pairs (x_i, y_i) for which
* y_i > 0
*
*/
void compact_distr(double *x, double *y, int *num){
int i, j;
i = j = 0;
while(j < *num){
while(j < *num && y[j] == 0){
j ++;
}
if (j==*num){
break;
}
x[i] = x[j];
y[i] = y[j];
i ++;
j ++;
}
*num = i;
}
/**
*
* Apply the function "f" on all the elemnts of a vector, in-place
*
*/
void map_vec(double *v, int N, double (*f)(double)){
int i;
for (i=0; i<N; i ++){
v[i] = f(v[i]);
}
}
/**
*
* Normalize a distribution, dividing each y[i] for the width of the
* corresponding bin (i.e., x[i] - x[i-1])
*
*/
void normalize_distr(double *x, double *y, int num){
int i;
for(i=1; i<num; i ++){
y[i] /= (x[i] - x[i-1]);
}
}
/**
*
* take two vectors (k and q) and a pairing, and compute the best
* power-law fit "a*k^{mu}" of qnn(k)
*
*/
void fit_current_trend(double *k, double *q, int N, int *pairing, double *mu, double *a,
double *corr){
static int *num;
static double *q_pair, *x, *y;
int i;
int fit_num;
double c0, c1, cov00, cov01, cov11, sqsum;
if (q_pair == NULL){
q_pair = malloc(N * sizeof(double));
}
for(i=0; i<N; i++){
q_pair[i] = q[pairing[i]];
}
exp_bin_avg(k, q_pair, N, 1.3, &x, &y, &fit_num);
//normalize_distr(x,y, fit_num);
compact_distr(x, y, &fit_num);
map_vec(x, fit_num, log);
map_vec(y, fit_num, log);
gsl_fit_linear(x, 1, y, 1, fit_num, &c0, &c1, &cov00, &cov01, &cov11, &sqsum);
//fprintf(stderr,"cov00: %g cov01: %g cov11: %g\n", cov00, cov01, cov11);
//fprintf(stderr, "corr: %g ", cov01 / sqrt(cov00 * cov11));
*a = c0;
*mu = c1;
*corr = cov01/sqrt(cov00 * cov11);
}