You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
147 lines
4.6 KiB
147 lines
4.6 KiB
# This file is part of MAMMULT: Metrics And Models for Multilayer Networks
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or (at
|
|
# your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
####
|
|
##
|
|
## Take as input two files, whose n^th line contains the ranking of
|
|
## element n, and compute the Kendall's \tau_b rank correlation
|
|
## coefficient
|
|
##
|
|
##
|
|
|
|
import sys
|
|
from numpy import *
|
|
|
|
|
|
def kendalltau(x,y):
|
|
initial_sort_with_lexsort = True # if True, ~30% slower (but faster under profiler!) but with better worst case (O(n log(n)) than (quick)sort (O(n^2))
|
|
n = len(x)
|
|
temp = range(n) # support structure used by mergesort
|
|
# this closure recursively sorts sections of perm[] by comparing
|
|
# elements of y[perm[]] using temp[] as support
|
|
# returns the number of swaps required by an equivalent bubble sort
|
|
def mergesort(offs, length):
|
|
exchcnt = 0
|
|
if length == 1:
|
|
return 0
|
|
if length == 2:
|
|
if y[perm[offs]] <= y[perm[offs+1]]:
|
|
return 0
|
|
t = perm[offs]
|
|
perm[offs] = perm[offs+1]
|
|
perm[offs+1] = t
|
|
return 1
|
|
length0 = length / 2
|
|
length1 = length - length0
|
|
middle = offs + length0
|
|
exchcnt += mergesort(offs, length0)
|
|
exchcnt += mergesort(middle, length1)
|
|
if y[perm[middle - 1]] < y[perm[middle]]:
|
|
return exchcnt
|
|
# merging
|
|
i = j = k = 0
|
|
while j < length0 or k < length1:
|
|
if k >= length1 or (j < length0 and y[perm[offs + j]] <= y[perm[middle + k]]):
|
|
temp[i] = perm[offs + j]
|
|
d = i - j
|
|
j += 1
|
|
else:
|
|
temp[i] = perm[middle + k]
|
|
d = (offs + i) - (middle + k)
|
|
k += 1
|
|
if d > 0:
|
|
exchcnt += d;
|
|
i += 1
|
|
perm[offs:offs+length] = temp[0:length]
|
|
return exchcnt
|
|
|
|
# initial sort on values of x and, if tied, on values of y
|
|
if initial_sort_with_lexsort:
|
|
# sort implemented as mergesort, worst case: O(n log(n))
|
|
perm = lexsort((y, x))
|
|
else:
|
|
# sort implemented as quicksort, 30% faster but with worst case: O(n^2)
|
|
perm = range(n)
|
|
perm.sort(lambda a,b: cmp(x[a],x[b]) or cmp(y[a],y[b]))
|
|
|
|
# compute joint ties
|
|
first = 0
|
|
t = 0
|
|
for i in xrange(1,n):
|
|
if x[perm[first]] != x[perm[i]] or y[perm[first]] != y[perm[i]]:
|
|
t += ((i - first) * (i - first - 1)) / 2
|
|
first = i
|
|
t += ((n - first) * (n - first - 1)) / 2
|
|
|
|
# compute ties in x
|
|
first = 0
|
|
u = 0
|
|
for i in xrange(1,n):
|
|
if x[perm[first]] != x[perm[i]]:
|
|
u += ((i - first) * (i - first - 1)) / 2
|
|
first = i
|
|
u += ((n - first) * (n - first - 1)) / 2
|
|
|
|
# count exchanges
|
|
exchanges = mergesort(0, n)
|
|
# compute ties in y after mergesort with counting
|
|
first = 0
|
|
v = 0
|
|
for i in xrange(1,n):
|
|
if y[perm[first]] != y[perm[i]]:
|
|
v += ((i - first) * (i - first - 1)) / 2
|
|
first = i
|
|
v += ((n - first) * (n - first - 1)) / 2
|
|
|
|
tot = (n * (n - 1)) / 2
|
|
if tot == u and tot == v:
|
|
return 1 # Special case for all ties in both ranks
|
|
|
|
tau = ((tot-(v+u-t)) - 2.0 * exchanges) / (sqrt(float(( tot - u )) * float( tot - v )))
|
|
|
|
# what follows reproduces ending of Gary Strangman's original stats.kendalltau() in SciPy
|
|
svar = (4.0*n+10.0) / (9.0*n*(n-1))
|
|
z = tau / sqrt(svar)
|
|
##prob = erfc(abs(z)/1.4142136)
|
|
##return tau, prob
|
|
return tau
|
|
|
|
def main():
|
|
|
|
if len(sys.argv) < 3:
|
|
print "Usage: %s <file1> <file2>" % sys.argv[0]
|
|
sys.exit(1)
|
|
|
|
x1 = []
|
|
x2= []
|
|
|
|
lines = open(sys.argv[1]).readlines()
|
|
|
|
for l in lines:
|
|
elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
|
|
x1.append(elem)
|
|
|
|
lines = open(sys.argv[2]).readlines()
|
|
|
|
for l in lines:
|
|
elem = [float(x) if "e" in x or "." in x else int(x) for x in l.strip(" \n").split()][0]
|
|
x2.append(elem)
|
|
|
|
|
|
tau = kendalltau(x1,x2)
|
|
print tau
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|