-
Notifications
You must be signed in to change notification settings - Fork 13
/
wquantiles.py
95 lines (83 loc) · 2.69 KB
/
wquantiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
Library to compute weighted quantiles, including the weighted median, of
numpy arrays.
"""
from __future__ import print_function
import numpy as np
__version__ = "0.4"
def quantile_1D(data, weights, quantile):
"""
Compute the weighted quantile of a 1D numpy array.
Parameters
----------
data : ndarray
Input array (one dimension).
weights : ndarray
Array with the weights of the same size of `data`.
quantile : float
Quantile to compute. It must have a value between 0 and 1.
Returns
-------
quantile_1D : float
The output value.
"""
# Check the data
if not isinstance(data, np.matrix):
data = np.asarray(data)
if not isinstance(weights, np.matrix):
weights = np.asarray(weights)
nd = data.ndim
if nd != 1:
raise TypeError("data must be a one dimensional array")
ndw = weights.ndim
if ndw != 1:
raise TypeError("weights must be a one dimensional array")
if data.shape != weights.shape:
raise TypeError("the length of data and weights must be the same")
if ((quantile > 1.) or (quantile < 0.)):
raise ValueError("quantile must have a value between 0. and 1.")
# Sort the data
ind_sorted = np.argsort(data)
sorted_data = data[ind_sorted]
sorted_weights = weights[ind_sorted]
# Compute the auxiliary arrays
Sn = np.cumsum(sorted_weights)
# TODO: Check that the weights do not sum zero
#assert Sn != 0, "The sum of the weights must not be zero"
Pn = (Sn-0.5*sorted_weights)/Sn[-1]
# Get the value of the weighted median
return np.interp(quantile, Pn, sorted_data)
def quantile(data, weights, quantile):
"""
Weighted quantile of an array with respect to the last axis.
Parameters
----------
data : ndarray
Input array.
weights : ndarray
Array with the weights. It must have the same size of the last
axis of `data`.
quantile : float
Quantile to compute. It must have a value between 0 and 1.
Returns
-------
quantile : float
The output value.
"""
# TODO: Allow to specify the axis
nd = data.ndim
if nd == 0:
TypeError("data must have at least one dimension")
elif nd == 1:
return quantile_1D(data, weights, quantile)
elif nd > 1:
n = data.shape
imr = data.reshape((np.prod(n[:-1]), n[-1]))
result = np.apply_along_axis(quantile_1D, -1, imr, weights, quantile)
return result.reshape(n[:-1])
def median(data, weights):
"""
Weighted median of an array with respect to the last axis.
Alias for `quantile(data, weights, 0.5)`.
"""
return quantile(data, weights, 0.5)