-
Notifications
You must be signed in to change notification settings - Fork 407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH Allow cum_returns to accept DataFrame #39
Changes from all commits
99ef007
a9919b8
e2d27d6
5dee4ce
39fd05b
7de1038
c8496e9
22c4366
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,6 @@ | |
|
||
from __future__ import division | ||
|
||
from functools import wraps | ||
|
||
import pandas as pd | ||
import numpy as np | ||
from scipy import stats | ||
|
@@ -108,20 +106,22 @@ def cum_returns(returns, starting_value=0): | |
|
||
Parameters | ||
---------- | ||
returns : pd.Series or np.ndarray | ||
returns : pd.Series, np.ndarray, or pd.DataFrame | ||
Returns of the strategy as a percentage, noncumulative. | ||
- Time series with decimal returns. | ||
- Example: | ||
2015-07-16 -0.012143 | ||
2015-07-17 0.045350 | ||
2015-07-20 0.030957 | ||
2015-07-21 0.004902. | ||
- Also accepts two dimensional data. In this case, | ||
each column is cumulated. | ||
starting_value : float, optional | ||
The starting returns. | ||
|
||
Returns | ||
------- | ||
pd.Series or np.ndarray | ||
pd.Series, np.ndarray, or pd.DataFrame | ||
Series of cumulative returns. | ||
|
||
Notes | ||
|
@@ -140,11 +140,11 @@ def cum_returns(returns, starting_value=0): | |
if len(returns) < 1: | ||
return type(returns)([]) | ||
|
||
if np.isnan(np.asanyarray(returns)[0]): | ||
if np.any(np.isnan(returns)): | ||
returns = returns.copy() | ||
returns[0] = 0. | ||
returns[np.isnan(returns)] = 0. | ||
|
||
df_cum = np.exp(nancumsum(np.log1p(returns))) | ||
df_cum = (returns + 1).cumprod(axis=0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming this has the same results (and speed) as the original, we should remove the helpers that we were using: |
||
|
||
if starting_value == 0: | ||
return df_cum - 1 | ||
|
@@ -182,88 +182,6 @@ def cum_returns_final(returns, starting_value=0): | |
starting_value=starting_value)[-1] | ||
|
||
|
||
def array_wrap(arg_name, _not_specified=object()): | ||
""" | ||
Decorator for functions working on array_likes that ensures the type of | ||
output matches that of the input, delegating to the input's __array_wrap__. | ||
|
||
Parameters | ||
---------- | ||
arg_name : str | ||
|
||
The name of the array_like arg to the wrapped function. Should be the | ||
first positional parameter to the wrapped function. | ||
|
||
""" | ||
def dec(f): | ||
@wraps(f) | ||
def _wrapit(*args, **kwds): | ||
obj = kwds.get(arg_name, _not_specified) | ||
if obj is _not_specified: | ||
obj = args[0] | ||
|
||
try: | ||
wrap = obj.__array_wrap__ | ||
except AttributeError: | ||
wrap = None | ||
result = f(*args, **kwds) | ||
if wrap: | ||
if not isinstance(result, np.ndarray): | ||
result = np.asarray(result) | ||
result = wrap(result) | ||
return result | ||
return _wrapit | ||
return dec | ||
|
||
|
||
@array_wrap('a') | ||
def nancumsum(a, axis=None, dtype=None): | ||
""" | ||
Return the cumulative sum of array elements over a given axis treating Not | ||
a Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are | ||
encountered and leading NaNs are replaced by zeros. | ||
|
||
Handles a subset of the edge cases handled by the nancumsum added in numpy | ||
1.12.0. | ||
|
||
Parameters | ||
---------- | ||
a : np.ndarray or pd.Series | ||
|
||
Input array. | ||
|
||
axis : int, optional | ||
|
||
Axis along which the cumulative sum is computed. The default | ||
(None) is to compute the cumsum over the flattened array. | ||
|
||
dtype : np.dtype, optional | ||
|
||
Type of the returned array and of the accumulator in which the | ||
elements are summed. If `dtype` is not specified, it defaults | ||
to the dtype of `a`, unless `a` has an integer dtype with a | ||
precision less than that of the default platform integer. In | ||
that case, the default platform integer is used. | ||
|
||
Returns | ||
------- | ||
nancumsum : np.ndarray or pd.Series | ||
|
||
A new array that has the same size as a, and the same shape as a. | ||
|
||
See Also | ||
-------- | ||
numpy.cumsum : Cumulative sum across array propagating NaNs. | ||
|
||
""" | ||
y = np.array(a, subok=True) | ||
mask = np.isnan(a) | ||
np.putmask(y, mask, 0.) | ||
result = np.cumsum(y, axis=axis, dtype=dtype) | ||
np.putmask(result, mask, np.nan) | ||
return result | ||
|
||
|
||
def aggregate_returns(returns, convert_to): | ||
""" | ||
Aggregates returns by week, month, or year. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1048,6 +1048,83 @@ def empyrical(self): | |
) | ||
|
||
|
||
class Test2DStats(TestCase): | ||
""" | ||
Tests for functions that are capable of outputting a DataFrame. | ||
""" | ||
|
||
input_one = [np.nan, 0.01322056, 0.03063862, -0.01422057, | ||
-0.00489779, 0.01268925, -0.03357711, 0.01797036] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add the edge cases that we have for 1D, like empty and nans in the final position? |
||
input_two = [0.01846232, 0.00793951, -0.01448395, 0.00422537, | ||
-0.00339611, 0.03756813, 0.0151531, np.nan] | ||
|
||
expected_0_one = [0.000000, 0.013221, 0.044264, 0.029414, 0.024372, | ||
0.037371, 0.002539, 0.020555] | ||
expected_0_two = [0.018462, 0.026548, 0.011680, 0.015955, 0.012504, | ||
0.050542, 0.066461, 0.066461] | ||
|
||
expected_100_one = [100.000000, 101.322056, 104.426424, 102.941421, | ||
102.437235, 103.737087, 100.253895, 102.055494] | ||
expected_100_two = [101.846232, 102.654841, 101.167994, 101.595466, | ||
101.250436, 105.054226, 106.646123, 106.646123] | ||
|
||
df_index = pd.date_range('2000-1-30', periods=8, freq='D') | ||
|
||
df_input = pd.DataFrame({ | ||
'one': pd.Series(input_one, index=df_index), | ||
'two': pd.Series(input_two, index=df_index)}) | ||
|
||
df_empty = pd.DataFrame() | ||
|
||
df_0_expected = pd.DataFrame({ | ||
'one': pd.Series(expected_0_one, index=df_index), | ||
'two': pd.Series(expected_0_two, index=df_index)}) | ||
|
||
df_100_expected = pd.DataFrame({ | ||
'one': pd.Series(expected_100_one, index=df_index), | ||
'two': pd.Series(expected_100_two, index=df_index)}) | ||
|
||
@parameterized.expand([ | ||
(df_input, 0, df_0_expected), | ||
(df_input, 100, df_100_expected), | ||
(df_empty, 0, pd.DataFrame()) | ||
]) | ||
def test_cum_returns_df(self, returns, starting_value, expected): | ||
cum_returns = self.empyrical.cum_returns( | ||
returns, | ||
starting_value=starting_value, | ||
) | ||
|
||
assert_almost_equal(np.asarray(cum_returns), | ||
np.asarray(expected), 4) | ||
|
||
@property | ||
def empyrical(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you do me a favor and add a docstring here that says it returns "empyrical", so that my dev env will autocomplete empyrical's functions for |
||
""" | ||
Returns a wrapper around the empyrical module so tests can | ||
perform input conversions or return type checks on each call to an | ||
empyrical function. See full explanation in TestStats. | ||
|
||
Returns | ||
------- | ||
empyrical | ||
|
||
""" | ||
|
||
return ReturnTypeEmpyricalProxy(self, pd.DataFrame) | ||
|
||
|
||
class Test2DStatsArrays(Test2DStats): | ||
""" | ||
Tests pass np.ndarray inputs to empyrical and assert that outputs are of | ||
type np.ndarray. | ||
|
||
""" | ||
@property | ||
def empyrical(self): | ||
return PassArraysEmpyricalProxy(self, np.ndarray) | ||
|
||
|
||
class ReturnTypeEmpyricalProxy(object): | ||
""" | ||
A wrapper around the empyrical module which, on each function call, asserts | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@twiecki wrote "We should only catch the case where the whole first row are nans, as it's the only case produced by .pct_change()." Seems like the behavior here is to replace nans anywhere in the array with zero.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't feel deeply about this, it's really a corner case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I wasn't sure, if we start seeing nans at arbitrary locations, whether we should mask it or raise an error or something else.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My preference is to alert early instead of masking the unexpected input.