Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH Allow cum_returns to accept DataFrame #39

Merged
merged 8 commits into from
Dec 7, 2016
96 changes: 7 additions & 89 deletions empyrical/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

from __future__ import division

from functools import wraps

import pandas as pd
import numpy as np
from scipy import stats
Expand Down Expand Up @@ -108,20 +106,22 @@ def cum_returns(returns, starting_value=0):

Parameters
----------
returns : pd.Series or np.ndarray
returns : pd.Series, np.ndarray, or pd.DataFrame
Returns of the strategy as a percentage, noncumulative.
- Time series with decimal returns.
- Example:
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902.
- Also accepts two dimensional data. In this case,
each column is cumulated.
starting_value : float, optional
The starting returns.

Returns
-------
pd.Series or np.ndarray
pd.Series, np.ndarray, or pd.DataFrame
Series of cumulative returns.

Notes
Expand All @@ -140,11 +140,11 @@ def cum_returns(returns, starting_value=0):
if len(returns) < 1:
return type(returns)([])

if np.isnan(np.asanyarray(returns)[0]):
if np.any(np.isnan(returns)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@twiecki wrote "We should only catch the case where the whole first row are nans, as it's the only case produced by .pct_change()." Seems like the behavior here is to replace nans anywhere in the array with zero.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't feel deeply about this, it's really a corner case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I wasn't sure, if we start seeing nans at arbitrary locations, whether we should mask it or raise an error or something else.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My preference is to alert early instead of masking the unexpected input.

returns = returns.copy()
returns[0] = 0.
returns[np.isnan(returns)] = 0.

df_cum = np.exp(nancumsum(np.log1p(returns)))
df_cum = (returns + 1).cumprod(axis=0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming this has the same results (and speed) as the original, we should remove the helpers that we were using: nancumsum and array_wrap.


if starting_value == 0:
return df_cum - 1
Expand Down Expand Up @@ -182,88 +182,6 @@ def cum_returns_final(returns, starting_value=0):
starting_value=starting_value)[-1]


def array_wrap(arg_name, _not_specified=object()):
"""
Decorator for functions working on array_likes that ensures the type of
output matches that of the input, delegating to the input's __array_wrap__.

Parameters
----------
arg_name : str

The name of the array_like arg to the wrapped function. Should be the
first positional parameter to the wrapped function.

"""
def dec(f):
@wraps(f)
def _wrapit(*args, **kwds):
obj = kwds.get(arg_name, _not_specified)
if obj is _not_specified:
obj = args[0]

try:
wrap = obj.__array_wrap__
except AttributeError:
wrap = None
result = f(*args, **kwds)
if wrap:
if not isinstance(result, np.ndarray):
result = np.asarray(result)
result = wrap(result)
return result
return _wrapit
return dec


@array_wrap('a')
def nancumsum(a, axis=None, dtype=None):
"""
Return the cumulative sum of array elements over a given axis treating Not
a Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
encountered and leading NaNs are replaced by zeros.

Handles a subset of the edge cases handled by the nancumsum added in numpy
1.12.0.

Parameters
----------
a : np.ndarray or pd.Series

Input array.

axis : int, optional

Axis along which the cumulative sum is computed. The default
(None) is to compute the cumsum over the flattened array.

dtype : np.dtype, optional

Type of the returned array and of the accumulator in which the
elements are summed. If `dtype` is not specified, it defaults
to the dtype of `a`, unless `a` has an integer dtype with a
precision less than that of the default platform integer. In
that case, the default platform integer is used.

Returns
-------
nancumsum : np.ndarray or pd.Series

A new array that has the same size as a, and the same shape as a.

See Also
--------
numpy.cumsum : Cumulative sum across array propagating NaNs.

"""
y = np.array(a, subok=True)
mask = np.isnan(a)
np.putmask(y, mask, 0.)
result = np.cumsum(y, axis=axis, dtype=dtype)
np.putmask(result, mask, np.nan)
return result


def aggregate_returns(returns, convert_to):
"""
Aggregates returns by week, month, or year.
Expand Down
77 changes: 77 additions & 0 deletions empyrical/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,83 @@ def empyrical(self):
)


class Test2DStats(TestCase):
"""
Tests for functions that are capable of outputting a DataFrame.
"""

input_one = [np.nan, 0.01322056, 0.03063862, -0.01422057,
-0.00489779, 0.01268925, -0.03357711, 0.01797036]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add the edge cases that we have for 1D, like empty and nans in the final position?

input_two = [0.01846232, 0.00793951, -0.01448395, 0.00422537,
-0.00339611, 0.03756813, 0.0151531, np.nan]

expected_0_one = [0.000000, 0.013221, 0.044264, 0.029414, 0.024372,
0.037371, 0.002539, 0.020555]
expected_0_two = [0.018462, 0.026548, 0.011680, 0.015955, 0.012504,
0.050542, 0.066461, 0.066461]

expected_100_one = [100.000000, 101.322056, 104.426424, 102.941421,
102.437235, 103.737087, 100.253895, 102.055494]
expected_100_two = [101.846232, 102.654841, 101.167994, 101.595466,
101.250436, 105.054226, 106.646123, 106.646123]

df_index = pd.date_range('2000-1-30', periods=8, freq='D')

df_input = pd.DataFrame({
'one': pd.Series(input_one, index=df_index),
'two': pd.Series(input_two, index=df_index)})

df_empty = pd.DataFrame()

df_0_expected = pd.DataFrame({
'one': pd.Series(expected_0_one, index=df_index),
'two': pd.Series(expected_0_two, index=df_index)})

df_100_expected = pd.DataFrame({
'one': pd.Series(expected_100_one, index=df_index),
'two': pd.Series(expected_100_two, index=df_index)})

@parameterized.expand([
(df_input, 0, df_0_expected),
(df_input, 100, df_100_expected),
(df_empty, 0, pd.DataFrame())
])
def test_cum_returns_df(self, returns, starting_value, expected):
cum_returns = self.empyrical.cum_returns(
returns,
starting_value=starting_value,
)

assert_almost_equal(np.asarray(cum_returns),
np.asarray(expected), 4)

@property
def empyrical(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you do me a favor and add a docstring here that says it returns "empyrical", so that my dev env will autocomplete empyrical's functions for self.empyrical? Would be good to mention what we're testing with this property.

"""
Returns a wrapper around the empyrical module so tests can
perform input conversions or return type checks on each call to an
empyrical function. See full explanation in TestStats.

Returns
-------
empyrical

"""

return ReturnTypeEmpyricalProxy(self, pd.DataFrame)


class Test2DStatsArrays(Test2DStats):
"""
Tests pass np.ndarray inputs to empyrical and assert that outputs are of
type np.ndarray.

"""
@property
def empyrical(self):
return PassArraysEmpyricalProxy(self, np.ndarray)


class ReturnTypeEmpyricalProxy(object):
"""
A wrapper around the empyrical module which, on each function call, asserts
Expand Down