import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

returns = pd.DataFrame(np.random.normal(1.0, 0.03, (100, 10)))
prices = returns.cumprod()
prices.plot()
plt.title('Randomly-generated Prices')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend(loc=0);

s = pd.Series([1, 2, np.nan, 4, 5])
print(s)

0    1.0
1    2.0
2    NaN
3    4.0
4    5.0
dtype: float64

print(s.name)

None

s.name = "Toy Series"
print(s.name)

Toy Series

print(s.index)

RangeIndex(start=0, stop=5, step=1)

new_index = pd.date_range("2016-01-01", periods=len(s), freq="D")
print(new_index)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05'],
              dtype='datetime64[ns]', freq='D')

s.index = new_index
print(s.index)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05'],
              dtype='datetime64[ns]', freq='D')

print("First element of the series:", s.iloc[0])
print("Last element of the series:", s.iloc[len(s)-1])

First element of the series: 1.0
Last element of the series: 5.0

s.iloc[:2]

2016-01-01    1.0
2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

start = 0
end = len(s) - 1
step = 1

s.iloc[start:end:step]

2016-01-01    1.0
2016-01-02    2.0
2016-01-03    NaN
2016-01-04    4.0
Freq: D, Name: Toy Series, dtype: float64

s.iloc[::-1]

2016-01-05    5.0
2016-01-04    4.0
2016-01-03    NaN
2016-01-02    2.0
2016-01-01    1.0
Freq: -1D, Name: Toy Series, dtype: float64

s.iloc[-2:-4:-1]

2016-01-04    4.0
2016-01-03    NaN
Freq: -1D, Name: Toy Series, dtype: float64

s.loc['2016-01-01']

1.0

s.loc['2016-01-02':'2016-01-04']

2016-01-02    2.0
2016-01-03    NaN
2016-01-04    4.0
Freq: D, Name: Toy Series, dtype: float64

print(s < 3)

2016-01-01     True
2016-01-02     True
2016-01-03    False
2016-01-04    False
2016-01-05    False
Freq: D, Name: Toy Series, dtype: bool

print(s.loc[s < 3])

2016-01-01    1.0
2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

print(s.loc[(s < 3) & (s > 1)])

2016-01-02    2.0
Freq: D, Name: Toy Series, dtype: float64

from quantrocket.master import get_securities
securities = get_securities(symbols='XOM', fields=['Sid','Symbol','Exchange'], vendors='usstock')
securities

from quantrocket import get_prices
XOM = securities.index[0]
start = "2012-01-01"
end = "2016-01-01"
prices = get_prices("usstock-free-1min", data_frequency="daily", sids=XOM, start_date=start, end_date=end, fields="Close")
prices = prices.loc["Close"][XOM]

print(type(prices))
prices.head(5)

<class 'pandas.core.series.Series'>

Date
2012-01-03    81.805
2012-01-04    81.824
2012-01-05    81.577
2012-01-06    80.968
2012-01-09    81.329
Name: FIBBG000GZQ728, dtype: float64

print('Old name:', prices.name)
prices.name = "XOM"
print('New name:', prices.name)

Old name: FIBBG000GZQ728
New name: XOM

print(prices.index)
print("tz:", prices.index.tz)

DatetimeIndex(['2012-01-03', '2012-01-04', '2012-01-05', '2012-01-06',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-17',
               ...
               '2015-12-17', '2015-12-18', '2015-12-21', '2015-12-22',
               '2015-12-23', '2015-12-24', '2015-12-28', '2015-12-29',
               '2015-12-30', '2015-12-31'],
              dtype='datetime64[ns]', name='Date', length=1006, freq=None)
tz: None

monthly_prices = prices.resample('M').last()
monthly_prices.head(10)

Date
2012-01-31    79.655
2012-02-29    82.281
2012-03-31    82.499
2012-04-30    82.128
2012-05-31    74.795
2012-06-30    81.396
2012-07-31    82.614
2012-08-31    83.042
2012-09-30    86.989
2012-10-31    86.723
Freq: M, Name: XOM, dtype: float64

monthly_prices_med = prices.resample('M').median()
monthly_prices_med.head(10)

Date
2012-01-31    81.6100
2012-02-29    81.3440
2012-03-31    81.8860
2012-04-30    81.2105
2012-05-31    78.1380
2012-06-30    78.1050
2012-07-31    81.3010
2012-08-31    83.6310
2012-09-30    86.7800
2012-10-31    87.2460
Freq: M, Name: XOM, dtype: float64

def custom_resampler(array_like):
    """ Returns the first value of the period """
    return array_like[0]

first_of_month_prices = prices.resample('M').apply(custom_resampler)
first_of_month_prices.head(10)

Date
2012-01-31    81.805
2012-02-29    79.874
2012-03-31    82.595
2012-04-30    82.823
2012-05-31    82.794
2012-06-30    74.119
2012-07-31    81.177
2012-08-31    82.671
2012-09-30    82.870
2012-10-31    87.322
Freq: M, Name: XOM, dtype: float64

eastern_prices = prices.tz_localize('America/New_York')
eastern_prices.head(10)

Date
2012-01-03 00:00:00-05:00    81.805
2012-01-04 00:00:00-05:00    81.824
2012-01-05 00:00:00-05:00    81.577
2012-01-06 00:00:00-05:00    80.968
2012-01-09 00:00:00-05:00    81.329
2012-01-10 00:00:00-05:00    81.539
2012-01-11 00:00:00-05:00    80.930
2012-01-12 00:00:00-05:00    80.606
2012-01-13 00:00:00-05:00    80.740
2012-01-17 00:00:00-05:00    81.510
Name: XOM, dtype: float64

calendar_dates = pd.date_range(start=start, end=end, freq='D')
print(calendar_dates)

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10',
               ...
               '2015-12-23', '2015-12-24', '2015-12-25', '2015-12-26',
               '2015-12-27', '2015-12-28', '2015-12-29', '2015-12-30',
               '2015-12-31', '2016-01-01'],
              dtype='datetime64[ns]', length=1462, freq='D')

calendar_prices = prices.reindex(calendar_dates, method='ffill')
calendar_prices.head(15)

2012-01-01       NaN
2012-01-02       NaN
2012-01-03    81.805
2012-01-04    81.824
2012-01-05    81.577
2012-01-06    80.968
2012-01-07    80.968
2012-01-08    80.968
2012-01-09    81.329
2012-01-10    81.539
2012-01-11    80.930
2012-01-12    80.606
2012-01-13    80.740
2012-01-14    80.740
2012-01-15    80.740
Freq: D, Name: XOM, dtype: float64

meanfilled_prices = calendar_prices.fillna(calendar_prices.mean())
meanfilled_prices.head(10)

2012-01-01    86.803071
2012-01-02    86.803071
2012-01-03    81.805000
2012-01-04    81.824000
2012-01-05    81.577000
2012-01-06    80.968000
2012-01-07    80.968000
2012-01-08    80.968000
2012-01-09    81.329000
2012-01-10    81.539000
Freq: D, Name: XOM, dtype: float64

bfilled_prices = calendar_prices.fillna(method='bfill')
bfilled_prices.head(10)

2012-01-01    81.805
2012-01-02    81.805
2012-01-03    81.805
2012-01-04    81.824
2012-01-05    81.577
2012-01-06    80.968
2012-01-07    80.968
2012-01-08    80.968
2012-01-09    81.329
2012-01-10    81.539
Freq: D, Name: XOM, dtype: float64

dropped_prices = calendar_prices.dropna()
dropped_prices.head(10)

2012-01-03    81.805
2012-01-04    81.824
2012-01-05    81.577
2012-01-06    80.968
2012-01-07    80.968
2012-01-08    80.968
2012-01-09    81.329
2012-01-10    81.539
2012-01-11    80.930
2012-01-12    80.606
Freq: D, Name: XOM, dtype: float64

prices.plot();
# We still need to add the axis labels and title ourselves
plt.title("XOM Prices")
plt.ylabel("Price")
plt.xlabel("Date");

print("Mean:", prices.mean())
print("Standard deviation:", prices.std())

Mean: 86.77727534791242
Standard deviation: 6.800728542530042

print("Summary Statistics")
print(prices.describe())

Summary Statistics
count    1006.000000
mean       86.777275
std         6.800729
min        68.116000
25%        82.356500
50%        85.377000
75%        91.559500
max       102.762000
Name: XOM, dtype: float64

modified_prices = prices * 2 - 10
modified_prices.head(5)

Date
2012-01-03    153.610
2012-01-04    153.648
2012-01-05    153.154
2012-01-06    151.936
2012-01-09    152.658
Name: XOM, dtype: float64

noisy_prices = prices + 5 * pd.Series(np.random.normal(0, 5, len(prices)), index=prices.index) + 20
noisy_prices.head(5)

Date
2012-01-03     72.189883
2012-01-04     91.860976
2012-01-05     88.914534
2012-01-06     75.311759
2012-01-09    103.796395
dtype: float64

empty_series = prices + pd.Series(np.random.normal(0, 1, len(prices)))
empty_series.head(5)

2012-01-03 00:00:00   NaN
2012-01-04 00:00:00   NaN
2012-01-05 00:00:00   NaN
2012-01-06 00:00:00   NaN
2012-01-09 00:00:00   NaN
dtype: float64

add_returns = prices.diff()[1:]
mult_returns = prices.pct_change()[1:]

plt.title("Multiplicative returns of XOM")
plt.xlabel("Date")
plt.ylabel("Percent Returns")
mult_returns.plot();

rolling_mean = prices.rolling(30).mean()
rolling_mean.name = "30-day rolling mean"

prices.plot()
rolling_mean.plot()
plt.title("XOM Price")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend();

rolling_std = prices.rolling(30).std()
rolling_std.name = "30-day rolling volatility"

rolling_std.plot()
plt.title(rolling_std.name);
plt.xlabel("Date")
plt.ylabel("Standard Deviation");

print(np.median(mult_returns))

-0.0003321045465112249

print(mult_returns.median())

-0.0003321045465112249

dict_data = {
    'a' : [1, 2, 3, 4, 5],
    'b' : ['L', 'K', 'J', 'M', 'Z'],
    'c' : np.random.normal(0, 1, 5)
}
print(dict_data)

{'a': [1, 2, 3, 4, 5], 'b': ['L', 'K', 'J', 'M', 'Z'], 'c': array([ 0.34975967, -0.61857671,  0.34380177, -0.4293727 ,  0.08275762])}

frame_data = pd.DataFrame(dict_data, index=pd.date_range('2016-01-01', periods=5))
print(frame_data)

            a  b         c
2016-01-01  1  L  0.349760
2016-01-02  2  K -0.618577
2016-01-03  3  J  0.343802
2016-01-04  4  M -0.429373
2016-01-05  5  Z  0.082758

s_1 = pd.Series([2, 4, 6, 8, 10], name='Evens')
s_2 = pd.Series([1, 3, 5, 7, 9], name="Odds")
numbers = pd.concat([s_1, s_2], axis=1)
print(numbers)

   Evens  Odds
0      2     1
1      4     3
2      6     5
3      8     7
4     10     9

print(numbers.columns)

Index(['Evens', 'Odds'], dtype='object')

numbers.columns = ['Shmevens', 'Shmodds']
print(numbers)

   Shmevens  Shmodds
0         2        1
1         4        3
2         6        5
3         8        7
4        10        9

print(numbers.index)

RangeIndex(start=0, stop=5, step=1)

numbers.index = pd.date_range("2016-01-01", periods=len(numbers))
print(numbers)

            Shmevens  Shmodds
2016-01-01         2        1
2016-01-02         4        3
2016-01-03         6        5
2016-01-04         8        7
2016-01-05        10        9

numbers.values

array([[ 2,  1],
       [ 4,  3],
       [ 6,  5],
       [ 8,  7],
       [10,  9]])

type(numbers.values)

numpy.ndarray

securities = get_securities(symbols=['XOM', 'JNJ', 'MON', 'KKD'], vendors='usstock')
securities

start = "2012-01-01"
end = "2017-01-01"

prices = get_prices("usstock-free-1min", data_frequency="daily", sids=securities.index.tolist(), start_date=start, end_date=end, fields="Close")
prices = prices.loc["Close"]
prices.head()

sids_to_symbols = securities.Symbol.to_dict()
prices = prices.rename(columns=sids_to_symbols)
prices.head()

prices.XOM.head()

Date
2012-01-03    79.014
2012-01-04    79.033
2012-01-05    78.794
2012-01-06    78.206
2012-01-09    78.555
Name: XOM, dtype: float64

prices["XOM"].head()

Date
2012-01-03    79.014
2012-01-04    79.033
2012-01-05    78.794
2012-01-06    78.206
2012-01-09    78.555
Name: XOM, dtype: float64

prices.loc[:, 'XOM'].head()

Date
2012-01-03    79.014
2012-01-04    79.033
2012-01-05    78.794
2012-01-06    78.206
2012-01-09    78.555
Name: XOM, dtype: float64

print(type(prices.XOM))
print(type(prices.loc[:, 'XOM']))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>

prices.loc[:, ['XOM', 'JNJ']].head()

prices.loc['2015-12-15':'2015-12-22']

prices.loc['2015-12-15':'2015-12-22', ['XOM', 'JNJ']]

prices.iloc[0:2, 1]

Date
2012-01-03    6.52
2012-01-04    6.36
Name: KKD, dtype: float64

# Access prices with integer index in
# [1, 3, 5, 7, 9, 11, 13, ..., 99]
# and in column 0 or 2
prices.iloc[[1, 3, 5] + list(range(7, 100, 2)), [0, 2]].head(20)

prices.loc[prices.MON > prices.JNJ].head()

prices.loc[(prices.MON > prices.JNJ) & ~(prices.XOM > 66)].head()

securities = get_securities(symbols="AAPL", vendors='usstock')
securities

AAPL = securities.index[0]

s_1 = get_prices("usstock-free-1min", data_frequency="daily", sids=AAPL, start_date=start, end_date=end, fields='Close').loc["Close"][AAPL]
prices.loc[:, AAPL] = s_1
prices.head(5)

prices = prices.drop(AAPL, axis=1)
prices.head(5)

prices.plot()
plt.title("Collected Stock Prices")
plt.ylabel("Price")
plt.xlabel("Date");

prices.mean(axis=0)

Sid
JNJ    88.983560
KKD    15.453603
MON    98.356854
XOM    84.021653
dtype: float64

prices.std(axis=0)

Sid
JNJ    18.026796
KKD     5.016813
MON    12.971706
XOM     6.301019
dtype: float64

prices.describe()

(2 * prices - 50).head(5)

mult_returns = prices.pct_change()[1:]
mult_returns.head()

norm_returns = (mult_returns - mult_returns.mean(axis=0))/mult_returns.std(axis=0)
norm_returns.loc['2014-01-01':'2015-01-01'].plot();

rolling_mean = prices.rolling(30).mean()
rolling_mean.columns = prices.columns

rolling_mean.plot()
plt.title("Rolling Mean of Prices")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend();

	Symbol	Exchange	Country	Currency	SecType	Etf	Timezone	Name	PriceMagnifier	Multiplier	Delisted	DateDelisted	LastTradeDate	RolloverDate
Sid
FIBBG000BFWKC0	MON	XNYS	US	USD	STK	False	America/New_York	MONSANTO CO	1	1	True	2018-06-06	NaT	NaT
FIBBG000BMHYD1	JNJ	XNYS	US	USD	STK	False	America/New_York	JOHNSON & JOHNSON	1	1	False	NaT	NaT	NaT
FIBBG000CK38G3	KKD	XNYS	US	USD	STK	False	America/New_York	KRISPY KREME DOUGHNUTS INC	1	1	True	2016-07-27	NaT	NaT
FIBBG000GZQ728	XOM	XNYS	US	USD	STK	False	America/New_York	EXXON MOBIL CORP	1	1	False	NaT	NaT	NaT
FIBBG00YPSJ318	MON	XNAS	US	USD	STK	False	America/New_York	MONUMENT CIRCLE ACQUI-CL A	1	1	False	NaT	NaT	NaT

Sid	FIBBG000BMHYD1	FIBBG000CK38G3	FIBBG000BFWKC0	FIBBG000GZQ728
Date
2012-01-03	60.473	6.52	68.281	79.014
2012-01-04	60.105	6.36	68.974	79.033
2012-01-05	60.032	6.42	72.780	78.794
2012-01-06	59.509	7.12	73.567	78.206
2012-01-09	59.601	7.31	74.507	78.555

Sid	JNJ	KKD	MON	XOM
Date
2015-12-15	101.268	15.12	93.491	76.720
2015-12-16	102.357	15.09	94.928	76.450
2015-12-17	100.791	14.86	93.608	75.300
2015-12-18	99.148	14.81	92.728	74.644
2015-12-21	98.788	14.98	93.305	74.624
2015-12-22	99.887	14.96	95.143	75.001

Sid	XOM	JNJ
Date
2015-12-15	76.720	101.268
2015-12-16	76.450	102.357
2015-12-17	75.300	100.791
2015-12-18	74.644	99.148
2015-12-21	74.624	98.788
2015-12-22	75.001	99.887

Sid	JNJ	MON
Date
2012-01-04	60.105	68.974
2012-01-06	59.509	73.567
2012-01-10	59.848	75.172
2012-01-12	59.876	76.187
2012-01-17	59.775	76.538
2012-01-19	59.839	76.263
2012-01-23	59.665	75.826
2012-01-25	59.867	77.184
2012-01-27	60.179	76.434
2012-01-31	60.500	77.877
2012-02-02	60.206	77.952
2012-02-06	59.839	75.599
2012-02-08	59.885	75.115
2012-02-10	59.298	73.406
2012-02-14	59.307	73.026
2012-02-16	59.591	74.991
2012-02-21	59.702	75.342
2012-02-23	59.757	73.824
2012-02-27	59.160	74.830
2012-02-29	59.738	73.444

Introduction to pandas¶

pandas Data Structures¶

`Series`¶

Accessing `Series` Elements¶

Boolean Indexing¶

Indexing and Time Series¶

Missing Data¶

Time Series Analysis with pandas¶

`DataFrames`¶

Accessing `DataFrame` elements¶

Boolean indexing¶

Adding, Removing Columns, Combining `DataFrames`/`Series`¶

Time Series Analysis with pandas¶

Next Steps¶

	Symbol	Exchange	Country	Currency	SecType	Etf	Timezone	Name	PriceMagnifier	Multiplier	Delisted	DateDelisted	LastTradeDate	RolloverDate
Sid
FIBBG000B9XRY4	AAPL	XNAS	US	USD	STK	False	America/New_York	APPLE INC	1	1	False	NaT	NaT	NaT

Sid	JNJ	KKD	MON	XOM
count	1258.000000	1149.000000	1258.000000	1258.000000
mean	88.983560	15.453603	98.356854	84.021653
std	18.026796	5.016813	12.971706	6.301019
min	56.709000	5.900000	66.335000	65.792000
25%	75.292500	13.000000	88.592500	79.976750
50%	94.091500	16.820000	100.211500	83.230500
75%	99.940500	19.320000	108.674250	88.244250
max	123.714000	26.510000	122.397000	99.256000

Sid	JNJ	KKD	MON	XOM
Date
2012-01-03	70.946	-36.96	86.562	108.028
2012-01-04	70.210	-37.28	87.948	108.066
2012-01-05	70.064	-37.16	95.560	107.588
2012-01-06	69.018	-35.76	97.134	106.412
2012-01-09	69.202	-35.38	99.014	107.110

Sid	JNJ	KKD	MON	XOM
Date
2012-01-04	-0.006085	-0.024540	0.010149	0.000240
2012-01-05	-0.001215	0.009434	0.055180	-0.003024
2012-01-06	-0.008712	0.109034	0.010813	-0.007462
2012-01-09	0.001546	0.026685	0.012777	0.004463
2012-01-10	0.004144	-0.016416	0.008925	0.002571

Introduction to pandas¶

pandas Data Structures¶

Series¶

Accessing Series Elements¶

Boolean Indexing¶

Indexing and Time Series¶

Missing Data¶

Time Series Analysis with pandas¶

DataFrames¶

Accessing DataFrame elements¶

Boolean indexing¶

Adding, Removing Columns, Combining DataFrames/Series¶

Time Series Analysis with pandas¶

Next Steps¶

`Series`¶

Accessing `Series` Elements¶

`DataFrames`¶

Accessing `DataFrame` elements¶

Adding, Removing Columns, Combining `DataFrames`/`Series`¶