|
| 1 | +from __future__ import print_function |
| 2 | + |
1 | 3 | import os
|
| 4 | +import time |
| 5 | +import pytz |
2 | 6 | import pandas as pd
|
3 | 7 |
|
4 |
| -pd.set_option('io.hdf.default_format', 'table') |
5 |
| - |
6 |
| -from pandas import HDFStore |
7 | 8 | from pandas.compat import StringIO, bytes_to_str
|
8 | 9 |
|
9 |
| -import grequests |
10 |
| -from gevent import monkey |
11 |
| -monkey.patch_all() |
12 |
| - |
13 |
| -DATA_DIR = os.environ['HOME'] + '/.pytradelib' |
14 |
| -__STORE = None |
| 10 | +import datetime as dt |
| 11 | + |
| 12 | +def batch(list_, size, sleep=None): |
| 13 | + list_ = list(list_) |
| 14 | + len_ = len(list_) |
| 15 | + for i in xrange((len_ / size) + 1): |
| 16 | + start_idx = i * size |
| 17 | + end_idx = (i + 1) * size |
| 18 | + if end_idx > len_: |
| 19 | + end_idx = len_ |
| 20 | + yield list_[start_idx:end_idx] |
| 21 | + if sleep: |
| 22 | + print('Sleeping for %d seconds' % sleep) |
| 23 | + time.sleep(sleep) |
| 24 | + |
| 25 | + |
| 26 | +def _sanitize_dates(start, end): |
| 27 | + from pandas.core.datetools import to_datetime |
| 28 | + start = to_datetime(start) |
| 29 | + end = to_datetime(end) |
| 30 | + if start is None: |
| 31 | + start = dt.datetime(2010, 1, 1) |
| 32 | + if end is None: |
| 33 | + end = dt.datetime.today() |
| 34 | + return start, end |
15 | 35 |
|
16 | 36 |
|
17 |
| -def _bulk_download(urls): |
18 |
| - return grequests.imap((grequests.get(url) for url in urls)) |
19 |
| - |
20 |
| -def get_parse_symbols(symbols, start, end, interval, symbol_to_url, url_to_symbol, data_to_df): |
21 |
| - urls = (symbol_to_url(symbol.upper(), start, end, interval) for symbol in symbols) |
22 |
| - def parse_response_to_symbol_and_df(r): |
23 |
| - return url_to_symbol(r.url), data_to_df(r.text) |
24 |
| - data = map(parse_response_to_symbol_and_df, _bulk_download(urls)) |
25 |
| - bulk_persist(data) |
26 |
| - return data |
27 |
| - |
28 | 37 | def csv_to_df(text):
|
29 |
| - rs = pd.read_csv(StringIO(bytes_to_str(text)), index_col=0, |
30 |
| - parse_dates=True, na_values='-')[::-1] |
| 38 | + df = pd.read_csv(StringIO(bytes_to_str(text)), index_col=0, |
| 39 | + parse_dates=True, infer_datetime_format=True, |
| 40 | + na_values='-')[::-1] |
31 | 41 |
|
32 | 42 | # Yahoo! Finance sometimes does this awesome thing where they
|
33 | 43 | # return 2 rows for the most recent business day
|
34 |
| - if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover |
35 |
| - rs = rs[:-1] |
| 44 | + if len(df) > 2 and df.index[-1] == df.index[-2]: # pragma: no cover |
| 45 | + df = df[:-1] |
36 | 46 |
|
37 |
| - # Get rid of unicode characters in index name. |
| 47 | + # Get rid of unicode charactedf in index name. |
38 | 48 | try:
|
39 |
| - rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore') |
| 49 | + df.index.name = df.index.name.decode('unicode_escape').encode('ascii', 'ignore') |
40 | 50 | except AttributeError:
|
41 | 51 | # Python 3 string has no decode method.
|
42 |
| - rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() |
43 |
| - return rs |
44 |
| - |
45 |
| -def get_store(): |
46 |
| - global __STORE |
47 |
| - if not __STORE: |
48 |
| - if not os.path.exists(DATA_DIR): |
49 |
| - os.mkdir(DATA_DIR) |
50 |
| - __STORE = HDFStore(DATA_DIR + '/store.hdf5') |
51 |
| - return __STORE |
52 |
| - |
53 |
| -def store_path(symbol, interval): |
54 |
| - return '/symbols/%s/%s' % (symbol.upper(), interval.lower()) |
55 |
| - |
56 |
| -def exists(symbol, interval): |
57 |
| - store = get_store() |
58 |
| - return store_path(symbol, interval) in store.keys() |
59 |
| - |
60 |
| -def persist(symbol, interval, df): |
61 |
| - store = get_store() |
62 |
| - if exists(symbol, interval): |
63 |
| - store.append(store_path(symbol, interval), df) |
64 |
| - else: |
65 |
| - store.put(store_path(symbol, interval), df) |
66 |
| - |
67 |
| -def bulk_persist(data): |
68 |
| - for symbol_data, df in data: |
69 |
| - persist(symbol_data['symbol'], symbol_data['interval'], df) |
70 |
| - |
71 |
| -def most_recent_datetime(symbol, interval): |
72 |
| - store = get_store() |
73 |
| - return store.get(store_path(symbol, interval)).tail(1).index[0].to_datetime() |
| 52 | + df.index.name = df.index.name.encode('ascii', 'ignore').decode() |
| 53 | + |
| 54 | + column_renames = {'Adj. Open': 'Adj Open', 'Adj. High': 'Adj High', |
| 55 | + 'Adj. Low': 'Adj Low', 'Adj. Close': 'Adj Close', |
| 56 | + 'Adj. Volume': 'Adj Volume'} |
| 57 | + df.rename(columns=column_renames, inplace=True) |
| 58 | + return df.tz_localize(pytz.UTC) |
| 59 | + |
| 60 | + |
| 61 | +def percent_change(from_val, to_val): |
| 62 | + # coerce to float for decimal division |
| 63 | + diff = float(to_val) - from_val |
| 64 | + return (diff / from_val) * 100 |
| 65 | + |
| 66 | + |
| 67 | +def crossed(value, yesterday, today, use_adjusted=True): |
| 68 | + def key(price_key): |
| 69 | + return 'Adj ' + price_key if use_adjusted else price_key |
| 70 | + crossed_over = yesterday[key('Close')] < value < today[key('Close')] |
| 71 | + crossed_under = yesterday[key('Close')] > value > today[key('Close')] |
| 72 | + return crossed_over or crossed_under |
| 73 | + |
| 74 | + |
| 75 | +def within_percent_of_value(price, value, percent=1): |
| 76 | + diff = percent * 0.01 * 0.5 * value |
| 77 | + return (value - diff) < price < (value + diff) |
0 commit comments