Let’s set up the environment first. We need the coingecko API to fetch prices, and pandas to work with timeseries (and tabular data in general).
from pycoingecko import CoinGeckoAPI
gecko = CoinGeckoAPI()
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib notebook
import datetime as dt
import requests
from IPython.display import clear_output
First, let’s set up the correct URL and parsing and all that jazz.
def get_historical_data_url(coin_id, currency_id, start_date, end_date):
return 'https://www.coingecko.com/en/coins/{}/historical_data/{}?end_date={}&start_date={}'.format(coin_id, currency_id, end_date.isoformat(), start_date.isoformat())
def get_historical_data(coin_id, currency_id, start_date_s, end_date_s):
start_date = dt.date.fromisoformat(start_date_s)
end_date = dt.date.fromisoformat(end_date_s)
url = get_historical_data_url('bitcoin', 'usd', start_date, end_date)
r = requests.get(url, timeout=5, stream=True)
dfs = pd.read_html(r.content, parse_dates=['Date'])
assert len(dfs) == 1
df = dfs[0][::-1].set_index("Date")
df = df.replace({'[$,]': ''}, regex=True).apply(pd.to_numeric)
df.columns = pd.MultiIndex.from_product([[coin_id], df.columns])
return df
df = get_historical_data('bitcoin', 'usd', '2013-01-01', '2021-04-01')
df
bitcoin | ||||
---|---|---|---|---|
Market Cap | Volume | Open | Close | |
Date | ||||
2013-04-28 | 1.500518e+09 | 0.000000e+00 | 135.30 | 141.96 |
2013-04-29 | 1.575032e+09 | 0.000000e+00 | 141.96 | 135.30 |
2013-04-30 | 1.501657e+09 | 0.000000e+00 | 135.30 | 117.00 |
2013-05-01 | 1.298952e+09 | 0.000000e+00 | 117.00 | 103.43 |
2013-05-02 | 1.148668e+09 | 0.000000e+00 | 103.43 | 91.01 |
... | ... | ... | ... | ... |
2021-03-24 | 1.017637e+12 | 6.038338e+10 | 54585.00 | 52527.00 |
2021-03-25 | 9.827974e+11 | 7.302905e+10 | 52527.00 | 51417.00 |
2021-03-26 | 9.591553e+11 | 6.340164e+10 | 51417.00 | 55033.00 |
2021-03-27 | 1.027210e+12 | 5.544256e+10 | 55033.00 | 55832.00 |
2021-03-28 | 1.042184e+12 | 4.728575e+10 | 55832.00 | NaN |
2890 rows × 4 columns
coins_list = pd.DataFrame(gecko.get_coins_list())
# df_complete = pd.DataFrame(index=pd.date_range(start='20130101', freq='1D', end='20210401'))
df_complete = None
coins_list_ids = coins_list['id']
coins_list_ids_len = len(coins_list_ids)
for i in range(coins_list_ids_len):
coin = coins_list_ids[i]
df = get_historical_data(coin, 'usd', '2013-01-01', '2021-04-01')
if df_complete is None:
df_complete = df
else:
df_complete = df_complete.join(df, how='left')
clear_output(wait=True)
print ("Downloaded", coin, "{}/{}".format(i, coins_list_ids_len))
Downloaded chartex 1321/6545
df_complete
01coin | 0-5x-long-algorand-token | |||||||
---|---|---|---|---|---|---|---|---|
Market Cap | Volume | Open | Close | Market Cap | Volume | Open | Close | |
Date | ||||||||
2013-04-28 | 1.500518e+09 | 0.000000e+00 | 135.30 | 141.96 | 1.500518e+09 | 0.000000e+00 | 135.30 | 141.96 |
2013-04-29 | 1.575032e+09 | 0.000000e+00 | 141.96 | 135.30 | 1.575032e+09 | 0.000000e+00 | 141.96 | 135.30 |
2013-04-30 | 1.501657e+09 | 0.000000e+00 | 135.30 | 117.00 | 1.501657e+09 | 0.000000e+00 | 135.30 | 117.00 |
2013-05-01 | 1.298952e+09 | 0.000000e+00 | 117.00 | 103.43 | 1.298952e+09 | 0.000000e+00 | 117.00 | 103.43 |
2013-05-02 | 1.148668e+09 | 0.000000e+00 | 103.43 | 91.01 | 1.148668e+09 | 0.000000e+00 | 103.43 | 91.01 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2021-03-24 | 1.017637e+12 | 6.038338e+10 | 54585.00 | 52527.00 | 1.017637e+12 | 6.038338e+10 | 54585.00 | 52527.00 |
2021-03-25 | 9.827974e+11 | 7.302905e+10 | 52527.00 | 51417.00 | 9.827974e+11 | 7.302905e+10 | 52527.00 | 51417.00 |
2021-03-26 | 9.591553e+11 | 6.340164e+10 | 51417.00 | 55033.00 | 9.591553e+11 | 6.340164e+10 | 51417.00 | 55033.00 |
2021-03-27 | 1.027210e+12 | 5.544256e+10 | 55033.00 | 55832.00 | 1.027210e+12 | 5.544256e+10 | 55033.00 | 55832.00 |
2021-03-28 | 1.042184e+12 | 4.728575e+10 | 55832.00 | NaN | 1.042184e+12 | 4.728575e+10 | 55832.00 | NaN |
2890 rows × 8 columns