|
| 1 | +from pandas_datareader import data as pdr |
| 2 | +import pandas as pd |
| 3 | +import numpy as np |
| 4 | +import yfinance as yf |
| 5 | + |
| 6 | +all_symbols = pdr.get_nasdaq_symbols() # get all nasdaq symbols |
| 7 | +all_symbols = all_symbols[all_symbols['Nasdaq Traded']] # get traded nasdaq symbols |
| 8 | +nasd = all_symbols[all_symbols['Listing Exchange'] == "Q"] # get nasdaq symbols |
| 9 | + |
| 10 | +nasdaq = list(nasd.index) |
| 11 | + |
| 12 | +# save all nasdaq symbols into txt |
| 13 | +file_write_obj = open("symbols.txt", 'w') |
| 14 | +for var in nasdaq: |
| 15 | + file_write_obj.writelines(var) |
| 16 | + file_write_obj.write('\n') |
| 17 | +file_write_obj.close() |
| 18 | + |
| 19 | +# get close data of nasdaq stocks |
| 20 | +data = yf.download( # or pdr.get_data_yahoo(... |
| 21 | + # tickers list or string as well |
| 22 | + tickers=nasdaq, |
| 23 | + |
| 24 | + # use "period" instead of start/end |
| 25 | + # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max |
| 26 | + # (optional, default is '1mo') |
| 27 | + |
| 28 | + start="2019-01-01", |
| 29 | + end="2020-01-01", |
| 30 | + # fetch data by interval (including intraday if period < 60 days) |
| 31 | + # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo |
| 32 | + # (optional, default is '1d') |
| 33 | + interval="1d", |
| 34 | + |
| 35 | + # group by ticker (to access via data['SPY']) |
| 36 | + # (optional, default is 'column') |
| 37 | + group_by='column', |
| 38 | + |
| 39 | + # adjust all OHLC automatically |
| 40 | + # (optional, default is False) |
| 41 | + auto_adjust=True, |
| 42 | + |
| 43 | + # download pre/post regular market hours data |
| 44 | + # (optional, default is False) |
| 45 | + prepost=True, |
| 46 | + |
| 47 | + # use threads for mass downloading? (True/False/Integer) |
| 48 | + # (optional, default is True) |
| 49 | + threads=True, |
| 50 | + |
| 51 | + # proxy URL scheme use use when downloading? |
| 52 | + # (optional, default is None) |
| 53 | + |
| 54 | + proxy=None |
| 55 | +) |
| 56 | +close = data["Close"] |
| 57 | + |
| 58 | +# fillna with previous day |
| 59 | +close = close.fillna(method='ffill') |
| 60 | + |
| 61 | +# delete non-traded stocks at the beginning of 2019 |
| 62 | +close = close.dropna(axis="columns") |
| 63 | + |
| 64 | +# get daily change |
| 65 | +daily_change = close.pct_change() |
| 66 | + |
| 67 | +# delete the first day |
| 68 | +daily_change = daily_change.dropna() |
| 69 | + |
| 70 | +# gt means great than 0, lt means less than 0, val means value, sym means stock symbol |
| 71 | +df = {"gt_val": {}, "gt_sym": {}, "lt_val": {}, "lt_sym": {}} |
| 72 | + |
| 73 | +for i in range(len(daily_change.index)): |
| 74 | + series = daily_change.iloc[i, :] # get stock returns at index i (day) |
| 75 | + date = series.name # get date |
| 76 | + series_lt = series[series.values < 0].sort_values(ascending=True) |
| 77 | + series_gt = series[series.values > 0].sort_values(ascending=False) |
| 78 | + df["gt_val"][date] = series_gt.to_list() |
| 79 | + df["gt_sym"][date] = series_gt.index.values.tolist() |
| 80 | + df["lt_val"][date] = series_lt.to_list(), |
| 81 | + df["lt_sym"][date] = series_lt.index.values.tolist() |
| 82 | +df = pd.DataFrame(df) |
| 83 | +df.to_csv("data.csv") |
| 84 | + |
| 85 | + |
0 commit comments