Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion 01_materials/labs/update_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@
src_path = (notebook_dir / "../../05_src").resolve()

if str(src_path) not in sys.path:
sys.path.insert(0, str(src_path)) # insert(0) gives it priority
sys.path.insert(0, str(src_path)) # insert(0) gives it priority


247 changes: 235 additions & 12 deletions 02_activities/assignments/assignment_1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,26 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The dotenv extension is already loaded. To reload it, use:\n",
" %reload_ext dotenv\n"
]
}
],
"source": [
"# Write your code below.\n",
"%load_ext dotenv\n",
"%dotenv\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -55,14 +66,29 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:__main__:Found 2045 price files.\n"
]
}
],
"source": [
"import os\n",
"from glob import glob\n",
"import logging\n",
"\n",
"# Write your code below.\n",
"\n",
"_logs = logging.getLogger(__name__)\n",
"if not logging.getLogger().handlers:\n",
"\tlogging.basicConfig(level=logging.INFO)\n",
"\n",
"price_files = glob(os.path.join(os.getenv('PRICE_DATA'), \"**/**\", \"*.parquet\"))\n",
"_logs.info(f'Found {len(price_files)} price files.')\n",
"\n"
]
},
Expand All @@ -88,12 +114,202 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_25160\\3543449060.py:9: UserWarning: `meta` is not specified, inferred from partial data.\n",
"Please provide `meta` if the result is unexpected.\n",
" Before: .shift(func)\n",
" After: .shift(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n",
" or: .shift(func, meta=('x', 'f8')) for series result\n",
"\n",
" dd_feat['Close_lag_1'] = dd_feat.groupby('ticker')['Close'].shift(1)\n",
"C:\\Users\\Admin\\AppData\\Local\\Temp\\ipykernel_25160\\3543449060.py:10: UserWarning: `meta` is not specified, inferred from partial data.\n",
"Please provide `meta` if the result is unexpected.\n",
" Before: .shift(func)\n",
" After: .shift(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result\n",
" or: .shift(func, meta=('x', 'f8')) for series result\n",
"\n",
" dd_feat['Adj_Close_lag_1'] = dd_feat.groupby('ticker')['Adj_Close'].shift(1)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Date</th>\n",
" <th>Open</th>\n",
" <th>High</th>\n",
" <th>Low</th>\n",
" <th>Close</th>\n",
" <th>Adj_Close</th>\n",
" <th>Volume</th>\n",
" <th>source</th>\n",
" <th>ticker</th>\n",
" <th>Year</th>\n",
" <th>Close_lag_1</th>\n",
" <th>Adj_Close_lag_1</th>\n",
" <th>returns</th>\n",
" <th>hi_lo_range</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>175864</th>\n",
" <td>2012-01-19</td>\n",
" <td>88.199997</td>\n",
" <td>88.889999</td>\n",
" <td>87.610001</td>\n",
" <td>88.830002</td>\n",
" <td>88.830002</td>\n",
" <td>894300.0</td>\n",
" <td>LH.csv</td>\n",
" <td>LH</td>\n",
" <td>2012</td>\n",
" <td>88.470001</td>\n",
" <td>88.470001</td>\n",
" <td>0.004069</td>\n",
" <td>1.279999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76009</th>\n",
" <td>2014-02-26</td>\n",
" <td>53.779999</td>\n",
" <td>54.049999</td>\n",
" <td>53.470001</td>\n",
" <td>53.849998</td>\n",
" <td>47.856152</td>\n",
" <td>2025700.0</td>\n",
" <td>ALL.csv</td>\n",
" <td>ALL</td>\n",
" <td>2014</td>\n",
" <td>53.650002</td>\n",
" <td>47.678425</td>\n",
" <td>0.003728</td>\n",
" <td>0.579998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172152</th>\n",
" <td>1997-04-21</td>\n",
" <td>7.500000</td>\n",
" <td>7.812500</td>\n",
" <td>7.500000</td>\n",
" <td>7.500000</td>\n",
" <td>7.500000</td>\n",
" <td>5200.0</td>\n",
" <td>LH.csv</td>\n",
" <td>LH</td>\n",
" <td>1997</td>\n",
" <td>7.812500</td>\n",
" <td>7.812500</td>\n",
" <td>-0.040000</td>\n",
" <td>0.312500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>171503</th>\n",
" <td>1994-09-26</td>\n",
" <td>31.875000</td>\n",
" <td>33.437500</td>\n",
" <td>31.250000</td>\n",
" <td>33.125000</td>\n",
" <td>33.125000</td>\n",
" <td>147600.0</td>\n",
" <td>LH.csv</td>\n",
" <td>LH</td>\n",
" <td>1994</td>\n",
" <td>32.187500</td>\n",
" <td>32.187500</td>\n",
" <td>0.029126</td>\n",
" <td>2.187500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>171166</th>\n",
" <td>1993-05-26</td>\n",
" <td>45.000000</td>\n",
" <td>45.625000</td>\n",
" <td>44.687500</td>\n",
" <td>45.625000</td>\n",
" <td>44.662472</td>\n",
" <td>341200.0</td>\n",
" <td>LH.csv</td>\n",
" <td>LH</td>\n",
" <td>1993</td>\n",
" <td>45.000000</td>\n",
" <td>44.050655</td>\n",
" <td>0.013889</td>\n",
" <td>0.937500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Date Open High Low Close Adj_Close \\\n",
"175864 2012-01-19 88.199997 88.889999 87.610001 88.830002 88.830002 \n",
"76009 2014-02-26 53.779999 54.049999 53.470001 53.849998 47.856152 \n",
"172152 1997-04-21 7.500000 7.812500 7.500000 7.500000 7.500000 \n",
"171503 1994-09-26 31.875000 33.437500 31.250000 33.125000 33.125000 \n",
"171166 1993-05-26 45.000000 45.625000 44.687500 45.625000 44.662472 \n",
"\n",
" Volume source ticker Year Close_lag_1 Adj_Close_lag_1 \\\n",
"175864 894300.0 LH.csv LH 2012 88.470001 88.470001 \n",
"76009 2025700.0 ALL.csv ALL 2014 53.650002 47.678425 \n",
"172152 5200.0 LH.csv LH 1997 7.812500 7.812500 \n",
"171503 147600.0 LH.csv LH 1994 32.187500 32.187500 \n",
"171166 341200.0 LH.csv LH 1993 45.000000 44.050655 \n",
"\n",
" returns hi_lo_range \n",
"175864 0.004069 1.279999 \n",
"76009 0.003728 0.579998 \n",
"172152 -0.040000 0.312500 \n",
"171503 0.029126 2.187500 \n",
"171166 0.013889 0.937500 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Write your code below.\n",
"\n"
"# Load data into Dask dataframe\n",
"dd_feat = dd.read_parquet(price_files)\n",
"\n",
"# Normalize column names (replace spaces with underscores)\n",
"dd_feat = dd_feat.rename(columns=lambda x: x.replace(' ', '_'))\n",
"\n",
"# Add lag features for Close and Adj_Close\n",
"dd_feat['Close_lag_1'] = dd_feat.groupby('ticker')['Close'].shift(1)\n",
"dd_feat['Adj_Close_lag_1'] = dd_feat.groupby('ticker')['Adj_Close'].shift(1)\n",
"\n",
"# Add returns based on Close\n",
"dd_feat['returns'] = (dd_feat['Close'] / dd_feat['Close_lag_1']) - 1\n",
"\n",
"# Add hi_lo_range (High minus Low)\n",
"dd_feat['hi_lo_range'] = dd_feat['High'] - dd_feat['Low']\n",
"dd_feat.head()\n"
]
},
{
Expand All @@ -108,12 +324,19 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Write your code below.\n",
"\n"
"\n",
"# Convert Dask dataframe to pandas\n",
"df = dd_feat.compute()\n",
"\n",
"# Add moving average of returns with 10-day window\n",
"df['returns_ma_10'] = df.groupby('ticker')['returns'].transform(\n",
" lambda x: x.rolling(10).mean()\n",
")\n"
]
},
{
Expand Down Expand Up @@ -165,7 +388,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "env",
"display_name": "production-env",
"language": "python",
"name": "python3"
},
Expand All @@ -179,7 +402,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
"version": "3.11.14"
}
},
"nbformat": 4,
Expand Down
Empty file added utils/__init__.py
Empty file.
Empty file added utils/logger.py
Empty file.