Skip to content

Commit 5dcc300

Browse files
committed
Add in script to set up notebook to use data flow
1 parent e1ffdfe commit 5dcc300

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/bin/bash
2+
3+
#This script allows you to install pyspark conda environment to work with data flow in data science notebook session. It creates a ipynb file that imports ads and loads the SparkMagic extension
4+
5+
# enable conda commands
6+
source /etc/profile.d/enableconda.sh
7+
8+
ENV_NAME=pyspark32_p38_cpu_v3
9+
10+
# Path to the conda environment folder
11+
ENV_FOLDER="$HOME/conda"
12+
13+
# Check if the conda environment exists
14+
if [ -d "$ENV_FOLDER/$ENV_NAME" ]
15+
then
16+
echo "Conda environment '$ENV_NAME' found."
17+
18+
else
19+
echo "Conda environment '$ENV_NAME' not found, installing..."
20+
odsc conda install -s "$ENV_NAME"
21+
fi
22+
23+
# Activate the conda environment
24+
conda activate "$ENV_FOLDER"/"$ENV_NAME"
25+
26+
echo "Conda environment '$ENV_NAME' is now activated."
27+
28+
29+
cat << EOF > pyscript.py
30+
import os
31+
os.system("source activate conda/pyspark32_p38_cpu_v3")
32+
import subprocess
33+
34+
import nbformat as nbf
35+
from nbformat.v4 import new_code_cell, new_notebook, new_markdown_cell
36+
37+
cells = []
38+
cells.append(new_markdown_cell(
39+
source='import ADS and load the SparkMagic extension',
40+
))
41+
42+
43+
cells.append(new_code_cell(
44+
source='import ads\nads.set_auth("resource_principal")\n%load_ext dataflow.magics',
45+
execution_count=1,
46+
))
47+
48+
49+
nb0 = new_notebook(cells=cells,
50+
metadata={
51+
'language': 'python',
52+
}
53+
)
54+
55+
import codecs
56+
f = codecs.open('dataflow_notebook.ipynb', encoding='utf-8', mode='w')
57+
nbf.write(nb0, f, 4)
58+
f.close()
59+
EOF
60+
61+
chmod 755 pyscript.py
62+
python pyscript.py

0 commit comments

Comments
 (0)