Skip to content

Commit 65e4f09

Browse files
committed
FEAT: Take install scripts from s3
1 parent dde0310 commit 65e4f09

File tree

3 files changed

+206
-42
lines changed

3 files changed

+206
-42
lines changed
Lines changed: 19 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,31 @@
11
#!/bin/bash
22
set -x -e
33

4-
# AWS EMR bootstrap script
5-
# Install hail
6-
7-
# check for master node
4+
INPUT_PATH=""
5+
HAIL_VERSION="0.1"
6+
SPARK_VERSION="2.2.1"
87
IS_MASTER=false
8+
99
if grep isMaster /mnt/var/lib/info/instance.json | grep true;
1010
then
1111
IS_MASTER=true
1212
fi
1313

14-
# error message
15-
error_msg ()
16-
{
17-
echo 1>&2 "Error: $1"
18-
}
19-
20-
# error message
21-
fatal_error_msg ()
22-
{
23-
echo 1>&2 "Fatal error: $1"
24-
exit 1
25-
}
26-
27-
VS_BUCKET="variant-spark"
28-
RELEASE_DIR=
29-
30-
# get input parameters
3114
while [ $# -gt 0 ]; do
3215
case "$1" in
33-
--release-url)
34-
shift
35-
HAIL_RELEASE_URL="$1"
16+
--input-path)
17+
shift
18+
INPUT_PATH=$1
19+
;;
20+
--hail-version)
21+
shift
22+
HAIL_VERSION=$1
23+
;;
24+
--spark-version)
25+
shift
26+
SPARK_VERSION=$1
3627
;;
3728
-*)
38-
# do not exit out, just note failure
3929
error_msg "unrecognized option: $1"
4030
;;
4131
*)
@@ -45,21 +35,8 @@ while [ $# -gt 0 ]; do
4535
shift
4636
done
4737

48-
if [[ -z "${HAIL_RELEASE_URL}" ]]; then
49-
fatal_error_msg "Parameter: --release-url is required"
50-
fi
51-
52-
echo "Hail release location is: ${HAIL_RELEASE_URL}"
53-
54-
INST_VOL="${INST_VOL:-/mnt}"
55-
HAIL_INST_DIR="${INST_VOL}/hail"
56-
57-
echo "Bootstraping hail"
38+
# copy hail to both master and workers
39+
# as ther is not shared dir and the bgz codec is needed on classpath for both
5840

59-
echo "Installing hail in: ${HAIL_INST_DIR}"
60-
mkdir -p "${HAIL_INST_DIR}"
61-
#download and install variant spark
62-
cd ${HAIL_INST_DIR}
63-
aws s3 cp --recursive "${HAIL_RELEASE_URL}/" .
64-
echo "Installed variant-spark in: ${HAIL_INST_DIR}"
65-
echo "Finished bootstraping hail"
41+
aws s3 cp ${INPUT_PATH}/hail-python.zip ${HOME}
42+
aws s3 cp ${INPUT_PATH}/hail-all-spark.jar ${HOME}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
set -x -e
3+
4+
INPUT_PATH=""
5+
HAIL_VERSION="0.1"
6+
SPARK_VERSION="2.2.1"
7+
IS_MASTER=false
8+
9+
if grep isMaster /mnt/var/lib/info/instance.json | grep true;
10+
then
11+
IS_MASTER=true
12+
fi
13+
14+
while [ $# -gt 0 ]; do
15+
case "$1" in
16+
--input-path)
17+
shift
18+
INPUT_PATH=$1
19+
;;
20+
--hail-version)
21+
shift
22+
HAIL_VERSION=$1
23+
;;
24+
--spark-version)
25+
shift
26+
SPARK_VERSION=$1
27+
;;
28+
--notebookPath)
29+
shift
30+
NotebookPath=$1
31+
;;
32+
-*)
33+
error_msg "unrecognized option: $1"
34+
;;
35+
*)
36+
break;
37+
;;
38+
esac
39+
shift
40+
done
41+
42+
43+
if [ "$IS_MASTER" = true ]; then
44+
#Install miniconda
45+
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
46+
sh Miniconda2-latest-Linux-x86_64.sh -b
47+
export PATH=~/miniconda2/bin:$PATH
48+
conda create -y -n jupyter python=2.7
49+
source activate jupyter
50+
#Install other packages
51+
#TODO: make these configurable
52+
pip install --upgrade matplotlib pandas click variant-spark
53+
54+
fi
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/bin/bash
2+
set -x -e
3+
4+
INPUT_PATH=""
5+
HAIL_VERSION="0.1"
6+
SPARK_VERSION="2.2.1"
7+
IS_MASTER=false
8+
9+
if grep isMaster /mnt/var/lib/info/instance.json | grep true;
10+
then
11+
IS_MASTER=true
12+
fi
13+
14+
while [ $# -gt 0 ]; do
15+
case "$1" in
16+
--input-path)
17+
shift
18+
INPUT_PATH=$1
19+
;;
20+
--hail-version)
21+
shift
22+
HAIL_VERSION=$1
23+
;;
24+
--spark-version)
25+
shift
26+
SPARK_VERSION=$1
27+
;;
28+
--notebookPath)
29+
shift
30+
NotebookPath=$1
31+
;;
32+
-*)
33+
error_msg "unrecognized option: $1"
34+
;;
35+
*)
36+
break;
37+
;;
38+
esac
39+
shift
40+
done
41+
42+
BUCKET=$(awk -v XX="$NotebookPath" 'BEGIN{x=substr(XX,6); split(x,a,"/"); print(a[1])}')
43+
PREFIX=$(awk -v XX="$NotebookPath" -v YY="$BUCKET" 'BEGIN{y=length(YY); print(substr(XX,7+y));}')
44+
45+
aws s3 cp s3://variant-spark/HailJupyter/VariantSpark_example_with_Hail_library.ipynb VariantSpark_example_with_Hail_library.ipynb
46+
aws s3 cp VariantSpark_example_with_Hail_library.ipynb $NotebookPath/
47+
48+
upstart_jupyter() {
49+
sudo puppet apply << PUPPET_SCRIPT
50+
include 'upstart'
51+
upstart::job { 'jupyter':
52+
description => 'Jupyter',
53+
respawn => true,
54+
respawn_limit => '0 10',
55+
start_on => 'runlevel [2345]',
56+
stop_on => 'runlevel [016]',
57+
console => 'output',
58+
chdir => '/home/hadoop',
59+
script => '
60+
sudo su - hadoop > /home/hadoop/jupyter.log 2>&1 <<BASH_SCRIPT
61+
export SPARK_HOME=/usr/lib/spark
62+
export PYTHONPATH=$PYTHONPATH:/home/hadoop/hail-python.zip
63+
/home/hadoop/miniconda2/envs/jupyter/bin/jupyter notebook
64+
BASH_SCRIPT
65+
',
66+
}
67+
PUPPET_SCRIPT
68+
}
69+
70+
71+
if [ "$IS_MASTER" = true ]; then
72+
#Install miniconda
73+
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
74+
sh Miniconda2-latest-Linux-x86_64.sh -b
75+
export PATH=~/miniconda2/bin:$PATH
76+
conda create -y -n jupyter python=2.7
77+
source activate jupyter
78+
#Install other packages
79+
#TODO: make these configurable
80+
pip install --upgrade matplotlib pandas click variant-spark
81+
#Install jupyter components
82+
pip install --upgrade jupyter==1.0.0 s3contents==0.1.4 decorator==4.2.1 notebook==5.7.0 juspark
83+
mkdir -p ~/.jupyter
84+
cat >> ~/.jupyter/jupyter_notebook_config.py << EOF
85+
# S3ContentsManager
86+
from s3contents import S3ContentsManager
87+
c.NotebookApp.contents_manager_class = S3ContentsManager
88+
c.S3ContentsManager.bucket_name = "$BUCKET"
89+
c.S3ContentsManager.prefix = "$PREFIX"
90+
EOF
91+
92+
cat >> ~/.jupyter/jupyter_notebook_config.py << EOF
93+
c.NotebookApp.token = ''
94+
c.NotebookApp.password = ''
95+
c.NotebookApp.ip = '*'
96+
c.NotebookApp.open_browser = False
97+
c.NotebookApp.allow_remote_access = True
98+
EOF
99+
100+
# Setup JuSpark kernel
101+
102+
mkdir -p ~/.local/share/jupyter/kernels/juspark
103+
cat > ~/.local/share/jupyter/kernels/juspark/kernel.json << EOF
104+
{
105+
"display_name": "JuSpark",
106+
"language": "python",
107+
"argv": [
108+
"/home/hadoop/miniconda2/envs/jupyter/bin/python",
109+
"-m",
110+
"ipykernel",
111+
"-f",
112+
"{connection_file}",
113+
"--ext=juspark"
114+
]
115+
}
116+
EOF
117+
118+
# Setup profiles for juspark
119+
mkdir -p ~/.juspark/profiles
120+
cat > ~/.juspark/profiles/hail << EOF
121+
{
122+
"spark.jars":"/home/hadoop/hail-all-spark.jar",
123+
"spark.submit.pyFiles":"/home/hadoop/hail-python.zip"
124+
}
125+
EOF
126+
127+
#Install puppet modules
128+
sudo puppet module install spantree-upstart
129+
130+
#Setup daemons
131+
upstart_jupyter
132+
133+
fi

0 commit comments

Comments
 (0)