Skip to content

Commit 8370e57

Browse files
committed
updated PySpark demo to run on binder
1 parent ef724f3 commit 8370e57

File tree

3 files changed

+83
-70
lines changed

3 files changed

+83
-70
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Repository of my talks/presentations
22

3-
* [Introduction to PySpark]()
3+
* [Introduction to PySpark](https://github.com/shagunsodhani/talks/tree/master/spark/PyDelhi)

spark/PyDelhi/PySpark.ipynb

Lines changed: 79 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 42,
66
"metadata": {
7-
"collapsed": true
7+
"collapsed": false
88
},
99
"outputs": [],
1010
"source": [
@@ -15,7 +15,18 @@
1515
"from pyspark.sql import SQLContext\n",
1616
"from pyspark.sql import Row\n",
1717
"from pyspark.sql.types import *\n",
18-
"import pyspark.sql.functions as func"
18+
"import pyspark.sql.functions as func\n",
19+
"from pyspark import SparkContext\n",
20+
"import json"
21+
]
22+
},
23+
{
24+
"cell_type": "markdown",
25+
"metadata": {
26+
"collapsed": false
27+
},
28+
"source": [
29+
"If the notebook is run locally, then sc (SparkContext) would be pre-configured. If running using binder, we need to create SparkContext."
1930
]
2031
},
2132
{
@@ -28,7 +39,7 @@
2839
{
2940
"data": {
3041
"text/plain": [
31-
"<pyspark.context.SparkContext at 0x7f1084022f50>"
42+
"<pyspark.context.SparkContext at 0x7fa3ec13d090>"
3243
]
3344
},
3445
"execution_count": 2,
@@ -38,6 +49,19 @@
3849
],
3950
"source": [
4051
"#This notebook comes with a pre-configured sparkContext called sc\n",
52+
"try:\n",
53+
" sc\n",
54+
"except NameError:\n",
55+
" sc = SparkContext(master='spark://master:7077')\n",
56+
" with open(\"data/sequence.txt\") as f:\n",
57+
" sequence = [x.strip('\\n') for x in f.readlines()]\n",
58+
" file_rdd = sc.parallelize(sequence)\n",
59+
" with open(\"data/people.json\") as f:\n",
60+
" json_data = [x.strip('\\n') for x in f.readlines()]\n",
61+
" json_rdd = sc.parallelize(json_data)\n",
62+
"else:\n",
63+
" file_rdd = sc.textFile(\"data/sequence.txt\")\n",
64+
" json_rdd = sc.textFile(\"data/people.json\")\n",
4165
"sc"
4266
]
4367
},
@@ -76,11 +100,8 @@
76100
"outputs": [],
77101
"source": [
78102
"#More RDDs\n",
79-
"file_path = \"data/sequence.txt\"\n",
80-
"file_rdd = sc.textFile(file_path)\n",
81-
"json_rdd = sc.textFile(\"data/people.json\")\n",
82-
"# print json_rdd.collect()\n",
83-
"# print type(json_rdd.collect())\n",
103+
"print json_rdd.collect()\n",
104+
"print type(json_rdd.collect())\n",
84105
"#Spark supports text files, SequenceFiles, and any other Hadoop InputFormat."
85106
]
86107
},
@@ -95,7 +116,7 @@
95116
"name": "stdout",
96117
"output_type": "stream",
97118
"text": [
98-
"Time taken (in seconds) = 0.00355696678162\n"
119+
"Time taken (in seconds) = 9.91821289062e-05\n"
99120
]
100121
}
101122
],
@@ -117,7 +138,7 @@
117138
"name": "stdout",
118139
"output_type": "stream",
119140
"text": [
120-
"Time taken (in seconds) = 5.95632791519\n"
141+
"Time taken (in seconds) = 1.08630800247\n"
121142
]
122143
}
123144
],
@@ -139,7 +160,7 @@
139160
"name": "stdout",
140161
"output_type": "stream",
141162
"text": [
142-
"Time taken (in seconds) = 0.00350093841553\n"
163+
"Time taken (in seconds) = 6.79492950439e-05\n"
143164
]
144165
}
145166
],
@@ -161,7 +182,7 @@
161182
"name": "stdout",
162183
"output_type": "stream",
163184
"text": [
164-
"Time taken (in seconds) = 2.28691196442\n"
185+
"Time taken (in seconds) = 1.19471502304\n"
165186
]
166187
}
167188
],
@@ -211,7 +232,7 @@
211232
"output_type": "stream",
212233
"text": [
213234
"10\n",
214-
"Time taken (in seconds) = 0.129802942276\n"
235+
"Time taken (in seconds) = 0.0378739833832\n"
215236
]
216237
}
217238
],
@@ -234,7 +255,7 @@
234255
"output_type": "stream",
235256
"text": [
236257
"100000\n",
237-
"Time taken (in seconds) = 0.553807973862\n"
258+
"Time taken (in seconds) = 0.257553100586\n"
238259
]
239260
}
240261
],
@@ -257,7 +278,7 @@
257278
"output_type": "stream",
258279
"text": [
259280
"1000000\n",
260-
"Time taken (in seconds) = 2.25160479546\n"
281+
"Time taken (in seconds) = 1.15057992935\n"
261282
]
262283
}
263284
],
@@ -300,15 +321,15 @@
300321
"text": [
301322
"We want to count the number of 1, 2, ... digit numbers.\n",
302323
"[(1, 9), (2, 90), (3, 900), (4, 9000), (5, 90000), (6, 900000), (7, 1)]\n",
303-
"Time taken (in seconds) = 1.79352688789\n"
324+
"Time taken (in seconds) = 1.0940117836\n"
304325
]
305326
}
306327
],
307328
"source": [
308329
"start_time = time()\n",
309330
"print \"We want to count the number of 1, 2, ... digit numbers.\"\n",
310-
"file_path = \"data/sequence.txt\"\n",
311-
"file_rdd = sc.textFile(file_path) \n",
331+
"# file_path = \"data/sequence.txt\"\n",
332+
"# file_rdd = sc.textFile(file_path) \n",
312333
"mapped_rdd = file_rdd.map(lambda a: (len(a), 1))\n",
313334
"count_rdd = mapped_rdd.reduceByKey(lambda a, b: a+b).sortByKey()\n",
314335
"print count_rdd.collect()\n",
@@ -328,7 +349,7 @@
328349
"output_type": "stream",
329350
"text": [
330351
"We want to count the number of 1, 2, ... digit numbers.\n",
331-
"Time taken (in seconds) = 7.90541195869\n"
352+
"Time taken (in seconds) = 4.63194608688\n"
332353
]
333354
}
334355
],
@@ -415,7 +436,7 @@
415436
{
416437
"data": {
417438
"text/plain": [
418-
"<pyspark.sql.context.SQLContext at 0x7f10669611d0>"
439+
"<pyspark.sql.context.SQLContext at 0x7fa3c1f88690>"
419440
]
420441
},
421442
"execution_count": 18,
@@ -507,35 +528,16 @@
507528
},
508529
{
509530
"cell_type": "code",
510-
"execution_count": 21,
531+
"execution_count": null,
511532
"metadata": {
512533
"collapsed": false
513534
},
514-
"outputs": [
515-
{
516-
"ename": "TypeError",
517-
"evalue": "Can not infer schema for type: <type 'unicode'>",
518-
"output_type": "error",
519-
"traceback": [
520-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
521-
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
522-
"\u001b[1;32m<ipython-input-21-dc35359ffec1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mrdd_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfile_rdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoDF\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
523-
"\u001b[1;32m/home/shagun/devsetup/spark-1.5.1-bin-hadoop2.6/python/pyspark/sql/context.pyc\u001b[0m in \u001b[0;36mtoDF\u001b[1;34m(self, schema, sampleRatio)\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34mu'Alice'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 61\u001b[0m \"\"\"\n\u001b[1;32m---> 62\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msqlContext\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreateDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msampleRatio\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 63\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 64\u001b[0m \u001b[0mRDD\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoDF\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtoDF\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
524-
"\u001b[1;32m/home/shagun/devsetup/spark-1.5.1-bin-hadoop2.6/python/pyspark/sql/context.pyc\u001b[0m in \u001b[0;36mcreateDataFrame\u001b[1;34m(self, data, schema, samplingRatio)\u001b[0m\n\u001b[0;32m 402\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mRDD\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 404\u001b[1;33m \u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_createFromRDD\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msamplingRatio\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 405\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 406\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_createFromLocal\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
525-
"\u001b[1;32m/home/shagun/devsetup/spark-1.5.1-bin-hadoop2.6/python/pyspark/sql/context.pyc\u001b[0m in \u001b[0;36m_createFromRDD\u001b[1;34m(self, rdd, schema, samplingRatio)\u001b[0m\n\u001b[0;32m 283\u001b[0m \"\"\"\n\u001b[0;32m 284\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 285\u001b[1;33m \u001b[0mstruct\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_inferSchema\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msamplingRatio\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 286\u001b[0m \u001b[0mconverter\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_create_converter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstruct\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 287\u001b[0m \u001b[0mrdd\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconverter\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
526-
"\u001b[1;32m/home/shagun/devsetup/spark-1.5.1-bin-hadoop2.6/python/pyspark/sql/context.pyc\u001b[0m in \u001b[0;36m_inferSchema\u001b[1;34m(self, rdd, samplingRatio)\u001b[0m\n\u001b[0;32m 236\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 237\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msamplingRatio\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 238\u001b[1;33m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_infer_schema\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfirst\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 239\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m_has_nulltype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mschema\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
527-
"\u001b[1;32m/home/shagun/devsetup/spark-1.5.1-bin-hadoop2.6/python/pyspark/sql/types.pyc\u001b[0m in \u001b[0;36m_infer_schema\u001b[1;34m(row)\u001b[0m\n\u001b[0;32m 829\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 830\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 831\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Can not infer schema for type: %s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 832\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 833\u001b[0m \u001b[0mfields\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mStructField\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_infer_type\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mitems\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
528-
"\u001b[1;31mTypeError\u001b[0m: Can not infer schema for type: <type 'unicode'>"
529-
]
530-
}
531-
],
532-
"source": [
533-
"rdd_df = file_rdd.toDF()"
534-
]
535+
"outputs": [],
536+
"source": []
535537
},
536538
{
537539
"cell_type": "code",
538-
"execution_count": 22,
540+
"execution_count": 21,
539541
"metadata": {
540542
"collapsed": false
541543
},
@@ -584,7 +586,7 @@
584586
},
585587
{
586588
"cell_type": "code",
587-
"execution_count": 23,
589+
"execution_count": 22,
588590
"metadata": {
589591
"collapsed": false
590592
},
@@ -597,7 +599,7 @@
597599
},
598600
{
599601
"cell_type": "code",
600-
"execution_count": 24,
602+
"execution_count": 23,
601603
"metadata": {
602604
"collapsed": false
603605
},
@@ -641,7 +643,7 @@
641643
},
642644
{
643645
"cell_type": "code",
644-
"execution_count": 25,
646+
"execution_count": 24,
645647
"metadata": {
646648
"collapsed": false
647649
},
@@ -662,7 +664,7 @@
662664
},
663665
{
664666
"cell_type": "code",
665-
"execution_count": 26,
667+
"execution_count": 25,
666668
"metadata": {
667669
"collapsed": false
668670
},
@@ -706,7 +708,7 @@
706708
},
707709
{
708710
"cell_type": "code",
709-
"execution_count": 27,
711+
"execution_count": 26,
710712
"metadata": {
711713
"collapsed": false
712714
},
@@ -731,7 +733,7 @@
731733
},
732734
{
733735
"cell_type": "code",
734-
"execution_count": 28,
736+
"execution_count": 27,
735737
"metadata": {
736738
"collapsed": false
737739
},
@@ -753,15 +755,15 @@
753755
"|dmatthewsb7@image...| 1|\n",
754756
"| [email protected]| 1|\n",
755757
758+
"| [email protected]| 1|\n",
759+
"| [email protected]| 1|\n",
760+
"|ladamsgf@hubpages...| 1|\n",
761+
756762
"| [email protected]| 1|\n",
757763
"| [email protected]| 1|\n",
758764
"|mjoneslf@wootheme...| 1|\n",
759765
"|mfranklinn9@hao12...| 1|\n",
760766
"|aandrewspf@redcro...| 1|\n",
761-
"| [email protected]| 1|\n",
762-
"| [email protected]| 1|\n",
763-
"|ladamsgf@hubpages...| 1|\n",
764-
765767
"|abrown36@yellowpa...| 1|\n",
766768
"+--------------------+-----+\n",
767769
"only showing top 20 rows\n",
@@ -775,7 +777,7 @@
775777
},
776778
{
777779
"cell_type": "code",
778-
"execution_count": 29,
780+
"execution_count": 28,
779781
"metadata": {
780782
"collapsed": false
781783
},
@@ -795,7 +797,7 @@
795797
},
796798
{
797799
"cell_type": "code",
798-
"execution_count": 30,
800+
"execution_count": 29,
799801
"metadata": {
800802
"collapsed": false
801803
},
@@ -839,7 +841,7 @@
839841
},
840842
{
841843
"cell_type": "code",
842-
"execution_count": 31,
844+
"execution_count": 30,
843845
"metadata": {
844846
"collapsed": false
845847
},
@@ -853,25 +855,25 @@
853855
"| domain|count|\n",
854856
"+--------------------+-----+\n",
855857
"| alibaba.com| 8|\n",
856-
"| examiner.com| 7|\n",
857858
"| 163.com| 7|\n",
858-
"| woothemes.com| 6|\n",
859-
"| mlb.com| 6|\n",
859+
"| examiner.com| 7|\n",
860860
"| friendfeed.com| 6|\n",
861-
"| fda.gov| 6|\n",
861+
"| lulu.com| 6|\n",
862862
"| free.fr| 6|\n",
863+
"| fda.gov| 6|\n",
863864
"| apple.com| 6|\n",
864-
"| sourceforge.net| 6|\n",
865+
"| woothemes.com| 6|\n",
865866
"| cornell.edu| 6|\n",
866-
"| lulu.com| 6|\n",
867+
"| sourceforge.net| 6|\n",
868+
"| mlb.com| 6|\n",
869+
"| wikia.com| 5|\n",
870+
"| engadget.com| 5|\n",
871+
"|pagesperso-orange.fr| 5|\n",
867872
"| usa.gov| 5|\n",
873+
"| wordpress.org| 5|\n",
868874
"| cbslocal.com| 5|\n",
869-
"|pagesperso-orange.fr| 5|\n",
870-
"| pbs.org| 5|\n",
871-
"| gravatar.com| 5|\n",
872875
"| ucla.edu| 5|\n",
873-
"| nyu.edu| 5|\n",
874-
"| webeden.co.uk| 5|\n",
876+
"| pbs.org| 5|\n",
875877
"+--------------------+-----+\n",
876878
"only showing top 20 rows\n",
877879
"\n"
@@ -884,6 +886,15 @@
884886
"df.select(domain(df.email).alias('domain'))\\\n",
885887
".groupBy('domain').agg(func.count('domain').alias(\"count\")).orderBy(\"count\", ascending=False).show()"
886888
]
889+
},
890+
{
891+
"cell_type": "code",
892+
"execution_count": null,
893+
"metadata": {
894+
"collapsed": true
895+
},
896+
"outputs": [],
897+
"source": []
887898
}
888899
],
889900
"metadata": {

spark/PyDelhi/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ Introduction to PySpark
88

99
## Demo
1010

11-
Execute `run.sh` to run the demo locally or use [this.](https://github.com/shagunsodhani/talks/blob/master/spark/PySpark.ipynb)
11+
[![Binder](http://mybinder.org/badge.svg)](http://mybinder.org/repo/shagunsodhani/talks)
12+
13+
Execute `run.sh` to run the demo locally or use [this.](https://github.com/shagunsodhani/talks/blob/master/spark/PyDelhi/PySpark.ipynb)
1214

1315
## Event
1416

0 commit comments

Comments
 (0)