|
62 | 62 |
|
63 | 63 | DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION
|
64 | 64 | DEFAULT_SPARK_GITHUB_REPO = "https://github.com/apache/spark"
|
65 |
| -MESOS_SPARK_EC2_BRANCH = "branch-1.3" |
66 | 65 |
|
67 |
| -# A URL prefix from which to fetch AMI information |
68 |
| -AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/{b}/ami-list".format(b=MESOS_SPARK_EC2_BRANCH) |
| 66 | +# Default location to get the spark-ec2 scripts (and ami-list) from |
| 67 | +DEFAULT_SPARK_EC2_GITHUB_REPO = "https://github.com/mesos/spark-ec2" |
| 68 | +DEFAULT_SPARK_EC2_BRANCH = "branch-1.3" |
69 | 69 |
|
70 | 70 |
|
71 | 71 | def setup_boto():
|
@@ -147,6 +147,14 @@ def parse_args():
|
147 | 147 | "--spark-git-repo",
|
148 | 148 | default=DEFAULT_SPARK_GITHUB_REPO,
|
149 | 149 | help="Github repo from which to checkout supplied commit hash (default: %default)")
|
| 150 | + parser.add_option( |
| 151 | + "--spark-ec2-git-repo", |
| 152 | + default=DEFAULT_SPARK_EC2_GITHUB_REPO, |
| 153 | + help="Github repo from which to checkout spark-ec2 (default: %default)") |
| 154 | + parser.add_option( |
| 155 | + "--spark-ec2-git-branch", |
| 156 | + default=DEFAULT_SPARK_EC2_BRANCH, |
| 157 | + help="Github repo branch of spark-ec2 to use (default: %default)") |
150 | 158 | parser.add_option(
|
151 | 159 | "--hadoop-major-version", default="1",
|
152 | 160 | help="Major version of Hadoop (default: %default)")
|
@@ -333,7 +341,12 @@ def get_spark_ami(opts):
|
333 | 341 | print >> stderr,\
|
334 | 342 | "Don't recognize %s, assuming type is pvm" % opts.instance_type
|
335 | 343 |
|
336 |
| - ami_path = "%s/%s/%s" % (AMI_PREFIX, opts.region, instance_type) |
| 344 | + # URL prefix from which to fetch AMI information |
| 345 | + ami_prefix = "{r}/{b}/ami-list".format( |
| 346 | + r=opts.spark_ec2_git_repo.replace("https://github.com", "https://raw.github.com", 1), |
| 347 | + b=opts.spark_ec2_git_branch) |
| 348 | + |
| 349 | + ami_path = "%s/%s/%s" % (ami_prefix, opts.region, instance_type) |
337 | 350 | try:
|
338 | 351 | ami = urllib2.urlopen(ami_path).read().strip()
|
339 | 352 | print "Spark AMI: " + ami
|
@@ -650,12 +663,15 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
|
650 | 663 |
|
651 | 664 | # NOTE: We should clone the repository before running deploy_files to
|
652 | 665 | # prevent ec2-variables.sh from being overwritten
|
| 666 | + print "Cloning spark-ec2 scripts from {r}/tree/{b} on master...".format( |
| 667 | + r=opts.spark_ec2_git_repo, b=opts.spark_ec2_git_branch) |
653 | 668 | ssh(
|
654 | 669 | host=master,
|
655 | 670 | opts=opts,
|
656 | 671 | command="rm -rf spark-ec2"
|
657 | 672 | + " && "
|
658 |
| - + "git clone https://github.com/mesos/spark-ec2.git -b {b}".format(b=MESOS_SPARK_EC2_BRANCH) |
| 673 | + + "git clone {r} -b {b} spark-ec2".format(r=opts.spark_ec2_git_repo, |
| 674 | + b=opts.spark_ec2_git_branch) |
659 | 675 | )
|
660 | 676 |
|
661 | 677 | print "Deploying files to master..."
|
@@ -1038,6 +1054,17 @@ def real_main():
|
1038 | 1054 | print >> stderr, "ebs-vol-num cannot be greater than 8"
|
1039 | 1055 | sys.exit(1)
|
1040 | 1056 |
|
| 1057 | + # Prevent breaking ami_prefix (/, .git and startswith checks) |
| 1058 | + # Prevent forks with non spark-ec2 names for now. |
| 1059 | + if opts.spark_ec2_git_repo.endswith("/") or \ |
| 1060 | + opts.spark_ec2_git_repo.endswith(".git") or \ |
| 1061 | + not opts.spark_ec2_git_repo.startswith("https://github.com") or \ |
| 1062 | + not opts.spark_ec2_git_repo.endswith("spark-ec2"): |
| 1063 | + print >> stderr, "spark-ec2-git-repo must be a github repo and it must not have a " \ |
| 1064 | + "trailing / or .git. " \ |
| 1065 | + "Furthermore, we currently only support forks named spark-ec2." |
| 1066 | + sys.exit(1) |
| 1067 | + |
1041 | 1068 | try:
|
1042 | 1069 | conn = ec2.connect_to_region(opts.region)
|
1043 | 1070 | except Exception as e:
|
|
0 commit comments