Skip to content

Commit bfc680d

Browse files
author
Tom
committed
Initial commit
1 parent 8876ede commit bfc680d

File tree

6 files changed

+387
-0
lines changed

6 files changed

+387
-0
lines changed

README.md

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# Rancher host cluster Terraform module
2+
3+
This is a terraform module to help with creating a rancher host cluster. It is intended for use in combination with [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server).
4+
5+
### Features
6+
7+
- Flexible for use with different deployment scenarios.
8+
- Automatically adds hosts launched by autoscaling to the Rancher server.
9+
- Registers autoscaling lifecycle hook used to automatically remove instances from the Rancher server on scale down (see [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server)).
10+
- Designed for use in VPC private subnets so can be used for private, backend services or proxy traffic from an ELB for public services.
11+
- Can be used unlimited times in a terraform config. Allows creation of separate clusters for dev, staging, production, etc.
12+
13+
### Requirements
14+
15+
Terraform 0.6.6 is required.
16+
17+
On it's own this doesn't do very much. It needs to be included in a Terraform config that creates the following resources:
18+
19+
- Security group
20+
- Autoscaling launch configuration
21+
- Autoscaling group
22+
23+
Because these resources may vary significantly for your deployment (eg, the type of app you're deploying, expected workload, etc), you need to create these yourself and pass in the necessary variables.
24+
25+
You'll also need to have your Rancher server setup & configured (did I mention [my Rancher server module](https://github.com/greensheep/terraform-aws-rancher-server)!). Don't be tempted to use this as part of some mega-config that also creates the server.. you need to specify an environment id and API access keys for it to work!
26+
27+
### Usage
28+
29+
Include the following in your existing terraform config:
30+
31+
module "staging_cluster" {
32+
33+
# Import the module from Github
34+
# It's probably better to fork or clone this repo if you intend to use in production
35+
# so any future changes dont mess up your existing infrastructure.
36+
source = "github.com/greensheep/terraform-aws-rancher-hosts"
37+
38+
# Add Rancher server details
39+
server_security_group_id = "sg-XXXXXXXX"
40+
server_hostname = "rancher-server.yourdomain.tld"
41+
42+
# Rancher environment
43+
# In your Rancher server, create an environment and an API keypair. You can have
44+
# multiple host clusters per environment if necessary. Instances will be labelled
45+
# with the cluster name so you can differentiate between multiple clusters.
46+
environment_id = "1a7"
47+
environment_access_key = "ACCESS-KEY"
48+
environment_secret_key = "SECRET-KET"
49+
50+
# Name your cluster and provide the autoscaling group name and security group id.
51+
# See examples below.
52+
cluster_name = "${var.cluster_name}"
53+
cluster_autoscaling_group_name = "${aws_autoscaling_group.cluster_autoscale_group.id}"
54+
cluster_instance_security_group_id = "${aws_security_group.rancher_host_sg.id}"
55+
56+
# Lifecycle hooks queue ARN
57+
# This is specific to my Rancher server module which creates the SQS queue used to
58+
# received autoscaling lifecycle hooks. This module creates a lifecycle hook for the
59+
# provided autoscaling group so that instances can be removed from the Rancher
60+
# server before they are terminated.
61+
lifecycle_hooks_sqs_queue_arn = "${var.lifecycle_hooks_sqs_queue_arn}"
62+
63+
}
64+
65+
### Examples of required resources
66+
67+
##### Security group
68+
69+
# Cluster instance security group
70+
resource "aws_security_group" "cluster_instance_sg" {
71+
72+
name = "Cluster-Instances"
73+
description = "Rules for connected Rancher host machines. These are the hosts that run containers placed on the cluster."
74+
vpc_id = "${TARGET-VPC-ID}"
75+
76+
# NOTE: To allow ELB proxied traffic to private VPC
77+
# hosts, open the necessary ports here..
78+
79+
lifecycle {
80+
create_before_destroy = true
81+
}
82+
83+
}
84+
85+
86+
##### Autoscaling
87+
88+
# Autoscaling launch configuration
89+
resource "aws_launch_configuration" "cluster_launch_conf" {
90+
91+
name = "Launch-Config"
92+
93+
# Amazon linux, eu-west-1
94+
image_id = "ami-69b9941e"
95+
96+
# No public ip when instances are placed in private subnets. See notes
97+
# about creating an ELB to proxy public traffic into the cluster.
98+
associate_public_ip_address = false
99+
100+
# Security groups
101+
security_groups = [
102+
"${aws_security_group.cluster_instance_sg.id}"
103+
]
104+
105+
# Key
106+
# NOTE: It's a good idea to use the same key as the Rancher server here.
107+
key_name = "${UPLOADED-KEY-NAME}"
108+
109+
# Add rendered userdata template
110+
user_data = "${module.staging_cluster.host_user_data}"
111+
112+
# Misc
113+
instance_type = "t2.micro"
114+
enable_monitoring = true
115+
116+
lifecycle {
117+
create_before_destroy = true
118+
}
119+
120+
}
121+
122+
# Autoscaling group
123+
resource "aws_autoscaling_group" "cluster_autoscale_group" {
124+
125+
name = "Cluster-ASG"
126+
launch_configuration = "${aws_launch_configuration.cluster_launch_conf.name}"
127+
min_size = "2"
128+
max_size = "2"
129+
desired_capacity = "2"
130+
health_check_grace_period = 180
131+
health_check_type = "EC2"
132+
force_delete = false
133+
termination_policies = ["OldestInstance"]
134+
135+
# Add ELB's here if you're proxying public traffic into the cluster
136+
# load_balancers = ["${var.instance_cluster_load_balancers}"]
137+
138+
# Target subnets
139+
vpc_zone_identifier = ["${LIST-OF-VPC-PRIVATE-SUBNET-IDS}"]
140+
141+
tag {
142+
key = "Name"
143+
value = "Test-Cluster-Instance"
144+
propagate_at_launch = true
145+
}
146+
147+
lifecycle {
148+
create_before_destroy = true
149+
}
150+
151+
}

auto-scaling.tf

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# User-data template
2+
# Registers the instance with the rancher server environment
3+
resource "template_file" "user_data" {
4+
5+
filename = "${path.module}/files/userdata.template"
6+
vars {
7+
cluster_name = "${var.cluster_name}"
8+
environment_id = "${var.environment_id}"
9+
environment_access_key = "${var.environment_access_key}"
10+
environment_secret_key = "${var.environment_secret_key}"
11+
server_hostname = "${var.server_hostname}"
12+
}
13+
14+
lifecycle {
15+
create_before_destroy = true
16+
}
17+
18+
}
19+
20+
# Lifecycle hook
21+
# Triggered when an instance should be removed from the autoscaling
22+
# group. Publishes a message to the supplied SQS queue so that the host
23+
# can be removed from the Rancher server before shutting down.
24+
resource "aws_autoscaling_lifecycle_hook" "cluster_instance_terminating_hook" {
25+
26+
name = "cluster_instance_terminating_hook"
27+
autoscaling_group_name = "${var.cluster_autoscaling_group_name}"
28+
lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING"
29+
default_result = "CONTINUE"
30+
31+
# 10 mins for rancher server to remove instance
32+
heartbeat_timeout = 600
33+
34+
# Notification SQS queue
35+
notification_target_arn = "${var.lifecycle_hooks_sqs_queue_arn}"
36+
37+
role_arn = "${aws_iam_role.lifecycle_role.arn}"
38+
39+
lifecycle {
40+
create_before_destroy = true
41+
}
42+
43+
}
44+
45+
output "host_user_data" {
46+
value = "${template_file.user_data.rendered}"
47+
}

files/userdata.template

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
3+
# Install jq
4+
yum install -y jq
5+
6+
# Install docker
7+
wget -qO- https://get.docker.com/ | sh
8+
service docker start
9+
10+
# Setup initial vars
11+
serverUrl=https://${environment_access_key}:${environment_secret_key}@${server_hostname}
12+
projectId=${environment_id}
13+
14+
# Make initial POST request for a registration token and record the id
15+
response=$(curl -s -X POST $serverUrl/v1/registrationtokens?projectId=$projectId)
16+
requestId=$(echo $response | jq -r '.id')
17+
requestState=$(echo $response | jq -r '.state')
18+
19+
# The registration token request is async so keep checking until it's complete
20+
while [[ "$requestState" != "active" ]]; do
21+
sleep 2
22+
response=$(curl -s $serverUrl/v1/registrationtokens/$requestId)
23+
requestState=$(echo $response | jq -r '.state')
24+
done
25+
26+
# Get the instance id from metadata
27+
instanceId=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
28+
29+
# Use the command in the response to start the rancher agent
30+
cmd=$(echo $response | jq -r '.command')
31+
eval $${cmd/sudo docker run /docker run -e CATTLE_HOST_LABELS=\"HOSTID=$instanceId&CLOUD=aws&CLUSTER=${cluster_name}\" }

iam.tf

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Autoscaling lifecycle hook role
2+
# Allows lifecycle hooks to add messages to the SQS queue
3+
resource "aws_iam_role" "lifecycle_role" {
4+
5+
name = "${var.cluster_name}-lifecycle-hooks"
6+
assume_role_policy = <<EOF
7+
{
8+
"Version": "2012-10-17",
9+
"Statement": [
10+
{
11+
"Sid": "",
12+
"Effect": "Allow",
13+
"Principal": {
14+
"Service": "autoscaling.amazonaws.com"
15+
},
16+
"Action": "sts:AssumeRole"
17+
}
18+
]
19+
}
20+
EOF
21+
22+
lifecycle {
23+
create_before_destroy = true
24+
}
25+
26+
}
27+
28+
# AWS managed lifecycle hook policy
29+
resource "aws_iam_policy_attachment" "lifecycle_role_policy" {
30+
31+
name = "AutoScalingNotificationAccessRole"
32+
policy_arn = "arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole"
33+
roles = [
34+
"${aws_iam_role.lifecycle_role.name}"
35+
]
36+
37+
lifecycle {
38+
create_before_destroy = true
39+
}
40+
41+
}

main.tf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Rancher server details
2+
variable "server_security_group_id" {
3+
description = "Security group id of the Rancher server so we can restrict incoming traffic."
4+
}
5+
variable "server_hostname" {
6+
description = "Hostname of the Rancher server."
7+
}
8+
9+
# Target server environment
10+
variable "environment_id" {
11+
description = "Target environment id for host registration."
12+
}
13+
variable "environment_access_key" {
14+
description = "API access key for target environment"
15+
}
16+
variable "environment_secret_key" {
17+
description = "API secret key for target environment"
18+
}
19+
20+
# Cluster setup
21+
variable "cluster_name" {
22+
description = "The name of the cluster. Best not to include non-alphanumeric characters. Will be used to name resources and tag instances."
23+
}
24+
variable "cluster_autoscaling_group_name" {
25+
description = "Name of the target autoscaling group."
26+
}
27+
variable "cluster_instance_security_group_id" {
28+
description = "ID of the security group used for host instances. Will be modified to include rancher specific rules."
29+
}
30+
31+
# Lifecycle hooks queue arn
32+
variable "lifecycle_hooks_sqs_queue_arn" {
33+
description = "ARN of the SQS queue used to receive autoscaling lifecycle hooks."
34+
}

security-groups.tf

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Attach IPSEC rules to host instance security group.
2+
# Enables the rancher overlay network for connected hosts.
3+
# Traffic only allowed from other machines with this security group.
4+
resource "aws_security_group_rule" "ipsec_ingress_1" {
5+
6+
security_group_id = "${var.cluster_instance_security_group_id}"
7+
type = "ingress"
8+
from_port = 4500
9+
to_port = 4500
10+
protocol = "udp"
11+
source_security_group_id = "${var.cluster_instance_security_group_id}"
12+
13+
lifecycle {
14+
create_before_destroy = true
15+
}
16+
17+
}
18+
19+
resource "aws_security_group_rule" "ipsec_ingress_2" {
20+
21+
security_group_id = "${var.cluster_instance_security_group_id}"
22+
type = "ingress"
23+
from_port = 500
24+
to_port = 500
25+
protocol = "udp"
26+
source_security_group_id = "${var.cluster_instance_security_group_id}"
27+
28+
lifecycle {
29+
create_before_destroy = true
30+
}
31+
32+
}
33+
34+
# SSH ingress
35+
# Required for the server to connect & configure the host.
36+
resource "aws_security_group_rule" "ssh_ingress" {
37+
38+
security_group_id = "${var.cluster_instance_security_group_id}"
39+
type = "ingress"
40+
from_port = 22
41+
to_port = 22
42+
protocol = "tcp"
43+
source_security_group_id = "${var.server_security_group_id}"
44+
45+
lifecycle {
46+
create_before_destroy = true
47+
}
48+
49+
}
50+
51+
# Outgoing HTTP
52+
# Allows pulling of remote docker images, installing packages, etc.
53+
resource "aws_security_group_rule" "http_egress" {
54+
55+
security_group_id = "${var.cluster_instance_security_group_id}"
56+
type = "egress"
57+
from_port = 80
58+
to_port = 80
59+
protocol = "tcp"
60+
cidr_blocks = ["0.0.0.0/0"]
61+
62+
lifecycle {
63+
create_before_destroy = true
64+
}
65+
66+
}
67+
68+
# Outgoing HTTPS
69+
# Allows pulling of remote docker images, installing packages, etc.
70+
resource "aws_security_group_rule" "https_egress" {
71+
72+
security_group_id = "${var.cluster_instance_security_group_id}"
73+
type = "egress"
74+
from_port = 443
75+
to_port = 443
76+
protocol = "tcp"
77+
cidr_blocks = ["0.0.0.0/0"]
78+
79+
lifecycle {
80+
create_before_destroy = true
81+
}
82+
83+
}

0 commit comments

Comments
 (0)