1
1
sovereign-stack/cluster.sh

230 lines
7.4 KiB
Bash
Raw Permalink Normal View History

#!/bin/bash
2022-07-27 16:38:33 +00:00
set -eux
cd "$(dirname "$0")"
# This script is meant to be executed on the management machine.
# it reaches out to an SSH endpoint and provisions that machine
# to use LXD.
DATA_PLANE_MACVLAN_INTERFACE=
DISK_TO_USE=loop
2022-10-10 00:35:02 +00:00
# override the cluster name.
CLUSTER_NAME="${1:-}"
if [ -z "$CLUSTER_NAME" ]; then
echo "ERROR: The cluster name was not provided."
exit 1
fi
2022-10-10 00:35:02 +00:00
#shellcheck disable=SC1091
source ./defaults.sh
2022-10-10 00:35:02 +00:00
export CLUSTER_PATH="$CLUSTERS_DIR/$CLUSTER_NAME"
CLUSTER_DEFINITION="$CLUSTER_PATH/cluster_definition"
export CLUSTER_DEFINITION="$CLUSTER_DEFINITION"
2022-10-10 00:35:02 +00:00
mkdir -p "$CLUSTER_PATH"
if [ ! -f "$CLUSTER_DEFINITION" ]; then
# stub out a cluster_definition.
cat >"$CLUSTER_DEFINITION" <<EOL
#!/bin/bash
2022-09-09 18:00:07 +00:00
# see https://www.sovereign-stack.org/cluster_definition for more info!
2022-09-09 18:00:07 +00:00
export LXD_CLUSTER_PASSWORD="$(gpg --gen-random --armor 1 14)"
export SOVEREIGN_STACK_MAC_ADDRESS="CHANGE_ME_REQUIRED"
2022-10-10 00:35:02 +00:00
export PROJECT_NAME="public"
2022-07-27 16:38:33 +00:00
export REGISTRY_URL="http://$(hostname).$(resolvectl status | grep 'DNS Domain:' | awk '{ print $3 }'):5000"
2022-09-09 18:00:07 +00:00
export REGISTRY_USERNAME="CHANGE_ME"
export REGISTRY_PASSWORD="CHANGE_ME"
EOL
2022-10-10 00:35:02 +00:00
chmod 0744 "$CLUSTER_DEFINITION"
echo "We stubbed out a '$CLUSTER_DEFINITION' file for you."
echo "Use this file to customize your cluster deployment;"
echo "Check out 'https://www.sovereign-stack.org/cluster-definition' for an example."
exit 1
fi
2022-10-10 00:35:02 +00:00
source "$CLUSTER_DEFINITION"
2022-10-10 00:35:02 +00:00
if ! lxc remote list | grep -q "$CLUSTER_NAME"; then
FQDN="${2:-}"
shift
2022-10-10 00:35:02 +00:00
if [ -z "$FQDN" ]; then
echo "ERROR: The Fully Qualified Domain Name of the new cluster member was not set."
exit 1
fi
2022-10-10 00:35:02 +00:00
# let's check to ensure we have SSH access to the specified host.
if ! wait-for-it -t 5 "$FQDN:22"; then
echo "ERROR: We can't get an SSH connection to '$FQDN:22'. Ensure you have the host set up correctly."
exit 1
fi
2022-10-10 00:35:02 +00:00
# grab any modifications from the command line.
for i in "$@"; do
case $i in
--data-plane-interface=*)
DATA_PLANE_MACVLAN_INTERFACE="${i#*=}"
shift
;;
--disk=*)
DISK_TO_USE="${i#*=}"
shift
;;
*)
2022-10-10 00:35:02 +00:00
;;
esac
done
2022-10-10 00:35:02 +00:00
# if [ -z "$DATA_PLANE_MACVLAN_INTERFACE" ]; then
# echo "INFO: It looks like you didn't provide input on the command line for the data plane macvlan interface."
# echo " We need to know which interface that is! Enter it here now."
# echo ""
2022-10-10 00:35:02 +00:00
# ssh "ubuntu@$FQDN" ip link
2022-10-10 00:35:02 +00:00
# echo "Please enter the network interface that's dedicated to the Sovereign Stack data plane: "
# read -r DATA_PLANE_MACVLAN_INTERFACE
2022-10-10 00:35:02 +00:00
# fi
2022-10-10 00:35:02 +00:00
# if [ -z "$DISK_TO_USE" ]; then
# echo "INFO: It looks like the DISK_TO_USE has not been set. Enter it now."
# echo ""
2022-10-10 00:35:02 +00:00
# ssh "ubuntu@$FQDN" lsblk
2022-10-10 00:35:02 +00:00
# USER_DISK=
# echo "Please enter the disk or partition that Sovereign Stack will use to store data (default: loop): "
# read -r USER_DISK
2022-10-10 00:35:02 +00:00
# fi
2022-10-10 00:35:02 +00:00
else
echo "ERROR: the cluster already exists! You need to go delete your lxd remote if you want to re-create your cluster."
echo " It's may also be helpful to reset/rename your cluster path."
exit 1
fi
2022-10-10 00:35:02 +00:00
# ensure we actually have that interface on the system.
echo "DATA_PLANE_MACVLAN_INTERFACE: $DATA_PLANE_MACVLAN_INTERFACE"
if ! ssh "ubuntu@$FQDN" ip link | grep "$DATA_PLANE_MACVLAN_INTERFACE" | grep -q ",UP"; then
echo "ERROR: We could not find your interface in our list of available interfaces. Please run this command again."
echo "NOTE: You can always specify on the command line by adding the '--data-plane-interface=eth0', for example."
exit 1
fi
2022-10-10 00:35:02 +00:00
# if the disk is loop-based, then we assume the / path exists.
if [ "$DISK_TO_USE" != loop ]; then
# ensure we actually have that disk/partition on the system.
if ssh "ubuntu@$FQDN" lsblk | grep -q "$DISK_TO_USE"; then
echo "ERROR: We could not the disk you specified. Please run this command again and supply a different disk."
echo "NOTE: You can always specify on the command line by adding the '--disk=/dev/sdd', for example."
exit 1
fi
2022-10-10 00:35:02 +00:00
fi
2022-10-10 00:35:02 +00:00
# The MGMT Plane IP is the IP address that the LXD API binds to, which happens
# to be the same as whichever SSH connection you're coming in on.
MGMT_PLANE_IP="$(ssh ubuntu@"$FQDN" env | grep SSH_CONNECTION | cut -d " " -f 3)"
IP_OF_MGMT_MACHINE="$(ssh ubuntu@"$FQDN" env | grep SSH_CLIENT | cut -d " " -f 1 )"
IP_OF_MGMT_MACHINE="${IP_OF_MGMT_MACHINE#*=}"
IP_OF_MGMT_MACHINE="$(echo "$IP_OF_MGMT_MACHINE" | cut -d: -f1)"
# error out if the cluster password is unset.
if [ -z "$LXD_CLUSTER_PASSWORD" ]; then
echo "ERROR: LXD_CLUSTER_PASSWORD must be set in your cluster_definition."
exit 1
fi
2022-10-10 00:35:02 +00:00
if ! command -v lxc >/dev/null 2>&1; then
if lxc profile list --format csv | grep -q sovereign-stack; then
lxc profile delete sovereign-stack
sleep 1
fi
2022-10-10 00:35:02 +00:00
if lxc network list --format csv | grep -q lxdbrSS; then
lxc network delete lxdbrSS
sleep 1
fi
2022-10-10 00:35:02 +00:00
fi
2022-10-10 00:35:02 +00:00
ssh -t "ubuntu@$FQDN" "
# set host firewall policy.
2022-07-27 16:38:33 +00:00
# allow LXD API from management network.
2022-08-19 19:30:30 +00:00
# sudo ufw allow from ${IP_OF_MGMT_MACHINE}/32 proto tcp to $MGMT_PLANE_IP port 8443
# enable it.
2022-08-19 19:30:30 +00:00
# if sudo ufw status | grep -q 'Status: inactive'; then
# sudo ufw enable
# fi
# install lxd as a snap if it's not installed. We only really use the LXC part of this package.
if ! snap list | grep -q lxd; then
sudo -A snap install lxd
sleep 4
fi
"
2022-10-10 00:35:02 +00:00
# if the DATA_PLANE_MACVLAN_INTERFACE is not specified, then we 'll
# just attach VMs to the network interface used for for the default route.
if [ -z "$DATA_PLANE_MACVLAN_INTERFACE" ]; then
DATA_PLANE_MACVLAN_INTERFACE="$(ssh -t ubuntu@"$FQDN" ip route | grep default | cut -d " " -f 5)"
fi
2022-10-10 00:35:02 +00:00
# stub out the lxd init file for the remote SSH endpoint.
CLUSTER_MASTER_LXD_INIT="$CLUSTER_PATH/lxdinit_profile.yml"
cat >"$CLUSTER_MASTER_LXD_INIT" <<EOF
config:
core.https_address: ${MGMT_PLANE_IP}:8443
core.trust_password: ${LXD_CLUSTER_PASSWORD}
images.auto_update_interval: 15
networks:
2022-08-19 19:30:30 +00:00
- name: lxdbrSS
2022-08-15 13:35:01 +00:00
type: bridge
config:
ipv4.address: 10.139.144.1/24
ipv4.nat: "false"
ipv4.dhcp: "false"
2022-08-19 19:30:30 +00:00
ipv6.address: "none"
dns.mode: "none"
#managed: true
description: ss-config,${DATA_PLANE_MACVLAN_INTERFACE:-},${DISK_TO_USE:-}
# lxdbrSS is an isolated bridge; no Internet access.
cluster:
server_name: ${CLUSTER_NAME}
enabled: true
member_config: []
cluster_address: ""
cluster_certificate: ""
server_address: ""
cluster_password: ""
cluster_certificate_path: ""
cluster_token: ""
EOF
2022-10-10 00:35:02 +00:00
# configure the LXD Daemon with our preseed.
cat "$CLUSTER_MASTER_LXD_INIT" | ssh "ubuntu@$FQDN" lxd init --preseed
2022-10-10 00:35:02 +00:00
# ensure the lxd service is available over the network, then add a lxc remote, then switch the active remote to it.
if wait-for-it -t 20 "$FQDN:8443"; then
# now create a remote on your local LXC client and switch to it.
# the software will now target the new cluster.
lxc remote add "$CLUSTER_NAME" "$FQDN" --password="$LXD_CLUSTER_PASSWORD" --protocol=lxd --auth-type=tls --accept-certificate
lxc remote switch "$CLUSTER_NAME"
2022-10-10 00:35:02 +00:00
echo "INFO: You have create a new cluster named '$CLUSTER_NAME'. Great! We switched your lxd remote to it."
else
2022-10-10 00:35:02 +00:00
echo "ERROR: Could not detect the LXD endpoint. Something went wrong."
exit 1
fi
2022-10-10 00:35:02 +00:00
echo "HINT: Now you can consider running 'ss-deploy'."