#!/bin/bash
#
# Copyright 2011-2019 Nicolas Thauvin. All rights reserved.
# Copyright 2019 Dalibo. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

version="3.3"

# Configuration defaults
SYSLOG="no"
ARCHIVE_COMPRESS="yes"
ARCHIVE_COMPRESS_BIN="gzip -f -4"
ARCHIVE_COMPRESS_SUFFIX="gz"
ARCHIVE_OVERWRITE="yes"
ARCHIVE_CHECK="no"
ARCHIVE_FLUSH="no"
ARCHIVE_FILE_CHMOD=""

CONFIG_DIR="/etc/pitrery"
CONFIG="pitrery.conf"

GPG_BIN="/usr/bin/gpg"
ARCHIVE_ENCRYPT="no"
GPG_ENCRYPT_KEYS=""

# Apply an extra level of shell quoting to each of the arguments passed.
# This is necessary for remote-side arguments of ssh (including commands that
# are executed by the remote shell and remote paths for scp and rsync via ssh)
# since they will strip an extra level of quoting off on the remote side.
# This makes it safe for them to include spaces or other special characters
# which should not be interpreted or cause word-splitting on the remote side.
qw() {
	printf -v out "%q " "$@"
	echo "${out%?}"  # Skip the final space.
}

# Message functions
now() {
	[ "$LOG_TIMESTAMP" = "yes" ] && echo "$(date "+%F %T %Z ")"
}

error() {
	echo "$(now)ERROR: $1" 1>&2
	[ -n "$tmpfile" ] && rm -f -- "$tmpfile"
	exit ${2:-1}
}

warn() {
	echo "$(now)WARNING: $*" 1>&2
}

# Script help
usage() {
	echo "`basename $0` Archive a WAL segment"
	echo
	echo "usage: `basename $0` [options] walfile"
	echo
	echo "options:"
	echo "    -C conf        configuration file"
	echo "    -a [[user@]host:]/dir  Place to store the archive"
	echo "    -X             do not compress"
	echo "    -O             do not overwrite the destination file"
	echo "    -H             check the hash of the destination file (remote only)"
	echo "    -F             flush the destination file to disk"
	echo "    -c command     compression command"
	echo "    -s suffix      compressed file suffix (ex: gz)"
	echo
	echo "    -E             encrypt the file using gpg"
	echo "    -r keys:...    colon separated list of recipients for GPG encryption"
	echo
	echo "    -S             send messages to syslog"
	echo "    -f facility    syslog facility"
	echo "    -t ident       syslog ident"
	echo "    -m mode        destination file permission mode in octal (e.g. chmod)"
	echo "    -T             Timestamp log messages"
	echo
	echo "    -V             Display the version and exit"
	echo "    -?             print help"
	echo
	exit $1

}

check_md5() {
	[ "$ARCHIVE_CHECK" != "yes" ] && return 0

	local ARCHIVE_MD5=$1

	LOCAL_MD5=$(md5sum "$wal")

	if [ "${LOCAL_MD5%% *}" != "${ARCHIVE_MD5%% *}" ]; then
		error "md5 mismatch between local and remote file" 4
	fi

	return 0
}

parse_target_uri() {
	local backup_target=$1
	local archive_target=$2

	if [ -n "$backup_target" ]; then
		# Parse the backuptarget into user, host and path
		backup_user="$(echo $backup_target | grep '@' | cut -d'@' -f1 )"
		backup_host="$(echo $backup_target | grep ':' | sed -re 's/(.*):(.*)/\1/' | cut -d'@' -f2 )"
		backup_dir="$(echo $backup_target | sed -re 's/(.*):(.*)/\2/')"

	else
		# Fallback to the values from the configuration file
		[ -n "$BACKUP_USER" ] && backup_user="$BACKUP_USER"
		[ -n "$BACKUP_HOST" ] && backup_host="$BACKUP_HOST"
		[ -n "$BACKUP_DIR" ] && backup_dir="$BACKUP_DIR"
	fi

	# Deduce if backup is local
	if [ -z "$backup_host" ]; then
		backup_local="yes"
	else
		backup_local="no"

		# Wrap IPv6 addresses with brackets
		echo $backup_host | grep -qi '^[0123456789abcdef:]*:[0123456789abcdef:]*$' && backup_host="[${backup_host}]"

		# Add a shortcut for ssh/rsync commands
		backup_ssh_target="${backup_user:+$backup_user@}$backup_host"
	fi

	if [ -n "$backup_dir" ]; then
		# Ensure the backup directory is an absolute path
		if [ "$backup_local" = "yes" ]; then
			backup_dir="$(readlink -m -- "$backup_dir")"
		else
			backup_dir="$(ssh -n -- "$backup_ssh_target" "readlink -m -- $(qw "$backup_dir")")"
		fi
	fi

	# Parse archive target the same way
	if [ -n "$archive_target" ]; then
		archive_user="$(echo $archive_target | grep '@' | cut -d'@' -f1 )"
		archive_host="$(echo $archive_target | grep ':' | sed -re 's/(.*):(.*)/\1/' | cut -d'@' -f2 )"
		archive_dir="$(echo $archive_target | sed -re 's/(.*):(.*)/\2/')"
	else
		# Fallback to the values of the configuration file. When the
		# path is not provided in the config file, fallback to backup values
		if [ -n "$ARCHIVE_DIR" ]; then
			[ -n "$ARCHIVE_USER" ] && archive_user=$ARCHIVE_USER
			[ -n "$ARCHIVE_HOST" ] && archive_host=$ARCHIVE_HOST
			archive_dir=$ARCHIVE_DIR
		else
			archive_user="$backup_user"
			archive_host="$backup_host"
			# avoid trying to create directory in /
			[ -n "$backup_dir" ] && archive_dir="$backup_dir/archived_wal"
		fi
	fi

	# Deduce if archives are local
	if [ -z "$archive_host" ]; then
		archive_local="yes"
	else
		archive_local="no"

		# Wrap IPv6 addresses with brackets
		echo $archive_host | grep -qi '^[0123456789abcdef:]*:[0123456789abcdef:]*$' && archive_host="[${archive_host}]"

		# Add a shortcut for ssh/rsync commands
		archive_ssh_target=${archive_user:+$archive_user@}$archive_host
	fi

	[ -n "$archive_dir" ] || error "missing archive directory"

	# Ensure the achives directory is an absolute path
	if [ "$archive_local" = "yes" ]; then
		archive_dir="$(readlink -m -- "$archive_dir")"
	else
		archive_dir="$(ssh -n -- "$archive_ssh_target" "readlink -m -- $(qw "$archive_dir")")"
	fi
}

# Command line options
while getopts "C:a:XOHFc:s:Er:Sf:t:m:TV?"  opt; do
	case $opt in
		C) CONFIG=$OPTARG;;
		a) archive_path="$OPTARG";;
		X) CLI_ARCHIVE_COMPRESS="no";;
		O) CLI_ARCHIVE_OVERWRITE="no";;
		H) CLI_ARCHIVE_CHECK="yes";;
		F) CLI_ARCHIVE_FLUSH="yes";;
		c) CLI_ARCHIVE_COMPRESS_BIN="$OPTARG";;
		s) CLI_ARCHIVE_COMPRESS_SUFFIX="$OPTARG";;
		E) CLI_ARCHIVE_ENCRYPT="yes";;
		r) CLI_GPG_ENCRYPT_KEYS="$OPTARG";;
		S) CLI_SYSLOG="yes";;
		f) CLI_SYSLOG_FACILITY=$OPTARG;;
		t) CLI_SYSLOG_IDENT=$OPTARG;;
		m) CLI_ARCHIVE_FILE_CHMOD=$OPTARG;;
		T) CLI_LOG_TIMESTAMP="yes";;
		V) echo "archive_wal (pitrery) $version"; exit 0;;
		"?") usage 1;;
		*) error "Unknown error while processing options";;
	esac
done

# Check if the config option is a path or just a name in the
# configuration directory.  Prepend the configuration directory and
# .conf when needed.
if [[ $CONFIG != */* ]]; then
	CONFIG="$CONFIG_DIR/$(basename -- "$CONFIG" .conf).conf"
fi

# Load configuration file
if [ -f "$CONFIG" ]; then
	. "$CONFIG"
fi

# Overwrite configuration with cli options
[ -n "$CLI_ARCHIVE_COMPRESS" ] && ARCHIVE_COMPRESS="$CLI_ARCHIVE_COMPRESS"
[ -n "$CLI_ARCHIVE_COMPRESS_BIN" ] && ARCHIVE_COMPRESS_BIN="$CLI_ARCHIVE_COMPRESS_BIN"
[ -n "$CLI_ARCHIVE_COMPRESS_SUFFIX" ] && ARCHIVE_COMPRESS_SUFFIX="$CLI_ARCHIVE_COMPRESS_SUFFIX"
[ -n "$CLI_ARCHIVE_OVERWRITE" ] && ARCHIVE_OVERWRITE="$CLI_ARCHIVE_OVERWRITE"
[ -n "$CLI_ARCHIVE_CHECK" ] && ARCHIVE_CHECK="$CLI_ARCHIVE_CHECK"
[ -n "$CLI_ARCHIVE_FLUSH" ] && ARCHIVE_FLUSH="$CLI_ARCHIVE_FLUSH"
[ -n "$CLI_SYSLOG" ] && SYSLOG="$CLI_SYSLOG"
[ -n "$CLI_SYSLOG_FACILITY" ] && SYSLOG_FACILITY="$CLI_SYSLOG_FACILITY"
[ -n "$CLI_SYSLOG_IDENT" ] && SYSLOG_IDENT="$CLI_SYSLOG_IDENT"
[ -n "$CLI_ARCHIVE_FILE_CHMOD" ] && ARCHIVE_FILE_CHMOD="$CLI_ARCHIVE_FILE_CHMOD"
[ -n "$CLI_LOG_TIMESTAMP" ] && LOG_TIMESTAMP="$CLI_LOG_TIMESTAMP"
[ -n "$CLI_ARCHIVE_ENCRYPT" ] && ARCHIVE_ENCRYPT="$CLI_ARCHIVE_ENCRYPT"
[ -n "$CLI_GPG_ENCRYPT_KEYS" ] && GPG_ENCRYPT_KEYS="$CLI_GPG_ENCRYPT_KEYS"

# Redirect output to syslog if configured
if [ "$SYSLOG" = "yes" ]; then
	SYSLOG_FACILITY=${SYSLOG_FACILITY:-local0}
	SYSLOG_IDENT=${SYSLOG_IDENT:-postgres}

	exec 1> >(logger -t "$SYSLOG_IDENT" -p "${SYSLOG_FACILITY}.info")
	exec 2> >(logger -t "$SYSLOG_IDENT" -p "${SYSLOG_FACILITY}.err")
fi

# Print a message when alias used
if [ $(basename $0) = 'archive_xlog' ]; then
	warn "'archive_xlog' is now an alias for 'archive_wal', which will disappear in a next release."
	warn "We advise you to use 'archive_wal' as soon as possible."
fi

parse_target_uri "" "$archive_path"

# The first argument must be a WAL file
wal=${@:$OPTIND:1}
if [ -z "$wal" ]; then
	error "missing wal filename to archive. Please consider modifying archive_command, eg add %p"
fi

# Check if the source file exists
if [ ! -r "$wal" ]; then
	error "Input file '$wal' does not exist or is not readable"
fi

# Set dd flush mode if needed
if [ "$ARCHIVE_FLUSH" = "yes" ]; then
	ARCHIVE_FLUSH='conv=fsync'
else
	ARCHIVE_FLUSH=''
fi

check_local_dest_exists()
{
	[ $ARCHIVE_OVERWRITE = "yes" ] && return 0

	[ ! -e "$1" ] || error "$1 already exists, refusing to overwrite it."
}

if [ "$ARCHIVE_ENCRYPT" = "yes" ]; then
	if [ -z "$GPG_ENCRYPT_KEYS" ]; then
		error "missing recipients for GPG encryption"
	else
		# Prepare and check GPG command line
		gpg_command=( "$GPG_BIN" "--batch" "--yes" "--encrypt" )

		while read -r -d '' o; do
			gpg_command+=( "--recipient" "$o" )
		done < <(tr ':' '\0' <<< "${GPG_ENCRYPT_KEYS}:")
	fi
fi

# Copy the wal locally
if [ "$archive_local" = "yes" ]; then
	dd_rc=0
	mkdir -p -- "$archive_dir" 1>&2 ||
		error "Unable to create target directory '$archive_dir'" $?

	if [ "$ARCHIVE_ENCRYPT" = "yes" ]; then
		dest_path="$archive_dir/$(basename -- "$wal").gpg"
		check_local_dest_exists "$dest_path"

		"${gpg_command[@]}" -o - "$wal" | dd $ARCHIVE_FLUSH of="$dest_path" 2>/dev/null
		rc=( ${PIPESTATUS[@]} )
		gpg_rc=${rc[0]}
		dd_rc=${rc[1]}
		if [ $gpg_rc != 0 ]; then
			rm -f -- "$dest_path"
			error "Encryption of $wal failed"
		fi
	elif [ "$ARCHIVE_COMPRESS" = "yes" ]; then
		dest_path="$archive_dir/$(basename -- "$wal").$ARCHIVE_COMPRESS_SUFFIX"
		check_local_dest_exists "$dest_path"

		$ARCHIVE_COMPRESS_BIN -c < "$wal" | dd $ARCHIVE_FLUSH of="$dest_path" 2>/dev/null
		rc=( ${PIPESTATUS[@]} )
		x_rc=${rc[0]}
		dd_rc=${rc[1]}
		if [ $x_rc != 0 ]; then
			rm -f -- "$dest_path"
			error "Compressing $wal to $dest_path failed"
		fi
	else
		dest_path="$archive_dir/$(basename -- "$wal")"
		check_local_dest_exists "$dest_path"

		dd $ARCHIVE_FLUSH if="$wal" of="$dest_path" 2>/dev/null
		dd_rc=$?
	fi

	if [ $dd_rc != 0 ] ; then
		rm -f -- "$dest_path"
		error "Copying $wal to $dest_path failed"
	fi

	# Chmod if required
	if [ -n "$ARCHIVE_FILE_CHMOD" ]; then
		echo "$ARCHIVE_FILE_CHMOD" | grep -qE '^[0-7]{3,4}$'
		if [ $? = 0 ]; then
			if ! chmod $ARCHIVE_FILE_CHMOD "$dest_path"; then
				warn "Could not change mode of $dest_path to $ARCHIVE_FILE_CHMOD"
			fi
		else
			warn "ARCHIVE_FILE_CHMOD is not in octal form, mode not changed"
		fi
	fi

else
	# Compress and copy with rsync

	dest_file="$archive_dir/$(basename -- "$wal")"
	tmp_file=""
	src_file="$wal"

	# Depending on the options, we may check different things on the
	# remote host. To avoid many connections, we build a command to be
	# run one time on the remote host.

	# Create remote folder if needed. Return 2 on error.
	REMOTE_CMD="mkdir -p -- $(qw "$archive_dir") || exit 2"

	if [ "$ARCHIVE_ENCRYPT" = "yes" ]; then
		dest_file="${dest_file}.gpg"
		tmp_file=$(mktemp -t pitr_wal.XXXXXXXXXX) ||
			error "Failed to create temporary file for encrypted WAL" $?

		"${gpg_command[@]}" -o "$tmp_file" "$wal" || error "Failed to encrypt $wal" $?
		src_file="$tmp_file"

	elif [ "$ARCHIVE_COMPRESS" = "yes" ]; then
		# Compress the file to a temporary location
		dest_file="${dest_file}.$ARCHIVE_COMPRESS_SUFFIX"
		tmp_file=$(mktemp -t pitr_wal.XXXXXXXXXX) ||
			error "Failed to create temporary file for compressed WAL" $?

		# We take no risk, pipe the content to the compression program
		# and save output elsewhere: the compression program never
		# touches the input file
		$ARCHIVE_COMPRESS_BIN -c < "$src_file" > "$tmp_file" ||
			error "Compressing $wal to $tmp_file failed" $?

		src_file="$tmp_file"
	fi

	# Check if the file exists on the remote host. If the file exists
	# on the remote host and we do not overwrite it, we just get its
	# md5 sum and exit. Later the check of the md5 will exit on error
	# if the sum a different
	if [ "$ARCHIVE_OVERWRITE" != "yes"  ] && [ "$ARCHIVE_CHECK" = "yes" ]; then
		REMOTE_CMD="$REMOTE_CMD; [ -e $(qw "$dest_file") ] && echo \$(md5sum -- $(qw "$dest_file")) && exit 0"
	elif [ "$ARCHIVE_OVERWRITE" != "yes" ]; then
		REMOTE_CMD="$REMOTE_CMD; [ ! -e $(qw "$dest_file") ] || exit 3"
	fi

	# Copy the file with dd
	REMOTE_CMD="$REMOTE_CMD; dd $ARCHIVE_FLUSH of=$(qw "$dest_file") 2>/dev/null || exit 1"

	if [ "$ARCHIVE_CHECK" = "yes" ]; then
		REMOTE_CMD="$REMOTE_CMD; md5sum -- $(qw "$dest_file")"
	fi

	# Chmod if required
	if [ -n "$ARCHIVE_FILE_CHMOD" ]; then
		echo "$ARCHIVE_FILE_CHMOD" | grep -qE '^[0-7]{3,4}$'
		if [ $? = 0 ]; then
			REMOTE_CMD="$REMOTE_CMD; chmod $ARCHIVE_FILE_CHMOD $(qw "$dest_file") || exit 4"
		else
			warn "ARCHIVE_FILE_CHMOD is not in octal form, mode not changed"
		fi
	fi

	# Actually execute the remote commands
	remote_md5=$(dd if="$src_file" 2>/dev/null|ssh -- "$archive_ssh_target" "$REMOTE_CMD")
	rc=$?

	case $rc in
		0) ;;
		1) error "Unable to copy $wal to ${archive_ssh_target}:$archive_dir" $rc;;
		2) error "Unable to create target directory" $rc;;
		3) error "'$dest_file' already exists on $dest_host, refusing to overwrite it" $rc;;
		4) warn "Could not change mode of $dest_path to $ARCHIVE_FILE_CHMOD";;
		5) error "SSH error on ${archive_host}" $rc;;
		*) error "Unexpected return code while copying the file" 100
	esac

	if [ "$ARCHIVE_CHECK" = "yes" ]; then
		local_md5=$(md5sum -- "$src_file")

		if [ "${local_md5%% *}" != "${remote_md5%% *}" ]; then
			error "md5 mismatch between local and remote file" 4
		fi
	fi

	# Remove temp file if exists
	if [ -e "$tmp_file" ]; then
		rm -- "$tmp_file" ||
			warn "Unable to remove temporary compressed file '$tmp_file'"
	fi
fi

exit 0
