#!/usr/bin/env bash

########
#
# Copyright © 2014-2019 Florian Pritz <bluewind@xinu.at>
# Copyright © 2021 bill-auger <bill-auger@programmer.net>
# For a complete list of contributors, see:
#   https://gitlab.archlinux.org/archlinux/infrastructure/-/graphs/master
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
########

# This is a simple mirroring script. To save bandwidth it first checks a
# timestamp via HTTP and only runs rsync when the timestamp differs from the
# local copy. As of 2016, a single rsync run without changes transfers roughly
# 6MiB of data which adds up to roughly 250GiB of traffic per month when rsync
# is run every minute. Performing a simple check via HTTP first can thus save a
# lot of traffic.
#
# Preparation:
#
# This script is mostly pre-configured; but some initial manual steps are required.
#
# * $UPSTREAM_HOST must be defined explicitly.
#
# * $LOCAL_DIR and $TEMP_DIR must be writable by the user executing this script.
#
# * In order for the $LOCK_FILE to work on systemd systems, you will need to add
#   a configuration file to /etc/tempfiles.d/ (eg: /etc/tempfiles.d/repo-sync.conf),
#   with the following text:
#     D  /run/lock/<REPO_SYNC_LOCK_DIR>  -  <REPO_USER>  <REPO_GROUP>  -  -
#   where:
#     * <REPO_SYNC_LOCK_DIR> corresponds to the $LOCK_FILE entry
#       eg: /var/lock/REPO_SYNC_LOCK_DIR/repo-sync.lck
#     * <REPO_LOGIN> and <REPO_GROUP> are login (or UID) and group (or GID)
#       of the user executing this script

# parabola changes:
# * renamed vars and made constant
# * refactored into functions
# * always guard sync with '/lastupdate' file check
# * added parabola mirror examples


### CONFIG BEGIN ###

# Mandatory - Local filesystem path to the repo. Example: /srv/repo
readonly LOCAL_DIR=/srv/repo

# Mandatory - Local filesystem path to the staging directory. Example: /srv/repo-staging
# This should be on the same filesystem as, but not a subdirectory of $LOCAL_DIR.
readonly TEMP_DIR=/srv/repo-tmp

# Mandatory - Lock file
readonly LOCK_FILE=/var/lock/repo-sync/repo-sync.lck

# Mandatory - Maximum incoming bandwidth limit.
# Use 0 to disable the limit.
# The default unit is KiB (see man rsync /--bwlimit for the valid syntax)
readonly BW_LIMIT=0

# Mandatory - Source URL of the mirror from which you want to sync.
# eg: Parabola tier-1 mirrors (tentative):
#     'rsync.cyberbits.eu'          # France     # IPv4 IPv6
#     'mirrors.dotsrc.org'          # Denmark    # IPv4 IPv6
#     'mirror.freedif.org'          # Singapore  # IPv4 IPv6
#     'mirror.grapentin.org'        # Germany    # IPv4
#     'parabola.ip-connect.info'    # Ukraine    # IPv4 IPv6
#     'rsync.linux.pizza/mirror'    # Sweden     # IPv4 IPv6
#     'mirror.csclub.uwaterloo.ca'  # Ontario    # IPv4 IPv6
readonly UPSTREAM_HOST=

# Optional - Non-standard port number (eg: ':2222')
# Normally, this will be empty
readonly UPSTREAM_PORT=

# Optional - Upstream filesystem path
# Conventionally, this is '/parabola'
readonly UPSTREAM_PATH=/parabola

### CONFIG END ###


readonly RSYNC_URL=rsync://${UPSTREAM_HOST}${UPSTREAM_PORT}${UPSTREAM_PATH}
readonly HTTP_URL=https://${UPSTREAM_HOST}${UPSTREAM_PATH}
readonly VERBOSE_OPTS='--human-readable --verbose --progress'
readonly QUIET_OPTS='--quiet'
readonly HAS_TTY=$( /usr/bin/tty -s && echo 1 || echo 0 )
readonly VERBOSITY="$( (( HAS_TTY )) && echo "${VERBOSE_OPTS}" || echo "${QUIET_OPTS}" )"


rsync_cmd()
{
  /usr/bin/rsync -rtlH --safe-links --delete-after --timeout=600 --contimeout=60    \
                 --perms --delay-updates --no-motd --exclude='*.links.tar.gz*'      \
                 --temp-dir="${TEMP_DIR}" --bwlimit="${BW_LIMIT}" ${VERBOSITY} "$@"
}

init()
{
  # Sanity checks and take lock.
  [[ -n "${UPSTREAM_HOST}" ]]           || return 1
  mkdir -p "${LOCAL_DIR}" "${TEMP_DIR}" || return 1
  exec 9> "${LOCK_FILE}"                || return 1
  /usr/bin/flock -n 9                   || return 1

  # Cleanup any temporary files from old run that might remain.
  find "${LOCAL_DIR}" -name '.~tmp~' -exec rm -r {} +
}

main()
{
  init || return 1

  # Synchronize only when there are significant changes.
  # Otherwise, synchronize the 'lastsync' file only, for statistics.
  local local_ts=$(    /usr/bin/cat      "${LOCAL_DIR}"/lastupdate )
  local upstream_ts=$( /usr/bin/curl -Ls "${HTTP_URL}"/lastupdate  )
  if [[ "${upstream_ts}" == "${local_ts}" ]]
  then rsync_cmd "$@" "${RSYNC_URL}"/lastsync "${LOCAL_DIR}"/lastsync
  else rsync_cmd "$@" "${RSYNC_URL}"/         "${LOCAL_DIR}"/
  fi
}


main "$@"