summaryrefslogtreecommitdiff
path: root/pcr/mirror-sync/mirror-sync.sh
blob: 773d7a538f1781596e86bda7284f68600b7cedc8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env bash

# TODO: this is a WIP, to replace repo-sync.sh

# This file is derrived from 'syncrepo-template.sh',
# originally fetched from: https://gitlab.archlinux.org/archlinux/infrastructure/-/raw/master/roles/syncrepo/files/syncrepo-template.sh

########
#
# Copyright © 2014-2019 Florian Pritz <bluewind@xinu.at>
# Copyright © 2021-2022 bill-auger    <bill-auger@programmer.net> (Parabola)
# For a complete list of contributors, see:
#   https://gitlab.archlinux.org/archlinux/infrastructure/-/graphs/master
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
########

# This is a simple mirroring script. To save bandwidth it first checks a
# timestamp via HTTP and only runs rsync when the timestamp differs from the
# local copy. As of 2016, a single rsync run without changes transfers roughly
# 6MiB of data which adds up to roughly 250GiB of traffic per month when rsync
# is run every minute. Performing a simple check via HTTP first can thus save a
# lot of traffic.
#
# Preparation:
#
# This script is mostly pre-configured; but some initial manual steps are required.
#
# * $UPSTREAM_HOST must be defined explicitly.
#
# * $LOCAL_DIR and $TEMP_DIR must be writable by the user executing this script.
#
# * In order for the $LOCK_FILE to work on systemd systems, you will need to add
#   a configuration file to /etc/tempfiles.d/ (eg: /etc/tempfiles.d/mirror-sync.conf),
#   with the following text:
#     D  /run/lock/<MIRROR_SYNC_LOCK_DIR>  -  <REPO_USER>  <REPO_GROUP>  -  -
#   where:
#     * <MIRROR_SYNC_LOCK_DIR> corresponds to the $LOCK_FILE entry
#       eg: /var/lock/MIRROR_SYNC_LOCK_DIR/mirror-sync.lck
#     * <REPO_LOGIN> and <REPO_GROUP> are login (or UID) and group (or GID)
#       of the user executing this script

# Parabola changes:
# * renamed vars and made constant
# * refactored into functions
# * always guard sync with '/lastupdate' file check
# * added parabola mirror examples


### CONFIG BEGIN ###

# Mandatory - Local filesystem path to the repo. Example: /srv/repo
readonly LOCAL_DIR=/srv/repo

# Mandatory - Local filesystem path to the staging directory. Example: /srv/repo-staging
# This should be on the same filesystem as, but not a subdirectory of $LOCAL_DIR.
readonly TEMP_DIR=/srv/repo-staging

# Mandatory - Lock file
readonly LOCK_FILE=/var/lock/mirror-sync/mirror-sync.lck

# Mandatory - Maximum incoming bandwidth limit.
# Use 0 to disable the limit.
# The default unit is KiB (see `man rsync` --bwlimit for the valid syntax)
readonly BW_LIMIT=0

# Mandatory - Source URL of the mirror from which you want to sync.
# eg: Parabola tier-1 mirrors:
#     'rsync.cyberbits.eu'       # Roubaix, France      # IPv4
#     'mirror.grapentin.org'     # Falkenstein, Germany # IPv4 IPv6
#     'parabola.ip-connect.info' # Vinnytsia, Ukraine   # IPv4 IPv6
readonly UPSTREAM_HOST=

# Optional - Non-standard port number (eg: ':2222')
# Normally, this will be empty
readonly UPSTREAM_PORT=

# Optional - Upstream filesystem path
# Conventionally, this is empty, or '/parabola'
readonly UPSTREAM_PATH=/parabola

### CONFIG END ###


readonly RSYNC_URL=rsync://${UPSTREAM_HOST}${UPSTREAM_PORT}${UPSTREAM_PATH}
readonly HTTP_URL=https://${UPSTREAM_HOST}${UPSTREAM_PATH}
readonly VERBOSE_OPTS='--human-readable --verbose --progress'
readonly QUIET_OPTS='--quiet'
readonly HAS_TTY=$( /usr/bin/tty -s && echo 1 || echo 0 )
readonly VERBOSITY="$( (( HAS_TTY )) && echo "${VERBOSE_OPTS}" || echo "${QUIET_OPTS}" )"


rsync_cmd()
{
  /usr/bin/rsync --recursive --perms --times --links --hard-links --safe-links \
                 --temp-dir="${TEMP_DIR}" --delete-after --delay-updates       \
                 --bwlimit="${BW_LIMIT}" --timeout=600 --contimeout=60         \
                 --no-motd --exclude='*.links.tar.gz*' ${VERBOSITY} "$@"
}

init()
{
  # Sanity checks and take lock.
  [[ -n "${UPSTREAM_HOST}" ]]           || return 1
  mkdir -p "${LOCAL_DIR}" "${TEMP_DIR}" || return 1
  exec 9> "${LOCK_FILE}"                || return 1
  /usr/bin/flock -n 9                   || return 1

  # Cleanup any temporary files from old run that might remain.
  find "${LOCAL_DIR}" -name '.~tmp~' -exec rm -rf {} +
}

main()
{
  init || return 1

  # Syncronize only when there are changes.
  local local_ts=$(    /usr/bin/cat      "${LOCAL_DIR}"/lastupdate )
  local upstream_ts=$( /usr/bin/curl -Ls "${HTTP_URL}"/lastupdate  )
  if [[ "${upstream_ts}" == "${local_ts}" ]]
  then # Force syncronize 'lastsync' file for statistics.
       rsync_cmd "$@" "${RSYNC_URL}"/lastsync "${LOCAL_DIR}"/lastsync
  else rsync_cmd "$@" "${RSYNC_URL}"/         "${LOCAL_DIR}"/
  fi
}


main "$@"