summaryrefslogtreecommitdiff
path: root/parabola-mirror-repos
blob: 341427885b68ecb35d6dcd82ff5de916582f8100 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
#!/bin/bash

# alfplayer
# 2014-06-12

# Bash script to mirror only some repositories of Parabola.
#
# Synchronizing first from Arch (maybe using db-sync and any-to-ours of dbscripts), can be 
# recommended to pull packages from the local Arch mirror afterwards using this script.

script_filename="$(basename "$0")"

# Create temporary log file
log_tmp="$(mktemp -p /var/tmp)"

# Default configuration values.
# They can be overridden by setting the variables in the calling environment.
# 1 to set, 0 to unset
: ${project:=parabola}
#: ${server:=rsync://repo.parabola.nu:875/repos}
: ${server:=rsync://alfplayer.com/repos/parabola}    # remote rsync directory
#: ${server:=rsync://parabolagnulinux.mirrors.linux.ro/parabolagnulinux}
: ${base_dir:=/srv/http}
: ${local_dir:=${base_dir}/${project}}         # symlink to the last snapshot
: ${repos:=core extra community multilib libre kernels libre-multilib libre-multilib-testing libre-testing nonprism nonprism-testing java cross pcr}
  # mips64el is also excluded in some rsync invocations in this script
: ${max_delete:=10000}    # maximum amount of files to delete in the local directory
[[ ! ${BW_LIMIT} ]] && \
    : ${BW_LIMIT:=1300}   # KB/s
: ${log_file:=${script_filename}.log}
: ${options:=-rltvH --no-p --no-g --max-delete=$max_delete --exclude=.* --bwlimit=${BW_LIMIT} --no-motd --chmod=Dug=srwx,Fug=rw --log-file=${log_tmp}}
: ${oldest_version:=100}        # delete versions older than this version
: ${link_dest[0]:=${base_dir}/archlinux}
#: ${link_dest[1]:=${base_dir}/parabola.secondary_mirror.tmp}
  # link-dest does not support any network URI like rsync://...
[[ ! ${mail_to} ]] && \
  : ${mail_to:=your@mail.com}   # set to enable mailing log file, or comment out to disable it
: ${date_exclude:=2014.06.19}    # disable running this script this date (see next line)
: ${forcerun:=0}            # set to 1 to force running on excluded date
: ${terminal:=1}             # outputs to stdout using rsync --progress (and logs to a file)
: ${alternative_mirror:=0}     # use alternative mirror; forces no_snapshot_delete and disables snapshot_symlink_update; synchronized files end up in ${local_dir}.tmp
: ${no_snapshot_delete:=0}     # disable deletion of oldest snapshots; alternative_mirror enables it forcefully
: ${leave_tmp:=0}              # leave updated tree in {project}.tmp instead of creating a dated snapshot directory
: ${snapshot_symlink_update:=1}         # update symlink to last snapshot
			       # does nothing if leave_tmp is enabled
: ${TZ:=UTC}              # set timezone to UTC (affects rsync log output)
: ${db_and_symlinks_update:=1}         # update DB files and package symlinks
			  # disabling this uses existing file /tmp/parabola-mirror which can be left over the last invocation of parabola-mirror-repos with this option set
: ${pools_update:=1}      # update files in package pools
: ${no_file_delete:=1}     # do not delete files
: ${link_dest_snapshot_count=3}  # number of snapshots that are passed as --link-dest to rsync
[[ ! {path_list} ]] && \
    : ${path_list:=docs sources mirrorlist.txt lastupdate}    # list of extra paths to synchronize

export TZ

if [[ ${alternative_mirror} == 1 ]] ; then
    no_snapshot_delete=1
    leave_tmp=1
    #server="rsync://repo.parabola.nu:875/repos"
    server="rsync://parabolagnulinux.mirrors.linux.ro/parabolagnulinux"
    local_dir="${base_dir}/${project}.secondary_mirror"
    #link_dest=("${base_dir}/archlinux" "${base_dir}/parabola.tmp")
    link_dest+=("${base_dir}/${project}.tmp")
    for dir in "$local_dir" "$local_dir".tmp ${local_dir}.tmp/pool ; do
        if [[ ! -d "$dir" ]] ; then
            mkdir -pv "$dir"
            chmod -v 2770 "$dir"
        fi
    done
else
    #link_dest+=("${base_dir}/${project}.tmp")
    link_dest+=("${base_dir}/${project}.secondary_mirror")
fi


remote_pool_files="/tmp/${project}-remote-files"
local_pool_files="/tmp/${project}-local-files"
pool_files_to_delete="/tmp/${project}-to-delete-files"

# Lock with flock (provided by util-linux), save to log file and send email on exit
lockfile="/var/lock/${script_filename}"
LOCKFD=99

_lock()             { flock -$1 $LOCKFD; }

# Wait until there is no process writing to ${log_tmp}
# Waiting can be necessary if command groups are not executed until the end
_wait_log_tmp() {
    if [[ -e "${log_tmp}" ]] ; then
        while fuser -s "${log_tmp}" ; do
            sleep 0.2
        done      
    fi
}

_no_more_locking()  {
    set +e

    # Save exit status
    es=$?

    _wait_log_tmp
    cat "${log_tmp}" >> "${log_file}"

    if [[ $? == 0 ]] ; then
        log_written=1
    else
	echo "=> ERROR: Failed to write to log file: ${log_file}" >&2
    fi

    if [[ -e ${local_dir}.tmp ]] ; then
	echo "=> WARNING: Temporary directory ${local_dir}.tmp remains in file system"
    fi

    if [[ ${mail_to} ]] ; then
	echo "=> Sending output to ${mail_to}"
    	mail -s "[$(hostname)] ${script_filename}" "${mail_to}" < "${log_tmp}"
    fi

    if [[ ${log_written} == 1 ]] ; then
    	rm -f "${log_tmp}"
    fi

    if [[ $es != 0 ]] ; then
        echo "=> WARNING: Unsuccessful script termination. Exit status: $es"
        if [[ ${mail_to} ]] ; then
	    echo "=> Sending error notification to ${mail_to}"
    	    mail -s "[$(hostname)] ${script_filename} failed. See logged output." "${mail_to}" < /dev/null
        fi
    fi

    _lock u
    _lock xn && \
	rm -f $lockfile
}

_prepare_locking()  { eval "exec $LOCKFD>\"$lockfile\""; trap _no_more_locking EXIT; }

_prepare_locking

# Lock now. The lock is disabled automatically when the script exits (with any error code).
if ! _lock xn ; then
    echo "=> ERROR: Could not obtain lock. Exiting." >&2
    exit 1
fi

date="$(date +%Y.%m.%d)"
current="${local_dir}-${date}"
current_component="${current##*/}"
for (( link_dest_count=1 ; link_dest_count <= ${link_dest_snapshot_count} ; link_dest_count++ )) ; do
	date_count="$(date -d @$(( $(date +"%s") - ${link_dest_count} * 86400)) +"%Y.%m.%d")"
	link_dest+=("${base_dir}/${project}-${date_count}")
done
current_exists=0
tmp_exists=0
local_useful=0
date_exact=""
first_run=0
if [[ ${BW_LIMIT} ]] ; then
    options+=" --bwlimit=${BW_LIMIT}"
fi
path_list_array=(${path_list})
repos_array=(${repos})

error() {
	echo "$@" >&2
	exit 1
}

{

if [[ ${date_exclude} && ${forcerun} != 1 ]] ; then
    if [[ $date == ${date_exclude} ]] ; then
	echo "Manually disabled: ${date}. Exiting."
	exit 0
    fi
fi

# Parse options.
# -t or --terminal enables rsync option --progress
if [[ $# == 1 ]] ; then
  if [[ $1 == -t || $1 == --terminal ]] ; then
    terminal=1
  elif [[ $1 == -h ]] ; then
    echo "Available options: -h, -t (rsync --progress)"
  else
    echo "=> Wrong argument: $1"
  fi
elif [[ $# -gt 1 ]] ; then
  echo "=> ${script_filename} has a wrong number of arguments"
fi

if [[ $terminal == 1 ]] ; then
    options+=" --progress"
fi

for i in ${!link_dest[@]}; do
    if [[ -d ${link_dest[$i]} ]] ; then
        link_dest_option[i]="${link_dest[@]/#/--link-dest=}"
    else
        echo "=> WARNING: Argument to rsync option --link-dest is not an existing directory: ${link_dest[$i]}"
    fi
done

# Test if ${current} exists
if [[ -e ${current} ]] ; then
  current_exists=1
fi

# Test if ${local_dir}.tmp exists
if [[ -e ${local_dir}.tmp ]] ; then
  tmp_exists=1
fi

# Test if ${local_dir} is an existing symlink pointing to an existing directory
if [[ -h ${local_dir} ]] ; then
  last_path="$(readlink -f "${local_dir}")"
  if [[ -d ${last_path} ]] ; then
    last="${last_path##*/}"
    local_useful=1
  else
    error "=> ERROR: ${local_dir} is a symlink which does not point to an existing directory."
  fi
else
  if [[ -e ${local_dir} ]] || stat -t ${local_dir}-* >/dev/null 2>&1 ; then
    error "=> ERROR: ${local_dir} exists but is not a symlink, or a file (or directory) ${local_dir}-* was found. Fix this before running ${script_filename} again."
  else
    echo "=> WARNING: ${local_dir} does not exist or is not a symlink, and no snapshot directories were found, so it is assumed this is the first time ${script_filename} is run using \"${base_dir}\" as the base directory."
    first_run=1
  fi
fi

# Check the current tree and issue warnings and errors based on the current tree state
# Also, it sets up the temporary directory
if [[ ${current_exists} == 1 ]] ; then
  echo "=> WARNING: ${current} already exists. It will be preserved."
  no_snapshot_delete=1
  if [[ ${local_useful} == 1 ]] ; then
    if [[ ${tmp_exists} == 0 ]] ; then
      echo "=> WARNING: ${local_dir}.tmp does not exist."
      cp -al "${current}" "${local_dir}".tmp
    fi
    date_exact="$(date +%Y.%m.%d-%T)"
    echo "=> WARNING: Snapshot ${local_dir}-${date_exact} will be created because ${current} already exists"
  else
    echo "=> WARNING: ${local_dir} does not exist. It will be created."
    if [[ ${tmp_exists} == 0 ]] ; then
      echo "=> WARNING: As ${local_dir}.tmp does not exist, data transfer will start from ${current}"
      cp -al "${current}" "${local_dir}".tmp
    else
      echo "=> WARNING: Data transfer will start from ${local_dir}.tmp"
    fi
  fi
elif [[ ${local_useful} == 0 ]] ; then
  if [[ ${tmp_exists} == 1 ]] ; then
    echo "=> ${local_dir} is not useful but ${local_dir}.tmp exists. Resuming from ${local_dir}.tmp"
    echo "=> WARNING: Symlink ${local_dir} does not exist. It will be created."
  else
    if [[  ${first_run} == 0 ]] ; then
      error "=> ERROR: ${local_dir}.tmp does not exist and ${local_dir} is not useful. Exiting."
    else
      mkdir "${local_dir}".tmp
    fi
  fi
else
  if [[ ${tmp_exists} == 1 ]] ; then
    echo "=> ${local_dir}.tmp already exists. Symlink \"${project}\" currently points to ${last_path}."
  else
    cp -al "${last_path}" "${local_dir}".tmp
  fi
fi

echo
echo "=> Creating snapshot for date ${date}"

# Change to the temporary directory
cd "${local_dir}".tmp

if [[ ${db_and_symlinks_update} == 1 ]] ; then

  # Delete temporary files that may be left over by a previous invocation of parabola-mirror-repos
  rm -f "$remote_pool_files" "$local_pool_files" "$pool_files_to_delete" || true
 
  remote_repo_dirs="${repos_array[@]/#/${server}/}"
  remote_repo_dirs="${remote_repo_dirs[@]/%//os}"

  echo "=> Getting pool path list"
  rsync -lrv --out-format="%L" --dry-run --exclude 'mips64el' --exclude '*mips64el.pkg.tar.*' ${remote_repo_dirs[@]} "/tmp/${script_filename}.unexistent_filename" \
    | grep -- "->" \
    | grep "pool.*[^/]$" \
    | sed -e "s#.*\(pool/.*/.*\)#\1#" \
  >> "$remote_pool_files" || error "  => ERROR: Failed with error code: $?"
  
  if [[ ${first_run} == 0 ]] ; then
    echo "=> Getting local pool package file list"
    # Build a list of local packages
    find pool -mindepth 2 >> "$local_pool_files"

    echo "=> Building list of local pool package files to delete"

    # Avoid duplicates (comes from -any packages present in both i686/ and x86_64/)
    sort -u -o "$remote_pool_files" "$remote_pool_files"

    sort -o "$local_pool_files" "$local_pool_files"

    # Keep lines that only appears in local_pool_files
    comm -13 "$remote_pool_files" "$local_pool_files" > "$pool_files_to_delete"

    NUMBER_TO_DELETE="$(wc -l $pool_files_to_delete | cut -d ' ' -f 1)"

    if [[ "$NUMBER_TO_DELETE" -gt "$max_delete" ]] ; then
      error "  => ERROR: The number of pool package files to be deleted is ${NUMBER_TO_DELETE}, greater than the specified maximum which is ${max_delete}"
    fi

    if [[ $NUMBER_TO_DELETE -gt 0 ]] ; then
      if [[ ${no_file_delete} == 1 ]] ; then
        echo "=> Deleting ${NUMBER_TO_DELETE} old pool package files"
        find $(cat "$pool_files_to_delete") -print -exec rm -f {} \;
      else
        echo "=> Deletion of pool package files (${NUMBER_TO_DELETE}) is disabled. List of files:"
        printf '%s\n' $(cat "$pool_files_to_delete")
      fi
    fi
  fi
fi

echo "=> List of local repositories. Existing files will be hard linked from these instead of being fetched from the remote server."
printf -- '%s\n' "${link_dest[@]}"

} &> >(tee -a "${log_tmp}")

_wait_log_tmp

if [[ ${db_and_symlinks_update} == 1 ]] ; then

echo "=> Starting to synchronize repository directories (symlinks and db.* files)"

rsync $options --stats --exclude 'mips64el' --delete-after --safe-links "${link_dest_option[@]}" --link-dest="$local_dir" "${repos_array[@]/#/${server}/}" "$local_dir".tmp || error "  => ERROR: rsync terminated with an error code: $?" 

fi

{

if [[ ${pools_update} == 1 ]] ; then

echo "=> Starting to synchronize package pools from remote server $server"

fi      # end "if [[ ${pools_update} == 1 ]]"

} &> >(tee -a "${log_tmp}")

_wait_log_tmp

if [[ ${pools_update} == 1 ]] ; then

  rsync $options --stats --exclude '*-mips64el.pkg.tar.*' --safe-links --files-from="$remote_pool_files" "${link_dest_option[@]}" --link-dest="$local_dir" $server "${local_dir}".tmp

fi      # end "if [[ ${pools_update} == 1 ]]"

if [[ ${path_list} ]] ; then
  echo "=> Synchronizing extra paths"
  rsync $options --stats --safe-links "${link_dest_option[@]/%//$path}" --link-dest="$local_dir"/"$path" ${path_list_array[@]/#/${server}/} "${local_dir}".tmp/ || error "  => ERROR: rsync terminated with an error code: $?"
fi

{

if [[ ${no_snapshot_delete} != 1 ]] ; then

echo "=> DRY-RUN: Delete versions older than the version number: ${oldest_version}."
delete_list=$(find ${base_dir} -regextype sed -maxdepth 1 -regex "${local_dir}-[0-9]\{4\}\.[0-9]\{2\}.[0-9]\{2\}" | head -n -"${oldest_version}") && \
    if [[ ${delete_list} ]] ; then
	echo "DRY_RUN: rm -rf ${delete_list}"
    else
        echo "=> Nothing to delete"
    fi

fi    # closes [[ ${no_snapshot_delete} == 0 ]]

if [[ ${leave_tmp} == 0 ]] ; then
    echo "=> Starting to serve the new repository version"

    if [[ ${date_exact} ]] ; then
        echo "  => Renaming ${local_dir}.tmp to ${local_dir}"-"${date_exact}"
        mv "${local_dir}".tmp "${local_dir}"-"${date_exact}"
    else
        echo "  => Renaming ${local_dir}.tmp to ${current}"
        mv "${local_dir}".tmp "${current}"
    fi
    
    if [[ ${snapshot_symlink_update} == 1 ]] ; then
        cd ${base_dir}

        if [[ ${local_useful} == 1 ]] ; then
            echo "  => Deleting symlink $local_dir"
            rm -rf "${local_dir}"
        fi

        # Create symlink
        if [[ ${date_exact} ]] ; then
            #echo "  => Creating symlink ${local_dir}-${date_exact} to ${current_component}"
            #ln -s ${current_component}-"${date_exact}" ${local_dir}-"${date_exact}"
            echo "  => Creating symlink ${local_dir} to ${project}-${date_exact}"
            ln -s ${project}-"${date_exact}" ${local_dir}
        else
            echo "  => Creating symlink \"${project}\" to ${current_component}"
            ln -s ${current_component} ${project}
        fi
    fi
fi

rm -f "$remote_pool_files" "$local_pool_files" "$pool_files_to_delete" "$local_dir".old || true

echo "=> Disk space report"
df -h "${base_dir}"

echo "=> ${script_filename} finished successfully. Finish time: $(date --rfc-3339=seconds)"

} &> >(tee -a "${log_tmp}")

_wait_log_tmp