#!/bin/bash # Syncs Arch, ALARM or Arch32 repos based on info contained in the # accompanying .conf files. # License: GPLv3 set -eE source "$(librelib messages)" source "$(librelib blacklist)" source "$(librelib conf)" setup_traps readonly -a UPSTREAMS=(packages community archlinux{32,arm}) # usage: fetch_dbs # # Fetch excluding everything but db files # TODO: we could be doing without things other than what is in # ${ARCHTAGS[@]} fetch_dbs() { rsync "${extra[@]}" --no-motd -mrtlH --no-p \ --include="*/" \ --include="*.db" \ --include="*${DBEXT}" \ --include="*.files" \ --include="*${FILESEXT}" \ --exclude="*" \ --delete-after \ "$1" "$2" } # usage: get_repo_dir # # Prints repo directory path fo rthe given combination, # relative to the rsync root. get_repo_dir() { repo=$1 arch=$2 envsubst '$repo $arch' <<<"$ARCHMIRROR_path" } # usage: db_list_pkgs # # Prints a list of packages within a given , one-per-line, # in the format: # # pkgname [epoch:]pkgver-pkgrel db_list_pkgs() { bsdtar tf "$1" | cut -d "/" -f 1 | sed -r 's/-([^-]*-[^-]*)$/ \1/' | sort -u } # usage: filter_blacklisted FILTERED_LIST # # Given a list of packages in the format: # # pkgname [epoch:]pkgver-pkgrel # # filter out all of the packages named in blacklist.txt. filter_blacklisted() { sort -u | join -v1 \ - \ <(blacklist-cat | blacklist-get-pkg | sort -u) } # usage: filter_duplicates FILTERED_LIST # # Given a list of packages in the format: # # pkgname [epoch:]pkgver-pkgrel # # filter out arch=(any) packages present elsewhere, as it confuses # parabolaweb, librechroot, and who-knows-what-else. This only # filters exact pkgname/epoch/pkgver/pkgrel matches. filter_duplicates() { # FIXME: this will need to be adjusted when we turn PKGEXTS in # to an extglob. sort -u | comm -23 \ - \ <(find "${FTP_BASE}/pool/" -name "*-any${PKGEXTS}" -printf "%f\n" | sed 's/-any\.pkg.*//' | sed -r 's/-([^-]*-[^-]*)$/ \1/' | sort -u) } # usage: sync_pool # # Sync excluding everything but whitelist sync_pool() { local -r _from=$1 _whitelist=$2 _into=$3 mkdir -p -- "$_into" msg2 "Retrieving %d packages from %s pool" \ "$(wc -l < "$_whitelist")" \ "$(basename "$_into")" # *Don't delete-after*, this is the job of # cleanup scripts. It will remove our packages too rsync "${extra[@]}" --no-motd -rtlH \ --delay-updates \ --safe-links \ --include-from="$_whitelist" \ --exclude="*" \ "$_from" \ "$_into" } # usage: sync_repo # # Sync excluding everything but whitelist. # TODO: this is too similar to sync_pool(). Merge? sync_repo() { local -r _from=$1 _whitelist=$2 _into=$3 mkdir -p -- "$_into" msg2 "Retrieving %d files from repo" \ "$(wc -l < "$_whitelist")" # We delete here for cleanup rsync "${extra[@]}" --no-motd -rtlH \ --delete-after \ --delete-excluded \ --delay-updates \ --include-from="$_whitelist" \ --exclude="*" \ "$_from" \ "$_into" } # usage: poolify # # Given a list of packages in the format: # # pkgname [epoch:]pkgver-pkgrel # # Resolve each to a file in `${FTP_BASE}/pool/`. The output is # relative to `${FTP_BASE}/pool/`. That is, something along the lines # of: # # poolname/pkgname-[epoch:]pkgver-pkgrel-arch.pkg.tar.xz # archlinux32/zip-3.0-7-i686.pkg.tar.xz # packages/rhino-1.7.7.1-1-any.pkg.tar.xz poolify() { local -r arch=$1 pool=$2 local pkgname fullpkgver local restore paths path while read -r pkgname fullpkgver; do restore=$(shopt -p extglob) shopt -s extglob paths=( "${FTP_BASE}/${pool}/${pkgname}-${fullpkgver}-${arch}.pkg.tar.xz" "${FTP_BASE}/${pool}/${pkgname}-${fullpkgver}-any.pkg.tar.xz" "${FTP_BASE}/pool"/*/"${pkgname}-${fullpkgver}-any.pkg.tar.xz" # HACK: Arch32 appends '.digits' to pkgrels. That # prevents us from finding the corresponding package # if we're using an upstream Arch pool. "${FTP_BASE}/pool"/*/"${pkgname}-${fullpkgver%.+([0-9])}-any.pkg.tar.xz" ) $restore for path in "${paths[@]}"; do if [[ -f $path ]]; then break fi done if ! [[ -f "$path" && -f "${path}.sig" ]]; then error "No file was found for %q=%q, aborting" "$pkgname" "$fullpkgver" printf ' -> %q\n' "${paths[@]}" >&2 exit 1 fi printf '%s\n' "${path#"${FTP_BASE}/pool/"}" done } # usage: make_repo_symlinks TAG make_repo_dbs() { local -r from=${WORKDIR}/staging-rsync/${1}/os/${2} local -r into=${FTP_BASE}/${1}/os/${2}/ local -r db_file=${from}/${1}${DBEXT} local -r files_file=${from}/${1}${FILESEXT} # create fresh databases to reflect actual `any.pkg.tar.xz` packages. # this also avoids corrupt upstream metadata (ALARM) msg2 "Adding whitelisted packages to clean DBs ..." pushd "${from}" local -r UMASK=$(umask) umask 002 repo-add "${db_file##*/}" *${PKGEXTS} umask "$UMASK" >/dev/null popd >/dev/null mkdir -p -- "$into" # This bit is based on db-functions:set_repo_permission() local -r group=$(/usr/bin/stat --printf='%G' "${into}") chgrp "$group" "${db_file}" chgrp "$group" "${files_file}" chmod g+w "${db_file}" chmod g+w "${files_file}" msg2 "Updating %s-%s databases" "$2" "$1" rsync "${extra[@]}" --no-motd -rtlpH \ --delay-updates \ --delete-after \ --links \ "$from/" "$into" } # Main function. Process the databases and get the libre packages # Outline: # 1. Fetch package info # * Get blacklist.txt # * Get repo.db from an Arch-like repo # 2. Figure out what we want # * Generate textfiles describing the current repo state, and # (using blacklist.txt) the desired repo state # 3. Fetch the packages we want # * Create sync whitelist (based on package blacklist) # * Call sync_repo and/or sync_pool to fetch packages and signatures # 4. Put the packages in the repos # * Create new repo.db with them (repo-add) # * rsync scratch directory => repos main() { ############################################################## # 0. Initialization # ############################################################## # Run as `V=true db-import-pkg` to get verbose output readonly VERBOSE=${V} extra=() ${VERBOSE} && extra+=(-v) readonly extra readonly UPSTREAM=$1 # Print usage message if [[ $# -ne 1 ]] || ! in_array "$UPSTREAM" "${UPSTREAMS[@]}" ; then IFS='|' msg 'usage: [V=true] %s {%s}' "${0##*/}" "${UPSTREAMS[*]}" exit $EXIT_INVALIDARGUMENT fi local vars case "$UPSTREAM" in packages|community) vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL ARCHSRCPOOL) ;; archlinux32|archlinuxarm) vars=(ARCHMIRROR ARCHTAGS ARCHPKGPOOL ) ;; esac load_conf "$(dirname "$(readlink -e "$0")")/config" DBEXT FILESEXT FTP_BASE load_conf "$(dirname "$(readlink -e "$0")")/db-import-${UPSTREAM}.conf" "${vars[@]}" declare -rg ARCHMIRROR_path="${ARCHMIRROR#rsync://*/*/}" declare -rg ARCHMIRROR_fullmodule="${ARCHMIRROR%"/${ARCHMIRROR_path}"}" WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX") trap "rm -rf -- ${WORKDIR@Q}" EXIT ############################################################## # 1. Fetch package info # ############################################################## # Get the blacklisted packages blacklist-update # Sync the repos databases msg 'Downloading .db and .files files to import' mkdir "${WORKDIR}/rsync" fetch_dbs "${ARCHMIRROR_fullmodule}/" "$WORKDIR/rsync" ############################################################## # 2. Figure out what we want # ############################################################## mkdir "${WORKDIR}"/{old,new,dif} local _tag _repo _arch db_file for _tag in "${ARCHTAGS[@]}"; do _repo=${_tag%-*} _arch=${_tag##*-} # FIXME: this assumes that the local DBEXT and the # imported DBEXT are the same, which is potentially # not true. # # FIXME: this should use db-functions to lock the # repos while we read them. db_file="${FTP_BASE}/${_repo}/os/${_arch}/${_repo}${DBEXT}" db_list_pkgs "$db_file" > "${WORKDIR}/old/${_tag}.txt" db_file="${WORKDIR}/rsync/$(get_repo_dir "${_repo}" "${_arch}")/${_repo}${DBEXT}" db_list_pkgs "$db_file" | filter_blacklisted > "${WORKDIR}/new/${_tag}.txt" done # We now have $WORKDIR/old/ describing the way the repos are, # and $WORKDIR/new/ describing the way we want them to be. We # now create $WORKDIR/dif/ describing how to get from point A # to point B. # # TODO: finish this section for _tag in "${ARCHTAGS[@]}"; do comm -23 "${WORKDIR}"/{old,new}/"${_tag}.txt" # take packages that have been "removed" done | grep -rFx -f /dev/stdin "${WORKDIR}/new/" | # but now appear in another repo sort -u > "${WORKDIR}/dif/moved.txt" comm -23 \ <(cat "${WORKDIR}"/old/* | cut -d' ' -f1 | sort -u) \ <(cat "${WORKDIR}"/new/* | cut -d' ' -f1 | sort -u) \ > "${WORKDIR}/dif/removed.txt" ############################################################## # 3. Fetch the packages we want # ############################################################## # OK, now we have $WORKDIR/old/ describing the way the repos # are, $WORKDIR/new/ describing the way we want them to be, # and $WORKDIR/dif/ describing how to get from `old` to `new`. # We should (TODO) now use db-move, db-update, and db-remove # to apply that diff. # # But, # - db-move is broken # - The code that populates /dif/ isn't finished # So, just nuke the current repos and entirely re-create # everything from /new/. local whitelists=() for _tag in "${ARCHTAGS[@]}"; do msg "Processing %s" "$_tag" _repo=${_tag%-*} _arch=${_tag##*-} # Create a whitelist, add * wildcard to end. # # FIXME: due to lack of -arch suffix, the pool sync # retrieves every arch even if we aren't syncing them. # # IMPORTANT: the . in the sed command is needed # because an empty whitelist would consist of a single # * allowing any package to pass through. case "$UPSTREAM" in packages|community) sed -e 's/ /-/' -e 's|.$|&*|g' \ <"${WORKDIR}/new/${_tag}.txt" \ >"${WORKDIR}/${_tag}.whitelist" # Append to whitelists array so that we can # later sync_pool() all packages whitelists+=("${WORKDIR}/${_tag}.whitelist") # Get repo packages (symlinks) sync_repo \ "${ARCHMIRROR_fullmodule}/$(get_repo_dir "${_repo}" "${_arch}")/" \ "${WORKDIR}/${_tag}.whitelist" \ "${WORKDIR}/staging-rsync/${_repo}/os/${_arch}/" ;; archlinux32|archlinuxarm) # Upstream doesn't use an $ARCHPKGPOOL filter_duplicates \ <"${WORKDIR}/new/${_tag}.txt" \ | sed -e 's/ /-/' -e 's|.$|&*|g' \ > "${WORKDIR}/${_tag}.whitelist" sync_pool \ "${ARCHMIRROR_fullmodule}/$(get_repo_dir "${_repo}" "${_arch}")/" \ "${WORKDIR}/${_tag}.whitelist" \ "${FTP_BASE}/${ARCHPKGPOOL}/" poolify "${_arch}" "${ARCHPKGPOOL}" \ <"${WORKDIR}/new/${_tag}.txt" \ >"${WORKDIR}/${_tag}.pool" make_repo_symlinks "$_tag" \ <"${WORKDIR}/${_tag}.pool" ;; esac done case "$UPSTREAM" in packages|community) # Concatenate all whitelists, check for single *s just in case cat "${whitelists[@]}" | grep -v "^\*$" | sort -u > "${WORKDIR}/all.whitelist" # FIXME: make_whitelist() wildcards should be narrowed # down to respect the architecture of the tag msg "Syncing package pool" sync_pool \ "${ARCHMIRROR_fullmodule}/${ARCHPKGPOOL}/" \ "${WORKDIR}/all.whitelist" \ "${FTP_BASE}/${ARCHPKGPOOL}/" msg "Syncing source pool" sync_pool \ "${ARCHMIRROR_fullmodule}/${ARCHSRCPOOL}/" \ "${WORKDIR}/all.whitelist" \ "${FTP_BASE}/${ARCHSRCPOOL}/" ;; archlinux32|archlinuxarm) : # do nothing ;; esac ############################################################## # 4. Put the packages in the repos # ############################################################## msg "Putting databases back in place" # FIXME: all repo DBs should be replaced at once (per architecture) ln -srT "$FTP_BASE/pool" "${WORKDIR}/staging-rsync/pool" for _tag in "${ARCHTAGS[@]}"; do _repo=${_tag%-*} _arch=${_tag##*-} make_repo_dbs "$_repo" "$_arch" done } main "$@"