From dfb41fcd55c211aa274f5afcfc715d6926f99511 Mon Sep 17 00:00:00 2001 From: bill-auger Date: Sat, 14 Dec 2019 02:03:38 -0500 Subject: move sorting into check.sh, log to file, add summary --- .gitignore | 1 - README | 3 ++- check.sh | 62 ++++++++++++++++++++++++++++++++++++++++++------------------ sort-entries | 22 --------------------- 4 files changed, 46 insertions(+), 42 deletions(-) delete mode 100755 sort-entries diff --git a/.gitignore b/.gitignore index 83a5258..1acfe1f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ !find-replacements !find-deprecated-pkgs !README -!sort-entries !SYNTAX !your-freedom_emu-blacklist.txt !your-privacy-blacklist.txt diff --git a/README b/README index 5b3bb8d..7fcf394 100644 --- a/README +++ b/README @@ -41,7 +41,8 @@ only exist for the sake of using nonfree software; respectively. see NOTE: your-freedom_emu-blacklist packages are not meant to have replacements. 2. Pull [8]blacklist.git. 3. Add an entry for the package according to the SYNTAX file. - 4. Run ./sort-entries. + 4. Run ./check.sh. This will detect any syntax errors, and also sort the entries + and remove empty lines. Please do this. It will help parsers do their jobs. 5. If you have [5]git write access, push the changes back. If not, send a patch produced using `git format-patch` to dev@lists.parabola.nu. 6. If you have shell access, ssh into repo.parabola.nu and run db-check-nonfree. diff --git a/check.sh b/check.sh index 48a8f8a..7de5a92 100755 --- a/check.sh +++ b/check.sh @@ -12,35 +12,61 @@ readonly REF_REGEX='^[^:]*:[^:]*::[^:]*:.*$' readonly SYNTAX_REGEX='^[^:]*:[^:]*:(sv|debian|parabola|fsf|fedora)?:[^:]*:.*$' readonly CSV_CHAR=':' readonly SEP_CHAR='!' +readonly LOG_FILE=./check.log ; rm ${LOG_FILE} 2> /dev/null exit_status=0 -printf "\n\nchecking for entries without reference to detailed description: ... " >&2 -unsourced="$(egrep ${REF_REGEX} *.txt)" -if [[ -z "$unsourced" ]] -then printf "OK\n" >&2 -else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >&2 - echo "entries needing citation: $(grep -c '\n' <<<${unsourced})" - exit_status=1 + +# TODO: the best sorting results are acheived when the field separator ($CSV_CHAR) +# precedes any valid package name character in ASCII order +# the lowest of which is ASCII 43 '+', and spaces are not allowed; +# so ASCII 33 ('!') serves this purpose quite well +# someday, we should re-write the tools to use parse on '!' instead of ':' +# if that were done, then the `sort` command alone would yeild +# the same results as this procedure, except for removing empty lines +unsortable="$( + for blacklist in *.txt + do echo -n "sorting and cleaning: '${blacklist}' ... " >> ${LOG_FILE} + if grep ${SEP_CHAR} ${blacklist} + then echo "ERROR: can not sort - contains '${SEP_CHAR}' char" >> ${LOG_FILE} + retval=1 + else echo "OK" >> ${LOG_FILE} + cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | \ + sed '/^[[:space:]]*$/d' | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp + mv ${blacklist}.temp ${blacklist} + fi + done +)" +if [[ -n "$unsortable" ]] +then printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >> ${LOG_FILE} + echo -n "ERROR: one of the data files is unsortable - check can not continue" + echo " - correct the malformed entries, then run this script again" + exit 1 fi -printf "\n\nchecking for entries with syntax errors: ... " >&2 +printf "\n\nchecking for entries with syntax errors: ... " >> ${LOG_FILE} invalid="$(egrep -v ${SYNTAX_REGEX} *.txt)" if [[ -z "$invalid" ]] -then printf "OK\n" >&2 -else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >&2 - echo "entries improperly formatted: $(grep -c '\n' <<<${invalid})" +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >> ${LOG_FILE} exit_status=1 fi -# TODO: this check could be removed someday - see note in 'sort-entries' script -printf "\n\nchecking for entries that the 'sort-entries' script would mutate: ... " >&2 -unsortable="$(grep ${SEP_CHAR} *.txt)" -if [[ -z "$unsortable" ]] -then printf "OK\n" >&2 -else printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >&2 - echo "entries containing $SEP_CHAR('${SEP_CHAR}'): $(grep -c '\n' <<<${unsortable})" +printf "\n\nchecking for entries without reference to detailed description: ... " >> ${LOG_FILE} +unsourced="$(egrep ${REF_REGEX} *.txt)" +if [[ -z "$unsourced" ]] +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >> ${LOG_FILE} exit_status=1 fi +# summary +totals=$(wc -l *.txt | sed 's|\(.*\)|\t\1|') +n_unsourced=$(wc -l <<<${unsourced}) +n_malformed=$(wc -l <<<${invalid} ) +echo -e "summary:\n\t* number of entries total:\n${totals}" +(( ${n_malformed} )) && echo -e "\t* number of entries improperly formatted: ${n_malformed}" +(( ${n_unsourced} )) && echo -e "\t* number of entries needing citation: ${n_unsourced}" +(( ${exit_status} )) && echo "refer to the file: '${LOG_FILE}' for details" + exit $exit_status diff --git a/sort-entries b/sort-entries deleted file mode 100755 index f688029..0000000 --- a/sort-entries +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# TODO: the best sorting results are acheived when the field separator -# precedes any valid package name character in ASCII order - -# the lowest of which is ASCII 43 '+'; so -# ASCII 33 ('!') serves this purpose quite well - -# someday, we should re-write the tools to use '!' instead of ':' - -# then the sort command alone would yeild the same results as this script -# and the warning could be removed from 'check.sh' - -readonly CSV_CHAR=':' -readonly SEP_CHAR='!' - - -for blacklist in *.txt -do if grep ${SEP_CHAR} ${blacklist} - then echo "can not sort: '${blacklist}' - contains '${SEP_CHAR}' char" - else echo "sorting: '${blacklist}'" - cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp - mv ${blacklist}.temp ${blacklist} - fi -done -- cgit v1.2.2