diff options
author | bill-auger <mr.j.spam.me@gmail.com> | 2019-12-14 02:03:38 -0500 |
---|---|---|
committer | bill-auger <mr.j.spam.me@gmail.com> | 2019-12-14 02:17:23 -0500 |
commit | dfb41fcd55c211aa274f5afcfc715d6926f99511 (patch) | |
tree | 2c110ac488a3f2cf8ec514768f58661814e4ecb3 | |
parent | 43f5dd438bd78d5f56d93e6588026c5910c49b45 (diff) |
move sorting into check.sh, log to file, add summary
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | README | 3 | ||||
-rwxr-xr-x | check.sh | 62 | ||||
-rwxr-xr-x | sort-entries | 22 |
4 files changed, 46 insertions, 42 deletions
@@ -7,7 +7,6 @@ !find-replacements !find-deprecated-pkgs !README -!sort-entries !SYNTAX !your-freedom_emu-blacklist.txt !your-privacy-blacklist.txt @@ -41,7 +41,8 @@ only exist for the sake of using nonfree software; respectively. see NOTE: your-freedom_emu-blacklist packages are not meant to have replacements. 2. Pull [8]blacklist.git. 3. Add an entry for the package according to the SYNTAX file. - 4. Run ./sort-entries. + 4. Run ./check.sh. This will detect any syntax errors, and also sort the entries + and remove empty lines. Please do this. It will help parsers do their jobs. 5. If you have [5]git write access, push the changes back. If not, send a patch produced using `git format-patch` to dev@lists.parabola.nu. 6. If you have shell access, ssh into repo.parabola.nu and run db-check-nonfree. @@ -12,35 +12,61 @@ readonly REF_REGEX='^[^:]*:[^:]*::[^:]*:.*$' readonly SYNTAX_REGEX='^[^:]*:[^:]*:(sv|debian|parabola|fsf|fedora)?:[^:]*:.*$' readonly CSV_CHAR=':' readonly SEP_CHAR='!' +readonly LOG_FILE=./check.log ; rm ${LOG_FILE} 2> /dev/null exit_status=0 -printf "\n\nchecking for entries without reference to detailed description: ... " >&2 -unsourced="$(egrep ${REF_REGEX} *.txt)" -if [[ -z "$unsourced" ]] -then printf "OK\n" >&2 -else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >&2 - echo "entries needing citation: $(grep -c '\n' <<<${unsourced})" - exit_status=1 + +# TODO: the best sorting results are acheived when the field separator ($CSV_CHAR) +# precedes any valid package name character in ASCII order +# the lowest of which is ASCII 43 '+', and spaces are not allowed; +# so ASCII 33 ('!') serves this purpose quite well +# someday, we should re-write the tools to use parse on '!' instead of ':' +# if that were done, then the `sort` command alone would yeild +# the same results as this procedure, except for removing empty lines +unsortable="$( + for blacklist in *.txt + do echo -n "sorting and cleaning: '${blacklist}' ... " >> ${LOG_FILE} + if grep ${SEP_CHAR} ${blacklist} + then echo "ERROR: can not sort - contains '${SEP_CHAR}' char" >> ${LOG_FILE} + retval=1 + else echo "OK" >> ${LOG_FILE} + cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | \ + sed '/^[[:space:]]*$/d' | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp + mv ${blacklist}.temp ${blacklist} + fi + done +)" +if [[ -n "$unsortable" ]] +then printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >> ${LOG_FILE} + echo -n "ERROR: one of the data files is unsortable - check can not continue" + echo " - correct the malformed entries, then run this script again" + exit 1 fi -printf "\n\nchecking for entries with syntax errors: ... " >&2 +printf "\n\nchecking for entries with syntax errors: ... " >> ${LOG_FILE} invalid="$(egrep -v ${SYNTAX_REGEX} *.txt)" if [[ -z "$invalid" ]] -then printf "OK\n" >&2 -else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >&2 - echo "entries improperly formatted: $(grep -c '\n' <<<${invalid})" +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >> ${LOG_FILE} exit_status=1 fi -# TODO: this check could be removed someday - see note in 'sort-entries' script -printf "\n\nchecking for entries that the 'sort-entries' script would mutate: ... " >&2 -unsortable="$(grep ${SEP_CHAR} *.txt)" -if [[ -z "$unsortable" ]] -then printf "OK\n" >&2 -else printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >&2 - echo "entries containing $SEP_CHAR('${SEP_CHAR}'): $(grep -c '\n' <<<${unsortable})" +printf "\n\nchecking for entries without reference to detailed description: ... " >> ${LOG_FILE} +unsourced="$(egrep ${REF_REGEX} *.txt)" +if [[ -z "$unsourced" ]] +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >> ${LOG_FILE} exit_status=1 fi +# summary +totals=$(wc -l *.txt | sed 's|\(.*\)|\t\1|') +n_unsourced=$(wc -l <<<${unsourced}) +n_malformed=$(wc -l <<<${invalid} ) +echo -e "summary:\n\t* number of entries total:\n${totals}" +(( ${n_malformed} )) && echo -e "\t* number of entries improperly formatted: ${n_malformed}" +(( ${n_unsourced} )) && echo -e "\t* number of entries needing citation: ${n_unsourced}" +(( ${exit_status} )) && echo "refer to the file: '${LOG_FILE}' for details" + exit $exit_status diff --git a/sort-entries b/sort-entries deleted file mode 100755 index f688029..0000000 --- a/sort-entries +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# TODO: the best sorting results are acheived when the field separator -# precedes any valid package name character in ASCII order - -# the lowest of which is ASCII 43 '+'; so -# ASCII 33 ('!') serves this purpose quite well - -# someday, we should re-write the tools to use '!' instead of ':' - -# then the sort command alone would yeild the same results as this script -# and the warning could be removed from 'check.sh' - -readonly CSV_CHAR=':' -readonly SEP_CHAR='!' - - -for blacklist in *.txt -do if grep ${SEP_CHAR} ${blacklist} - then echo "can not sort: '${blacklist}' - contains '${SEP_CHAR}' char" - else echo "sorting: '${blacklist}'" - cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp - mv ${blacklist}.temp ${blacklist} - fi -done |