diff options
author | bill-auger <mr.j.spam.me@gmail.com> | 2019-12-14 02:03:38 -0500 |
---|---|---|
committer | bill-auger <mr.j.spam.me@gmail.com> | 2019-12-14 02:17:23 -0500 |
commit | dfb41fcd55c211aa274f5afcfc715d6926f99511 (patch) | |
tree | 2c110ac488a3f2cf8ec514768f58661814e4ecb3 /check.sh | |
parent | 43f5dd438bd78d5f56d93e6588026c5910c49b45 (diff) |
move sorting into check.sh, log to file, add summary
Diffstat (limited to 'check.sh')
-rwxr-xr-x | check.sh | 62 |
1 files changed, 44 insertions, 18 deletions
@@ -12,35 +12,61 @@ readonly REF_REGEX='^[^:]*:[^:]*::[^:]*:.*$' readonly SYNTAX_REGEX='^[^:]*:[^:]*:(sv|debian|parabola|fsf|fedora)?:[^:]*:.*$' readonly CSV_CHAR=':' readonly SEP_CHAR='!' +readonly LOG_FILE=./check.log ; rm ${LOG_FILE} 2> /dev/null exit_status=0 -printf "\n\nchecking for entries without reference to detailed description: ... " >&2 -unsourced="$(egrep ${REF_REGEX} *.txt)" -if [[ -z "$unsourced" ]] -then printf "OK\n" >&2 -else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >&2 - echo "entries needing citation: $(grep -c '\n' <<<${unsourced})" - exit_status=1 + +# TODO: the best sorting results are acheived when the field separator ($CSV_CHAR) +# precedes any valid package name character in ASCII order +# the lowest of which is ASCII 43 '+', and spaces are not allowed; +# so ASCII 33 ('!') serves this purpose quite well +# someday, we should re-write the tools to use parse on '!' instead of ':' +# if that were done, then the `sort` command alone would yeild +# the same results as this procedure, except for removing empty lines +unsortable="$( + for blacklist in *.txt + do echo -n "sorting and cleaning: '${blacklist}' ... " >> ${LOG_FILE} + if grep ${SEP_CHAR} ${blacklist} + then echo "ERROR: can not sort - contains '${SEP_CHAR}' char" >> ${LOG_FILE} + retval=1 + else echo "OK" >> ${LOG_FILE} + cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | \ + sed '/^[[:space:]]*$/d' | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp + mv ${blacklist}.temp ${blacklist} + fi + done +)" +if [[ -n "$unsortable" ]] +then printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >> ${LOG_FILE} + echo -n "ERROR: one of the data files is unsortable - check can not continue" + echo " - correct the malformed entries, then run this script again" + exit 1 fi -printf "\n\nchecking for entries with syntax errors: ... " >&2 +printf "\n\nchecking for entries with syntax errors: ... " >> ${LOG_FILE} invalid="$(egrep -v ${SYNTAX_REGEX} *.txt)" if [[ -z "$invalid" ]] -then printf "OK\n" >&2 -else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >&2 - echo "entries improperly formatted: $(grep -c '\n' <<<${invalid})" +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >> ${LOG_FILE} exit_status=1 fi -# TODO: this check could be removed someday - see note in 'sort-entries' script -printf "\n\nchecking for entries that the 'sort-entries' script would mutate: ... " >&2 -unsortable="$(grep ${SEP_CHAR} *.txt)" -if [[ -z "$unsortable" ]] -then printf "OK\n" >&2 -else printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >&2 - echo "entries containing $SEP_CHAR('${SEP_CHAR}'): $(grep -c '\n' <<<${unsortable})" +printf "\n\nchecking for entries without reference to detailed description: ... " >> ${LOG_FILE} +unsourced="$(egrep ${REF_REGEX} *.txt)" +if [[ -z "$unsourced" ]] +then printf "OK\n" >> ${LOG_FILE} +else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >> ${LOG_FILE} exit_status=1 fi +# summary +totals=$(wc -l *.txt | sed 's|\(.*\)|\t\1|') +n_unsourced=$(wc -l <<<${unsourced}) +n_malformed=$(wc -l <<<${invalid} ) +echo -e "summary:\n\t* number of entries total:\n${totals}" +(( ${n_malformed} )) && echo -e "\t* number of entries improperly formatted: ${n_malformed}" +(( ${n_unsourced} )) && echo -e "\t* number of entries needing citation: ${n_unsourced}" +(( ${exit_status} )) && echo "refer to the file: '${LOG_FILE}' for details" + exit $exit_status |