summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbill-auger <mr.j.spam.me@gmail.com>2019-12-14 02:03:38 -0500
committerbill-auger <mr.j.spam.me@gmail.com>2019-12-14 02:17:23 -0500
commitdfb41fcd55c211aa274f5afcfc715d6926f99511 (patch)
tree2c110ac488a3f2cf8ec514768f58661814e4ecb3
parent43f5dd438bd78d5f56d93e6588026c5910c49b45 (diff)
move sorting into check.sh, log to file, add summary
-rw-r--r--.gitignore1
-rw-r--r--README3
-rwxr-xr-xcheck.sh62
-rwxr-xr-xsort-entries22
4 files changed, 46 insertions, 42 deletions
diff --git a/.gitignore b/.gitignore
index 83a5258..1acfe1f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,6 @@
!find-replacements
!find-deprecated-pkgs
!README
-!sort-entries
!SYNTAX
!your-freedom_emu-blacklist.txt
!your-privacy-blacklist.txt
diff --git a/README b/README
index 5b3bb8d..7fcf394 100644
--- a/README
+++ b/README
@@ -41,7 +41,8 @@ only exist for the sake of using nonfree software; respectively. see
NOTE: your-freedom_emu-blacklist packages are not meant to have replacements.
2. Pull [8]blacklist.git.
3. Add an entry for the package according to the SYNTAX file.
- 4. Run ./sort-entries.
+ 4. Run ./check.sh. This will detect any syntax errors, and also sort the entries
+ and remove empty lines. Please do this. It will help parsers do their jobs.
5. If you have [5]git write access, push the changes back.
If not, send a patch produced using `git format-patch` to dev@lists.parabola.nu.
6. If you have shell access, ssh into repo.parabola.nu and run db-check-nonfree.
diff --git a/check.sh b/check.sh
index 48a8f8a..7de5a92 100755
--- a/check.sh
+++ b/check.sh
@@ -12,35 +12,61 @@ readonly REF_REGEX='^[^:]*:[^:]*::[^:]*:.*$'
readonly SYNTAX_REGEX='^[^:]*:[^:]*:(sv|debian|parabola|fsf|fedora)?:[^:]*:.*$'
readonly CSV_CHAR=':'
readonly SEP_CHAR='!'
+readonly LOG_FILE=./check.log ; rm ${LOG_FILE} 2> /dev/null
exit_status=0
-printf "\n\nchecking for entries without reference to detailed description: ... " >&2
-unsourced="$(egrep ${REF_REGEX} *.txt)"
-if [[ -z "$unsourced" ]]
-then printf "OK\n" >&2
-else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >&2
- echo "entries needing citation: $(grep -c '\n' <<<${unsourced})"
- exit_status=1
+
+# TODO: the best sorting results are acheived when the field separator ($CSV_CHAR)
+# precedes any valid package name character in ASCII order
+# the lowest of which is ASCII 43 '+', and spaces are not allowed;
+# so ASCII 33 ('!') serves this purpose quite well
+# someday, we should re-write the tools to use parse on '!' instead of ':'
+# if that were done, then the `sort` command alone would yeild
+# the same results as this procedure, except for removing empty lines
+unsortable="$(
+ for blacklist in *.txt
+ do echo -n "sorting and cleaning: '${blacklist}' ... " >> ${LOG_FILE}
+ if grep ${SEP_CHAR} ${blacklist}
+ then echo "ERROR: can not sort - contains '${SEP_CHAR}' char" >> ${LOG_FILE}
+ retval=1
+ else echo "OK" >> ${LOG_FILE}
+ cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | \
+ sed '/^[[:space:]]*$/d' | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp
+ mv ${blacklist}.temp ${blacklist}
+ fi
+ done
+)"
+if [[ -n "$unsortable" ]]
+then printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >> ${LOG_FILE}
+ echo -n "ERROR: one of the data files is unsortable - check can not continue"
+ echo " - correct the malformed entries, then run this script again"
+ exit 1
fi
-printf "\n\nchecking for entries with syntax errors: ... " >&2
+printf "\n\nchecking for entries with syntax errors: ... " >> ${LOG_FILE}
invalid="$(egrep -v ${SYNTAX_REGEX} *.txt)"
if [[ -z "$invalid" ]]
-then printf "OK\n" >&2
-else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >&2
- echo "entries improperly formatted: $(grep -c '\n' <<<${invalid})"
+then printf "OK\n" >> ${LOG_FILE}
+else printf "\n[Incorrectly formatted entries]:\n\n%s\n\n" "$invalid" >> ${LOG_FILE}
exit_status=1
fi
-# TODO: this check could be removed someday - see note in 'sort-entries' script
-printf "\n\nchecking for entries that the 'sort-entries' script would mutate: ... " >&2
-unsortable="$(grep ${SEP_CHAR} *.txt)"
-if [[ -z "$unsortable" ]]
-then printf "OK\n" >&2
-else printf "\n[Entries containing '%s' char]:\n\n%s\n\n" "${SEP_CHAR}" "$unsortable" >&2
- echo "entries containing $SEP_CHAR('${SEP_CHAR}'): $(grep -c '\n' <<<${unsortable})"
+printf "\n\nchecking for entries without reference to detailed description: ... " >> ${LOG_FILE}
+unsourced="$(egrep ${REF_REGEX} *.txt)"
+if [[ -z "$unsourced" ]]
+then printf "OK\n" >> ${LOG_FILE}
+else printf "\n[citation needed]:\n\n%s\n\n" "$unsourced" >> ${LOG_FILE}
exit_status=1
fi
+# summary
+totals=$(wc -l *.txt | sed 's|\(.*\)|\t\1|')
+n_unsourced=$(wc -l <<<${unsourced})
+n_malformed=$(wc -l <<<${invalid} )
+echo -e "summary:\n\t* number of entries total:\n${totals}"
+(( ${n_malformed} )) && echo -e "\t* number of entries improperly formatted: ${n_malformed}"
+(( ${n_unsourced} )) && echo -e "\t* number of entries needing citation: ${n_unsourced}"
+(( ${exit_status} )) && echo "refer to the file: '${LOG_FILE}' for details"
+
exit $exit_status
diff --git a/sort-entries b/sort-entries
deleted file mode 100755
index f688029..0000000
--- a/sort-entries
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# TODO: the best sorting results are acheived when the field separator
-# precedes any valid package name character in ASCII order -
-# the lowest of which is ASCII 43 '+'; so
-# ASCII 33 ('!') serves this purpose quite well -
-# someday, we should re-write the tools to use '!' instead of ':' -
-# then the sort command alone would yeild the same results as this script
-# and the warning could be removed from 'check.sh'
-
-readonly CSV_CHAR=':'
-readonly SEP_CHAR='!'
-
-
-for blacklist in *.txt
-do if grep ${SEP_CHAR} ${blacklist}
- then echo "can not sort: '${blacklist}' - contains '${SEP_CHAR}' char"
- else echo "sorting: '${blacklist}'"
- cat ${blacklist} | tr "${CSV_CHAR}" "${SEP_CHAR}" | sort | uniq | tr "${SEP_CHAR}" "${CSV_CHAR}" > ${blacklist}.temp
- mv ${blacklist}.temp ${blacklist}
- fi
-done