summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <LukeShu@sbcglobal.net>2013-04-25 21:06:10 -0400
committerLuke Shumaker <LukeShu@sbcglobal.net>2013-04-25 21:06:10 -0400
commit952f2444763af9100339876b1bfc88c169346967 (patch)
treed368fb61ac0a56ace28de419382e234351ce0eb4
parent8d81be0dcd7ab9d14a88f0e981046f50e67526fe (diff)
improve jh-checksource
Code-wise: * add `-m` flag for machine readable * use mime-types instead of user-readable strings * ignore files less than 3 bytes Settings-wise: * ignore _all_ `inode/*`, `text/*`, `image/*` and `video/*` types * ignore PDFs and PostScript * ignore .hg files
-rw-r--r--jh-checksource.sh43
1 files changed, 31 insertions, 12 deletions
diff --git a/jh-checksource.sh b/jh-checksource.sh
index 1fecfa5..9e3a9d6 100644
--- a/jh-checksource.sh
+++ b/jh-checksource.sh
@@ -1,13 +1,15 @@
#!/bin/bash
-sep='<nofileevercontainsthis>'
-resep='@'
+sep='<no-filename-ever-contains-this>'
-safe_types_regexp=('text' '(GIF|JPEG|PNG) image data' 'MS Windows icon')
-safe_types_string=('empty')
-safe_files_regexp=('/\.(git|svn)/')
+safe_types_regexp=('^(inode|text|image|video)/')
+safe_types_string=('application/pdf' 'application/postscript')
+safe_files_regexp=('/\.(git|hg|svn)/')
safe_files_string=()
+# don't care about files less than 3 bytes.
+min_size=3
+
normalize_filename() {
local cwd="`pwd`"
readlink -m -- "$1"|sed "s|^$cwd/|./|"
@@ -35,18 +37,36 @@ matches_regexp() {
return 1
}
+print-human() {
+ libremessages warning "The source directory `pwd` contains binary files:"
+ sed 's/./ -> &/'
+}
+
+print-machine() {
+ cat
+}
+
main() {
+ format=human
# Parse arguments
- for file in "$@"; do safe_files_string+=("$(normalize_filename "$file")"); done
+ for arg in "$@"; do
+ case "$arg" in
+ -m) format=machine;;
+ *) safe_files_string+=("$(normalize_filename "$arg")");;
+ esac
+ done
# Init
unsafe_files="$(mktemp)"
# Heavy lifting
- find . -type f -exec file -F"$sep" {} + | while read line; do
- file="$(echo "$line"|sed "s${resep}${sep}.*${resep}${resep}")"
- type="$(echo "$line"|sed "s${resep}.*${sep}\s*${resep}${resep}")"
-
+ find . -type f -printf '%s %h/%f\n' | # find all files
+ while read -r size file; do # filter out files smaller than $min_size
+ [[ $size < $min_size ]] || printf '%s\n' "$file"
+ done |
+ xargs -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes
+ sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse
+ while IFS=: read -r type file; do
file="$(normalize_filename "$file")"
if \
@@ -61,8 +81,7 @@ main() {
done > "$unsafe_files"
if [[ -n "$(cat "$unsafe_files")" ]]; then
- echo "==> WARNING: The source directory `pwd` contains binary files:"
- <"$unsafe_files" sort | sed 's/./ -> &/'
+ <"$unsafe_files" sort | print-$format
rm -f "$unsafe_files"
exit 1
else