summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbill-auger <mr.j.spam.me@gmail.com>2020-03-11 02:23:52 -0400
committerbill-auger <mr.j.spam.me@gmail.com>2020-03-14 19:13:20 -0400
commit0f501979af323d087561acf4944dd1ddedaa0d05 (patch)
treec90e802912cbc07223aa6c00d3770efa3c6ed644
parent3f42c288d08001e738e1a26f5be094b8d4d37ef6 (diff)
[report] refactor report script for per-file operation and reports
-rw-r--r--blacklist-testdata.txt14
-rwxr-xr-xreport337
2 files changed, 195 insertions, 156 deletions
diff --git a/blacklist-testdata.txt b/blacklist-testdata.txt
index 232d502..93f9d38 100644
--- a/blacklist-testdata.txt
+++ b/blacklist-testdata.txt
@@ -1,19 +1,5 @@
# comments begin with '#'
-##------ expected results ------##
-# entries found: 20 #
-# entries valid: 14 #
-# entries_invalid: 6 #
-# entries lacking tags: 2 #
-# entries lacking description: 3 #
-# unspecified tags: 3 #
-# neglected entries: 4 #
-# duplicate_names: 2 #
-# identical entries: 1 #
-# differing entries: 1 #
-##------------------------------##
-
-
valid-complete:replacement:parabola:42:[branding]valid complete
valid-no-desc::::[branding]
valid-no-tags::::valid no tags
diff --git a/report b/report
index 1685317..17a32ee 100755
--- a/report
+++ b/report
@@ -1,201 +1,254 @@
#!/usr/bin/env ruby
=begin
- post_fsd_wiki.phantomjs
+ report.rb
this script is used to validate and report statistics on the blacklist entries
- it currently detects syntax errors, missing tags, unspecified tags, missing descriptions,
- duplicate entries for a single package (partitioning them as identical or differing),
- and 'neglected_entries' which are those with no tag, no description, and no replacement
+ it currently detects syntax errors, missing tags, unknown tags, missing descriptions,
+ duplicate entries for a single package (partitioning them as identical or differing),
+ and will fail if any of those conditions are unmet
+ it also detects entries with no replacement, although that is not an error
it optionally creates a JSON file with the results
that can be the input to the post_fsd_wiki.phantomjs script
=end
-# DEBUG begin
+## DEBUG begin ##
DEBUG = false
require 'byebug' if DEBUG
-DEBUG_FILTER_NAMES = [ 'vapoursynth-plugin-fluxsmooth' ]
-def IS_DEBUG_FILTER_NAME name ; DEBUG_FILTER_NAMES.include? name ; end ;
-# DEBUG end
+DEBUG_FILTER_NAMES = []
+def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;
+
+def DBG_PARSE input_filename ; if DEBUG ; print "\nDEBUG: parsing #{input_filename}\n" ; end ; end ;
+def DBG_FILTER_NAME line ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea } ; end ; end ;
+def DBG_TOKENS tokens ; if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ; end ;
+def DBG_TAG entry ; if DEBUG ; print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ; end ;
+def DBG_DESC entry ; if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2 ; end ; end ;
+def DBG_NO_TAG entry ; if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty? ; end ; end ;
+def DBG_NO_DESC entry ; if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ; end ;
+def DBG_ENTRY entry ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
+def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt'
+ print "##------ expected results ------##\n" +
+ "# entries found: 20 #\n" +
+ "# entries valid: 14 #\n" +
+ "# entries invalid: 6 #\n" +
+ "# entries lacking tags: 2 #\n" +
+ "# entries lacking description: 3 #\n" +
+ "# tags unknown: 3 #\n" +
+ "# entries unreplaced: 13 #\n" +
+ "# entries duplicated: 2 #\n" +
+ "# identical: 1 #\n" +
+ "# differing: 1 #\n" +
+ "##------------------------------##\n" ; end ; end ;
+## DEBUG end ##
require 'json'
require 'set'
-# entry syntax => original-package:[libre-replacement]:[ref]:[id]:short-description
-BLACKLIST_FILES = [ 'blacklist.txt' ]
-# BLACKLIST_FILES = [ 'blacklist-testdata.txt' ]
-VALID_ENTRIES_REGEX = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
-BLACKLIST_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/
-RAW_ENTRY_KEY = :raw_blacklist_entry
-PACKAGE_NAME_KEY = :original_package # per blacklist SYNTAX
-REPLACEMENT_KEY = :libre_replacement # per blacklist SYNTAX
-REFERENCE_KEY = :ref # per blacklist SYNTAX
-ENTRY_ID_KEY = :id # per blacklist SYNTAX
-DESCRIPTION_KEY = :short_description # per blacklist SYNTAX
-BLACKLIST_TAGS_KEY = :blacklist_tags
-NONFREE_TAG = 'nonfree'
-SEMIFREE_TAG = 'semifree'
-USES_NONFREE_TAG = 'uses-nonfree'
-BRANDING_TAG = 'branding'
-TECHNICAL_TAG = 'technical'
-HAS_REPLACEMENT_TAG = 'FIXME:package'
-NEEDS_DESC_TAG = 'FIXME:description'
-ACCEPTABLE_TAGS = [ NONFREE_TAG , SEMIFREE_TAG , USES_NONFREE_TAG , BRANDING_TAG ,
- TECHNICAL_TAG , HAS_REPLACEMENT_TAG , NEEDS_DESC_TAG ]
-DO_PRINT_STATS = true
-DO_PRINT_INCOMPLETE = true
-DO_PRINT_DUPLICATE = false
-REPORT_SEPARATOR = "------------------------------------------------------------\n"
-OUTPUT_JSON_FILE = 'blacklist-data.json'
-
-entries_invalid = []
-entries = []
-entry_freqs = {}
-entries_no_desc = []
-entries_no_tags = []
-entries_unspecified_tags = []
-unspecified_tags = Set[]
-duplicate_names = nil # deferred
-duplicate_identical_entries = {}
-duplicate_differing_entries = {}
-
-
-## parse data ##
-
-BLACKLIST_FILES.each do | blacklist_filename |
-if DEBUG ; print "\nDEBUG: parsing #{blacklist_filename}\n" ; end ;
-
- (File.readlines blacklist_filename).each do | line |
-# DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea }
-
- next if line.strip.empty? || (line.strip.start_with? '#')
- entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX
-
- entries << (entry = {})
- tokens = (line.split ':')
-
-if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ;
+# NOTE: acceptable entry syntax per SYNTAX doc =>
+# ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION
+ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
+BLACKLIST_FILES = (ALL_BLACKLIST_FILES.include? ARGV.first) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
+# BLACKLIST_FILES = [ 'blacklist-testdata.txt' ] # DEBUG
+VALID_ENTRIES_REGEX = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
+BLACKLIST_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/
+RAW_ENTRY_KEY = :raw_blacklist_entry
+PACKAGE_NAME_KEY = :original_package # syntax token
+REPLACEMENT_KEY = :libre_replacement # syntax token
+REFERENCE_KEY = :ref # syntax token
+REFERENCE_ID_KEY = :id # syntax token
+DESCRIPTION_KEY = :short_description # syntax token
+BLACKLIST_TAGS_KEY = :blacklist_tags
+NONFREE_TAG = 'nonfree'
+SEMIFREE_TAG = 'semifree'
+USES_NONFREE_TAG = 'uses-nonfree'
+BRANDING_TAG = 'branding'
+TECHNICAL_TAG = 'technical'
+NEEDS_REPLACEMENT_TAG = 'FIXME:package'
+NEEDS_DESC_TAG = 'FIXME:description'
+ACCEPTABLE_TAGS = [ NONFREE_TAG , SEMIFREE_TAG , USES_NONFREE_TAG , BRANDING_TAG ,
+ TECHNICAL_TAG , NEEDS_REPLACEMENT_TAG , NEEDS_DESC_TAG ]
+DO_PRINT_STATS = true
+DO_PRINT_INVALID = true
+DO_PRINT_UNREPLACED = true
+DO_PRINT_INCOMPLETE = true
+DO_PRINT_DUPLICATED = true
+REPORT_SEPARATOR = "------------------------------------------------------------\n"
+
+
+def reset_state
+ @entries_invalid = []
+ @entries_valid = []
+ @entry_freqs = {}
+ @entries_no_desc = []
+ @entries_no_tags = []
+ @entries_tags_unknown = []
+ @tags_unknown = Set[]
+ @entries_unreplaced = nil # deferred
+ @entries_duplicated = nil # deferred
+ @entries_duplicated_identical = {}
+ @entries_duplicated_differing = {}
+ @errors = []
+end
+
+def parse_entries input_filename
+DBG_PARSE input_filename
+
+ (File.readlines input_filename).each do | line |
+
+DBG_FILTER_NAME line
+
+ next if line.strip.empty? || (line.strip.start_with? '#')
+ @entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX
+
+ @entries_valid << (entry = {})
+ tokens = (line.split ':')
entry[RAW_ENTRY_KEY ] = line
entry[PACKAGE_NAME_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[REPLACEMENT_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[REFERENCE_KEY ] = (tokens.shift ).gsub("\t" , '').strip
- entry[ENTRY_ID_KEY ] = (tokens.shift ).gsub("\t" , '').strip
+ entry[REFERENCE_ID_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[DESCRIPTION_KEY ] = (tokens.join ':').gsub("\t" , '').strip
entry[BLACKLIST_TAGS_KEY] = []
+DBG_TOKENS tokens
+
# parse tags
while (entry[DESCRIPTION_KEY].start_with? '[') && (entry[DESCRIPTION_KEY].include? ']')
-if DEBUG ; print "\n parsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ;
-# debugger if DEBUG && (DEBUG_FILTER_NAMES.include? entry[PACKAGE_NAME_KEY])
+
+DBG_TAG entry
entry[BLACKLIST_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\1')
entry[DESCRIPTION_KEY ] = (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\2')
-if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2 ; end ;
+DBG_DESC entry
+
end
-if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty? ; end ;
-if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ;
+DBG_NO_TAG entry
+DBG_NO_DESC entry
end
end
+def process_entries
+ @entries_valid.each do | entry |
-## process data ##
+DBG_ENTRY entry
-entries.each do | entry |
-if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ;
+ entry_name = entry[PACKAGE_NAME_KEY ]
+ entry_desc = entry[DESCRIPTION_KEY ]
+ entry_tags = entry[BLACKLIST_TAGS_KEY]
+ entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS
- entry_name = entry[PACKAGE_NAME_KEY ]
- entry_desc = entry[DESCRIPTION_KEY ]
- entry_tags = entry[BLACKLIST_TAGS_KEY]
- entry_unspecified_tags = entry_tags - ACCEPTABLE_TAGS
-
- entry_freqs[entry_name] = (entry_freqs[entry_name] ||= 0) + 1
- entries_no_desc << entry if entry_desc .empty?
- entries_no_tags << entry if entry_tags .empty?
- entries_unspecified_tags << entry unless entry_unspecified_tags.empty?
- unspecified_tags.merge entry_unspecified_tags
-end
-
-duplicate_names = entry_freqs.keys.select { | name | entry_freqs[name] > 1 }
-incomplete_entries = entries_no_desc + entries_no_tags
-unreplaced_entries = entries.select { | entry | entry[REPLACEMENT_KEY].empty? }
-neglected_entries = incomplete_entries.map { | entry | entry[PACKAGE_NAME_KEY] }.to_set &
- unreplaced_entries.map { | entry | entry[PACKAGE_NAME_KEY] }.to_set
+ @entry_freqs[entry_name] = (@entry_freqs[entry_name] ||= 0) + 1
+ @entries_no_desc << entry if entry_desc .empty?
+ @entries_no_tags << entry if entry_tags .empty?
+ @entries_tags_unknown << entry unless entry_tags_unknown.empty?
+ @tags_unknown.merge entry_tags_unknown
+ end
-duplicate_names.each do | duplicate_name |
-# next unless IS_DEBUG_FILTER_NAME duplicate_name ; p "duplicate_name=#{duplicate_name}" # DEBUG
+ @entries_unreplaced = @entries_valid .select { | entry | entry[REPLACEMENT_KEY].empty? }
+ @entries_duplicated = @entry_freqs.keys.select { | name | @entry_freqs[name] > 1 }
- duplicate_entries = entries.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
- .map! { | entry | entry[RAW_ENTRY_KEY ] }
- unique_entries = duplicate_entries.uniq
- n_unique_entries = unique_entries.size
+ @entries_duplicated.each do | duplicate_name |
+ duplicate_entries = @entries_valid.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
+ .map! { | entry | entry[RAW_ENTRY_KEY ] }
+ unique_entries = duplicate_entries.uniq
+ n_unique_entries = unique_entries.size
- unique_entries.each do | uniq_value |
- n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
- duplicate_identical_entries[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
- end
- if n_unique_entries > 1
- duplicate_differing_entries[duplicate_name] = unique_entries
+ unique_entries.each do | uniq_value |
+ n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
+ @entries_duplicated_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
+ end
+ if n_unique_entries > 1
+ @entries_duplicated_differing[duplicate_name] = unique_entries
+ end
end
end
+def print_report input_filename
+ if DO_PRINT_INVALID || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
+ print "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
+ end
-## report ##
-
-print REPORT_SEPARATOR
-print "entries found: #{ (entries + entries_invalid ).size }\n" if DO_PRINT_STATS
-print "entries valid: #{ (entries ).size }\n" if DO_PRINT_STATS
-print "entries_invalid: #{ (entries_invalid ).size }\n" if DO_PRINT_STATS
-print "entries lacking tags: #{ (entries_no_tags ).size }\n" if DO_PRINT_STATS
-print "entries lacking description: #{(entries_no_desc ).size }\n" if DO_PRINT_STATS
-print "unspecified tags: #{ (unspecified_tags ).size }\n" if DO_PRINT_STATS
-print "neglected entries: #{ (neglected_entries ).size }\n" if DO_PRINT_STATS
-print "duplicate_names: #{ (duplicate_names ).size }\n" if DO_PRINT_STATS
-print " identical entries: #{ (duplicate_identical_entries).size }\n" if DO_PRINT_STATS
-print " differing entries: #{ (duplicate_differing_entries).keys.size}\n" if DO_PRINT_STATS
-
-if DO_PRINT_INCOMPLETE
- { 'invalid entries' => entries_invalid ,
- 'entries lacking description' => entries_no_desc ,
- 'entries lacking tags' => entries_no_tags ,
- 'entries with unspecified tags' => entries_unspecified_tags ,
- 'unspecified tags' => unspecified_tags }.each_pair do | label , data |
- print REPORT_SEPARATOR + "#{label}:\n" unless data.empty?
+ print_invalid = {}
+ print_invalid['entries invalid' ] = @entries_invalid if DO_PRINT_INVALID
+ print_invalid['entries lacking description'] = @entries_no_desc if DO_PRINT_INCOMPLETE
+ print_invalid['entries lacking tags' ] = @entries_no_tags if DO_PRINT_INCOMPLETE
+ print_invalid['entries with unknown tags' ] = @entries_tags_unknown if DO_PRINT_INCOMPLETE
+ print_invalid['tags unknown' ] = @tags_unknown if DO_PRINT_INCOMPLETE
+ print_invalid.each_pair do | label , data |
+ print "#{REPORT_SEPARATOR}#{label}:\n" unless data.empty?
data.each { | entry | print " #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
end
-end
-unless neglected_entries.empty? || ! DO_PRINT_INCOMPLETE
- print REPORT_SEPARATOR + "neglected entries:\n"
- neglected_entries.each { | entry_name | print " #{entry_name}\n" }
-end
+ unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
+ print "#{REPORT_SEPARATOR}entries unreplaced:\n"
+ @entries_unreplaced.each { | entry | print " #{entry[PACKAGE_NAME_KEY]}\n" }
+ end
+
+ unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
+ print "#{REPORT_SEPARATOR}entries duplicates:\n"
+ @entries_duplicated.each do | duplicate_name |
+ entry_identical = @entries_duplicated_identical[duplicate_name]
+ entries_differing = @entries_duplicated_differing[duplicate_name]
+
+ print "\n #{duplicate_name}:\n"
+ print " identical:\n" unless entry_identical .nil?
+ print " #{entry_identical}\n" unless entry_identical .nil?
+ print " differing:\n" unless entries_differing.nil?
+ entries_differing.each { | entry | print " #{entry}\n" } unless entries_differing.nil?
+ end
+ end
-unless duplicate_names.empty? || ! DO_PRINT_DUPLICATE
- print REPORT_SEPARATOR + "duplicate entries:\n"
- duplicate_names.each do | duplicate_name |
- identical_entry = duplicate_identical_entries[duplicate_name]
- differing_entries = duplicate_differing_entries[duplicate_name]
-
- print "\n #{duplicate_name}:\n"
- print " identical entries:\n" unless identical_entry .nil?
- print " #{identical_entry}\n" unless identical_entry .nil?
- print " differing entries:\n" unless differing_entries.nil?
- differing_entries.each { | entry | print " #{entry}\n" } unless differing_entries.nil?
+ if DO_PRINT_STATS
+ print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
+ print " entries found: #{ (@entries_valid + @entries_invalid).size }\n"
+ print " entries valid: #{ (@entries_valid ).size }\n"
+ print " entries invalid: #{ (@entries_invalid ).size }\n"
+ print " entries lacking tags: #{ (@entries_no_tags ).size }\n"
+ print " entries lacking description: #{(@entries_no_desc ).size }\n"
+ print " tags unknown: #{ (@tags_unknown ).size }\n"
+ print " entries unreplaced: #{ (@entries_unreplaced ).size }\n"
+ print " entries duplicated: #{ (@entries_duplicated ).size }\n"
+ print " identical: #{ (@entries_duplicated_identical ).size }\n"
+ print " differing: #{ (@entries_duplicated_differing ).keys.size}\n"
+ print REPORT_SEPARATOR
end
+
+DBG_EXPECTED input_filename
end
+def sanity_check input_filename
+ @errors << 'entries_invalid' unless @entries_invalid .empty?
+ # @errors << 'entries_no_tags' unless @entries_no_tags .empty? # TODO: complete these entries
+ # @errors << 'entries_no_desc' unless @entries_no_desc .empty? # TODO: complete these entries
+ @errors << 'tags_unknown' unless @tags_unknown .empty?
+ @errors << 'entries_duplicated' unless @entries_duplicated.empty?
+end
-## sanity check ##
+def generate_json input_filename
+ output_json_file = "#{input_filename}.json"
-should_quit = ! (entries_invalid.empty? && unspecified_tags.empty? && duplicate_names.empty?)
-(print "errors were found - JSON will not be generated\n" ; exit 1) if should_quit
+ if @errors.empty?
+ IO.write output_json_file , @entries_valid.to_json
+ print "\nwrote: #{output_json_file}\n\nno problems detected in #{input_filename}\n"
+ else
+ print "\nERROR: in #{input_filename} - #{@errors.join ','} - JSON will not be generated\n"
+ end
+end
-## generate JSON ##
+BLACKLIST_FILES.each do | input_filename |
+ reset_state
+ parse_entries input_filename
+ process_entries
+ print_report input_filename
+ sanity_check input_filename
+ generate_json input_filename
-IO.write OUTPUT_JSON_FILE , entries.to_json ; print "\nwrote: #{OUTPUT_JSON_FILE}\n\ndone\n" ;
+ exit 1 unless @errors.empty?
+end