#!/usr/bin/env ruby =begin USAGE: report.rb [ input_files ] this script is used to validate and report statistics on the blacklist entries it currently detects syntax errors, missing tags, unknown tags, missing descriptions, duplicate entries for a single package (partitioning them as identical or differing), and will fail if any of those conditions are unmet it also detects entries with no replacement, although that is not an error it optionally creates a JSON file with the results, which can be the input to the post_fsd_wiki.phantomjs script =end ## DEBUG begin ## DEBUG = false require 'byebug' if DEBUG || false DEBUG_FILTER_NAMES = [] def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ; def DBG_PARSE input_filename ; if DEBUG ; $stderr.print "\nDEBUG: parsing #{input_filename}\n" ; end ; end ; def DBG_FILTER_NAME line ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea } ; end ; end ; def DBG_TOKENS tokens ; if DEBUG ; tokens.each_with_index { | token , i | $stderr.print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ; end ; def DBG_TAG entry ; if DEBUG ; $stderr.print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\ndesc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ; end ; def DBG_DESC entry ; if DEBUG ; $stderr.print "desc OUT=#{entry[DESCRIPTION_KEY]}\n tags=#{entry[REASON_TAGS_KEY]}\n" ; sleep 0.2 ; end ; end ; def DBG_NO_TAG entry ; if DEBUG ; $stderr.print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[REASON_TAGS_KEY].empty? ; end ; end ; def DBG_NO_DESC entry ; if DEBUG ; $stderr.print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ; end ; def DBG_ENTRY entry ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; $stderr.print "\n" ; entry.each_pair { | k , v | $stderr.print "DEBUG: #{k}: #{v}\n" } ; end ; end ; def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt' ; $stderr.print DBG_EXPECTED_TEST_RESULTS ; end ; end ; DBG_EXPECTED_TEST_RESULTS = "##------ expected results ---------##\n" + # per 'blacklist-testdata.txt' "# entries found: 20 #\n" + "# well-formed: 14 #\n" + "# malformed: 6 #\n" + "# entries lacking bug reference: 15 #\n" + "# entries lacking tag: 2 #\n" + "# entries lacking description: 3 #\n" + "# unknown bug references: 0 #\n" + "# unknown reason tags: 3 #\n" + "# entries with no replacement: 13 #\n" + "# duplicate entries: 2 #\n" + "# identical: 1 #\n" + "# differing: 2 #\n" + "##---------------------------------##\n" # entries found: 34 # well-formed: 28 # malformed: 6 # entries lacking bug reference: 8 # entries lacking tag: 8 # entries lacking description: 4 # unknown bug references: 1 # unknown reason tags: 4 # entries with no replacement: 4 # duplicate entries: 3 # identical: 1 # differing: 2 ## DEBUG end ## require 'json' require 'set' # NOTE: acceptable entry syntax per SYNTAX doc => # ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ] BLACKLIST_FILES = (File.exist? ARGV.first.to_s) ? [ ARGV.first ] : ALL_BLACKLIST_FILES ENTRIES_REGEX = /^[^:\[\]#]+:[^:\[\]]*:(debian|fedora|fsf|parabola|savannah|sv)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/ REASON_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/ RAW_ENTRY_KEY = :raw_blacklist_entry PACKAGE_NAME_KEY = :original_package # syntax field 0 REPLACEMENT_KEY = :libre_replacement # syntax field 1 REFERENCE_KEY = :ref # syntax field 2 , constrained REFERENCE_ID_KEY = :id # syntax field 3 REASON_TAGS_KEY = :reason_tags # syntax field 4 head , constrained DESCRIPTION_KEY = :short_description # syntax field 4 tail ACCEPTABLE_REFS = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' ] ACCEPTABLE_TAGS = [ 'nonfree' , 'semifree' , 'uses-nonfree' , 'branding' , 'technical' , 'FIXME:package' , 'FIXME:description' ] DO_PRINT_MALFORMED = true DO_PRINT_UNREPLACED = true DO_PRINT_INCOMPLETE = true DO_PRINT_DUPLICATED = true DO_PRINT_STATS = true REPORT_SEPARATOR = "------------------------------------------------------------\n" LOG_FILE = 'validation.log' @log_file = nil def log log_msg ; @log_file.print log_msg if @log_file.respond_to? :print ; end ; def reset_state @entries_malformed = [] @entries_acceptable = [] @entry_freqs = {} @entries_no_ref = [] @entries_no_tag = [] @entries_no_desc = [] @entries_refs_unknown = [] @entries_tags_unknown = [] @refs_unknown = Set[] @tags_unknown = Set[] @entries_unreplaced = nil # deferred @entries_duplicated = nil # deferred @entries_identical = {} @entries_differing = {} @errors = [] end def parse_entries input_filename DBG_PARSE input_filename (File.readlines input_filename).each do | line | DBG_FILTER_NAME line next if line.strip.empty? || (line.strip.start_with? '#') @entries_malformed << line && next unless line.match ENTRIES_REGEX @entries_acceptable << (entry = {}) tokens = (line.split ':') entry[RAW_ENTRY_KEY ] = line entry[PACKAGE_NAME_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[REPLACEMENT_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[REFERENCE_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[REFERENCE_ID_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[DESCRIPTION_KEY ] = (tokens.join ':').gsub("\t" , '').strip entry[REASON_TAGS_KEY ] = [] DBG_TOKENS tokens # parse tags while (entry[DESCRIPTION_KEY].start_with? '[') && (entry[DESCRIPTION_KEY].include? ']') DBG_TAG entry entry[REASON_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\1') entry[DESCRIPTION_KEY] = (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\2') DBG_DESC entry end DBG_NO_TAG entry DBG_NO_DESC entry end end def process_entries @entries_acceptable.each do | entry | DBG_ENTRY entry entry_name = entry[PACKAGE_NAME_KEY] entry_ref = entry[REFERENCE_KEY ] entry_ref_id = entry[REFERENCE_ID_KEY] entry_tags = entry[REASON_TAGS_KEY ] entry_desc = entry[DESCRIPTION_KEY ] entry_ref_unknown = (ACCEPTABLE_REFS.include? entry_ref) ? entry_ref : '' entry_ref_unknown = [ entry_ref ] - ACCEPTABLE_REFS - [ '' ] entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS @entry_freqs[entry_name] = (@entry_freqs[entry_name] ||= 0) + 1 @entries_no_ref << entry if entry_ref .empty? || entry_ref_id .empty? @entries_no_tag << entry if entry_tags .empty? @entries_no_desc << entry if entry_desc .empty? @entries_refs_unknown << entry unless entry_ref_unknown .empty? @entries_tags_unknown << entry unless entry_tags_unknown.empty? @refs_unknown.merge entry_ref_unknown @tags_unknown.merge entry_tags_unknown end @entries_unreplaced = @entries_acceptable.select { | entry | entry[REPLACEMENT_KEY].empty? } @entries_duplicated = @entry_freqs.keys .select { | name | @entry_freqs[name] > 1 } @entries_duplicated.each do | duplicate_name | duplicate_entries = @entries_acceptable.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \ .map { | entry | entry[RAW_ENTRY_KEY ] } unique_entries = duplicate_entries.uniq n_unique_entries = unique_entries .size unique_entries.each do | uniq_value | n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value } @entries_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1 end if n_unique_entries > 1 @entries_differing[duplicate_name] = unique_entries end end end def print_report input_filename if DO_PRINT_MALFORMED || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED log "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n" end entries_malformed = [ '@entries_malformed' ] entries_incomplete = [ '@entries_no_ref' , '@entries_refs_unknown' , '@refs_unknown' , '@entries_no_tag' , '@entries_tags_unknown' , '@tags_unknown' , '@entries_no_desc' ] log_invalid = [] log_invalid += entries_malformed if DO_PRINT_MALFORMED log_invalid += entries_incomplete if DO_PRINT_INCOMPLETE log_invalid.each do | var_name | entries = eval var_name log "#{REPORT_SEPARATOR}#{var_name}:\n" unless entries.empty? entries.each { | entry | log " #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" } end unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED log "#{REPORT_SEPARATOR}entries unreplaced:\n" @entries_unreplaced.each { | entry | log " #{entry[PACKAGE_NAME_KEY]}\n" } end unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED log "#{REPORT_SEPARATOR}entries duplicates:\n" @entries_duplicated.each do | duplicate_name | identical = @entries_identical[duplicate_name] differings = @entries_differing[duplicate_name] log "\n #{duplicate_name}:\n" log " identical:\n" unless identical .nil? log " #{identical}\n" unless identical .nil? log " differing:\n" unless differings.nil? log differings.map { | differing | " #{differing}\n" } unless differings.nil? end end if DO_PRINT_STATS all_entries = @entries_acceptable + @entries_malformed print "#{REPORT_SEPARATOR}#{input_filename} stats:\n" print " entries found: #{ (all_entries ).size }\n" print " well-formed: #{ (@entries_acceptable).size }\n" print " malformed: #{ (@entries_malformed ).size }\n" print " entries lacking bug reference: #{(@entries_no_ref ).size }\n" print " entries lacking tag: #{ (@entries_no_tag ).size }\n" print " entries lacking description: #{ (@entries_no_desc ).size }\n" print " unknown bug references: #{ (@refs_unknown ).size }\n" print " unknown reason tags: #{ (@tags_unknown ).size }\n" print " entries with no replacement: #{ (@entries_unreplaced).size }\n" print " duplicate entries: #{ (@entries_duplicated).size }\n" print " identical: #{ (@entries_identical ).size }\n" print " differing: #{ (@entries_differing ).keys.size}\n" print REPORT_SEPARATOR end DBG_EXPECTED input_filename end def validate input_filename validations = [ '@entries_malformed' , '@entries_no_ref' , '@entries_no_tag' , '@entries_no_desc' , '@refs_unknown' , '@tags_unknown' , '@entries_duplicated' ] # TODO: ref, ref_id, tag, and desc should be required validations = [ '@entries_malformed' , '@refs_unknown' , '@tags_unknown' , '@entries_duplicated' ] # TODO: ref, ref_id, tag, and desc should be required validations.each { | var_name | @errors << var_name unless (eval var_name).empty? } end def generate_json input_filename output_json_file = "#{input_filename}.json" if @errors.empty? IO.write output_json_file , @entries_acceptable.to_json print "no problems detected in #{input_filename}\n" $stderr.print "wrote: #{output_json_file}\n" else print "ERROR: in #{input_filename} - [ #{@errors.join ','} ] - see: #{LOG_FILE}\n" File.delete output_json_file if File.exist? output_json_file end end ## main entry ## File.open(LOG_FILE , File::CREAT|File::WRONLY) do | file | (@log_file = file).truncate 0 BLACKLIST_FILES.each do | input_filename | reset_state parse_entries input_filename process_entries print_report input_filename validate input_filename generate_json input_filename exit 1 unless @errors.empty? end end