diff options
author | bill-auger <mr.j.spam.me@gmail.com> | 2020-03-14 08:37:34 -0400 |
---|---|---|
committer | bill-auger <mr.j.spam.me@gmail.com> | 2020-03-14 19:13:20 -0400 |
commit | 505e2811648d54698ca3d6ffd153a7479efa6714 (patch) | |
tree | ef013aa99d5df724317b9bfc09f5e197858e8a93 | |
parent | 0f501979af323d087561acf4944dd1ddedaa0d05 (diff) |
refactor and add validations for refs
-rw-r--r-- | blacklist-testdata.txt | 61 | ||||
-rwxr-xr-x | report | 305 |
2 files changed, 214 insertions, 152 deletions
diff --git a/blacklist-testdata.txt b/blacklist-testdata.txt index 93f9d38..20ca592 100644 --- a/blacklist-testdata.txt +++ b/blacklist-testdata.txt @@ -1,22 +1,43 @@ # comments begin with '#' -valid-complete:replacement:parabola:42:[branding]valid complete -valid-no-desc::::[branding] -valid-no-tags::::valid no tags -valid-no-tags-no-desc:::: -valid-multiple-tags-no-desc::::[tag1][tag2] -valid-identical-duplicate::::[branding] valid identical duplicate -valid-identical-duplicate::::[branding] valid identical duplicate -valid-differing-duplicate::::[branding] valid differing duplicate -valid-differing-duplicate::::[branding] valid differing duplicate -valid-differing-duplicate::::[branding][uses-nonfree] valid differing duplicate -valid-space-after-tag::::[uses-nonfree] valid space after tag -valid-multiple-tags::::[tag1][tag2] [tag3]valid multiple tags -valid-with-spaces-and-tabs::::[tag1][tag2] [tag3] valid with spaces and tabs -valid-with-colon-tag::::[FIXME:description] valid with colon tag -invalid-too-many-colons-before-brace:::parabola:42:[nonfree] invalid too many colons before brace -invalid-too-many-colons-before-brace-lacking-tag:::parabola:42: invalid too many colons before brace lacking tag -invalid-too-many-colons-before-brace-with-spaces::: [semifree] invalid too many colons before brace with spaces -invalid-not-enough-colons-before-brace:::[uses-nonfree] invalid not enough colons before brace -invalid-not-enough-colons-before-brace-with-colon-tag:::[FIXME:package] invalid not enough colons before brace with colon tag -invalid-not-enough-colons-before-brace-lacking-tag::: invalid too many colons before brace lacking tag +# these will be accepted, pending validation +well-formed-complete:replacement:fsf:42:[technical]well-formed - complete +well-formed-no-ref:replacement::42:[technical] well-formed - no ref +well-formed-no-refid:replacement:fsf::[technical] well-formed - no ref_id +well-formed-no-ref-no-refiid:replacement:::[technical] well-formed - no ref - no ref_id +well-formed-no-tag:replacement:fsf:42:well-formed - no tag +well-formed-no-desc:replacement:fsf:42:[technical] +well-formed-no-tag-no-desc:replacement:fsf:42: +well-formed-unknown-tag:r:fsf:42:[unknown-tag] well formed - unknown tag +well-formed-unknown-tag-no-desc:r:fsf:42:[unknown-tag] +well-formed-multiple-tags-no-desc:r:fsf:42:[technical][nonfree] +well-formed-multiple-tags-unknown-tag:r:fsf:42:[technical][unknown-tag] well-formed - multiple tags - unknown tag +well-formed-identical-duplicate:r:fsf:42:[technical] well-formed - identical duplicate +well-formed-identical-duplicate:r:fsf:42:[technical] well-formed - identical duplicate +well-formed-differing-duplicate:r:fsf:42:[technical] well-formed - differing duplicate +well-formed-differing-duplicate:r:fsf:42:[technical] well-formed - differing duplicate +well-formed-differing-duplicate:r:fsf:42:[technical][nonfree] well-formed - differing duplicate +well-formed-differing-duplicate:r:fsf:42:[technical][nonfree] well-formed - differing duplicate +well-formed-space-after-tag:r:fsf:42:[nonfree] well-formed - space after tag +well-formed-multiple-tags:r:fsf:42:[tag1][tag2] [tag3]well-formed - multiple tags +well-formed-with-spaces-and-tabs:r:fsf:42:[tag1][tag2] [tag3] well-formed - with spaces and tabs +well-formed-with-deprecated-colon-in-tag:r:fsf:42:[FIXME:description] well-formed - with deprecated colon in tag + +# these will not be rejected before validation +malformed-too-many-colons-before-bracket-1::r:fsf:42:[nonfree] malformed - too many colons before bracket +malformed-too-many-colons-before-bracket-2:r::fsf:42:[nonfree] malformed - too many colons before bracket +malformed-too-many-colons-before-bracket-3:r:fsf::42:[nonfree] malformed - too many colons before bracket +malformed-too-many-colons-before-bracket-4:r:fsf:42::[nonfree] malformed - too many colons before bracket +malformed-too-many-colons-before-bracket-lacking-tag:r::fsf:42: malformed - too many colons before bracket - lacking tag +malformed-too-many-colons-before-bracket-with-spaces::r:fsf:42: [semifree] malformed - too many colons before bracket - with spaces +malformed-not-enough-colons-before-bracket-1:r:fsf:42[nonfree] malformed - not enough colons before bracket +malformed-not-enough-colons-before-bracket-2:r:fsf:[nonfree] malformed - not enough colons before bracket - no ref_id +malformed-not-enough-colons-before-bracket-3:fsf:42:[nonfree] malformed - not enough colons before bracket - no replacement +malformed-not-enough-colons-before-bracket-lacking-tag:r:: malformed - too many colons before bracket - lacking tag +malformed-not-enough-colons-before-bracket-with-deprecated-colon-in-tag:r::[FIXME:package] malformed - not enough colons before bracket - with deprecated colon in tag + +# TODO: deprecated reason tags +# TODO: probobly can drop: 'well-formed-with-deprecated-colon-in-tag' + 'malformed-not-enough-colons-before-bracket-with-deprecated-colon-in-tag' +well-formed-no-ref::::[FIXME:package] well-formed deprecated tag1 +well-formed-no-ref::::[FIXME:description] well-formed deprecated tag2 @@ -1,7 +1,8 @@ #!/usr/bin/env ruby =begin - report.rb + USAGE: + report.rb [ input_files ] this script is used to validate and report statistics on the blacklist entries @@ -10,38 +11,52 @@ and will fail if any of those conditions are unmet it also detects entries with no replacement, although that is not an error - it optionally creates a JSON file with the results - that can be the input to the post_fsd_wiki.phantomjs script + it optionally creates a JSON file with the results, + which can be the input to the post_fsd_wiki.phantomjs script =end ## DEBUG begin ## DEBUG = false -require 'byebug' if DEBUG +require 'byebug' if DEBUG || false DEBUG_FILTER_NAMES = [] def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ; -def DBG_PARSE input_filename ; if DEBUG ; print "\nDEBUG: parsing #{input_filename}\n" ; end ; end ; -def DBG_FILTER_NAME line ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea } ; end ; end ; -def DBG_TOKENS tokens ; if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ; end ; -def DBG_TAG entry ; if DEBUG ; print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ; end ; -def DBG_DESC entry ; if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2 ; end ; end ; -def DBG_NO_TAG entry ; if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty? ; end ; end ; -def DBG_NO_DESC entry ; if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ; end ; -def DBG_ENTRY entry ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ; end ; -def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt' - print "##------ expected results ------##\n" + - "# entries found: 20 #\n" + - "# entries valid: 14 #\n" + - "# entries invalid: 6 #\n" + - "# entries lacking tags: 2 #\n" + - "# entries lacking description: 3 #\n" + - "# tags unknown: 3 #\n" + - "# entries unreplaced: 13 #\n" + - "# entries duplicated: 2 #\n" + - "# identical: 1 #\n" + - "# differing: 1 #\n" + - "##------------------------------##\n" ; end ; end ; +def DBG_PARSE input_filename ; if DEBUG ; $stderr.print "\nDEBUG: parsing #{input_filename}\n" ; end ; end ; +def DBG_FILTER_NAME line ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea } ; end ; end ; +def DBG_TOKENS tokens ; if DEBUG ; tokens.each_with_index { | token , i | $stderr.print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ; end ; +def DBG_TAG entry ; if DEBUG ; $stderr.print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\ndesc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ; end ; +def DBG_DESC entry ; if DEBUG ; $stderr.print "desc OUT=#{entry[DESCRIPTION_KEY]}\n tags=#{entry[REASON_TAGS_KEY]}\n" ; sleep 0.2 ; end ; end ; +def DBG_NO_TAG entry ; if DEBUG ; $stderr.print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[REASON_TAGS_KEY].empty? ; end ; end ; +def DBG_NO_DESC entry ; if DEBUG ; $stderr.print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ; end ; +def DBG_ENTRY entry ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; $stderr.print "\n" ; entry.each_pair { | k , v | $stderr.print "DEBUG: #{k}: #{v}\n" } ; end ; end ; +def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt' ; $stderr.print DBG_EXPECTED_TEST_RESULTS ; end ; end ; +DBG_EXPECTED_TEST_RESULTS = "##------ expected results ---------##\n" + # per 'blacklist-testdata.txt' + "# entries found: 20 #\n" + + "# well-formed: 14 #\n" + + "# malformed: 6 #\n" + + "# entries lacking bug reference: 15 #\n" + + "# entries lacking tag: 2 #\n" + + "# entries lacking description: 3 #\n" + + "# unknown bug references: 0 #\n" + + "# unknown reason tags: 3 #\n" + + "# entries with no replacement: 13 #\n" + + "# duplicate entries: 2 #\n" + + "# identical: 1 #\n" + + "# differing: 2 #\n" + + "##---------------------------------##\n" +# entries found: 34 +# well-formed: 28 +# malformed: 6 +# entries lacking bug reference: 8 +# entries lacking tag: 8 +# entries lacking description: 4 +# unknown bug references: 1 +# unknown reason tags: 4 +# entries with no replacement: 4 +# duplicate entries: 3 +# identical: 1 +# differing: 2 ## DEBUG end ## @@ -52,47 +67,47 @@ require 'set' # NOTE: acceptable entry syntax per SYNTAX doc => # ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ] -BLACKLIST_FILES = (ALL_BLACKLIST_FILES.include? ARGV.first) ? [ ARGV.first ] : ALL_BLACKLIST_FILES -# BLACKLIST_FILES = [ 'blacklist-testdata.txt' ] # DEBUG -VALID_ENTRIES_REGEX = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/ -BLACKLIST_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/ -RAW_ENTRY_KEY = :raw_blacklist_entry -PACKAGE_NAME_KEY = :original_package # syntax token -REPLACEMENT_KEY = :libre_replacement # syntax token -REFERENCE_KEY = :ref # syntax token -REFERENCE_ID_KEY = :id # syntax token -DESCRIPTION_KEY = :short_description # syntax token -BLACKLIST_TAGS_KEY = :blacklist_tags -NONFREE_TAG = 'nonfree' -SEMIFREE_TAG = 'semifree' -USES_NONFREE_TAG = 'uses-nonfree' -BRANDING_TAG = 'branding' -TECHNICAL_TAG = 'technical' -NEEDS_REPLACEMENT_TAG = 'FIXME:package' -NEEDS_DESC_TAG = 'FIXME:description' -ACCEPTABLE_TAGS = [ NONFREE_TAG , SEMIFREE_TAG , USES_NONFREE_TAG , BRANDING_TAG , - TECHNICAL_TAG , NEEDS_REPLACEMENT_TAG , NEEDS_DESC_TAG ] -DO_PRINT_STATS = true -DO_PRINT_INVALID = true -DO_PRINT_UNREPLACED = true -DO_PRINT_INCOMPLETE = true -DO_PRINT_DUPLICATED = true -REPORT_SEPARATOR = "------------------------------------------------------------\n" - +BLACKLIST_FILES = (File.exist? ARGV.first.to_s) ? [ ARGV.first ] : ALL_BLACKLIST_FILES +ENTRIES_REGEX = /^[^:\[\]#]+:[^:\[\]]*:(debian|fedora|fsf|parabola|savannah|sv)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/ +REASON_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/ +RAW_ENTRY_KEY = :raw_blacklist_entry +PACKAGE_NAME_KEY = :original_package # syntax field 0 +REPLACEMENT_KEY = :libre_replacement # syntax field 1 +REFERENCE_KEY = :ref # syntax field 2 , constrained +REFERENCE_ID_KEY = :id # syntax field 3 +REASON_TAGS_KEY = :reason_tags # syntax field 4 head , constrained +DESCRIPTION_KEY = :short_description # syntax field 4 tail +ACCEPTABLE_REFS = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' ] +ACCEPTABLE_TAGS = [ 'nonfree' , 'semifree' , 'uses-nonfree' , 'branding' , + 'technical' , 'FIXME:package' , 'FIXME:description' ] +DO_PRINT_MALFORMED = true +DO_PRINT_UNREPLACED = true +DO_PRINT_INCOMPLETE = true +DO_PRINT_DUPLICATED = true +DO_PRINT_STATS = true +REPORT_SEPARATOR = "------------------------------------------------------------\n" +LOG_FILE = 'validation.log' + + +@log_file = nil +def log log_msg ; @log_file.print log_msg if @log_file.respond_to? :print ; end ; def reset_state - @entries_invalid = [] - @entries_valid = [] - @entry_freqs = {} - @entries_no_desc = [] - @entries_no_tags = [] - @entries_tags_unknown = [] - @tags_unknown = Set[] - @entries_unreplaced = nil # deferred - @entries_duplicated = nil # deferred - @entries_duplicated_identical = {} - @entries_duplicated_differing = {} - @errors = [] + @entries_malformed = [] + @entries_acceptable = [] + @entry_freqs = {} + @entries_no_ref = [] + @entries_no_tag = [] + @entries_no_desc = [] + @entries_refs_unknown = [] + @entries_tags_unknown = [] + @refs_unknown = Set[] + @tags_unknown = Set[] + @entries_unreplaced = nil # deferred + @entries_duplicated = nil # deferred + @entries_identical = {} + @entries_differing = {} + @errors = [] end def parse_entries input_filename @@ -102,10 +117,10 @@ DBG_PARSE input_filename DBG_FILTER_NAME line - next if line.strip.empty? || (line.strip.start_with? '#') - @entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX + next if line.strip.empty? || (line.strip.start_with? '#') + @entries_malformed << line && next unless line.match ENTRIES_REGEX - @entries_valid << (entry = {}) + @entries_acceptable << (entry = {}) tokens = (line.split ':') entry[RAW_ENTRY_KEY ] = line entry[PACKAGE_NAME_KEY ] = (tokens.shift ).gsub("\t" , '').strip @@ -113,7 +128,7 @@ DBG_FILTER_NAME line entry[REFERENCE_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[REFERENCE_ID_KEY ] = (tokens.shift ).gsub("\t" , '').strip entry[DESCRIPTION_KEY ] = (tokens.join ':').gsub("\t" , '').strip - entry[BLACKLIST_TAGS_KEY] = [] + entry[REASON_TAGS_KEY ] = [] DBG_TOKENS tokens @@ -122,8 +137,8 @@ DBG_TOKENS tokens DBG_TAG entry - entry[BLACKLIST_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\1') - entry[DESCRIPTION_KEY ] = (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\2') + entry[REASON_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\1') + entry[DESCRIPTION_KEY] = (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\2') DBG_DESC entry @@ -134,121 +149,147 @@ DBG_NO_DESC entry end def process_entries - @entries_valid.each do | entry | + @entries_acceptable.each do | entry | DBG_ENTRY entry - entry_name = entry[PACKAGE_NAME_KEY ] - entry_desc = entry[DESCRIPTION_KEY ] - entry_tags = entry[BLACKLIST_TAGS_KEY] - entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS + entry_name = entry[PACKAGE_NAME_KEY] + entry_ref = entry[REFERENCE_KEY ] + entry_ref_id = entry[REFERENCE_ID_KEY] + entry_tags = entry[REASON_TAGS_KEY ] + entry_desc = entry[DESCRIPTION_KEY ] + entry_ref_unknown = (ACCEPTABLE_REFS.include? entry_ref) ? entry_ref : '' + entry_ref_unknown = [ entry_ref ] - ACCEPTABLE_REFS - [ '' ] + entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS @entry_freqs[entry_name] = (@entry_freqs[entry_name] ||= 0) + 1 + @entries_no_ref << entry if entry_ref .empty? || + entry_ref_id .empty? + @entries_no_tag << entry if entry_tags .empty? @entries_no_desc << entry if entry_desc .empty? - @entries_no_tags << entry if entry_tags .empty? + @entries_refs_unknown << entry unless entry_ref_unknown .empty? @entries_tags_unknown << entry unless entry_tags_unknown.empty? + @refs_unknown.merge entry_ref_unknown @tags_unknown.merge entry_tags_unknown end - @entries_unreplaced = @entries_valid .select { | entry | entry[REPLACEMENT_KEY].empty? } - @entries_duplicated = @entry_freqs.keys.select { | name | @entry_freqs[name] > 1 } + @entries_unreplaced = @entries_acceptable.select { | entry | entry[REPLACEMENT_KEY].empty? } + @entries_duplicated = @entry_freqs.keys .select { | name | @entry_freqs[name] > 1 } @entries_duplicated.each do | duplicate_name | - duplicate_entries = @entries_valid.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \ - .map! { | entry | entry[RAW_ENTRY_KEY ] } + duplicate_entries = @entries_acceptable.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \ + .map { | entry | entry[RAW_ENTRY_KEY ] } unique_entries = duplicate_entries.uniq - n_unique_entries = unique_entries.size + n_unique_entries = unique_entries .size unique_entries.each do | uniq_value | n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value } - @entries_duplicated_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1 + @entries_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1 end if n_unique_entries > 1 - @entries_duplicated_differing[duplicate_name] = unique_entries + @entries_differing[duplicate_name] = unique_entries end end end def print_report input_filename - if DO_PRINT_INVALID || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED - print "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n" + if DO_PRINT_MALFORMED || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED + log "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n" end - print_invalid = {} - print_invalid['entries invalid' ] = @entries_invalid if DO_PRINT_INVALID - print_invalid['entries lacking description'] = @entries_no_desc if DO_PRINT_INCOMPLETE - print_invalid['entries lacking tags' ] = @entries_no_tags if DO_PRINT_INCOMPLETE - print_invalid['entries with unknown tags' ] = @entries_tags_unknown if DO_PRINT_INCOMPLETE - print_invalid['tags unknown' ] = @tags_unknown if DO_PRINT_INCOMPLETE - print_invalid.each_pair do | label , data | - print "#{REPORT_SEPARATOR}#{label}:\n" unless data.empty? - data.each { | entry | print " #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" } + entries_malformed = [ '@entries_malformed' ] + entries_incomplete = [ '@entries_no_ref' , '@entries_refs_unknown' , '@refs_unknown' , + '@entries_no_tag' , '@entries_tags_unknown' , '@tags_unknown' , + '@entries_no_desc' ] + log_invalid = [] + + log_invalid += entries_malformed if DO_PRINT_MALFORMED + log_invalid += entries_incomplete if DO_PRINT_INCOMPLETE + log_invalid.each do | var_name | + entries = eval var_name + + log "#{REPORT_SEPARATOR}#{var_name}:\n" unless entries.empty? + entries.each { | entry | log " #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" } end unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED - print "#{REPORT_SEPARATOR}entries unreplaced:\n" - @entries_unreplaced.each { | entry | print " #{entry[PACKAGE_NAME_KEY]}\n" } + log "#{REPORT_SEPARATOR}entries unreplaced:\n" + @entries_unreplaced.each { | entry | log " #{entry[PACKAGE_NAME_KEY]}\n" } end unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED - print "#{REPORT_SEPARATOR}entries duplicates:\n" + log "#{REPORT_SEPARATOR}entries duplicates:\n" @entries_duplicated.each do | duplicate_name | - entry_identical = @entries_duplicated_identical[duplicate_name] - entries_differing = @entries_duplicated_differing[duplicate_name] - - print "\n #{duplicate_name}:\n" - print " identical:\n" unless entry_identical .nil? - print " #{entry_identical}\n" unless entry_identical .nil? - print " differing:\n" unless entries_differing.nil? - entries_differing.each { | entry | print " #{entry}\n" } unless entries_differing.nil? + identical = @entries_identical[duplicate_name] + differings = @entries_differing[duplicate_name] + + log "\n #{duplicate_name}:\n" + log " identical:\n" unless identical .nil? + log " #{identical}\n" unless identical .nil? + log " differing:\n" unless differings.nil? + log differings.map { | differing | " #{differing}\n" } unless differings.nil? end end if DO_PRINT_STATS + all_entries = @entries_acceptable + @entries_malformed + print "#{REPORT_SEPARATOR}#{input_filename} stats:\n" - print " entries found: #{ (@entries_valid + @entries_invalid).size }\n" - print " entries valid: #{ (@entries_valid ).size }\n" - print " entries invalid: #{ (@entries_invalid ).size }\n" - print " entries lacking tags: #{ (@entries_no_tags ).size }\n" - print " entries lacking description: #{(@entries_no_desc ).size }\n" - print " tags unknown: #{ (@tags_unknown ).size }\n" - print " entries unreplaced: #{ (@entries_unreplaced ).size }\n" - print " entries duplicated: #{ (@entries_duplicated ).size }\n" - print " identical: #{ (@entries_duplicated_identical ).size }\n" - print " differing: #{ (@entries_duplicated_differing ).keys.size}\n" + print " entries found: #{ (all_entries ).size }\n" + print " well-formed: #{ (@entries_acceptable).size }\n" + print " malformed: #{ (@entries_malformed ).size }\n" + print " entries lacking bug reference: #{(@entries_no_ref ).size }\n" + print " entries lacking tag: #{ (@entries_no_tag ).size }\n" + print " entries lacking description: #{ (@entries_no_desc ).size }\n" + print " unknown bug references: #{ (@refs_unknown ).size }\n" + print " unknown reason tags: #{ (@tags_unknown ).size }\n" + print " entries with no replacement: #{ (@entries_unreplaced).size }\n" + print " duplicate entries: #{ (@entries_duplicated).size }\n" + print " identical: #{ (@entries_identical ).size }\n" + print " differing: #{ (@entries_differing ).keys.size}\n" print REPORT_SEPARATOR end DBG_EXPECTED input_filename end -def sanity_check input_filename - @errors << 'entries_invalid' unless @entries_invalid .empty? - # @errors << 'entries_no_tags' unless @entries_no_tags .empty? # TODO: complete these entries - # @errors << 'entries_no_desc' unless @entries_no_desc .empty? # TODO: complete these entries - @errors << 'tags_unknown' unless @tags_unknown .empty? - @errors << 'entries_duplicated' unless @entries_duplicated.empty? +def validate input_filename + validations = [ '@entries_malformed' , '@entries_no_ref' , '@entries_no_tag' , + '@entries_no_desc' , '@refs_unknown' , '@tags_unknown' , + '@entries_duplicated' ] # TODO: ref, ref_id, tag, and desc should be required + validations = [ '@entries_malformed' , '@refs_unknown' , '@tags_unknown' , + '@entries_duplicated' ] # TODO: ref, ref_id, tag, and desc should be required + + validations.each { | var_name | @errors << var_name unless (eval var_name).empty? } end def generate_json input_filename output_json_file = "#{input_filename}.json" if @errors.empty? - IO.write output_json_file , @entries_valid.to_json - print "\nwrote: #{output_json_file}\n\nno problems detected in #{input_filename}\n" + IO.write output_json_file , @entries_acceptable.to_json + print "no problems detected in #{input_filename}\n" + $stderr.print "wrote: #{output_json_file}\n" else - print "\nERROR: in #{input_filename} - #{@errors.join ','} - JSON will not be generated\n" + print "ERROR: in #{input_filename} - [ #{@errors.join ','} ] - see: #{LOG_FILE}\n" + File.delete output_json_file if File.exist? output_json_file end end -BLACKLIST_FILES.each do | input_filename | - reset_state - parse_entries input_filename - process_entries - print_report input_filename - sanity_check input_filename - generate_json input_filename +## main entry ## + +File.open(LOG_FILE , File::CREAT|File::WRONLY) do | file | + (@log_file = file).truncate 0 - exit 1 unless @errors.empty? + BLACKLIST_FILES.each do | input_filename | + reset_state + parse_entries input_filename + process_entries + print_report input_filename + validate input_filename + generate_json input_filename + + exit 1 unless @errors.empty? + end end |