refactor and add validations for refs

author: bill-auger <mr.j.spam.me@gmail.com> 2020-03-14 08:37:34 -0400
committer: bill-auger <mr.j.spam.me@gmail.com> 2020-03-14 19:13:20 -0400
commit: 505e2811648d54698ca3d6ffd153a7479efa6714 (patch)
tree: ef013aa99d5df724317b9bfc09f5e197858e8a93
parent: 0f501979af323d087561acf4944dd1ddedaa0d05 (diff)
2 files changed, 214 insertions, 152 deletions
diff --git a/blacklist-testdata.txt b/blacklist-testdata.txt
index 93f9d38..20ca592 100644
--- a/blacklist-testdata.txt
+++ b/blacklist-testdata.txt
@@ -1,22 +1,43 @@
 # comments begin with '#'
 
-valid-complete:replacement:parabola:42:[branding]valid complete
-valid-no-desc::::[branding]
-valid-no-tags::::valid no tags
-valid-no-tags-no-desc::::
-valid-multiple-tags-no-desc::::[tag1][tag2]
-valid-identical-duplicate::::[branding] valid identical duplicate
-valid-identical-duplicate::::[branding] valid identical duplicate
-valid-differing-duplicate::::[branding] valid differing  duplicate
-valid-differing-duplicate::::[branding] valid  differing duplicate
-valid-differing-duplicate::::[branding][uses-nonfree] valid differing  duplicate
-valid-space-after-tag::::[uses-nonfree] valid space after tag
-valid-multiple-tags::::[tag1][tag2] [tag3]valid multiple tags
-valid-with-spaces-and-tabs::::[tag1][tag2] [tag3]         			  valid with spaces and tabs
-valid-with-colon-tag::::[FIXME:description] valid with colon tag
-invalid-too-many-colons-before-brace:::parabola:42:[nonfree] invalid too many colons before brace
-invalid-too-many-colons-before-brace-lacking-tag:::parabola:42: invalid too many colons before brace lacking tag
-invalid-too-many-colons-before-brace-with-spaces:::                          [semifree] invalid too many colons before brace with spaces
-invalid-not-enough-colons-before-brace:::[uses-nonfree] invalid not enough colons before brace
-invalid-not-enough-colons-before-brace-with-colon-tag:::[FIXME:package] invalid not enough colons before brace with colon tag
-invalid-not-enough-colons-before-brace-lacking-tag::: invalid too many colons before brace lacking tag
+# these will be accepted, pending validation
+well-formed-complete:replacement:fsf:42:[technical]well-formed - complete
+well-formed-no-ref:replacement::42:[technical] well-formed - no ref
+well-formed-no-refid:replacement:fsf::[technical] well-formed - no ref_id
+well-formed-no-ref-no-refiid:replacement:::[technical] well-formed - no ref - no ref_id
+well-formed-no-tag:replacement:fsf:42:well-formed - no tag
+well-formed-no-desc:replacement:fsf:42:[technical]
+well-formed-no-tag-no-desc:replacement:fsf:42:
+well-formed-unknown-tag:r:fsf:42:[unknown-tag] well formed - unknown tag
+well-formed-unknown-tag-no-desc:r:fsf:42:[unknown-tag]
+well-formed-multiple-tags-no-desc:r:fsf:42:[technical][nonfree]
+well-formed-multiple-tags-unknown-tag:r:fsf:42:[technical][unknown-tag] well-formed - multiple tags - unknown tag
+well-formed-identical-duplicate:r:fsf:42:[technical] well-formed - identical duplicate
+well-formed-identical-duplicate:r:fsf:42:[technical] well-formed - identical duplicate
+well-formed-differing-duplicate:r:fsf:42:[technical] well-formed - differing  duplicate
+well-formed-differing-duplicate:r:fsf:42:[technical] well-formed -  differing duplicate
+well-formed-differing-duplicate:r:fsf:42:[technical][nonfree] well-formed -  differing duplicate
+well-formed-differing-duplicate:r:fsf:42:[technical][nonfree] well-formed -  differing  duplicate
+well-formed-space-after-tag:r:fsf:42:[nonfree] well-formed - space after tag
+well-formed-multiple-tags:r:fsf:42:[tag1][tag2] [tag3]well-formed - multiple tags
+well-formed-with-spaces-and-tabs:r:fsf:42:[tag1][tag2] [tag3]         			  well-formed - with spaces and tabs
+well-formed-with-deprecated-colon-in-tag:r:fsf:42:[FIXME:description] well-formed - with deprecated colon in tag
+
+# these will not be rejected before validation
+malformed-too-many-colons-before-bracket-1::r:fsf:42:[nonfree] malformed - too many colons before bracket
+malformed-too-many-colons-before-bracket-2:r::fsf:42:[nonfree] malformed - too many colons before bracket
+malformed-too-many-colons-before-bracket-3:r:fsf::42:[nonfree] malformed - too many colons before bracket
+malformed-too-many-colons-before-bracket-4:r:fsf:42::[nonfree] malformed - too many colons before bracket
+malformed-too-many-colons-before-bracket-lacking-tag:r::fsf:42: malformed - too many colons before bracket - lacking tag
+malformed-too-many-colons-before-bracket-with-spaces::r:fsf:42:                          [semifree] malformed - too many colons before bracket - with spaces
+malformed-not-enough-colons-before-bracket-1:r:fsf:42[nonfree] malformed - not enough colons before bracket
+malformed-not-enough-colons-before-bracket-2:r:fsf:[nonfree] malformed - not enough colons before bracket - no ref_id
+malformed-not-enough-colons-before-bracket-3:fsf:42:[nonfree] malformed - not enough colons before bracket - no replacement
+malformed-not-enough-colons-before-bracket-lacking-tag:r:: malformed - too many colons before bracket - lacking tag
+malformed-not-enough-colons-before-bracket-with-deprecated-colon-in-tag:r::[FIXME:package] malformed - not enough colons before bracket - with deprecated colon in tag
+
+# TODO: deprecated reason tags
+# TODO: probobly can drop: 'well-formed-with-deprecated-colon-in-tag'
+                           'malformed-not-enough-colons-before-bracket-with-deprecated-colon-in-tag'
+well-formed-no-ref::::[FIXME:package] well-formed deprecated tag1
+well-formed-no-ref::::[FIXME:description] well-formed deprecated tag2
diff --git a/report b/report
index 17a32ee..fc384cc 100755
--- a/report
+++ b/report
@@ -1,7 +1,8 @@
 #!/usr/bin/env ruby
 
 =begin
-  report.rb
+  USAGE:
+    report.rb [ input_files ]
 
     this script is used to validate and report statistics on the blacklist entries
 
@@ -10,38 +11,52 @@
       and will fail if any of those conditions are unmet
     it also detects entries with no replacement, although that is not an error
 
-    it optionally creates a JSON file with the results
-    that can be the input to the post_fsd_wiki.phantomjs script
+    it optionally creates a JSON file with the results,
+      which can be the input to the post_fsd_wiki.phantomjs script
 =end
 
 
 ## DEBUG begin ##
 DEBUG = false
-require 'byebug' if DEBUG
+require 'byebug' if DEBUG || false
 DEBUG_FILTER_NAMES = []
 def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;
 
-def DBG_PARSE input_filename    ; if DEBUG ; print "\nDEBUG: parsing #{input_filename}\n"                                                                             ; end ; end ;
-def DBG_FILTER_NAME line        ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea }                                                       ; end ; end ;
-def DBG_TOKENS tokens           ; if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] }        ; end ; end ;
-def DBG_TAG entry               ; if DEBUG ; print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc  IN=#{entry[DESCRIPTION_KEY]}\n"                   ; end ; end ;
-def DBG_DESC entry              ; if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2                   ; end ; end ;
-def DBG_NO_TAG entry            ; if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty?                                  ; end ; end ;
-def DBG_NO_DESC entry           ; if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY   ].empty?                                  ; end ; end ;
-def DBG_ENTRY entry             ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
-def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt'
-                                    print "##------ expected results ------##\n" +
-                                          "# entries found: 20              #\n" +
-                                          "# entries valid: 14              #\n" +
-                                          "# entries invalid: 6             #\n" +
-                                          "# entries lacking tags: 2        #\n" +
-                                          "# entries lacking description: 3 #\n" +
-                                          "# tags unknown: 3                #\n" +
-                                          "# entries unreplaced: 13         #\n" +
-                                          "# entries duplicated: 2          #\n" +
-                                          "#   identical: 1                 #\n" +
-                                          "#   differing: 1                 #\n" +
-                                          "##------------------------------##\n" ; end ; end ;
+def DBG_PARSE input_filename    ; if DEBUG ; $stderr.print "\nDEBUG: parsing #{input_filename}\n"                                                                                     ; end ; end ;
+def DBG_FILTER_NAME line        ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea }                                                                       ; end ; end ;
+def DBG_TOKENS tokens           ; if DEBUG ; tokens.each_with_index { | token , i | $stderr.print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] }                ; end ; end ;
+def DBG_TAG entry               ; if DEBUG ; $stderr.print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\ndesc  IN=#{entry[DESCRIPTION_KEY]}\n"                                      ; end ; end ;
+def DBG_DESC entry              ; if DEBUG ; $stderr.print "desc OUT=#{entry[DESCRIPTION_KEY]}\n  tags=#{entry[REASON_TAGS_KEY]}\n" ; sleep 0.2                                       ; end ; end ;
+def DBG_NO_TAG entry            ; if DEBUG ; $stderr.print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[REASON_TAGS_KEY].empty?                                             ; end ; end ;
+def DBG_NO_DESC entry           ; if DEBUG ; $stderr.print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY   ].empty?                                          ; end ; end ;
+def DBG_ENTRY entry             ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; $stderr.print "\n" ; entry.each_pair { | k , v | $stderr.print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
+def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt' ; $stderr.print DBG_EXPECTED_TEST_RESULTS                                                             ; end ; end ;
+DBG_EXPECTED_TEST_RESULTS = "##------ expected results ---------##\n" + # per 'blacklist-testdata.txt'
+                            "# entries found: 20                 #\n" +
+                            "#   well-formed: 14                 #\n" +
+                            "#   malformed: 6                    #\n" +
+                            "# entries lacking bug reference: 15 #\n" +
+                            "# entries lacking tag: 2            #\n" +
+                            "# entries lacking description: 3    #\n" +
+                            "# unknown bug references: 0         #\n" +
+                            "# unknown reason tags: 3            #\n" +
+                            "# entries with no replacement: 13   #\n" +
+                            "# duplicate entries: 2              #\n" +
+                            "#   identical: 1                    #\n" +
+                            "#   differing: 2                    #\n" +
+                            "##---------------------------------##\n"
+#   entries found: 34
+#     well-formed: 28
+#     malformed: 6
+#   entries lacking bug reference: 8
+#   entries lacking tag: 8
+#   entries lacking description: 4
+#   unknown bug references: 1
+#   unknown reason tags: 4
+#   entries with no replacement: 4
+#   duplicate  entries: 3
+#     identical: 1
+#     differing: 2
 ## DEBUG end ##
 
 
@@ -52,47 +67,47 @@ require 'set'
 # NOTE: acceptable entry syntax per SYNTAX doc =>
 #         ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION
 ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
-BLACKLIST_FILES        = (ALL_BLACKLIST_FILES.include? ARGV.first) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
-# BLACKLIST_FILES        = [ 'blacklist-testdata.txt' ] # DEBUG
-VALID_ENTRIES_REGEX    = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
-BLACKLIST_TAGS_REGEX   = /^\[([^\]]*)\] *(.*)/
-RAW_ENTRY_KEY          = :raw_blacklist_entry
-PACKAGE_NAME_KEY       = :original_package  # syntax token
-REPLACEMENT_KEY        = :libre_replacement # syntax token
-REFERENCE_KEY          = :ref               # syntax token
-REFERENCE_ID_KEY       = :id                # syntax token
-DESCRIPTION_KEY        = :short_description # syntax token
-BLACKLIST_TAGS_KEY     = :blacklist_tags
-NONFREE_TAG            = 'nonfree'
-SEMIFREE_TAG           = 'semifree'
-USES_NONFREE_TAG       = 'uses-nonfree'
-BRANDING_TAG           = 'branding'
-TECHNICAL_TAG          = 'technical'
-NEEDS_REPLACEMENT_TAG  = 'FIXME:package'
-NEEDS_DESC_TAG         = 'FIXME:description'
-ACCEPTABLE_TAGS        = [ NONFREE_TAG   , SEMIFREE_TAG          , USES_NONFREE_TAG , BRANDING_TAG ,
-                           TECHNICAL_TAG , NEEDS_REPLACEMENT_TAG , NEEDS_DESC_TAG                  ]
-DO_PRINT_STATS         = true
-DO_PRINT_INVALID       = true
-DO_PRINT_UNREPLACED    = true
-DO_PRINT_INCOMPLETE    = true
-DO_PRINT_DUPLICATED     = true
-REPORT_SEPARATOR       = "------------------------------------------------------------\n"
-
+BLACKLIST_FILES     = (File.exist? ARGV.first.to_s) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
+ENTRIES_REGEX       = /^[^:\[\]#]+:[^:\[\]]*:(debian|fedora|fsf|parabola|savannah|sv)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
+REASON_TAGS_REGEX   = /^\[([^\]]*)\] *(.*)/
+RAW_ENTRY_KEY       = :raw_blacklist_entry
+PACKAGE_NAME_KEY    = :original_package  # syntax field 0
+REPLACEMENT_KEY     = :libre_replacement # syntax field 1
+REFERENCE_KEY       = :ref               # syntax field 2      , constrained
+REFERENCE_ID_KEY    = :id                # syntax field 3
+REASON_TAGS_KEY     = :reason_tags       # syntax field 4 head , constrained
+DESCRIPTION_KEY     = :short_description # syntax field 4 tail
+ACCEPTABLE_REFS     = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' ]
+ACCEPTABLE_TAGS     = [ 'nonfree'   , 'semifree'      , 'uses-nonfree' , 'branding' ,
+                        'technical' , 'FIXME:package' , 'FIXME:description' ]
+DO_PRINT_MALFORMED  = true
+DO_PRINT_UNREPLACED = true
+DO_PRINT_INCOMPLETE = true
+DO_PRINT_DUPLICATED = true
+DO_PRINT_STATS      = true
+REPORT_SEPARATOR    = "------------------------------------------------------------\n"
+LOG_FILE            = 'validation.log'
+
+
+@log_file = nil
+def log log_msg ; @log_file.print log_msg if @log_file.respond_to? :print ; end ;
 
 def reset_state
-  @entries_invalid              = []
-  @entries_valid                = []
-  @entry_freqs                  = {}
-  @entries_no_desc              = []
-  @entries_no_tags              = []
-  @entries_tags_unknown         = []
-  @tags_unknown                 = Set[]
-  @entries_unreplaced           = nil # deferred
-  @entries_duplicated           = nil # deferred
-  @entries_duplicated_identical = {}
-  @entries_duplicated_differing = {}
-  @errors                       = []
+  @entries_malformed    = []
+  @entries_acceptable   = []
+  @entry_freqs          = {}
+  @entries_no_ref       = []
+  @entries_no_tag       = []
+  @entries_no_desc      = []
+  @entries_refs_unknown = []
+  @entries_tags_unknown = []
+  @refs_unknown         = Set[]
+  @tags_unknown         = Set[]
+  @entries_unreplaced   = nil # deferred
+  @entries_duplicated   = nil # deferred
+  @entries_identical    = {}
+  @entries_differing    = {}
+  @errors               = []
 end
 
 def parse_entries input_filename
@@ -102,10 +117,10 @@ DBG_PARSE input_filename
 
 DBG_FILTER_NAME line
 
-    next                             if     line.strip.empty? || (line.strip.start_with? '#')
-    @entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX
+    next                               if     line.strip.empty? || (line.strip.start_with? '#')
+    @entries_malformed << line && next unless line.match ENTRIES_REGEX
 
-    @entries_valid           << (entry = {})
+    @entries_acceptable      << (entry = {})
     tokens                    = (line.split ':')
     entry[RAW_ENTRY_KEY     ] = line
     entry[PACKAGE_NAME_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
@@ -113,7 +128,7 @@ DBG_FILTER_NAME line
     entry[REFERENCE_KEY     ] = (tokens.shift   ).gsub("\t" , '').strip
     entry[REFERENCE_ID_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
     entry[DESCRIPTION_KEY   ] = (tokens.join ':').gsub("\t" , '').strip
-    entry[BLACKLIST_TAGS_KEY] = []
+    entry[REASON_TAGS_KEY   ] = []
 
 DBG_TOKENS tokens
 
@@ -122,8 +137,8 @@ DBG_TOKENS tokens
 
 DBG_TAG entry
 
-      entry[BLACKLIST_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\1')
-      entry[DESCRIPTION_KEY   ]  = (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\2')
+      entry[REASON_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\1')
+      entry[DESCRIPTION_KEY]  = (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\2')
 
 DBG_DESC entry
 
@@ -134,121 +149,147 @@ DBG_NO_DESC entry
 end
 
 def process_entries
-  @entries_valid.each do | entry |
+  @entries_acceptable.each do | entry |
 
 DBG_ENTRY entry
 
-    entry_name         = entry[PACKAGE_NAME_KEY  ]
-    entry_desc         = entry[DESCRIPTION_KEY   ]
-    entry_tags         = entry[BLACKLIST_TAGS_KEY]
-    entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS
+    entry_name         = entry[PACKAGE_NAME_KEY]
+    entry_ref          = entry[REFERENCE_KEY   ]
+    entry_ref_id       = entry[REFERENCE_ID_KEY]
+    entry_tags         = entry[REASON_TAGS_KEY ]
+    entry_desc         = entry[DESCRIPTION_KEY ]
+    entry_ref_unknown  = (ACCEPTABLE_REFS.include? entry_ref) ? entry_ref : ''
+    entry_ref_unknown  = [ entry_ref ] - ACCEPTABLE_REFS - [ '' ]
+    entry_tags_unknown = entry_tags    - ACCEPTABLE_TAGS
 
     @entry_freqs[entry_name]  = (@entry_freqs[entry_name] ||= 0) + 1
+    @entries_no_ref          << entry if     entry_ref         .empty? ||
+                                             entry_ref_id      .empty?
+    @entries_no_tag          << entry if     entry_tags        .empty?
     @entries_no_desc         << entry if     entry_desc        .empty?
-    @entries_no_tags         << entry if     entry_tags        .empty?
+    @entries_refs_unknown    << entry unless entry_ref_unknown .empty?
     @entries_tags_unknown    << entry unless entry_tags_unknown.empty?
+    @refs_unknown.merge         entry_ref_unknown
     @tags_unknown.merge         entry_tags_unknown
   end
 
-  @entries_unreplaced = @entries_valid   .select { | entry | entry[REPLACEMENT_KEY].empty? }
-  @entries_duplicated = @entry_freqs.keys.select { | name | @entry_freqs[name] > 1 }
+  @entries_unreplaced = @entries_acceptable.select { | entry | entry[REPLACEMENT_KEY].empty? }
+  @entries_duplicated = @entry_freqs.keys  .select { | name | @entry_freqs[name] > 1 }
 
   @entries_duplicated.each do | duplicate_name |
-    duplicate_entries = @entries_valid.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
-                                      .map!   { | entry | entry[RAW_ENTRY_KEY   ]                   }
+    duplicate_entries = @entries_acceptable.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
+                                           .map    { | entry | entry[RAW_ENTRY_KEY   ]                   }
     unique_entries    = duplicate_entries.uniq
-    n_unique_entries  = unique_entries.size
+    n_unique_entries  = unique_entries   .size
 
     unique_entries.each do | uniq_value |
       n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
-      @entries_duplicated_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
+      @entries_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
     end
     if n_unique_entries > 1
-      @entries_duplicated_differing[duplicate_name] = unique_entries
+      @entries_differing[duplicate_name] = unique_entries
     end
   end
 end
 
 def print_report input_filename
-  if DO_PRINT_INVALID || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
-    print "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
+  if DO_PRINT_MALFORMED || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
+    log "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
   end
 
-  print_invalid = {}
-  print_invalid['entries invalid'            ] = @entries_invalid      if DO_PRINT_INVALID
-  print_invalid['entries lacking description'] = @entries_no_desc      if DO_PRINT_INCOMPLETE
-  print_invalid['entries lacking tags'       ] = @entries_no_tags      if DO_PRINT_INCOMPLETE
-  print_invalid['entries with unknown tags'  ] = @entries_tags_unknown if DO_PRINT_INCOMPLETE
-  print_invalid['tags unknown'               ] = @tags_unknown         if DO_PRINT_INCOMPLETE
-  print_invalid.each_pair do | label , data |
-    print "#{REPORT_SEPARATOR}#{label}:\n" unless data.empty?
-    data.each { | entry | print "  #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
+  entries_malformed  = [ '@entries_malformed' ]
+  entries_incomplete = [ '@entries_no_ref'  , '@entries_refs_unknown' , '@refs_unknown' ,
+                         '@entries_no_tag'  , '@entries_tags_unknown' , '@tags_unknown' ,
+                         '@entries_no_desc'                                             ]
+  log_invalid        = []
+
+  log_invalid += entries_malformed  if DO_PRINT_MALFORMED
+  log_invalid += entries_incomplete if DO_PRINT_INCOMPLETE
+  log_invalid.each do | var_name |
+    entries = eval var_name
+
+    log "#{REPORT_SEPARATOR}#{var_name}:\n" unless entries.empty?
+    entries.each { | entry | log "  #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
   end
 
   unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
-    print "#{REPORT_SEPARATOR}entries unreplaced:\n"
-    @entries_unreplaced.each { | entry | print "  #{entry[PACKAGE_NAME_KEY]}\n" }
+    log "#{REPORT_SEPARATOR}entries unreplaced:\n"
+    @entries_unreplaced.each { | entry | log "  #{entry[PACKAGE_NAME_KEY]}\n" }
   end
 
   unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
-    print "#{REPORT_SEPARATOR}entries duplicates:\n"
+    log "#{REPORT_SEPARATOR}entries duplicates:\n"
     @entries_duplicated.each do | duplicate_name |
-      entry_identical   = @entries_duplicated_identical[duplicate_name]
-      entries_differing = @entries_duplicated_differing[duplicate_name]
-
-      print                                    "\n  #{duplicate_name}:\n"
-      print                                    "    identical:\n"           unless entry_identical  .nil?
-      print                                    "      #{entry_identical}\n" unless entry_identical  .nil?
-      print                                    "    differing:\n"           unless entries_differing.nil?
-      entries_differing.each { | entry | print "      #{entry}\n"         } unless entries_differing.nil?
+      identical  = @entries_identical[duplicate_name]
+      differings = @entries_differing[duplicate_name]
+
+      log                                "\n  #{duplicate_name}:\n"
+      log                                "    identical:\n"           unless identical .nil?
+      log                                "      #{identical}\n"       unless identical .nil?
+      log                                "    differing:\n"           unless differings.nil?
+      log differings.map { | differing | "      #{differing}\n"     } unless differings.nil?
     end
   end
 
   if DO_PRINT_STATS
+    all_entries = @entries_acceptable + @entries_malformed
+
     print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
-    print "  entries found: #{              (@entries_valid + @entries_invalid).size     }\n"
-    print "  entries valid: #{              (@entries_valid                   ).size     }\n"
-    print "  entries invalid: #{            (@entries_invalid                 ).size     }\n"
-    print "  entries lacking tags: #{       (@entries_no_tags                 ).size     }\n"
-    print "  entries lacking description: #{(@entries_no_desc                 ).size     }\n"
-    print "  tags unknown: #{               (@tags_unknown                    ).size     }\n"
-    print "  entries unreplaced: #{         (@entries_unreplaced              ).size     }\n"
-    print "  entries duplicated: #{         (@entries_duplicated              ).size     }\n"
-    print "    identical: #{                (@entries_duplicated_identical    ).size     }\n"
-    print "    differing: #{                (@entries_duplicated_differing    ).keys.size}\n"
+    print "  entries found: #{                (all_entries        ).size     }\n"
+    print "    well-formed: #{                (@entries_acceptable).size     }\n"
+    print "    malformed: #{                  (@entries_malformed ).size     }\n"
+    print "  entries lacking bug reference: #{(@entries_no_ref    ).size     }\n"
+    print "  entries lacking tag: #{          (@entries_no_tag    ).size     }\n"
+    print "  entries lacking description: #{  (@entries_no_desc   ).size     }\n"
+    print "  unknown bug references: #{       (@refs_unknown      ).size     }\n"
+    print "  unknown reason tags: #{          (@tags_unknown      ).size     }\n"
+    print "  entries with no replacement: #{  (@entries_unreplaced).size     }\n"
+    print "  duplicate  entries: #{           (@entries_duplicated).size     }\n"
+    print "    identical: #{                  (@entries_identical ).size     }\n"
+    print "    differing: #{                  (@entries_differing ).keys.size}\n"
     print REPORT_SEPARATOR
   end
 
 DBG_EXPECTED input_filename
 end
 
-def sanity_check input_filename
-  @errors << 'entries_invalid'    unless @entries_invalid   .empty?
-  # @errors << 'entries_no_tags'    unless @entries_no_tags   .empty? # TODO: complete these entries
-  # @errors << 'entries_no_desc'    unless @entries_no_desc   .empty? # TODO: complete these entries
-  @errors << 'tags_unknown'       unless @tags_unknown      .empty?
-  @errors << 'entries_duplicated' unless @entries_duplicated.empty?
+def validate input_filename
+  validations = [ '@entries_malformed'  , '@entries_no_ref' , '@entries_no_tag' ,
+                  '@entries_no_desc'    , '@refs_unknown'   , '@tags_unknown'   ,
+                  '@entries_duplicated'                                         ] # TODO: ref, ref_id, tag, and desc should be required
+  validations = [ '@entries_malformed'  , '@refs_unknown' , '@tags_unknown' ,
+                  '@entries_duplicated'                                     ]     # TODO: ref, ref_id, tag, and desc should be required
+
+  validations.each { | var_name | @errors << var_name unless (eval var_name).empty? }
 end
 
 def generate_json input_filename
     output_json_file = "#{input_filename}.json"
 
   if @errors.empty?
-    IO.write output_json_file , @entries_valid.to_json
-    print "\nwrote: #{output_json_file}\n\nno problems detected in #{input_filename}\n"
+    IO.write output_json_file , @entries_acceptable.to_json
+    print "no problems detected in #{input_filename}\n"
+    $stderr.print "wrote: #{output_json_file}\n"
   else
-    print "\nERROR: in #{input_filename} - #{@errors.join ','} - JSON will not be generated\n"
+    print "ERROR: in #{input_filename} - [ #{@errors.join ','} ] - see: #{LOG_FILE}\n"
+    File.delete output_json_file if File.exist? output_json_file
   end
 end
 
 
-BLACKLIST_FILES.each do | input_filename |
-  reset_state
-  parse_entries   input_filename
-  process_entries
-  print_report    input_filename
-  sanity_check   input_filename
-  generate_json   input_filename
+## main entry ##
+
+File.open(LOG_FILE , File::CREAT|File::WRONLY) do | file |
+  (@log_file = file).truncate 0
 
-  exit 1 unless @errors.empty?
+  BLACKLIST_FILES.each do | input_filename |
+    reset_state
+    parse_entries   input_filename
+    process_entries
+    print_report    input_filename
+    validate        input_filename
+    generate_json   input_filename
+
+    exit 1 unless @errors.empty?
+  end
 end
author	bill-auger <mr.j.spam.me@gmail.com>	2020-03-14 08:37:34 -0400
committer	bill-auger <mr.j.spam.me@gmail.com>	2020-03-14 19:13:20 -0400
commit	505e2811648d54698ca3d6ffd153a7479efa6714 (patch)
tree	ef013aa99d5df724317b9bfc09f5e197858e8a93
parent	0f501979af323d087561acf4944dd1ddedaa0d05 (diff)