summaryrefslogtreecommitdiff
path: root/report
blob: 6860147457940456c64bd94b787e1e97d858b9d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/usr/bin/env ruby

=begin
  USAGE:
    report.rb [ input_files ]

    this script is used to validate and report statistics on the blacklist entries

    it currently detects syntax errors, missing and unknown bug references,
      missing and unknown reason tags, missing descriptions, and duplicate entries
    any of the above discrepancies will cause the validation to fail

    it also detects entries with no replacement, although that is not an error

    it optionally creates a JSON file with the results,
      which can be the input to the post_fsd_wiki.phantomjs script

is not an error
=end


## DEBUG begin ##
DEBUG = false
require 'byebug' if DEBUG || true
DEBUG_FILTER_NAMES = [] #[ 'well-formed-complete' ]
def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;
DBG_DEBUGGER_EVAL = "DEBUG_FILTER_NAMES.each { | name | byebug if line.start_with? name } if DEBUG"

def DBG_PARSE     input_filename ; if DEBUG ; $stderr.print "\nDEBUG: parsing #{input_filename}\n"                                                                                     ; end ; end ;
def DBG_TOKENS    tokens         ; if DEBUG ; tokens.each_with_index { | token , i | $stderr.print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] }                ; end ; end ;
def DBG_ENTRY     entry          ; if DEBUG ; $stderr.print "\n" ; entry.each_pair { | k , v | $stderr.print "DEBUG: #{k}: #{v}\n" }                                                   ; end ; end ;
def DBG_EXPECTED  input_filename ; if input_filename == 'blacklist-testdata.txt' ; $stderr.print DBG_EXPECTED_TEST_RESULTS + REPORT_SEPARATOR                                          ; end ; end ;
def DBG_TEST_DATA line           ; if DEBUG && ! line.strip.empty? && ! (line.strip.start_with? '#')
                                     $stderr.print "DBG_TEST_DATA: is_well_formed=#{!(line.match ENTRIES_REGEX).nil?}\n               line=#{line}\n"
                                     if (line.start_with? 'well-formed') &&  (line.match ENTRIES_REGEX).nil? ||
                                        (line.start_with? 'malformed'  ) && !(line.match ENTRIES_REGEX).nil?
                                       $stderr.print "mismatched test-name/expectation - quitting" ; exit                                                                              ; end ; end ; end
DBG_EXPECTED_TEST_RESULTS = "##--------- expected results -----------##    ##--- expected failures ---##\n" +
                            "# entries found: 32                      #    # [ @entries_malformed    , #\n" +
                            "#   well-formed: 22                      #    #   @entries_refs_unknown , #\n" +
                            "#   malformed: 10                        #    #   @entries_tags_unknown , #\n" +
                            "# entries lacking bug reference: 3       #    #   @entries_duplicated   ] #\n" +
                            "#   no bug reference: 2                  #    ##-------------------------##\n" +
                            "#   no bug reference id: 2               #\n" +
                            "# entries lacking tag: 1                 #\n" +
                            "# entries lacking description: 1         #\n" +
                            "# entries with unknown bug references: 1 #\n" +
                            "# entries with unknown reason tags: 2    #\n" +
                            "# entries without replacement: 1         #\n" +
                            "# duplicate entries: 2                   #\n" +
                            "#   identical: 1                         #\n" +
                            "#   differing: 1                         #\n" +
                            "##--------------------------------------##\n" # per 'blacklist-testdata.txt'
## DEBUG end ##


require 'json'
require 'set'


# NOTE: acceptable entry syntax per SYNTAX doc =>
#         ORIGINAL_PACKAGE!LIBRE_REPLACEMENT!REF!REF_ID!TAG!SHORT_DESCRIPTION
ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
BLACKLIST_FILES     = (File.exist? ARGV.first.to_s) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
# TODO: empty ref, ref_id, tag, or desc is deprecated - they will be mandatory eventually
# ENTRIES_REGEX       = /^[^!#]+![^!]*![^!]+![^!]+![^!]+![^!]+$/ # TODO: this is the desired regex after the deprecation period
ENTRIES_REGEX       = /^[^!#]+![^!]*![^!]*![^!]*![^!]*![^!]*$/   # TODO: this is the desired regex during the deprecation period
RAW_ENTRY_KEY       = :raw_blacklist_entry
PACKAGE_NAME_KEY    = :original_package  # syntax field 0
REPLACEMENT_KEY     = :libre_replacement # syntax field 1
REFERENCE_KEY       = :ref               # syntax field 2      , constrained
REFERENCE_ID_KEY    = :id                # syntax field 3
REASON_TAGS_KEY     = :reason_tags       # syntax field 4 head , constrained
DESCRIPTION_KEY     = :short_description # syntax field 4 tail
ACCEPTABLE_REFS     = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' ]
# ACCEPTABLE_REFS     = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' , 'sv' ]
ACCEPTABLE_TAGS     = [ 'nonfree'   , 'semifree'      , 'uses-nonfree' , 'branding' ,
                        'technical' , 'FIXME-package' , 'FIXME-description'         ]
DO_PRINT_MALFORMED  = true
DO_PRINT_UNREPLACED = true
DO_PRINT_INCOMPLETE = true
DO_PRINT_DUPLICATED = true
DO_PRINT_STATS      = true
REPORT_SEPARATOR    = "---------------------------------------------------------------------------\n"
LOG_FILE            = 'validation.log'


@log_file = nil
def log log_msg ; @log_file.print log_msg if @log_file.respond_to? :print ; end ;

def reset_state
  @entries_malformed    = []
  @entries_acceptable   = []
  @entries_no_ref       = []
  @entries_no_refid     = []
  @entries_no_tag       = []
  @entries_no_desc      = []
  @entries_refs_unknown = []
  @entries_tags_unknown = []
  @entries_unreplaced   = nil # deferred
  @entries_duplicated   = nil # deferred
  @entries_identical    = {}
  @entries_differing    = {}
  @errors               = []
end

def parse_entries input_filename
DBG_PARSE input_filename

  (File.readlines input_filename).each do | line |

DBG_TEST_DATA line ; eval DBG_DEBUGGER_EVAL ;

    next                               if     line.strip.empty? || (line.strip.start_with? '#')
    @entries_malformed << line && next unless line.match ENTRIES_REGEX

    @entries_acceptable    << (entry = {})
    tokens                  = (line.split '!')
    entry[RAW_ENTRY_KEY   ] = line
    entry[PACKAGE_NAME_KEY] = (tokens.shift    ).gsub(/\s+/ , '')
    entry[REPLACEMENT_KEY ] = (tokens.shift    ).gsub(/\s+/ , '')
    entry[REFERENCE_KEY   ] = (tokens.shift    ).gsub(/\s+/ , '')
    entry[REFERENCE_ID_KEY] = (tokens.shift    ).gsub(/\s+/ , '')
    entry[REASON_TAGS_KEY ] = (tokens.shift    ).gsub(/\s+/ , '').split '|'
    entry[DESCRIPTION_KEY ] = (tokens.join  '!').gsub(/\s+/ , '')

DBG_TOKENS tokens
  end
end

def process_entries
  entry_freqs = {}
  @entries_acceptable.each do | entry |

DBG_ENTRY entry

    entry_name         = entry[PACKAGE_NAME_KEY]
    entry_ref          = entry[REFERENCE_KEY   ]
    entry_ref_id       = entry[REFERENCE_ID_KEY]
    entry_tags         = entry[REASON_TAGS_KEY ]
    entry_desc         = entry[DESCRIPTION_KEY ]
    entry_ref_unknown  = (ACCEPTABLE_REFS.include? entry_ref) ? entry_ref : ''
    entry_ref_unknown  = [ entry_ref ] - ACCEPTABLE_REFS - [ '' ]
    entry_tags_unknown = entry_tags    - ACCEPTABLE_TAGS

    entry_freqs[entry_name]  = (entry_freqs[entry_name] ||= 0) + 1
    @entries_no_ref         << entry if     entry_ref         .empty?
    @entries_no_refid       << entry if     entry_ref_id      .empty?
    @entries_no_tag         << entry if     entry_tags        .empty?
    @entries_no_desc        << entry if     entry_desc        .empty?
    @entries_refs_unknown   << entry unless entry_ref_unknown .empty?
    @entries_tags_unknown   << entry unless entry_tags_unknown.empty?
  end

  @entries_unreplaced = @entries_acceptable.select { | entry | entry[REPLACEMENT_KEY].empty? }
  @entries_duplicated = entry_freqs.keys   .select { | name  | entry_freqs[name] > 1         }

  @entries_duplicated.each do | duplicate_name |
    duplicate_entries = @entries_acceptable.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
                                           .map    { | entry | entry[RAW_ENTRY_KEY   ]                   }
    unique_entries    = duplicate_entries.uniq
    n_unique_entries  = unique_entries   .size

    unique_entries.each do | uniq_entry |
      identical_entries   = duplicate_entries.select { | dup_entry | dup_entry == uniq_entry }
      n_identical_entries = identical_entries.count

      @entries_identical[duplicate_name] = []                 if n_identical_entries > 1
      @entries_identical[duplicate_name] += identical_entries if n_identical_entries > 1
    end
    if n_unique_entries > 1
      @entries_differing[duplicate_name]  = unique_entries
    end
  end
end

def print_report input_filename
  if DO_PRINT_MALFORMED || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
    log "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
  end

  entries_malformed  = [ '@entries_malformed' ]
  entries_incomplete = [ '@entries_no_ref' , '@entries_refs_unknown' , '@entries_no_refid' ,
                         '@entries_no_tag' , '@entries_tags_unknown' , '@entries_no_desc'  ]
  log_invalid        = []

  log_invalid += entries_malformed  if DO_PRINT_MALFORMED
  log_invalid += entries_incomplete if DO_PRINT_INCOMPLETE
  log_invalid.each do | var_name |
    entries = eval var_name

    log "#{REPORT_SEPARATOR}#{var_name}:\n" unless entries.empty?
    entries.each { | entry | log "  #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
  end

  unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
    log "#{REPORT_SEPARATOR}entries unreplaced:\n"
    @entries_unreplaced.each { | entry | log "  #{entry[PACKAGE_NAME_KEY]}\n" }
  end

  unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
    log "#{REPORT_SEPARATOR}entries duplicates:\n"
    @entries_duplicated.each do | duplicate_name |
      identicals = @entries_identical[duplicate_name]
      differings = @entries_differing[duplicate_name]

      log                                "\n  #{duplicate_name}:\n"
      log                                "    identical:\n"           unless identicals.nil?
      log identicals.map { | identical | "      #{identical}\n"     } unless identicals.nil?
      log                                "    differing:\n"           unless differings.nil?
      log differings.map { | differing | "      #{differing}\n"     } unless differings.nil?
    end
  end

  if DO_PRINT_STATS
    all_entries = @entries_acceptable + @entries_malformed
    all_no_ref  = (@entries_no_ref    + @entries_no_refid).uniq

    print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
    print "  entries found: #{                      (all_entries          ).size     }\n"
    print "    well-formed: #{                      (@entries_acceptable  ).size     }\n"
    print "    malformed: #{                        (@entries_malformed   ).size     }\n"
    print "  entries lacking bug reference: #{      (all_no_ref           ).size     }\n"
    print "    no bug reference: #{                 (@entries_no_ref      ).size     }\n"
    print "    no bug reference id: #{              (@entries_no_refid    ).size     }\n"
    print "  entries lacking tag: #{                (@entries_no_tag      ).size     }\n"
    print "  entries lacking description: #{        (@entries_no_desc     ).size     }\n"
    print "  entries with unknown bug references: #{(@entries_refs_unknown).size     }\n"
    print "  entries with unknown reason tags: #{   (@entries_tags_unknown).size     }\n"
    print "  entries without replacement: #{        (@entries_unreplaced  ).size     }\n"
    print "  duplicate entries: #{                  (@entries_duplicated  ).size     }\n"
    print "    identical: #{                        (@entries_identical   ).keys.size}\n"
    print "    differing: #{                        (@entries_differing   ).keys.size}\n"
    print REPORT_SEPARATOR
  end

DBG_EXPECTED input_filename
end

def validate input_filename
  # TODO: empty ref, ref_id, tag, or desc is deprecated - they will be mandatory eventually
  # TODO: these are the desired strict validations after the deprecation period
  validations = [ '@entries_malformed'  ,
                  '@entries_no_ref'     , '@entries_refs_unknown' , '@entries_no_refid' ,
                  '@entries_no_tag'     , '@entries_tags_unknown' , '@entries_no_desc'  ,
                  '@entries_duplicated'                                                 ]
  # TODO: these are the current validations during the deprecation period
  validations = [ '@entries_malformed'    , '@entries_refs_unknown' ,
                  '@entries_tags_unknown' , '@entries_duplicated'   ]

  validations.each { | var_name | @errors << var_name unless (eval var_name).empty? }
end

def generate_json input_filename
    output_json_file = "#{input_filename}.json"

  if @errors.empty?
    IO.write output_json_file , @entries_acceptable.to_json
    print "no problems detected in #{input_filename}\n"
    $stderr.print "wrote: #{output_json_file}\n"
  else
    print "ERROR: [ #{@errors.join " , "} ] in #{input_filename} - see: #{LOG_FILE}\n"
    File.delete output_json_file if File.exist? output_json_file
  end
end


## main entry ##

File.open(LOG_FILE , File::CREAT|File::WRONLY) do | file |
  (@log_file = file).truncate 0

  BLACKLIST_FILES.each do | input_filename |
    reset_state
    parse_entries   input_filename
    process_entries
    print_report    input_filename
    validate        input_filename
    generate_json   input_filename

    exit 1 unless @errors.empty?
  end
end