summaryrefslogtreecommitdiff
path: root/report
blob: fc384cc81861de282d5eb60b7a14b21785daa4b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/usr/bin/env ruby

=begin
  USAGE:
    report.rb [ input_files ]

    this script is used to validate and report statistics on the blacklist entries

    it currently detects syntax errors, missing tags, unknown tags, missing descriptions,
      duplicate entries for a single package (partitioning them as identical or differing),
      and will fail if any of those conditions are unmet
    it also detects entries with no replacement, although that is not an error

    it optionally creates a JSON file with the results,
      which can be the input to the post_fsd_wiki.phantomjs script
=end


## DEBUG begin ##
DEBUG = false
require 'byebug' if DEBUG || false
DEBUG_FILTER_NAMES = []
def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;

def DBG_PARSE input_filename    ; if DEBUG ; $stderr.print "\nDEBUG: parsing #{input_filename}\n"                                                                                     ; end ; end ;
def DBG_FILTER_NAME line        ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea }                                                                       ; end ; end ;
def DBG_TOKENS tokens           ; if DEBUG ; tokens.each_with_index { | token , i | $stderr.print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] }                ; end ; end ;
def DBG_TAG entry               ; if DEBUG ; $stderr.print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\ndesc  IN=#{entry[DESCRIPTION_KEY]}\n"                                      ; end ; end ;
def DBG_DESC entry              ; if DEBUG ; $stderr.print "desc OUT=#{entry[DESCRIPTION_KEY]}\n  tags=#{entry[REASON_TAGS_KEY]}\n" ; sleep 0.2                                       ; end ; end ;
def DBG_NO_TAG entry            ; if DEBUG ; $stderr.print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[REASON_TAGS_KEY].empty?                                             ; end ; end ;
def DBG_NO_DESC entry           ; if DEBUG ; $stderr.print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY   ].empty?                                          ; end ; end ;
def DBG_ENTRY entry             ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; $stderr.print "\n" ; entry.each_pair { | k , v | $stderr.print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt' ; $stderr.print DBG_EXPECTED_TEST_RESULTS                                                             ; end ; end ;
DBG_EXPECTED_TEST_RESULTS = "##------ expected results ---------##\n" + # per 'blacklist-testdata.txt'
                            "# entries found: 20                 #\n" +
                            "#   well-formed: 14                 #\n" +
                            "#   malformed: 6                    #\n" +
                            "# entries lacking bug reference: 15 #\n" +
                            "# entries lacking tag: 2            #\n" +
                            "# entries lacking description: 3    #\n" +
                            "# unknown bug references: 0         #\n" +
                            "# unknown reason tags: 3            #\n" +
                            "# entries with no replacement: 13   #\n" +
                            "# duplicate entries: 2              #\n" +
                            "#   identical: 1                    #\n" +
                            "#   differing: 2                    #\n" +
                            "##---------------------------------##\n"
#   entries found: 34
#     well-formed: 28
#     malformed: 6
#   entries lacking bug reference: 8
#   entries lacking tag: 8
#   entries lacking description: 4
#   unknown bug references: 1
#   unknown reason tags: 4
#   entries with no replacement: 4
#   duplicate  entries: 3
#     identical: 1
#     differing: 2
## DEBUG end ##


require 'json'
require 'set'


# NOTE: acceptable entry syntax per SYNTAX doc =>
#         ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION
ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
BLACKLIST_FILES     = (File.exist? ARGV.first.to_s) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
ENTRIES_REGEX       = /^[^:\[\]#]+:[^:\[\]]*:(debian|fedora|fsf|parabola|savannah|sv)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
REASON_TAGS_REGEX   = /^\[([^\]]*)\] *(.*)/
RAW_ENTRY_KEY       = :raw_blacklist_entry
PACKAGE_NAME_KEY    = :original_package  # syntax field 0
REPLACEMENT_KEY     = :libre_replacement # syntax field 1
REFERENCE_KEY       = :ref               # syntax field 2      , constrained
REFERENCE_ID_KEY    = :id                # syntax field 3
REASON_TAGS_KEY     = :reason_tags       # syntax field 4 head , constrained
DESCRIPTION_KEY     = :short_description # syntax field 4 tail
ACCEPTABLE_REFS     = [ 'debian' , 'fedora' , 'fsf' , 'parabola' , 'savannah' ]
ACCEPTABLE_TAGS     = [ 'nonfree'   , 'semifree'      , 'uses-nonfree' , 'branding' ,
                        'technical' , 'FIXME:package' , 'FIXME:description' ]
DO_PRINT_MALFORMED  = true
DO_PRINT_UNREPLACED = true
DO_PRINT_INCOMPLETE = true
DO_PRINT_DUPLICATED = true
DO_PRINT_STATS      = true
REPORT_SEPARATOR    = "------------------------------------------------------------\n"
LOG_FILE            = 'validation.log'


@log_file = nil
def log log_msg ; @log_file.print log_msg if @log_file.respond_to? :print ; end ;

def reset_state
  @entries_malformed    = []
  @entries_acceptable   = []
  @entry_freqs          = {}
  @entries_no_ref       = []
  @entries_no_tag       = []
  @entries_no_desc      = []
  @entries_refs_unknown = []
  @entries_tags_unknown = []
  @refs_unknown         = Set[]
  @tags_unknown         = Set[]
  @entries_unreplaced   = nil # deferred
  @entries_duplicated   = nil # deferred
  @entries_identical    = {}
  @entries_differing    = {}
  @errors               = []
end

def parse_entries input_filename
DBG_PARSE input_filename

  (File.readlines input_filename).each do | line |

DBG_FILTER_NAME line

    next                               if     line.strip.empty? || (line.strip.start_with? '#')
    @entries_malformed << line && next unless line.match ENTRIES_REGEX

    @entries_acceptable      << (entry = {})
    tokens                    = (line.split ':')
    entry[RAW_ENTRY_KEY     ] = line
    entry[PACKAGE_NAME_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REPLACEMENT_KEY   ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REFERENCE_KEY     ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REFERENCE_ID_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[DESCRIPTION_KEY   ] = (tokens.join ':').gsub("\t" , '').strip
    entry[REASON_TAGS_KEY   ] = []

DBG_TOKENS tokens

    # parse tags
    while (entry[DESCRIPTION_KEY].start_with? '[') && (entry[DESCRIPTION_KEY].include? ']')

DBG_TAG entry

      entry[REASON_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\1')
      entry[DESCRIPTION_KEY]  = (entry[DESCRIPTION_KEY].gsub REASON_TAGS_REGEX , '\2')

DBG_DESC entry

    end
DBG_NO_TAG entry
DBG_NO_DESC entry
  end
end

def process_entries
  @entries_acceptable.each do | entry |

DBG_ENTRY entry

    entry_name         = entry[PACKAGE_NAME_KEY]
    entry_ref          = entry[REFERENCE_KEY   ]
    entry_ref_id       = entry[REFERENCE_ID_KEY]
    entry_tags         = entry[REASON_TAGS_KEY ]
    entry_desc         = entry[DESCRIPTION_KEY ]
    entry_ref_unknown  = (ACCEPTABLE_REFS.include? entry_ref) ? entry_ref : ''
    entry_ref_unknown  = [ entry_ref ] - ACCEPTABLE_REFS - [ '' ]
    entry_tags_unknown = entry_tags    - ACCEPTABLE_TAGS

    @entry_freqs[entry_name]  = (@entry_freqs[entry_name] ||= 0) + 1
    @entries_no_ref          << entry if     entry_ref         .empty? ||
                                             entry_ref_id      .empty?
    @entries_no_tag          << entry if     entry_tags        .empty?
    @entries_no_desc         << entry if     entry_desc        .empty?
    @entries_refs_unknown    << entry unless entry_ref_unknown .empty?
    @entries_tags_unknown    << entry unless entry_tags_unknown.empty?
    @refs_unknown.merge         entry_ref_unknown
    @tags_unknown.merge         entry_tags_unknown
  end

  @entries_unreplaced = @entries_acceptable.select { | entry | entry[REPLACEMENT_KEY].empty? }
  @entries_duplicated = @entry_freqs.keys  .select { | name | @entry_freqs[name] > 1 }

  @entries_duplicated.each do | duplicate_name |
    duplicate_entries = @entries_acceptable.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
                                           .map    { | entry | entry[RAW_ENTRY_KEY   ]                   }
    unique_entries    = duplicate_entries.uniq
    n_unique_entries  = unique_entries   .size

    unique_entries.each do | uniq_value |
      n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
      @entries_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
    end
    if n_unique_entries > 1
      @entries_differing[duplicate_name] = unique_entries
    end
  end
end

def print_report input_filename
  if DO_PRINT_MALFORMED || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
    log "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
  end

  entries_malformed  = [ '@entries_malformed' ]
  entries_incomplete = [ '@entries_no_ref'  , '@entries_refs_unknown' , '@refs_unknown' ,
                         '@entries_no_tag'  , '@entries_tags_unknown' , '@tags_unknown' ,
                         '@entries_no_desc'                                             ]
  log_invalid        = []

  log_invalid += entries_malformed  if DO_PRINT_MALFORMED
  log_invalid += entries_incomplete if DO_PRINT_INCOMPLETE
  log_invalid.each do | var_name |
    entries = eval var_name

    log "#{REPORT_SEPARATOR}#{var_name}:\n" unless entries.empty?
    entries.each { | entry | log "  #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
  end

  unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
    log "#{REPORT_SEPARATOR}entries unreplaced:\n"
    @entries_unreplaced.each { | entry | log "  #{entry[PACKAGE_NAME_KEY]}\n" }
  end

  unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
    log "#{REPORT_SEPARATOR}entries duplicates:\n"
    @entries_duplicated.each do | duplicate_name |
      identical  = @entries_identical[duplicate_name]
      differings = @entries_differing[duplicate_name]

      log                                "\n  #{duplicate_name}:\n"
      log                                "    identical:\n"           unless identical .nil?
      log                                "      #{identical}\n"       unless identical .nil?
      log                                "    differing:\n"           unless differings.nil?
      log differings.map { | differing | "      #{differing}\n"     } unless differings.nil?
    end
  end

  if DO_PRINT_STATS
    all_entries = @entries_acceptable + @entries_malformed

    print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
    print "  entries found: #{                (all_entries        ).size     }\n"
    print "    well-formed: #{                (@entries_acceptable).size     }\n"
    print "    malformed: #{                  (@entries_malformed ).size     }\n"
    print "  entries lacking bug reference: #{(@entries_no_ref    ).size     }\n"
    print "  entries lacking tag: #{          (@entries_no_tag    ).size     }\n"
    print "  entries lacking description: #{  (@entries_no_desc   ).size     }\n"
    print "  unknown bug references: #{       (@refs_unknown      ).size     }\n"
    print "  unknown reason tags: #{          (@tags_unknown      ).size     }\n"
    print "  entries with no replacement: #{  (@entries_unreplaced).size     }\n"
    print "  duplicate  entries: #{           (@entries_duplicated).size     }\n"
    print "    identical: #{                  (@entries_identical ).size     }\n"
    print "    differing: #{                  (@entries_differing ).keys.size}\n"
    print REPORT_SEPARATOR
  end

DBG_EXPECTED input_filename
end

def validate input_filename
  validations = [ '@entries_malformed'  , '@entries_no_ref' , '@entries_no_tag' ,
                  '@entries_no_desc'    , '@refs_unknown'   , '@tags_unknown'   ,
                  '@entries_duplicated'                                         ] # TODO: ref, ref_id, tag, and desc should be required
  validations = [ '@entries_malformed'  , '@refs_unknown' , '@tags_unknown' ,
                  '@entries_duplicated'                                     ]     # TODO: ref, ref_id, tag, and desc should be required

  validations.each { | var_name | @errors << var_name unless (eval var_name).empty? }
end

def generate_json input_filename
    output_json_file = "#{input_filename}.json"

  if @errors.empty?
    IO.write output_json_file , @entries_acceptable.to_json
    print "no problems detected in #{input_filename}\n"
    $stderr.print "wrote: #{output_json_file}\n"
  else
    print "ERROR: in #{input_filename} - [ #{@errors.join ','} ] - see: #{LOG_FILE}\n"
    File.delete output_json_file if File.exist? output_json_file
  end
end


## main entry ##

File.open(LOG_FILE , File::CREAT|File::WRONLY) do | file |
  (@log_file = file).truncate 0

  BLACKLIST_FILES.each do | input_filename |
    reset_state
    parse_entries   input_filename
    process_entries
    print_report    input_filename
    validate        input_filename
    generate_json   input_filename

    exit 1 unless @errors.empty?
  end
end