summaryrefslogtreecommitdiff
path: root/report
blob: 17a32eecd5dd37d71b8447ac976d9a43f8a9a920 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/usr/bin/env ruby

=begin
  report.rb

    this script is used to validate and report statistics on the blacklist entries

    it currently detects syntax errors, missing tags, unknown tags, missing descriptions,
      duplicate entries for a single package (partitioning them as identical or differing),
      and will fail if any of those conditions are unmet
    it also detects entries with no replacement, although that is not an error

    it optionally creates a JSON file with the results
    that can be the input to the post_fsd_wiki.phantomjs script
=end


## DEBUG begin ##
DEBUG = false
require 'byebug' if DEBUG
DEBUG_FILTER_NAMES = []
def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;

def DBG_PARSE input_filename    ; if DEBUG ; print "\nDEBUG: parsing #{input_filename}\n"                                                                             ; end ; end ;
def DBG_FILTER_NAME line        ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea }                                                       ; end ; end ;
def DBG_TOKENS tokens           ; if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] }        ; end ; end ;
def DBG_TAG entry               ; if DEBUG ; print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc  IN=#{entry[DESCRIPTION_KEY]}\n"                   ; end ; end ;
def DBG_DESC entry              ; if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2                   ; end ; end ;
def DBG_NO_TAG entry            ; if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty?                                  ; end ; end ;
def DBG_NO_DESC entry           ; if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY   ].empty?                                  ; end ; end ;
def DBG_ENTRY entry             ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt'
                                    print "##------ expected results ------##\n" +
                                          "# entries found: 20              #\n" +
                                          "# entries valid: 14              #\n" +
                                          "# entries invalid: 6             #\n" +
                                          "# entries lacking tags: 2        #\n" +
                                          "# entries lacking description: 3 #\n" +
                                          "# tags unknown: 3                #\n" +
                                          "# entries unreplaced: 13         #\n" +
                                          "# entries duplicated: 2          #\n" +
                                          "#   identical: 1                 #\n" +
                                          "#   differing: 1                 #\n" +
                                          "##------------------------------##\n" ; end ; end ;
## DEBUG end ##


require 'json'
require 'set'


# NOTE: acceptable entry syntax per SYNTAX doc =>
#         ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION
ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
BLACKLIST_FILES        = (ALL_BLACKLIST_FILES.include? ARGV.first) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
# BLACKLIST_FILES        = [ 'blacklist-testdata.txt' ] # DEBUG
VALID_ENTRIES_REGEX    = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
BLACKLIST_TAGS_REGEX   = /^\[([^\]]*)\] *(.*)/
RAW_ENTRY_KEY          = :raw_blacklist_entry
PACKAGE_NAME_KEY       = :original_package  # syntax token
REPLACEMENT_KEY        = :libre_replacement # syntax token
REFERENCE_KEY          = :ref               # syntax token
REFERENCE_ID_KEY       = :id                # syntax token
DESCRIPTION_KEY        = :short_description # syntax token
BLACKLIST_TAGS_KEY     = :blacklist_tags
NONFREE_TAG            = 'nonfree'
SEMIFREE_TAG           = 'semifree'
USES_NONFREE_TAG       = 'uses-nonfree'
BRANDING_TAG           = 'branding'
TECHNICAL_TAG          = 'technical'
NEEDS_REPLACEMENT_TAG  = 'FIXME:package'
NEEDS_DESC_TAG         = 'FIXME:description'
ACCEPTABLE_TAGS        = [ NONFREE_TAG   , SEMIFREE_TAG          , USES_NONFREE_TAG , BRANDING_TAG ,
                           TECHNICAL_TAG , NEEDS_REPLACEMENT_TAG , NEEDS_DESC_TAG                  ]
DO_PRINT_STATS         = true
DO_PRINT_INVALID       = true
DO_PRINT_UNREPLACED    = true
DO_PRINT_INCOMPLETE    = true
DO_PRINT_DUPLICATED     = true
REPORT_SEPARATOR       = "------------------------------------------------------------\n"


def reset_state
  @entries_invalid              = []
  @entries_valid                = []
  @entry_freqs                  = {}
  @entries_no_desc              = []
  @entries_no_tags              = []
  @entries_tags_unknown         = []
  @tags_unknown                 = Set[]
  @entries_unreplaced           = nil # deferred
  @entries_duplicated           = nil # deferred
  @entries_duplicated_identical = {}
  @entries_duplicated_differing = {}
  @errors                       = []
end

def parse_entries input_filename
DBG_PARSE input_filename

  (File.readlines input_filename).each do | line |

DBG_FILTER_NAME line

    next                             if     line.strip.empty? || (line.strip.start_with? '#')
    @entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX

    @entries_valid           << (entry = {})
    tokens                    = (line.split ':')
    entry[RAW_ENTRY_KEY     ] = line
    entry[PACKAGE_NAME_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REPLACEMENT_KEY   ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REFERENCE_KEY     ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[REFERENCE_ID_KEY  ] = (tokens.shift   ).gsub("\t" , '').strip
    entry[DESCRIPTION_KEY   ] = (tokens.join ':').gsub("\t" , '').strip
    entry[BLACKLIST_TAGS_KEY] = []

DBG_TOKENS tokens

    # parse tags
    while (entry[DESCRIPTION_KEY].start_with? '[') && (entry[DESCRIPTION_KEY].include? ']')

DBG_TAG entry

      entry[BLACKLIST_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\1')
      entry[DESCRIPTION_KEY   ]  = (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\2')

DBG_DESC entry

    end
DBG_NO_TAG entry
DBG_NO_DESC entry
  end
end

def process_entries
  @entries_valid.each do | entry |

DBG_ENTRY entry

    entry_name         = entry[PACKAGE_NAME_KEY  ]
    entry_desc         = entry[DESCRIPTION_KEY   ]
    entry_tags         = entry[BLACKLIST_TAGS_KEY]
    entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS

    @entry_freqs[entry_name]  = (@entry_freqs[entry_name] ||= 0) + 1
    @entries_no_desc         << entry if     entry_desc        .empty?
    @entries_no_tags         << entry if     entry_tags        .empty?
    @entries_tags_unknown    << entry unless entry_tags_unknown.empty?
    @tags_unknown.merge         entry_tags_unknown
  end

  @entries_unreplaced = @entries_valid   .select { | entry | entry[REPLACEMENT_KEY].empty? }
  @entries_duplicated = @entry_freqs.keys.select { | name | @entry_freqs[name] > 1 }

  @entries_duplicated.each do | duplicate_name |
    duplicate_entries = @entries_valid.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
                                      .map!   { | entry | entry[RAW_ENTRY_KEY   ]                   }
    unique_entries    = duplicate_entries.uniq
    n_unique_entries  = unique_entries.size

    unique_entries.each do | uniq_value |
      n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
      @entries_duplicated_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
    end
    if n_unique_entries > 1
      @entries_duplicated_differing[duplicate_name] = unique_entries
    end
  end
end

def print_report input_filename
  if DO_PRINT_INVALID || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
    print "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
  end

  print_invalid = {}
  print_invalid['entries invalid'            ] = @entries_invalid      if DO_PRINT_INVALID
  print_invalid['entries lacking description'] = @entries_no_desc      if DO_PRINT_INCOMPLETE
  print_invalid['entries lacking tags'       ] = @entries_no_tags      if DO_PRINT_INCOMPLETE
  print_invalid['entries with unknown tags'  ] = @entries_tags_unknown if DO_PRINT_INCOMPLETE
  print_invalid['tags unknown'               ] = @tags_unknown         if DO_PRINT_INCOMPLETE
  print_invalid.each_pair do | label , data |
    print "#{REPORT_SEPARATOR}#{label}:\n" unless data.empty?
    data.each { | entry | print "  #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
  end

  unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
    print "#{REPORT_SEPARATOR}entries unreplaced:\n"
    @entries_unreplaced.each { | entry | print "  #{entry[PACKAGE_NAME_KEY]}\n" }
  end

  unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
    print "#{REPORT_SEPARATOR}entries duplicates:\n"
    @entries_duplicated.each do | duplicate_name |
      entry_identical   = @entries_duplicated_identical[duplicate_name]
      entries_differing = @entries_duplicated_differing[duplicate_name]

      print                                    "\n  #{duplicate_name}:\n"
      print                                    "    identical:\n"           unless entry_identical  .nil?
      print                                    "      #{entry_identical}\n" unless entry_identical  .nil?
      print                                    "    differing:\n"           unless entries_differing.nil?
      entries_differing.each { | entry | print "      #{entry}\n"         } unless entries_differing.nil?
    end
  end

  if DO_PRINT_STATS
    print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
    print "  entries found: #{              (@entries_valid + @entries_invalid).size     }\n"
    print "  entries valid: #{              (@entries_valid                   ).size     }\n"
    print "  entries invalid: #{            (@entries_invalid                 ).size     }\n"
    print "  entries lacking tags: #{       (@entries_no_tags                 ).size     }\n"
    print "  entries lacking description: #{(@entries_no_desc                 ).size     }\n"
    print "  tags unknown: #{               (@tags_unknown                    ).size     }\n"
    print "  entries unreplaced: #{         (@entries_unreplaced              ).size     }\n"
    print "  entries duplicated: #{         (@entries_duplicated              ).size     }\n"
    print "    identical: #{                (@entries_duplicated_identical    ).size     }\n"
    print "    differing: #{                (@entries_duplicated_differing    ).keys.size}\n"
    print REPORT_SEPARATOR
  end

DBG_EXPECTED input_filename
end

def sanity_check input_filename
  @errors << 'entries_invalid'    unless @entries_invalid   .empty?
  # @errors << 'entries_no_tags'    unless @entries_no_tags   .empty? # TODO: complete these entries
  # @errors << 'entries_no_desc'    unless @entries_no_desc   .empty? # TODO: complete these entries
  @errors << 'tags_unknown'       unless @tags_unknown      .empty?
  @errors << 'entries_duplicated' unless @entries_duplicated.empty?
end

def generate_json input_filename
    output_json_file = "#{input_filename}.json"

  if @errors.empty?
    IO.write output_json_file , @entries_valid.to_json
    print "\nwrote: #{output_json_file}\n\nno problems detected in #{input_filename}\n"
  else
    print "\nERROR: in #{input_filename} - #{@errors.join ','} - JSON will not be generated\n"
  end
end


BLACKLIST_FILES.each do | input_filename |
  reset_state
  parse_entries   input_filename
  process_entries
  print_report    input_filename
  sanity_check   input_filename
  generate_json   input_filename

  exit 1 unless @errors.empty?
end