1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
#!/usr/bin/env ruby
=begin
report.rb
this script is used to validate and report statistics on the blacklist entries
it currently detects syntax errors, missing tags, unknown tags, missing descriptions,
duplicate entries for a single package (partitioning them as identical or differing),
and will fail if any of those conditions are unmet
it also detects entries with no replacement, although that is not an error
it optionally creates a JSON file with the results
that can be the input to the post_fsd_wiki.phantomjs script
=end
## DEBUG begin ##
DEBUG = false
require 'byebug' if DEBUG
DEBUG_FILTER_NAMES = []
def IS_DEBUG_FILTER_NAME name ; DEBUG && (DEBUG_FILTER_NAMES.include? name) ; end ;
def DBG_PARSE input_filename ; if DEBUG ; print "\nDEBUG: parsing #{input_filename}\n" ; end ; end ;
def DBG_FILTER_NAME line ; if DEBUG ; DEBUG_FILTER_NAMES.each { | ea | debugger if line.start_with? ea } ; end ; end ;
def DBG_TOKENS tokens ; if DEBUG ; tokens.each_with_index { | token , i | print "DEBUG: tokens[#{i}]=#{token}\n" if IS_DEBUG_FILTER_NAME tokens[0] } ; end ; end ;
def DBG_TAG entry ; if DEBUG ; print "\nparsing tag for: #{entry[PACKAGE_NAME_KEY]}\n" ; print "desc IN=#{entry[DESCRIPTION_KEY]}\n" ; end ; end ;
def DBG_DESC entry ; if DEBUG ; print "desc OUT=#{entry[DESCRIPTION_KEY]}\n" ; print "tags=#{entry[BLACKLIST_TAGS_KEY]}\n" ; sleep 0.2 ; end ; end ;
def DBG_NO_TAG entry ; if DEBUG ; print "\nno tag for: #{ entry[PACKAGE_NAME_KEY]}\n" if entry[BLACKLIST_TAGS_KEY].empty? ; end ; end ;
def DBG_NO_DESC entry ; if DEBUG ; print "\nno desc for: #{entry[PACKAGE_NAME_KEY]}\n" if entry[DESCRIPTION_KEY ].empty? ; end ; end ;
def DBG_ENTRY entry ; if DEBUG && (IS_DEBUG_FILTER_NAME entry[PACKAGE_NAME_KEY]) ; print "\n" ; entry.each_pair { | k , v | print "DEBUG: #{k}: #{v}\n" } ; end ; end ;
def DBG_EXPECTED input_filename ; if input_filename == 'blacklist-testdata.txt'
print "##------ expected results ------##\n" +
"# entries found: 20 #\n" +
"# entries valid: 14 #\n" +
"# entries invalid: 6 #\n" +
"# entries lacking tags: 2 #\n" +
"# entries lacking description: 3 #\n" +
"# tags unknown: 3 #\n" +
"# entries unreplaced: 13 #\n" +
"# entries duplicated: 2 #\n" +
"# identical: 1 #\n" +
"# differing: 1 #\n" +
"##------------------------------##\n" ; end ; end ;
## DEBUG end ##
require 'json'
require 'set'
# NOTE: acceptable entry syntax per SYNTAX doc =>
# ORIGINAL_PACKAGE:LIBRE_REPLACEMENT:REF:REF_ID:[TAG] SHORT_DESCRIPTION
ALL_BLACKLIST_FILES = [ 'blacklist.txt' , 'your-freedom_emu-blacklist.txt' , 'your-privacy-blacklist.txt' ]
BLACKLIST_FILES = (ALL_BLACKLIST_FILES.include? ARGV.first) ? [ ARGV.first ] : ALL_BLACKLIST_FILES
# BLACKLIST_FILES = [ 'blacklist-testdata.txt' ] # DEBUG
VALID_ENTRIES_REGEX = /^[^:\[\]#]*:[^:\[\]]*:(sv|debian|parabola|fsf|fedora)?:[^:\[\]]*:\w*([^\d:]+:.*|\[[^:]+:.*|[^:]*)$/
BLACKLIST_TAGS_REGEX = /^\[([^\]]*)\] *(.*)/
RAW_ENTRY_KEY = :raw_blacklist_entry
PACKAGE_NAME_KEY = :original_package # syntax token
REPLACEMENT_KEY = :libre_replacement # syntax token
REFERENCE_KEY = :ref # syntax token
REFERENCE_ID_KEY = :id # syntax token
DESCRIPTION_KEY = :short_description # syntax token
BLACKLIST_TAGS_KEY = :blacklist_tags
NONFREE_TAG = 'nonfree'
SEMIFREE_TAG = 'semifree'
USES_NONFREE_TAG = 'uses-nonfree'
BRANDING_TAG = 'branding'
TECHNICAL_TAG = 'technical'
NEEDS_REPLACEMENT_TAG = 'FIXME:package'
NEEDS_DESC_TAG = 'FIXME:description'
ACCEPTABLE_TAGS = [ NONFREE_TAG , SEMIFREE_TAG , USES_NONFREE_TAG , BRANDING_TAG ,
TECHNICAL_TAG , NEEDS_REPLACEMENT_TAG , NEEDS_DESC_TAG ]
DO_PRINT_STATS = true
DO_PRINT_INVALID = true
DO_PRINT_UNREPLACED = true
DO_PRINT_INCOMPLETE = true
DO_PRINT_DUPLICATED = true
REPORT_SEPARATOR = "------------------------------------------------------------\n"
def reset_state
@entries_invalid = []
@entries_valid = []
@entry_freqs = {}
@entries_no_desc = []
@entries_no_tags = []
@entries_tags_unknown = []
@tags_unknown = Set[]
@entries_unreplaced = nil # deferred
@entries_duplicated = nil # deferred
@entries_duplicated_identical = {}
@entries_duplicated_differing = {}
@errors = []
end
def parse_entries input_filename
DBG_PARSE input_filename
(File.readlines input_filename).each do | line |
DBG_FILTER_NAME line
next if line.strip.empty? || (line.strip.start_with? '#')
@entries_invalid << line && next unless line.match VALID_ENTRIES_REGEX
@entries_valid << (entry = {})
tokens = (line.split ':')
entry[RAW_ENTRY_KEY ] = line
entry[PACKAGE_NAME_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[REPLACEMENT_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[REFERENCE_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[REFERENCE_ID_KEY ] = (tokens.shift ).gsub("\t" , '').strip
entry[DESCRIPTION_KEY ] = (tokens.join ':').gsub("\t" , '').strip
entry[BLACKLIST_TAGS_KEY] = []
DBG_TOKENS tokens
# parse tags
while (entry[DESCRIPTION_KEY].start_with? '[') && (entry[DESCRIPTION_KEY].include? ']')
DBG_TAG entry
entry[BLACKLIST_TAGS_KEY] << (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\1')
entry[DESCRIPTION_KEY ] = (entry[DESCRIPTION_KEY].gsub BLACKLIST_TAGS_REGEX , '\2')
DBG_DESC entry
end
DBG_NO_TAG entry
DBG_NO_DESC entry
end
end
def process_entries
@entries_valid.each do | entry |
DBG_ENTRY entry
entry_name = entry[PACKAGE_NAME_KEY ]
entry_desc = entry[DESCRIPTION_KEY ]
entry_tags = entry[BLACKLIST_TAGS_KEY]
entry_tags_unknown = entry_tags - ACCEPTABLE_TAGS
@entry_freqs[entry_name] = (@entry_freqs[entry_name] ||= 0) + 1
@entries_no_desc << entry if entry_desc .empty?
@entries_no_tags << entry if entry_tags .empty?
@entries_tags_unknown << entry unless entry_tags_unknown.empty?
@tags_unknown.merge entry_tags_unknown
end
@entries_unreplaced = @entries_valid .select { | entry | entry[REPLACEMENT_KEY].empty? }
@entries_duplicated = @entry_freqs.keys.select { | name | @entry_freqs[name] > 1 }
@entries_duplicated.each do | duplicate_name |
duplicate_entries = @entries_valid.select { | entry | entry[PACKAGE_NAME_KEY] == duplicate_name } \
.map! { | entry | entry[RAW_ENTRY_KEY ] }
unique_entries = duplicate_entries.uniq
n_unique_entries = unique_entries.size
unique_entries.each do | uniq_value |
n_identical_entries = duplicate_entries.count { | dup_entry | dup_entry == uniq_value }
@entries_duplicated_identical[duplicate_name] = uniq_value + " (#{n_identical_entries} identical)" if n_identical_entries > 1
end
if n_unique_entries > 1
@entries_duplicated_differing[duplicate_name] = unique_entries
end
end
end
def print_report input_filename
if DO_PRINT_INVALID || DO_PRINT_INCOMPLETE || DO_PRINT_UNREPLACED
print "\n\n#{REPORT_SEPARATOR}#{input_filename} report:\n"
end
print_invalid = {}
print_invalid['entries invalid' ] = @entries_invalid if DO_PRINT_INVALID
print_invalid['entries lacking description'] = @entries_no_desc if DO_PRINT_INCOMPLETE
print_invalid['entries lacking tags' ] = @entries_no_tags if DO_PRINT_INCOMPLETE
print_invalid['entries with unknown tags' ] = @entries_tags_unknown if DO_PRINT_INCOMPLETE
print_invalid['tags unknown' ] = @tags_unknown if DO_PRINT_INCOMPLETE
print_invalid.each_pair do | label , data |
print "#{REPORT_SEPARATOR}#{label}:\n" unless data.empty?
data.each { | entry | print " #{((entry.is_a? Hash) ? entry[RAW_ENTRY_KEY] : entry).strip}\n" }
end
unless @entries_unreplaced.empty? || ! DO_PRINT_UNREPLACED
print "#{REPORT_SEPARATOR}entries unreplaced:\n"
@entries_unreplaced.each { | entry | print " #{entry[PACKAGE_NAME_KEY]}\n" }
end
unless @entries_duplicated.empty? || ! DO_PRINT_DUPLICATED
print "#{REPORT_SEPARATOR}entries duplicates:\n"
@entries_duplicated.each do | duplicate_name |
entry_identical = @entries_duplicated_identical[duplicate_name]
entries_differing = @entries_duplicated_differing[duplicate_name]
print "\n #{duplicate_name}:\n"
print " identical:\n" unless entry_identical .nil?
print " #{entry_identical}\n" unless entry_identical .nil?
print " differing:\n" unless entries_differing.nil?
entries_differing.each { | entry | print " #{entry}\n" } unless entries_differing.nil?
end
end
if DO_PRINT_STATS
print "#{REPORT_SEPARATOR}#{input_filename} stats:\n"
print " entries found: #{ (@entries_valid + @entries_invalid).size }\n"
print " entries valid: #{ (@entries_valid ).size }\n"
print " entries invalid: #{ (@entries_invalid ).size }\n"
print " entries lacking tags: #{ (@entries_no_tags ).size }\n"
print " entries lacking description: #{(@entries_no_desc ).size }\n"
print " tags unknown: #{ (@tags_unknown ).size }\n"
print " entries unreplaced: #{ (@entries_unreplaced ).size }\n"
print " entries duplicated: #{ (@entries_duplicated ).size }\n"
print " identical: #{ (@entries_duplicated_identical ).size }\n"
print " differing: #{ (@entries_duplicated_differing ).keys.size}\n"
print REPORT_SEPARATOR
end
DBG_EXPECTED input_filename
end
def sanity_check input_filename
@errors << 'entries_invalid' unless @entries_invalid .empty?
# @errors << 'entries_no_tags' unless @entries_no_tags .empty? # TODO: complete these entries
# @errors << 'entries_no_desc' unless @entries_no_desc .empty? # TODO: complete these entries
@errors << 'tags_unknown' unless @tags_unknown .empty?
@errors << 'entries_duplicated' unless @entries_duplicated.empty?
end
def generate_json input_filename
output_json_file = "#{input_filename}.json"
if @errors.empty?
IO.write output_json_file , @entries_valid.to_json
print "\nwrote: #{output_json_file}\n\nno problems detected in #{input_filename}\n"
else
print "\nERROR: in #{input_filename} - #{@errors.join ','} - JSON will not be generated\n"
end
end
BLACKLIST_FILES.each do | input_filename |
reset_state
parse_entries input_filename
process_entries
print_report input_filename
sanity_check input_filename
generate_json input_filename
exit 1 unless @errors.empty?
end
|