libre/iceweasel: Add JSON processing script

jq was not enough. It lacks functionality necessary to do all these changes in one pass, like verification by JSON schemas, or automatic unique timestamp generation for changed records. Python script can be updated in future to support more Remote Settings dumps, not just main/search-config and monitor/changes.
author: grizzlyuser <grizzlyuser@protonmail.com> 2020-12-30 21:48:02 +0200
committer: bill-auger <mr.j.spam.me@gmail.com> 2021-01-11 03:13:51 -0500
commit: abc8686cbd0442e2d59d74b27cf2beb7084d6b80 (patch)
tree: ecc89898ba2827e3b34afb0413d81ff588ddf8f2
parent: b5671a575e4a3a2569e8d28d43e02739bc96a0d0 (diff)
2 files changed, 179 insertions, 5 deletions
diff --git a/libre/iceweasel/PKGBUILD b/libre/iceweasel/PKGBUILD
index 84542ba6f..77fff6406 100644
--- a/libre/iceweasel/PKGBUILD
+++ b/libre/iceweasel/PKGBUILD
@@ -62,8 +62,7 @@ makedepends=(unzip zip diffutils yasm mesa imake inetutils xorg-server-xvfb
              autoconf2.13 rust clang llvm jack gtk2 nodejs cbindgen nasm
              python-setuptools python-psutil python-zstandard lld)
 # FIXME: 'mozilla-serarchplugins' package needs re-working (see note in prepare())
-makedepends+=(quilt libxslt imagemagick git jq)
-makedepends+=(rust=1:1.46.0) # FIXME: FTBS with rust v1.47.0
+makedepends+=(quilt libxslt imagemagick git jq python-jsonschema)
 optdepends=('networkmanager: Location detection via available WiFi networks'
             'libnotify: Notification integration'
             'pulseaudio: Audio support'
@@ -76,8 +75,8 @@ source=(https://archive.mozilla.org/pub/firefox/releases/$pkgver/source/firefox-
         $pkgname.desktop)
 source+=(https://repo.parabola.nu/other/iceweasel/${pkgname}_${_brandingver}-${_brandingrel}.branding.tar.xz{,.sig}
          libre.patch
-         libre-searchengines.patch
          libre-0001-always-sync-remote-settings-with-local-dump.patch
+         libre-process-json-files.py
          vendor.js.in)
 source_armv7h=(arm.patch
                build-arm-libopus.patch)
@@ -338,7 +337,6 @@ END
 
   ## libre patching ##
 
-  # Remove remaining non-free bits
   # Remove test-related networking dumps, because they contain code from
   # some Amazon webpage with no clear licensing, thus nonfree.
   # Also they interfere with checking of Remote Settings patching done later,
@@ -374,7 +372,9 @@ END
     -e '!third_party/python/**/*.egg-info/'
   rm -rf .git
 
-  # Patch and remove anything that's left
+  python ../libre-process-json-files.py "$srcdir/firefox-$pkgver" "${brandingsrcdir}"
+
+  # Remove remaining non-free bits
   echo "applying libre.patch"
   patch -Np1 --no-backup-if-mismatch -i "${srcdir}"/libre.patch
 }
diff --git a/libre/iceweasel/libre-process-json-files.py b/libre/iceweasel/libre-process-json-files.py
new file mode 100644
index 000000000..2fdde62d4
--- /dev/null
+++ b/libre/iceweasel/libre-process-json-files.py
@@ -0,0 +1,174 @@
+#! /usr/bin/python3
+
+#    Copyright (C) 2020  grizzlyuser <grizzlyuser@protonmail.com>
+#    Based on: https://gitlab.trisquel.org/trisquel/wrapage-helpers/-/blob/81881d89b2bf7d502dd14fcccdb471fec6f6b206/helpers/DATA/firefox/reprocess-search-config.py
+#    Below is the notice from the original author:
+#
+#    Copyright (C) 2020  Ruben Rodriguez <ruben@trisquel.info>
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+import json
+import sys
+import time
+import copy
+import argparse
+import pathlib
+from collections import namedtuple
+from jsonschema import validate
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    'MAIN_PATH',
+    type=pathlib.Path,
+    help='path to main application source code directory')
+parser.add_argument(
+    'BRANDING_PATH',
+    type=pathlib.Path,
+    help='path to branding source code directory')
+parser.add_argument(
+    '-i',
+    '--indent',
+    type=int,
+    help='indent for pretty printing of output files')
+arguments = parser.parse_args()
+
+File = namedtuple('File', ['path', 'content'])
+
+
+class RemoteSettings:
+    DUMPS_PATH = arguments.MAIN_PATH / 'services/settings/dumps'
+    JSON_PATHS = tuple(DUMPS_PATH.glob('*/*.json'))
+    WRAPPER_NAME = 'data'
+
+    @classmethod
+    def wrap(cls, processed):
+        return File(processed.path, {cls.WRAPPER_NAME: processed.content})
+
+    @classmethod
+    def unwrap(cls, parsed_jsons):
+        return [File(json.path, json.content[cls.WRAPPER_NAME])
+                for json in parsed_jsons]
+
+    @classmethod
+    def process_raw(cls, unwrapped_jsons):
+        changes = []
+        output_path = cls.DUMPS_PATH / 'monitor/changes.json'
+
+        for collection in unwrapped_jsons:
+            if collection.path == cls.DUMPS_PATH / 'main/example.json':
+                continue
+            latest_change = {}
+            latest_change['last_modified'] = max(
+                (record['last_modified'] for record in collection.content), default=0)
+            latest_change['bucket'] = collection.path.parent.name
+            latest_change['collection'] = collection.path.stem
+            changes.append(latest_change)
+
+        output_path.parent.mkdir(exist_ok=True)
+
+        return File(output_path, changes)
+
+    @classmethod
+    def process(cls, parsed_jsons):
+        return cls.wrap(cls.process_raw(cls.unwrap(parsed_jsons)))
+
+
+class SearchConfig(RemoteSettings):
+    JSON_PATHS = (RemoteSettings.DUMPS_PATH / 'main/search-config.json',)
+
+    def _get_schema():
+        PATH = arguments.MAIN_PATH / \
+            'toolkit/components/search/schema/search-engine-config-schema.json'
+        with PATH.open() as file:
+            return json.load(file)
+
+    @classmethod
+    def process_raw(cls, unwrapped_jsons):
+        _WHITELIST = ('ddg@search.mozilla.org', 'wikipedia@search.mozilla.org')
+        SCHEMA = cls._get_schema()
+
+        search_engines, timestamps = [], []
+        search_config = unwrapped_jsons[0]
+
+        for search_engine in search_config.content:
+            if search_engine['webExtension']['id'] in _WHITELIST:
+                clone = copy.deepcopy(search_engine)
+
+                if 'telemetryId' in search_engine:
+                    del search_engine['telemetryId']
+                if 'extraParams' in search_engine:
+                    del search_engine['extraParams']
+
+                general_specifier = {}
+                for specifier in search_engine['appliesTo'].copy():
+                    if 'application' in specifier:
+                        if 'distributions' in specifier['application']:
+                            search_engine['appliesTo'].remove(specifier)
+                            continue
+                        if 'extraParams' in specifier['application']:
+                            del specifier['application']['extraParams']
+
+                    if 'included' in specifier and 'everywhere' in specifier[
+                            'included'] and specifier['included']['everywhere']:
+                        general_specifier = specifier
+
+                if not general_specifier:
+                    general_specifier = {'included': {'everywhere': True}}
+                    search_engine['appliesTo'].insert(0, general_specifier)
+                if search_engine['webExtension']['id'] == _WHITELIST[0]:
+                    general_specifier['default'] = 'yes'
+
+                if clone != search_engine:
+                    timestamp = int(round(time.time_ns() / 10 ** 6))
+                    while timestamp in timestamps:
+                        timestamp += 1
+                    timestamps.append(timestamp)
+                    search_engine['last_modified'] = timestamp
+
+                validate(search_engine, schema=SCHEMA)
+
+                search_engines.append(search_engine)
+
+        return File(search_config.path, search_engines)
+
+
+class TopSites:
+    JSON_PATHS = (
+        arguments.MAIN_PATH /
+        'browser/components/newtab/data/content/tippytop/top_sites.json',
+        arguments.BRANDING_PATH /
+        'tippytop/top_sites.json')
+
+    @classmethod
+    def process(cls, parsed_jsons):
+        main_top_sites = parsed_jsons[0]
+        branding_top_sites = parsed_jsons[1]
+        result = branding_top_sites.content + \
+            [site for site in main_top_sites.content if site['title'] == 'wikipedia']
+        return File(main_top_sites.path, result)
+
+
+processors = (SearchConfig, TopSites, RemoteSettings)
+
+for processor in processors:
+    parsed_jsons = []
+    for json_path in processor.JSON_PATHS:
+        with json_path.open() as file:
+            parsed_jsons.append(File(json_path, json.load(file)))
+
+    processed = processor.process(parsed_jsons)
+    with processed.path.open('w') as file:
+        json.dump(processed.content, file, indent=arguments.indent)
author	grizzlyuser <grizzlyuser@protonmail.com>	2020-12-30 21:48:02 +0200
committer	bill-auger <mr.j.spam.me@gmail.com>	2021-01-11 03:13:51 -0500
commit	abc8686cbd0442e2d59d74b27cf2beb7084d6b80 (patch)
tree	ecc89898ba2827e3b34afb0413d81ff588ddf8f2
parent	b5671a575e4a3a2569e8d28d43e02739bc96a0d0 (diff)