summaryrefslogtreecommitdiff
path: root/maintenance/postgres
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/postgres')
-rw-r--r--maintenance/postgres/compare_schemas.pl181
-rw-r--r--maintenance/postgres/tables.sql156
-rw-r--r--maintenance/postgres/wp_mysql2postgres.pl400
3 files changed, 687 insertions, 50 deletions
diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl
new file mode 100644
index 00000000..4a76b270
--- /dev/null
+++ b/maintenance/postgres/compare_schemas.pl
@@ -0,0 +1,181 @@
+#!/usr/bin/perl
+
+## Rough check that the base and postgres "tables.sql" are in sync
+## Should be run from maintenance/postgres
+
+use strict;
+use warnings;
+use Data::Dumper;
+
+my @old = ("../tables.sql");
+my $new = "tables.sql";
+
+## Read in exceptions and other metadata
+my %ok;
+while (<DATA>) {
+ next unless /^(\w+)\s*:\s*([^#]+)/;
+ my ($name,$val) = ($1,$2);
+ chomp $val;
+ if ($name eq 'RENAME') {
+ die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/;
+ $ok{OLD}{$1} = $2;
+ $ok{NEW}{$2} = $1;
+ next;
+ }
+ if ($name eq 'XFILE') {
+ push @old, $val;
+ next;
+ }
+ for (split(/\s+/ => $val)) {
+ $ok{$name}{$_} = 0;
+ }
+}
+
+open my $newfh, "<", $new or die qq{Could not open $new: $!\n};
+
+my $datatype = join '|' => qw(
+bool
+tinyint int bigint real float
+tinytext mediumtext text char varchar
+timestamp datetime
+tinyblob mediumblob blob
+);
+$datatype .= q{|ENUM\([\"\w, ]+\)};
+$datatype = qr{($datatype)};
+
+my $typeval = qr{(\(\d+\))?};
+
+my $typeval2 = qr{ unsigned| binary| NOT NULL| NULL| auto_increment| default ['\-\d\w"]+| REFERENCES .+CASCADE};
+
+my $indextype = join '|' => qw(INDEX KEY FULLTEXT), "PRIMARY KEY", "UNIQUE INDEX", "UNIQUE KEY";
+$indextype = qr{$indextype};
+
+my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+};
+
+my ($table,%old);
+for my $old (@old) {
+ open my $oldfh, "<", $old or die qq{Could not open $old: $!\n};
+
+ while (<$oldfh>) {
+ next if /^\s*\-\-/ or /^\s+$/;
+ s/\s*\-\- [\w ]+$//;
+ chomp;
+
+ if (/CREATE\s*TABLE/i) {
+ m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($}
+ or die qq{Invalid CREATE TABLE at line $. of $old\n};
+ $table = $1;
+ $old{$table}{name}=$table;
+ }
+ elsif (/^\) TYPE=($tabletype);$/) {
+ $old{$table}{type}=$1;
+ }
+ elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) {
+ $old{$table}{column}{$1} = $2;
+ }
+ elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) {
+ $old{$table}{lc $1."_name"} = $2 ? $2 : "";
+ $old{$table}{lc $1."pk_target"} = $3;
+ }
+ else {
+ die "Cannot parse line $. of $old:\n$_\n";
+ }
+ }
+ close $oldfh;
+}
+
+$datatype = join '|' => qw(
+SMALLINT INTEGER BIGINT NUMERIC SERIAL
+TEXT CHAR VARCHAR
+BYTEA
+TIMESTAMPTZ
+CIDR
+);
+$datatype = qr{($datatype)};
+my %new;
+my ($infunction,$inview,$inrule) = (0,0,0);
+while (<$newfh>) {
+ next if /^\s*\-\-/ or /^\s*$/;
+ s/\s*\-\- [\w ']+$//;
+ next if /^BEGIN;/ or /^SET / or /^COMMIT;/;
+ next if /^CREATE SEQUENCE/;
+ next if /^CREATE(?: UNIQUE)? INDEX/;
+ next if /^CREATE FUNCTION/;
+ next if /^CREATE TRIGGER/ or /^ FOR EACH ROW/;
+ next if /^INSERT INTO/ or /^ VALUES \(/;
+ next if /^ALTER TABLE/;
+ chomp;
+
+ if (/^\$mw\$;?$/) {
+ $infunction = $infunction ? 0 : 1;
+ next;
+ }
+ next if $infunction;
+
+ next if /^CREATE VIEW/ and $inview = 1;
+ if ($inview) {
+ /;$/ and $inview = 0;
+ next;
+ }
+
+ next if /^CREATE RULE/ and $inrule = 1;
+ if ($inrule) {
+ /;$/ and $inrule = 0;
+ next;
+ }
+
+ if (/^CREATE TABLE "?(\w+)"? \($/) {
+ $table = $1;
+ $new{$table}{name}=$table;
+ }
+ elsif (/^\);$/) {
+ }
+ elsif (/^ (\w+) +$datatype/) {
+ $new{$table}{column}{$1} = $2;
+ }
+ else {
+ die "Cannot parse line $. of $new:\n$_\n";
+ }
+}
+close $newfh;
+
+## Old but not new
+for my $t (sort keys %old) {
+ if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
+ print "Table not in $new: $t\n";
+ next;
+ }
+ next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
+ my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
+ my $oldcol = $old{$t}{column};
+ my $newcol = $new{$newt}{column};
+ for my $c (keys %$oldcol) {
+ if (!exists $newcol->{$c}) {
+ print "Column $t.$c not in new\n";
+ next;
+ }
+ }
+ for my $c (keys %$newcol) {
+ if (!exists $oldcol->{$c}) {
+ print "Column $t.$c not in old\n";
+ next;
+ }
+ }
+}
+## New but not old:
+for (sort keys %new) {
+ if (!exists $old{$_} and !exists $ok{NEW}{$_}) {
+ print "Not in old: $_\n";
+ next;
+ }
+}
+
+__DATA__
+## Known exceptions
+OLD: searchindex ## We use tsearch2 directly on the page table instead
+OLD: archive ## This is a view due to the char(14) timestamp hack
+RENAME: user mwuser ## Reserved word causing lots of problems
+RENAME: text pagecontent ## Reserved word
+NEW: archive2 ## The real archive table
+NEW: mediawiki_version ## Just us, for now
+XFILE: ../archives/patch-profiling.sql
diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql
index 5481a394..9ac329d8 100644
--- a/maintenance/postgres/tables.sql
+++ b/maintenance/postgres/tables.sql
@@ -11,7 +11,7 @@ BEGIN;
SET client_min_messages = 'ERROR';
CREATE SEQUENCE user_user_id_seq MINVALUE 0 START WITH 0;
-CREATE TABLE "user" (
+CREATE TABLE mwuser ( -- replace reserved word 'user'
user_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('user_user_id_seq'),
user_name TEXT NOT NULL UNIQUE,
user_real_name TEXT,
@@ -26,20 +26,20 @@ CREATE TABLE "user" (
user_touched TIMESTAMPTZ,
user_registration TIMESTAMPTZ
);
-CREATE INDEX user_email_token_idx ON "user" (user_email_token);
+CREATE INDEX user_email_token_idx ON mwuser (user_email_token);
-- Create a dummy user to satisfy fk contraints especially with revisions
-INSERT INTO "user" VALUES
- (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
+INSERT INTO mwuser
+ VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
CREATE TABLE user_groups (
- ug_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE CASCADE,
+ ug_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE CASCADE,
ug_group TEXT NOT NULL
);
CREATE UNIQUE INDEX user_groups_unique ON user_groups (ug_user, ug_group);
CREATE TABLE user_newtalk (
- user_id INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE,
+ user_id INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE,
user_ip CIDR NULL
);
CREATE INDEX user_newtalk_id_idx ON user_newtalk (user_id);
@@ -69,18 +69,24 @@ CREATE INDEX page_project_title ON page (page_title) WHERE page_namespace =
CREATE INDEX page_random_idx ON page (page_random);
CREATE INDEX page_len_idx ON page (page_len);
--- Create a dummy page to satisfy fk contraints where a page_id of "0" is added
-INSERT INTO page (page_id,page_namespace,page_title,page_random,page_latest,page_len)
-VALUES (0,0,'',0.0,0,0);
+CREATE FUNCTION page_deleted() RETURNS TRIGGER LANGUAGE plpgsql AS
+$mw$
+BEGIN
+DELETE FROM recentchanges WHERE rc_namespace = OLD.page_namespace AND rc_title = OLD.page_title;
+RETURN NULL;
+END;
+$mw$;
+CREATE TRIGGER page_deleted AFTER DELETE ON page
+ FOR EACH ROW EXECUTE PROCEDURE page_deleted();
CREATE SEQUENCE rev_rev_id_val;
CREATE TABLE revision (
rev_id INTEGER NOT NULL UNIQUE DEFAULT nextval('rev_rev_id_val'),
- rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE SET NULL,
+ rev_page INTEGER NULL REFERENCES page (page_id) ON DELETE CASCADE,
rev_text_id INTEGER NULL, -- FK
rev_comment TEXT,
- rev_user INTEGER NOT NULL REFERENCES "user"(user_id),
+ rev_user INTEGER NOT NULL REFERENCES mwuser(user_id),
rev_user_text TEXT NOT NULL,
rev_timestamp TIMESTAMPTZ NOT NULL,
rev_minor_edit CHAR NOT NULL DEFAULT '0',
@@ -93,19 +99,19 @@ CREATE INDEX rev_user_text_idx ON revision (rev_user_text);
CREATE SEQUENCE text_old_id_val;
-CREATE TABLE "text" (
+CREATE TABLE pagecontent ( -- replaces reserved word 'text'
old_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('text_old_id_val'),
old_text TEXT,
old_flags TEXT
);
-CREATE TABLE archive (
+CREATE TABLE archive2 (
ar_namespace SMALLINT NOT NULL,
ar_title TEXT NOT NULL,
ar_text TEXT,
ar_comment TEXT,
- ar_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ ar_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
ar_user_text TEXT NOT NULL,
ar_timestamp TIMESTAMPTZ NOT NULL,
ar_minor_edit CHAR NOT NULL DEFAULT '0',
@@ -113,7 +119,22 @@ CREATE TABLE archive (
ar_rev_id INTEGER,
ar_text_id INTEGER
);
-CREATE INDEX archive_name_title_timestamp ON archive (ar_namespace,ar_title,ar_timestamp);
+CREATE INDEX archive_name_title_timestamp ON archive2 (ar_namespace,ar_title,ar_timestamp);
+
+-- This is the easiest way to work around the char(15) timestamp hack without modifying PHP code
+CREATE VIEW archive AS
+SELECT
+ ar_namespace, ar_title, ar_text, ar_comment, ar_user, ar_user_text,
+ ar_minor_edit, ar_flags, ar_rev_id, ar_text_id,
+ TO_CHAR(ar_timestamp, 'YYYYMMDDHH24MISS') AS ar_timestamp
+FROM archive2;
+
+CREATE RULE archive_insert AS ON INSERT TO archive
+DO INSTEAD INSERT INTO archive2 VALUES (
+ NEW.ar_namespace, NEW.ar_title, NEW.ar_text, NEW.ar_comment, NEW.ar_user, NEW.ar_user_text,
+ TO_DATE(NEW.ar_timestamp, 'YYYYMMDDHH24MISS'),
+ NEW.ar_minor_edit, NEW.ar_flags, NEW.ar_rev_id, NEW.ar_text_id
+);
CREATE TABLE pagelinks (
@@ -121,7 +142,7 @@ CREATE TABLE pagelinks (
pl_namespace SMALLINT NOT NULL,
pl_title TEXT NOT NULL
);
-CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_namespace,pl_title,pl_from);
+CREATE UNIQUE INDEX pagelink_unique ON pagelinks (pl_from,pl_namespace,pl_title);
CREATE TABLE templatelinks (
tl_from INTEGER NOT NULL REFERENCES page(page_id) ON DELETE CASCADE,
@@ -180,16 +201,18 @@ CREATE TABLE hitcounter (
CREATE SEQUENCE ipblocks_ipb_id_val;
CREATE TABLE ipblocks (
- ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'),
- ipb_address CIDR NULL,
- ipb_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
- ipb_by INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE,
- ipb_reason TEXT NOT NULL,
- ipb_timestamp TIMESTAMPTZ NOT NULL,
- ipb_auto CHAR NOT NULL DEFAULT '0',
- ipb_expiry TIMESTAMPTZ NOT NULL,
- ipb_range_start TEXT,
- ipb_range_end TEXT
+ ipb_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('ipblocks_ipb_id_val'),
+ ipb_address CIDR NULL,
+ ipb_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
+ ipb_by INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE,
+ ipb_reason TEXT NOT NULL,
+ ipb_timestamp TIMESTAMPTZ NOT NULL,
+ ipb_auto CHAR NOT NULL DEFAULT '0',
+ ipb_anon_only CHAR NOT NULL DEFAULT '0',
+ ipb_create_account CHAR NOT NULL DEFAULT '1',
+ ipb_expiry TIMESTAMPTZ NOT NULL,
+ ipb_range_start TEXT,
+ ipb_range_end TEXT
);
CREATE INDEX ipb_address ON ipblocks (ipb_address);
CREATE INDEX ipb_user ON ipblocks (ipb_user);
@@ -198,16 +221,16 @@ CREATE INDEX ipb_range ON ipblocks (ipb_range_start,ipb_range_end);
CREATE TABLE image (
img_name TEXT NOT NULL PRIMARY KEY,
- img_size SMALLINT NOT NULL,
- img_width SMALLINT NOT NULL,
- img_height SMALLINT NOT NULL,
+ img_size INTEGER NOT NULL,
+ img_width INTEGER NOT NULL,
+ img_height INTEGER NOT NULL,
img_metadata TEXT,
img_bits SMALLINT,
img_media_type TEXT,
img_major_mime TEXT DEFAULT 'unknown',
img_minor_mime TEXT DEFAULT 'unknown',
img_description TEXT NOT NULL,
- img_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ img_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
img_user_text TEXT NOT NULL,
img_timestamp TIMESTAMPTZ
);
@@ -217,12 +240,12 @@ CREATE INDEX img_timestamp_idx ON image (img_timestamp);
CREATE TABLE oldimage (
oi_name TEXT NOT NULL REFERENCES image(img_name),
oi_archive_name TEXT NOT NULL,
- oi_size SMALLINT NOT NULL,
- oi_width SMALLINT NOT NULL,
- oi_height SMALLINT NOT NULL,
+ oi_size INTEGER NOT NULL,
+ oi_width INTEGER NOT NULL,
+ oi_height INTEGER NOT NULL,
oi_bits SMALLINT NOT NULL,
oi_description TEXT,
- oi_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ oi_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
oi_user_text TEXT NOT NULL,
oi_timestamp TIMESTAMPTZ NOT NULL
);
@@ -235,7 +258,7 @@ CREATE TABLE filearchive (
fa_archive_name TEXT,
fa_storage_group VARCHAR(16),
fa_storage_key CHAR(64),
- fa_deleted_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ fa_deleted_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
fa_deleted_timestamp TIMESTAMPTZ NOT NULL,
fa_deleted_reason TEXT,
fa_size SMALLINT NOT NULL,
@@ -247,7 +270,7 @@ CREATE TABLE filearchive (
fa_major_mime TEXT DEFAULT 'unknown',
fa_minor_mime TEXT DEFAULT 'unknown',
fa_description TEXT NOT NULL,
- fa_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ fa_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
fa_user_text TEXT NOT NULL,
fa_timestamp TIMESTAMPTZ
);
@@ -262,7 +285,7 @@ CREATE TABLE recentchanges (
rc_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('rc_rc_id_seq'),
rc_timestamp TIMESTAMPTZ NOT NULL,
rc_cur_time TIMESTAMPTZ NOT NULL,
- rc_user INTEGER NULL REFERENCES "user"(user_id) ON DELETE SET NULL,
+ rc_user INTEGER NULL REFERENCES mwuser(user_id) ON DELETE SET NULL,
rc_user_text TEXT NOT NULL,
rc_namespace SMALLINT NOT NULL,
rc_title TEXT NOT NULL,
@@ -270,7 +293,7 @@ CREATE TABLE recentchanges (
rc_minor CHAR NOT NULL DEFAULT '0',
rc_bot CHAR NOT NULL DEFAULT '0',
rc_new CHAR NOT NULL DEFAULT '0',
- rc_cur_id INTEGER NOT NULL REFERENCES page(page_id),
+ rc_cur_id INTEGER NULL REFERENCES page(page_id) ON DELETE SET NULL,
rc_this_oldid INTEGER NOT NULL,
rc_last_oldid INTEGER NOT NULL,
rc_type CHAR NOT NULL DEFAULT '0',
@@ -287,7 +310,7 @@ CREATE INDEX rc_ip ON recentchanges (rc_ip);
CREATE TABLE watchlist (
- wl_user INTEGER NOT NULL REFERENCES "user"(user_id) ON DELETE CASCADE,
+ wl_user INTEGER NOT NULL REFERENCES mwuser(user_id) ON DELETE CASCADE,
wl_namespace SMALLINT NOT NULL DEFAULT 0,
wl_title TEXT NOT NULL,
wl_notificationtimestamp TIMESTAMPTZ
@@ -343,7 +366,7 @@ CREATE TABLE logging (
log_type TEXT NOT NULL,
log_action TEXT NOT NULL,
log_timestamp TIMESTAMPTZ NOT NULL,
- log_user INTEGER REFERENCES "user"(user_id) ON DELETE SET NULL,
+ log_user INTEGER REFERENCES mwuser(user_id) ON DELETE SET NULL,
log_namespace SMALLINT NOT NULL,
log_title TEXT NOT NULL,
log_comment TEXT,
@@ -383,38 +406,71 @@ CREATE FUNCTION ts2_page_title() RETURNS TRIGGER LANGUAGE plpgsql AS
$mw$
BEGIN
IF TG_OP = 'INSERT' THEN
- NEW.titlevector = to_tsvector(NEW.page_title);
+ NEW.titlevector = to_tsvector('default',NEW.page_title);
ELSIF NEW.page_title != OLD.page_title THEN
- NEW.titlevector := to_tsvector(NEW.page_title);
+ NEW.titlevector := to_tsvector('default',NEW.page_title);
END IF;
RETURN NEW;
END;
$mw$;
CREATE TRIGGER ts2_page_title BEFORE INSERT OR UPDATE ON page
-FOR EACH ROW EXECUTE PROCEDURE ts2_page_title();
+ FOR EACH ROW EXECUTE PROCEDURE ts2_page_title();
-ALTER TABLE text ADD textvector tsvector;
-CREATE INDEX ts2_page_text ON text USING gist(textvector);
+ALTER TABLE pagecontent ADD textvector tsvector;
+CREATE INDEX ts2_page_text ON pagecontent USING gist(textvector);
CREATE FUNCTION ts2_page_text() RETURNS TRIGGER LANGUAGE plpgsql AS
$mw$
BEGIN
IF TG_OP = 'INSERT' THEN
- NEW.textvector = to_tsvector(NEW.old_text);
+ NEW.textvector = to_tsvector('default',NEW.old_text);
ELSIF NEW.old_text != OLD.old_text THEN
- NEW.textvector := to_tsvector(NEW.old_text);
+ NEW.textvector := to_tsvector('default',NEW.old_text);
END IF;
RETURN NEW;
END;
$mw$;
-CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON text
-FOR EACH ROW EXECUTE PROCEDURE ts2_page_text();
+CREATE TRIGGER ts2_page_text BEFORE INSERT OR UPDATE ON pagecontent
+ FOR EACH ROW EXECUTE PROCEDURE ts2_page_text();
-CREATE OR REPLACE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS
+CREATE FUNCTION add_interwiki (TEXT,INT,CHAR) RETURNS INT LANGUAGE SQL AS
$mw$
INSERT INTO interwiki (iw_prefix, iw_url, iw_local) VALUES ($1,$2,$3);
SELECT 1;
$mw$;
+
+-- This table is not used unless profiling is turned on
+CREATE TABLE profiling (
+ pf_count INTEGER NOT NULL DEFAULT 0,
+ pf_time NUMERIC(18,10) NOT NULL DEFAULT 0,
+ pf_name TEXT NOT NULL,
+ pf_server TEXT NULL
+);
+CREATE UNIQUE INDEX pf_name_server ON profiling (pf_name, pf_server);
+
+
+CREATE TABLE mediawiki_version (
+ type TEXT NOT NULL,
+ mw_version TEXT NOT NULL,
+ notes TEXT NULL,
+
+ pg_version TEXT NULL,
+ pg_dbname TEXT NULL,
+ pg_user TEXT NULL,
+ pg_port TEXT NULL,
+ mw_schema TEXT NULL,
+ ts2_schema TEXT NULL,
+ ctype TEXT NULL,
+
+ sql_version TEXT NULL,
+ sql_date TEXT NULL,
+ cdate TIMESTAMPTZ NOT NULL DEFAULT now()
+);
+
+INSERT INTO mediawiki_version (type,mw_version,sql_version,sql_date)
+ VALUES ('Creation','??','$LastChangedRevision: 16747 $','$LastChangedDate: 2006-10-02 17:55:26 -0700 (Mon, 02 Oct 2006) $');
+
+
COMMIT;
diff --git a/maintenance/postgres/wp_mysql2postgres.pl b/maintenance/postgres/wp_mysql2postgres.pl
new file mode 100644
index 00000000..788d9e0b
--- /dev/null
+++ b/maintenance/postgres/wp_mysql2postgres.pl
@@ -0,0 +1,400 @@
+#!/usr/bin/perl
+
+## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
+## svn: $Id: wp_mysql2postgres.pl 16088 2006-08-16 01:12:20Z greg $
+
+use strict;
+use warnings;
+use Data::Dumper;
+use Getopt::Long;
+
+use vars qw(%table %tz %special @torder $COM);
+my $VERSION = "1.0";
+
+## The following options can be changed via command line arguments:
+my $MYSQLDB = 'wikidb';
+my $MYSQLUSER = 'wikiuser';
+
+## If the following are zero-length, we omit their arguments entirely:
+my $MYSQLHOST = '';
+my $MYSQLPASSWORD = '';
+my $MYSQLSOCKET = '';
+
+## Name of the dump file created
+my $MYSQLDUMPFILE = "mediawiki_upgrade.pg";
+
+## How verbose should this script be (0, 1, or 2)
+my $verbose = 0;
+
+my $USAGE = "
+Usage: $0 [OPTION]...
+Convert a MediaWiki schema from MySQL to Postgres
+Example: $0 --db=wikidb --user=wikiuser --pass=sushi
+Options:
+ db Name of the MySQL database
+ user MySQL database username
+ pass MySQL database password
+ host MySQL database host
+ socket MySQL database socket
+ verbose Verbosity, increases with multiple uses
+";
+
+GetOptions
+ (
+ "db=s" => \$MYSQLDB,
+ "user=s" => \$MYSQLUSER,
+ "pass=s" => \$MYSQLPASSWORD,
+ "host=s" => \$MYSQLHOST,
+ "socket=s" => \$MYSQLSOCKET,
+ "verbose+" => \$verbose
+ );
+
+## The Postgres schema file: should not be changed
+my $PG_SCHEMA = "tables.sql";
+
+## What version we default to when we can't parse the old schema
+my $MW_DEFAULT_VERSION = '1.8';
+
+## Try and find a working version of mysqldump
+$verbose and warn "Locating the mysqldump executable\n";
+my @MYSQLDUMP = ("/usr/local/bin/mysqldump", "/usr/bin/mysqldump");
+my $MYSQLDUMP;
+for my $mytry (@MYSQLDUMP) {
+ next if ! -e $mytry;
+ -x $mytry or die qq{Not an executable file: "$mytry"\n};
+ my $version = qx{$mytry -V};
+ $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
+ $MYSQLDUMP = $mytry;
+}
+$MYSQLDUMP or die qq{Could not find the mysqldump program\n};
+
+## Flags we use for mysqldump
+my @MYSQLDUMPARGS = qw(
+--skip-lock-tables
+--complete-insert
+--skip-extended-insert
+--skip-add-drop-table
+--skip-add-locks
+--skip-disable-keys
+--skip-set-charset
+--skip-comments
+--skip-quote-names
+);
+
+
+$verbose and warn "Checking that mysqldump can handle our flags\n";
+## Make sure this version can handle all the flags we want.
+## Combine with user dump below
+my $MYSQLDUMPARGS = join " " => @MYSQLDUMPARGS;
+## Argh. Any way to make this work on Win32?
+my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
+if ($version =~ /unknown option/) {
+ die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
+}
+
+push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
+length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
+length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
+
+## Open the dump file to hold the mysqldump output
+open my $mdump, "+>", $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
+$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
+
+open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, "--no-data", $MYSQLDB;
+my $oldselect = select $mdump;
+
+print while <$mfork2>;
+
+## Slurp in the current schema
+my $current_schema;
+seek $mdump, 0, 0;
+{
+ local $/;
+ $current_schema = <$mdump>;
+}
+seek $mdump, 0, 0;
+truncate $mdump, 0;
+
+warn qq{Trying to determine database version...\n} if $verbose;
+
+my $current_version = 0;
+if ($current_schema =~ /CREATE TABLE \S+cur /) {
+ $current_version = '1.3';
+}
+elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
+ $current_version = '1.4';
+}
+elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
+ $current_version = '1.5';
+}
+elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
+ $current_version = '1.6';
+}
+elsif ($current_schema !~ /ipb_auto tinyint/) {
+ $current_version = '1.7';
+}
+else {
+ $current_version = '1.8';
+}
+
+if (!$current_version) {
+ warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
+ $current_version = $MW_DEFAULT_VERSION;
+}
+
+## Check for a table prefix:
+my $table_prefix = '';
+if ($current_version =~ /CREATE TABLE (\S+)archive /) {
+ $table_prefix = $1;
+}
+
+warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
+warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
+
+$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
+my $now = scalar localtime();
+my $conninfo = '';
+$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
+$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
+
+print qq{
+-- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
+-- Performed by the program: $0
+-- Version: $VERSION (subversion }.q{$LastChangedRevision: 16088 $}.qq{)
+-- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
+--
+-- This file was created: $now
+-- Executable used: $MYSQLDUMP
+-- Connection information:
+-- database: $MYSQLDB
+-- user: $MYSQLUSER$conninfo
+
+-- This file can be imported manually with psql like so:
+-- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
+-- This will overwrite any existing MediaWiki information, so be careful
+
+
+};
+
+warn qq{Reading in the Postgres schema information\n} if $verbose;
+open my $schema, "<", $PG_SCHEMA
+ or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
+my $t;
+while (<$schema>) {
+ if (/CREATE TABLE\s+(\S+)/) {
+ $t = $1;
+ $table{$t}={};
+ }
+ elsif (/^ +(\w+)\s+TIMESTAMP/) {
+ $tz{$t}{$1}++;
+ }
+ elsif (/REFERENCES\s*([^( ]+)/) {
+ my $ref = $1;
+ exists $table{$ref} or die qq{No parent table $ref found for $t\n};
+ $table{$t}{$ref}++;
+ }
+}
+close $schema;
+
+## Read in special cases and table/version information
+$verbose and warn qq{Reading in schema exception information\n};
+my %version_tables;
+while (<DATA>) {
+ if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
+ my $list = join '|' => split /\s+/ => $2;
+ $version_tables{$1} = qr{\b$list\b};
+ next;
+ }
+ next unless /^(\w+)\s*(.*)/;
+ $special{$1} = $2||'';
+ $special{$2} = $1 if length $2;
+}
+
+## Determine the order of tables based on foreign key constraints
+$verbose and warn qq{Figuring out order of tables to dump\n};
+my %dumped;
+my $bail = 0;
+{
+ my $found=0;
+ T: for my $t (sort keys %table) {
+ next if exists $dumped{$t} and $dumped{$t} >= 1;
+ $found=1;
+ for my $dep (sort keys %{$table{$t}}) {
+ next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
+ }
+ $dumped{$t} = -1 if ! exists $dumped{$t};
+ ## Skip certain tables that are not imported
+ next if exists $special{$t} and !$special{$t};
+ push @torder, $special{$t} || $t;
+ }
+ last if !$found;
+ push @torder, "---";
+ for (values %dumped) { $_+=2; }
+ die "Too many loops!\n" if $bail++ > 1000;
+ redo;
+}
+
+## Prepare the Postgres database for the move
+$verbose and warn qq{Writing Postgres transformation information\n};
+
+print "\n-- Empty out all existing tables\n";
+$verbose and warn qq{Writing truncates to empty existing tables\n};
+for my $t (@torder) {
+ next if $t eq '---';
+ my $tname = $special{$t}||$t;
+ printf qq{TRUNCATE TABLE %-18s CASCADE;\n}, qq{"$tname"};
+}
+print "\n\n";
+
+print qq{-- Rename the "text" table\n};
+print qq{ALTER TABLE pagecontent RENAME TO "text";\n\n};
+
+print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
+print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
+
+print "-- Changing all timestamp fields to handle raw integers\n";
+for my $t (sort keys %tz) {
+ next if $t eq "archive2";
+ for my $c (sort keys %{$tz{$t}}) {
+ printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
+ }
+}
+print "\n";
+
+print qq{
+INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
+};
+
+## If we have a table _prefix, we need to temporarily rename all of our Postgres
+## tables temporarily for the import. Perhaps consider making this an auto-schema
+## thing in the future.
+if (length $table_prefix) {
+ print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
+ for my $t (@torder) {
+ next if $t eq '---';
+ my $tname = $special{$t}||$t;
+ printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname"\n}, qq{"$tname"};
+ }
+}
+
+
+## Try and dump the ill-named "user" table:
+## We do this table alone because "user" is a reserved word.
+print qq{
+
+SET escape_string_warning TO 'off';
+\\o /dev/null
+
+-- Postgres uses a table name of "mwuser" instead of "user"
+
+-- Create a dummy user to satisfy fk contraints especially with revisions
+SELECT setval('user_user_id_seq',0,'false');
+INSERT INTO mwuser
+ VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
+
+};
+
+push @MYSQLDUMPARGS, "--no-create-info";
+
+$verbose and warn qq{Dumping "user" table\n};
+$verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
+my $usertable = "${table_prefix}user";
+open my $mfork, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
+## Unfortunately, there is no easy way to catch errors
+my $numusers = 0;
+while (<$mfork>) {
+ ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
+}
+close $mfork;
+if ($numusers < 1) {
+ warn qq{No users found, probably a connection error.\n};
+ print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
+ close $mdump;
+ exit;
+}
+print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
+
+warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
+
+## Dump the rest of the tables, in chunks based on constraints
+## We do not need the user table:
+my @dumplist = grep { $_ ne 'user'} @torder;
+my @alist;
+{
+ undef @alist;
+ PICKATABLE: {
+ my $tname = shift @dumplist;
+ ## XXX Make this dynamic below
+ for my $ver (sort {$b <=> $a } keys %version_tables) {
+ redo PICKATABLE if $tname =~ $version_tables{$ver};
+ }
+ $tname = "${table_prefix}$tname" if length $table_prefix;
+ push @alist, $tname;
+ pop @alist and last if index($alist[-1],'---') >= 0;
+ redo if @dumplist;
+ }
+
+ ## Dump everything else
+ open my $mfork2, "-|" or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
+ print while <$mfork2>;
+ close $mfork2;
+ warn qq{Finished dumping from MySQL\n} if $verbose;
+
+ redo if @dumplist;
+}
+
+warn qq{Writing information to return Postgres database to normal\n} if $verbose;
+print qq{ALTER TABLE "${table_prefix}text" RENAME TO pagecontent;\n};
+print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
+print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
+
+## Return tables to their original names if a table prefix was used.
+if (length $table_prefix) {
+ print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
+ my $maxsize = 18;
+ for (@torder) {
+ $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
+ }
+ for my $t (@torder) {
+ next if $t eq '---' or $t eq 'text';
+ my $tname = $special{$t}||$t;
+ printf qq{ALTER TABLE %*s RENAME TO "$tname"\n}, $maxsize+1, qq{"${table_prefix}$tname"};
+ }
+}
+
+print qq{\n\n--Returning timestamps to normal\n};
+for my $t (sort keys %tz) {
+ next if $t eq "archive2";
+ for my $c (sort keys %{$tz{$t}}) {
+ printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
+ " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
+ }
+}
+
+## Finally, make a record in the mediawiki_version table about this import
+print qq{
+INSERT INTO mediawiki_version (type,mw_version,notes) VALUES ('MySQL import','??',
+'Imported from file created on $now. Old version: $current_version');
+};
+
+
+print "\\o\n\n-- End of dump\n\n";
+select $oldselect;
+close $mdump;
+exit;
+
+
+__DATA__
+## Known remappings: either indicate the MySQL name,
+## or leave blank if it should be skipped
+pagecontent text
+mwuser user
+mediawiki_version
+archive2
+profiling
+objectcache
+
+## Which tables to ignore depending on the version
+VERSION 1.5: trackback
+VERSION 1.6: externallinks job templatelinks transcache
+VERSION 1.7: filearchive langlinks querycache_info