summaryrefslogtreecommitdiff
path: root/maintenance/postgres/compare_schemas.pl
diff options
context:
space:
mode:
Diffstat (limited to 'maintenance/postgres/compare_schemas.pl')
-rw-r--r--maintenance/postgres/compare_schemas.pl221
1 files changed, 220 insertions, 1 deletions
diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl
index 297e3af2..6f639eca 100644
--- a/maintenance/postgres/compare_schemas.pl
+++ b/maintenance/postgres/compare_schemas.pl
@@ -112,6 +112,8 @@ sub parse_sql {
}
elsif (/^ (\w+) $datatype$typeval$typeval2{0,3},?$/) {
$info{$table}{column}{$1} = $2;
+ my $extra = $3 || '';
+ $info{$table}{columnfull}{$1} = "$2$extra";
}
elsif (/^ ($indextype)(?: (\w+))? \(([\w, \(\)]+)\),?$/) {
$info{$table}{lc $1.'_name'} = $2 ? $2 : '';
@@ -128,6 +130,46 @@ sub parse_sql {
} ## end of parse_sql
+## Read in the parser test information
+my $parsefile = '../parserTests.inc';
+open my $pfh, '<', $parsefile or die qq{Could not open "$parsefile": $!\n};
+my $stat = 0;
+my %ptable;
+while (<$pfh>) {
+ if (!$stat) {
+ if (/function listTables/) {
+ $stat = 1;
+ }
+ next;
+ }
+ $ptable{$1}=2 while /'(\w+)'/g;
+ last if /\);/;
+}
+close $pfh;
+
+my $OK_NOT_IN_PTABLE = '
+filearchive
+logging
+profiling
+querycache_info
+searchindex
+trackbacks
+transcache
+user_newtalk
+';
+
+## Make sure all tables in main tables.sql are accounted for int the parsertest.
+for my $table (sort keys %{$old{'../tables.sql'}}) {
+ $ptable{$table}++;
+ next if $ptable{$table} > 2;
+ next if $OK_NOT_IN_PTABLE =~ /\b$table\b/;
+ print qq{Table "$table" is in the schema, but not used inside of parserTest.inc\n};
+}
+## Any that are used in ptables but no longer exist in the schema?
+for my $table (sort grep { $ptable{$_} == 2 } keys %ptable) {
+ print qq{Table "$table" ($ptable{$table}) used in parserTest.inc, but not found in schema\n};
+}
+
for my $oldfile (@old) {
## Begin non-standard indent
@@ -209,6 +251,171 @@ while (<$newfh>) {
}
}
+## Which column types are okay to map from mysql to postgres?
+my $COLMAP = q{
+## INTS:
+tinyint SMALLINT
+int INTEGER SERIAL
+bigint BIGINT
+real NUMERIC
+float NUMERIC
+
+## TEXT:
+varchar(32) TEXT
+varchar(70) TEXT
+varchar(255) TEXT
+varchar TEXT
+text TEXT
+tinytext TEXT
+ENUM TEXT
+
+## TIMESTAMPS:
+varbinary(14) TIMESTAMPTZ
+binary(14) TIMESTAMPTZ
+datetime TIMESTAMPTZ
+timestamp TIMESTAMPTZ
+
+## BYTEA:
+mediumblob BYTEA
+
+## OTHER:
+bool CHAR # Sigh
+
+};
+## Allow specific exceptions to the above
+my $COLMAPOK = q{
+## User inputted text strings:
+ar_comment tinyblob TEXT
+fa_description tinyblob TEXT
+img_description tinyblob TEXT
+ipb_reason tinyblob TEXT
+log_action varbinary(10) TEXT
+oi_description tinyblob TEXT
+rev_comment tinyblob TEXT
+rc_log_action varbinary(255) TEXT
+rc_log_type varbinary(255) TEXT
+
+## Simple text-only strings:
+ar_flags tinyblob TEXT
+fa_minor_mime varbinary(32) TEXT
+fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text
+fa_storage_key varbinary(64) TEXT # sha1 plus text extension
+ipb_address tinyblob TEXT # IP address or username
+ipb_range_end tinyblob TEXT # hexadecimal
+ipb_range_start tinyblob TEXT # hexadecimal
+img_minor_mime varbinary(32) TEXT
+img_sha1 varbinary(32) TEXT
+job_cmd varbinary(60) TEXT # Should we limit to 60 as well?
+keyname varbinary(255) TEXT # No tablename prefix (objectcache)
+ll_lang varbinary(20) TEXT # Language code
+log_params blob TEXT # LF separated list of args
+log_type varbinary(10) TEXT
+oi_minor_mime varbinary(32) TEXT
+oi_sha1 varbinary(32) TEXT
+old_flags tinyblob TEXT
+old_text mediumblob TEXT
+page_restrictions tinyblob TEXT # CSV string
+pf_server varchar(30) TEXT
+pr_level varbinary(60) TEXT
+pr_type varbinary(60) TEXT
+pt_create_perm varbinary(60) TEXT
+pt_reason tinyblob TEXT
+qc_type varbinary(32) TEXT
+qcc_type varbinary(32) TEXT
+qci_type varbinary(32) TEXT
+rc_params blob TEXT
+ug_group varbinary(16) TEXT
+user_email_token binary(32) TEXT
+user_ip varbinary(40) TEXT
+user_newpassword tinyblob TEXT
+user_options blob TEXT
+user_password tinyblob TEXT
+user_token binary(32) TEXT
+
+## Text URLs:
+el_index blob TEXT
+el_to blob TEXT
+iw_url blob TEXT
+tb_url blob TEXT
+tc_url varbinary(255) TEXT
+
+## Deprecated or not yet used:
+ar_text mediumblob TEXT
+job_params blob TEXT
+log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety
+rc_type tinyint CHAR
+
+## Number tweaking:
+fa_bits int SMALLINT # bits per pixel
+fa_height int SMALLINT
+fa_width int SMALLINT # Hope we don't see an image this wide...
+hc_id int BIGINT # Odd that site_stats is all bigint...
+img_bits int SMALLINT # bits per image should stay sane
+oi_bits int SMALLINT
+
+## True binary fields, usually due to gzdeflate and/or serialize:
+math_inputhash varbinary(16) BYTEA
+math_outputhash varbinary(16) BYTEA
+
+## Namespaces: not need for such a high range
+ar_namespace int SMALLINT
+job_namespace int SMALLINT
+log_namespace int SMALLINT
+page_namespace int SMALLINT
+pl_namespace int SMALLINT
+pt_namespace int SMALLINT
+qc_namespace int SMALLINT
+rc_namespace int SMALLINT
+rd_namespace int SMALLINT
+tl_namespace int SMALLINT
+wl_namespace int SMALLINT
+
+## "Bools"
+ar_minor_edit tinyint CHAR
+iw_trans tinyint CHAR
+page_is_new tinyint CHAR
+page_is_redirect tinyint CHAR
+rc_bot tinyint CHAR
+rc_deleted tinyint CHAR
+rc_minor tinyint CHAR
+rc_new tinyint CHAR
+rc_patrolled tinyint CHAR
+rev_deleted tinyint CHAR
+rev_minor_edit tinyint CHAR
+
+## Easy enough to change if a wiki ever does grow this big:
+ss_good_articles bigint INTEGER
+ss_total_edits bigint INTEGER
+ss_total_pages bigint INTEGER
+ss_total_views bigint INTEGER
+ss_users bigint INTEGER
+
+## True IP - keep an eye on these, coders tend to make textual assumptions
+rc_ip varbinary(40) CIDR # Want to keep an eye on this
+
+## Others:
+tc_time int TIMESTAMPTZ
+
+
+};
+
+my %colmap;
+for (split /\n/ => $COLMAP) {
+ next unless /^\w/;
+ s/(.*?)#.*/$1/;
+ my ($col,@maps) = split / +/, $_;
+ for (@maps) {
+ $colmap{$col}{$_} = 1;
+ }
+}
+
+my %colmapok;
+for (split /\n/ => $COLMAPOK) {
+ next unless /^\w/;
+ my ($col,$old,$new) = split / +/, $_;
+ $colmapok{$col}{$old}{$new} = 1;
+}
+
## Old but not new
for my $t (sort keys %{$old{$oldfile}}) {
if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
@@ -218,6 +425,7 @@ for my $t (sort keys %{$old{$oldfile}}) {
next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
my $oldcol = $old{$oldfile}{$t}{column};
+ my $oldcolfull = $old{$oldfile}{$t}{columnfull};
my $newcol = $new{$newt}{column};
for my $c (keys %$oldcol) {
if (!exists $newcol->{$c}) {
@@ -225,11 +433,22 @@ for my $t (sort keys %{$old{$oldfile}}) {
next;
}
}
- for my $c (keys %$newcol) {
+ for my $c (sort keys %$newcol) {
if (!exists $oldcol->{$c}) {
print "Column $t.$c not in $oldfile\n";
next;
}
+ ## Column types (roughly) match up?
+ my $new = $newcol->{$c};
+ my $old = $oldcolfull->{$c};
+
+ ## Known exceptions:
+ next if exists $colmapok{$c}{$old}{$new};
+
+ $old =~ s/ENUM.*/ENUM/;
+ if (! exists $colmap{$old}{$new}) {
+ print "Column types for $t.$c do not match: $old does not map to $new\n";
+ }
}
}
## New but not old: