summaryrefslogtreecommitdiff
path: root/includes/api/ApiQueryDuplicateFiles.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/api/ApiQueryDuplicateFiles.php')
-rw-r--r--includes/api/ApiQueryDuplicateFiles.php143
1 files changed, 94 insertions, 49 deletions
diff --git a/includes/api/ApiQueryDuplicateFiles.php b/includes/api/ApiQueryDuplicateFiles.php
index beca5879..8f0fd3be 100644
--- a/includes/api/ApiQueryDuplicateFiles.php
+++ b/includes/api/ApiQueryDuplicateFiles.php
@@ -4,7 +4,7 @@
*
* Created on Sep 27, 2008
*
- * Copyright © 2008 Roan Kattouw <Firstname>.<Lastname>@gmail.com
+ * Copyright © 2008 Roan Kattouw "<Firstname>.<Lastname>@gmail.com"
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -59,67 +59,99 @@ class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
}
$images = $namespaces[NS_FILE];
- $this->addTables( 'image', 'i1' );
- $this->addTables( 'image', 'i2' );
- $this->addFields( array(
- 'i1.img_name AS orig_name',
- 'i2.img_name AS dup_name',
- 'i2.img_user_text AS dup_user_text',
- 'i2.img_timestamp AS dup_timestamp'
- ) );
-
- $this->addWhere( array(
- 'i1.img_name' => array_keys( $images ),
- 'i1.img_sha1 = i2.img_sha1',
- 'i1.img_name != i2.img_name',
- ) );
+ if( $params['dir'] == 'descending' ) {
+ $images = array_reverse( $images );
+ }
+ $skipUntilThisDup = false;
if ( isset( $params['continue'] ) ) {
$cont = explode( '|', $params['continue'] );
if ( count( $cont ) != 2 ) {
$this->dieUsage( 'Invalid continue param. You should pass the ' .
'original value returned by the previous query', '_badcontinue' );
}
- $orig = $this->getDB()->strencode( $this->titleTokey( $cont[0] ) );
- $dup = $this->getDB()->strencode( $this->titleToKey( $cont[1] ) );
- $this->addWhere(
- "i1.img_name > '$orig' OR " .
- "(i1.img_name = '$orig' AND " .
- "i2.img_name >= '$dup')"
- );
+ $fromImage = $cont[0];
+ $skipUntilThisDup = $cont[1];
+ // Filter out any images before $fromImage
+ foreach ( $images as $image => $pageId ) {
+ if ( $image < $fromImage ) {
+ unset( $images[$image] );
+ } else {
+ break;
+ }
+ }
}
- $dir = ( $params['dir'] == 'descending' ? ' DESC' : '' );
- $this->addOption( 'ORDER BY', 'i1.img_name' . $dir );
- $this->addOption( 'LIMIT', $params['limit'] + 1 );
+ $filesToFind = array_keys( $images );
+ if( $params['localonly'] ) {
+ $files = RepoGroup::singleton()->getLocalRepo()->findFiles( $filesToFind );
+ } else {
+ $files = RepoGroup::singleton()->findFiles( $filesToFind );
+ }
- $res = $this->select( __METHOD__ );
+ $fit = true;
$count = 0;
$titles = array();
- foreach ( $res as $row ) {
- if ( ++$count > $params['limit'] ) {
- // We've reached the one extra which shows that
- // there are additional pages to be had. Stop here...
- $this->setContinueEnumParameter( 'continue',
- $this->keyToTitle( $row->orig_name ) . '|' .
- $this->keyToTitle( $row->dup_name ) );
- break;
+
+ $sha1s = array();
+ foreach ( $files as $file ) {
+ $sha1s[$file->getName()] = $file->getSha1();
+ }
+
+ // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... )
+ $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
+ if( $params['localonly'] ) {
+ $filesBySha1s = RepoGroup::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
+ } else {
+ $filesBySha1s = RepoGroup::singleton()->findBySha1s( $filesToFindBySha1s );
+ }
+
+ // iterate over $images to handle continue param correct
+ foreach( $images as $image => $pageId ) {
+ if( !isset( $sha1s[$image] ) ) {
+ continue; //file does not exist
+ }
+ $sha1 = $sha1s[$image];
+ $dupFiles = $filesBySha1s[$sha1];
+ if( $params['dir'] == 'descending' ) {
+ $dupFiles = array_reverse( $dupFiles );
}
- if ( !is_null( $resultPageSet ) ) {
- $titles[] = Title::makeTitle( NS_FILE, $row->dup_name );
- } else {
- $r = array(
- 'name' => $row->dup_name,
- 'user' => $row->dup_user_text,
- 'timestamp' => wfTimestamp( TS_ISO_8601, $row->dup_timestamp )
- );
- $fit = $this->addPageSubItem( $images[$row->orig_name], $r );
- if ( !$fit ) {
- $this->setContinueEnumParameter( 'continue',
- $this->keyToTitle( $row->orig_name ) . '|' .
- $this->keyToTitle( $row->dup_name ) );
+ foreach ( $dupFiles as $dupFile ) {
+ $dupName = $dupFile->getName();
+ if( $image == $dupName && $dupFile->isLocal() ) {
+ continue; //ignore the local file itself
+ }
+ if( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
+ continue; //skip to pos after the image from continue param
+ }
+ $skipUntilThisDup = false;
+ if ( ++$count > $params['limit'] ) {
+ $fit = false; //break outer loop
+ // We're one over limit which shows that
+ // there are additional images to be had. Stop here...
+ $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
break;
}
+ if ( !is_null( $resultPageSet ) ) {
+ $titles[] = $file->getTitle();
+ } else {
+ $r = array(
+ 'name' => $dupName,
+ 'user' => $dupFile->getUser( 'text' ),
+ 'timestamp' => wfTimestamp( TS_ISO_8601, $dupFile->getTimestamp() )
+ );
+ if( !$dupFile->isLocal() ) {
+ $r['shared'] = '';
+ }
+ $fit = $this->addPageSubItem( $pageId, $r );
+ if ( !$fit ) {
+ $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
+ break;
+ }
+ }
+ }
+ if( !$fit ) {
+ break;
}
}
if ( !is_null( $resultPageSet ) ) {
@@ -144,19 +176,32 @@ class ApiQueryDuplicateFiles extends ApiQueryGeneratorBase {
'descending'
)
),
+ 'localonly' => false,
);
}
public function getParamDescription() {
return array(
- 'limit' => 'How many files to return',
+ 'limit' => 'How many duplicate files to return',
'continue' => 'When more results are available, use this to continue',
'dir' => 'The direction in which to list',
+ 'localonly' => 'Look only for files in the local repository',
+ );
+ }
+
+ public function getResultProperties() {
+ return array(
+ '' => array(
+ 'name' => 'string',
+ 'user' => 'string',
+ 'timestamp' => 'timestamp',
+ 'shared' => 'boolean',
+ )
);
}
public function getDescription() {
- return 'List all files that are duplicates of the given file(s)';
+ return 'List all files that are duplicates of the given file(s) based on hash values';
}
public function getPossibleErrors() {