Refactored batch query.

This commit is contained in:
Qiang Xue
2014-02-15 11:22:48 -05:00
parent 94576de926
commit 9a068f50f4
5 changed files with 127 additions and 113 deletions

View File

@ -117,15 +117,15 @@ Batch query is also supported when working with Active Record. For example,
```php
// fetch 10 customers at a time
foreach (Customer::find()->batch() as $customers) {
foreach (Customer::find()->batch(10) as $customers) {
// $customers is an array of 10 or fewer Customer objects
}
// fetch customers one by one
foreach (Customer::find()->each() as $customer) {
// fetch 10 customers at a time and iterate them one by one
foreach (Customer::find()->each(10) as $customer) {
// $customer is a Customer object
}
// batch query with eager loading
foreach (Customer::find()->with('orders')->batch() as $customers) {
foreach (Customer::find()->with('orders')->each() as $customer) {
}
```

View File

@ -351,41 +351,25 @@ $query = (new Query)
->from('tbl_user')
->orderBy('id');
foreach ($query->batch(10) as $users) {
// $users is an array of 10 or fewer rows from the user table
foreach ($query->batch() as $users) {
// $users is an array of 100 or fewer rows from the user table
}
```
The method [[yii\db\Query::batch()]] returns an [[yii\db\BatchQueryResult]] object which implements
the `Iterator` interface and thus can be used in the `foreach` construct. For each iterator,
it returns an array of query result. The size of the array is determined by the so-called batch
size, which is the first parameter (defaults to 100) to the method.
Compared to the `$query->all()` call, the above code only loads 10 rows of data at a time into the memory.
If you process the data and then discard it right away, the batch query can help keep the memory usage under a limit.
Note that in the special case when you specify the batch size as 1, each iteration of the batch query
only returns a single row of data, rather than an array of a row. In this case, you may also use
the shortcut method [[yii\db\Query::each()]]. For example,
```php
use yii\db\Query;
$query = (new Query)
->from('tbl_user')
->orderBy('id');
// or if you want to iterate the row one by one
foreach ($query->each() as $user) {
// $user represents a row from the user table
}
// the above code is equivalent to the following:
foreach ($query->batch(1) as $user) {
// $user represents a row from the user table
// $user represents one row of data from the user table
}
```
The method [[yii\db\Query::batch()]] and [[yii\db\Query::each()]] return an [[yii\db\BatchQueryResult]] object
which implements the `Iterator` interface and thus can be used in the `foreach` construct.
During the first iteration, a SQL query is made to the database. Data are since then fetched in batches
in the iterations. By default, the batch size is 100, meaning 100 rows of data are being fetched in each batch.
You can change the batch size by passing the first parameter to the `batch()` or `each()` method.
Compared to the [[yii\db\Query::all()]], the batch query only loads 100 rows of data at a time into the memory.
If you process the data and then discard it right away, the batch query can help keep the memory usage under a limit.
If you specify the query result to be indexed by some column via [[yii\db\Query::indexBy()]], the batch query
will still keep the proper index. For example,
@ -396,7 +380,7 @@ $query = (new Query)
->from('tbl_user')
->indexBy('username');
foreach ($query->batch(10) as $users) {
foreach ($query->batch() as $users) {
// $users is indexed by the "username" column
}

View File

@ -21,6 +21,8 @@ use yii\base\Object;
* foreach ($query->batch() as $i => $users) {
* // $users represents the rows in the $i-th batch
* }
* foreach ($query->each() as $user) {
* }
* ```
*
* @author Qiang Xue <qiang.xue@gmail.com>
@ -38,19 +40,31 @@ class BatchQueryResult extends Object implements \Iterator
* Do not modify this property directly unless after [[reset()]] is called explicitly.
*/
public $query;
/**
* @var DataReader the data reader associated with this batch query.
* Do not modify this property directly unless after [[reset()]] is called explicitly.
*/
public $dataReader;
/**
* @var integer the number of rows to be returned in each batch.
*/
public $batchSize = 100;
private $_data;
/**
* @var boolean whether to return a single row during each iteration.
* If false, a whole batch of rows will be returned in each iteration.
*/
public $each = false;
/**
* @var DataReader the data reader associated with this batch query.
*/
private $_dataReader;
/**
* @var array the data retrieved in the current batch
*/
private $_batch;
/**
* @var mixed the value for the current iteration
*/
private $_value;
/**
* @var string|integer the key for the current iteration
*/
private $_key;
private $_index = -1;
/**
* Destructor.
@ -67,12 +81,13 @@ class BatchQueryResult extends Object implements \Iterator
*/
public function reset()
{
if ($this->dataReader !== null) {
$this->dataReader->close();
if ($this->_dataReader !== null) {
$this->_dataReader->close();
}
$this->dataReader = null;
$this->_data = null;
$this->_index = -1;
$this->_dataReader = null;
$this->_batch = null;
$this->_value = null;
$this->_key = null;
}
/**
@ -85,6 +100,50 @@ class BatchQueryResult extends Object implements \Iterator
$this->next();
}
/**
* Moves the internal pointer to the next dataset.
* This method is required by the interface Iterator.
*/
public function next()
{
if ($this->_batch === null || !$this->each || $this->each && next($this->_batch) === false) {
$this->_batch = $this->fetchData();
}
if ($this->each) {
$this->_value = current($this->_batch);
if ($this->query->indexBy !== null) {
$this->_key = key($this->_batch);
} elseif (key($this->_batch) !== null) {
$this->_key++;
} else {
$this->_key = null;
}
} else {
$this->_value = $this->_batch;
$this->_key = $this->_key === null ? 0 : $this->_key + 1;
}
}
/**
* Fetches the next batch of data.
* @return array the data fetched
*/
protected function fetchData()
{
if ($this->_dataReader === null) {
$this->_dataReader = $this->query->createCommand($this->db)->query();
}
$rows = [];
$count = 0;
while ($count++ < $this->batchSize && ($row = $this->_dataReader->read())) {
$rows[] = $row;
}
return $this->query->prepareResult($rows);
}
/**
* Returns the index of the current dataset.
* This method is required by the interface Iterator.
@ -92,7 +151,7 @@ class BatchQueryResult extends Object implements \Iterator
*/
public function key()
{
return $this->batchSize == 1 ? $this->_key : $this->_index;
return $this->_key;
}
/**
@ -102,37 +161,7 @@ class BatchQueryResult extends Object implements \Iterator
*/
public function current()
{
return $this->_data;
}
/**
* Moves the internal pointer to the next dataset.
* This method is required by the interface Iterator.
*/
public function next()
{
if ($this->dataReader === null) {
$this->dataReader = $this->query->createCommand($this->db)->query();
$this->_index = 0;
} else {
$this->_index++;
}
$rows = [];
$count = 0;
while ($count++ < $this->batchSize && ($row = $this->dataReader->read())) {
$rows[] = $row;
}
if (empty($rows)) {
$this->_data = null;
} else {
$this->_data = $this->query->prepareResult($rows);
if ($this->batchSize == 1) {
$row = reset($this->_data);
$this->_key = key($this->_data);
$this->_data = $row;
}
}
return $this->_value;
}
/**
@ -142,6 +171,6 @@ class BatchQueryResult extends Object implements \Iterator
*/
public function valid()
{
return $this->_data !== null;
return !empty($this->_batch);
}
}

View File

@ -139,31 +139,47 @@ class Query extends Component implements QueryInterface
* }
* ```
*
* @param integer $size the number of records to be fetched in each batch.
* @param integer $batchSize the number of records to be fetched in each batch.
* @param Connection $db the database connection. If not set, the "db" application component will be used.
* @return BatchQueryResult the batch query result. It implements the `Iterator` interface
* and can be traversed to retrieve the data in batches.
*/
public function batch($size = 100, $db = null)
public function batch($batchSize = 100, $db = null)
{
return Yii::createObject([
'class' => BatchQueryResult::className(),
'query' => $this,
'batchSize' => $size,
'batchSize' => $batchSize,
'db' => $db,
'each' => false,
]);
}
/**
* Starts a batch query and retrieves data row by row.
* This method is a shortcut to [[batch()]] with batch size fixed to be 1.
* This method is similar to [[batch()]] except that in each iteration of the result,
* only one row of data is returned. For example,
*
* ```php
* $query = (new Query)->from('tbl_user');
* foreach ($query->each() as $row) {
* }
* ```
*
* @param integer $batchSize the number of records to be fetched in each batch.
* @param Connection $db the database connection. If not set, the "db" application component will be used.
* @return BatchQueryResult the batch query result. It implements the `Iterator` interface
* and can be traversed to retrieve the data in batches.
*/
public function each($db = null)
public function each($batchSize = 100, $db = null)
{
return $this->batch(1, $db);
return Yii::createObject([
'class' => BatchQueryResult::className(),
'query' => $this,
'batchSize' => $batchSize,
'db' => $db,
'each' => true,
]);
}
/**

View File

@ -35,7 +35,6 @@ class BatchQueryResultTest extends DatabaseTestCase
$result = $query->batch(2, $db);
$this->assertTrue($result instanceof BatchQueryResult);
$this->assertEquals(2, $result->batchSize);
$this->assertNull($result->dataReader);
$this->assertTrue($result->query === $query);
// normal query
@ -58,7 +57,16 @@ class BatchQueryResultTest extends DatabaseTestCase
$this->assertEquals(3, count($allRows));
// reset
$batch->reset();
$this->assertNull($batch->dataReader);
// empty query
$query = new Query();
$query->from('tbl_customer')->where(['id' => 100]);
$allRows = [];
$batch = $query->batch(2, $db);
foreach ($batch as $rows) {
$allRows = array_merge($allRows, $rows);
}
$this->assertEquals(0, count($allRows));
// query with index
$query = new Query();
@ -72,23 +80,11 @@ class BatchQueryResultTest extends DatabaseTestCase
$this->assertEquals('address2', $allRows['user2']['address']);
$this->assertEquals('address3', $allRows['user3']['address']);
// query in batch 1
$query = new Query();
$query->from('tbl_customer')->orderBy('id');
$allRows = [];
foreach ($query->batch(1, $db) as $rows) {
$allRows[] = $rows;
}
$this->assertEquals(3, count($allRows));
$this->assertEquals('user1', $allRows[0]['name']);
$this->assertEquals('user2', $allRows[1]['name']);
$this->assertEquals('user3', $allRows[2]['name']);
// each
$query = new Query();
$query->from('tbl_customer')->orderBy('id');
$allRows = [];
foreach ($query->each($db) as $rows) {
foreach ($query->each(100, $db) as $rows) {
$allRows[] = $rows;
}
$this->assertEquals(3, count($allRows));
@ -100,7 +96,7 @@ class BatchQueryResultTest extends DatabaseTestCase
$query = new Query();
$query->from('tbl_customer')->orderBy('id')->indexBy('name');
$allRows = [];
foreach ($query->each($db) as $key => $row) {
foreach ($query->each(100, $db) as $key => $row) {
$allRows[$key] = $row;
}
$this->assertEquals(3, count($allRows));
@ -123,17 +119,6 @@ class BatchQueryResultTest extends DatabaseTestCase
$this->assertEquals('user2', $customers[1]->name);
$this->assertEquals('user3', $customers[2]->name);
// query in batch 1
$query = Customer::find()->orderBy('id');
$customers = [];
foreach ($query->batch(1, $db) as $model) {
$customers[] = $model;
}
$this->assertEquals(3, count($customers));
$this->assertEquals('user1', $customers[0]->name);
$this->assertEquals('user2', $customers[1]->name);
$this->assertEquals('user3', $customers[2]->name);
// batch with eager loading
$query = Customer::find()->with('orders')->orderBy('id');
$customers = [];