improvements in importer class and documentation

This commit is contained in:
Leo Germani 2018-03-30 16:34:37 -03:00
parent 993fe321ba
commit d474711708
4 changed files with 182 additions and 88 deletions

48
docs/importer-flow.md Normal file
View File

@ -0,0 +1,48 @@
Create an instance of the Importer
First of all, user will be given the choice to choose the destination collection.
If called from inside a collection, this step is skipped and the current collection is set as destination.
If the importer has the attribute `$import_structure_and_mapping` set to `true`, and the importer is called from repository level,
they will also be able to choose to create a new collection.
In that cases, a new collection is created with the status of `auto-draft`, and the importer will implement a method called `create_fields_and_mapping()`, which, as the name says, will create all collections fields and set the mapping for the importer.
Set options
Now its time to set the importer options. Each importer may have its own set of options, that will be used during the import process. It could be anything, from the delimiter character in a CSV importer, to some query parameter for a importer that fetches something from an API.
The importer handles options by implementing three methods: `get_options()`, `get_default_options()` and `options_form()`.
Fetch file
Next, users will choose the source. Each importer declares the kind of sources they accpet: URL, file, or both.
If its a file, upload it. If its an URL, fetch it into a file.
By default, the importer will fetch any given URL to a file. However, each importer may override the `fetch_from_remote()` method
and do whatever it want to create the file. For example, it could make several paged requests.
Mapping
If the importer has the attribute `$import_structure_and_mapping` set to `true`, the importer will automatically create all the fields and set the mapping for the collection. This is done by calling the `create_fields_and_mapping()` method of the importer.
If not, the user will be asked to map the fields that were identified in the source file (by calling the `get_fields()` method) with the fields present in the chosen collection.
Run importer
Finally, everything is ready. The importer runs.
The `run()` method is called, the importer runs a step of the import process, and returns the number of items imported so far. The client (browser) will repeat this request as many times as necessary to complete the process and will give feedback to the user about the progress.
In order to allow this, the importer must implement the `get_total_items()` method, which will inform the total number of items present in the source file.
All the steps and insertion are handled by the Importer super class. The importer class only have to implement one method (`process_item()`) to handle one single item. It will receive the index of this item and it must return the item in the format of a mapped array, where the key is the identifier of the source field (the same used in the mapping array), and the value is the field value.
In the end, a report is generated with all the logs generated in the process. If the collection was set to `auto-draft`, it is now published.

View File

@ -5,33 +5,22 @@ use Tainacan;
class CSV extends Importer { class CSV extends Importer {
public $delimiter = ',';
public function __construct() { public function __construct() {
parent::__construct(); parent::__construct();
}
/** $this->set_default_options([
* @return string $delimiter value that divides each column 'delimiter' => ','
*/ ]);
public function get_delimiter(){
return $this->delimiter;
}
/**
* @param $delimiter
*/
public function set_delimiter( $delimiter ){
$this->delimiter = $delimiter;
} }
/** /**
* @inheritdoc * @inheritdoc
*/ */
public function get_fields_source(){ public function get_fields(){
$file = new \SplFileObject( $this->tmp_file, 'r' ); $file = new \SplFileObject( $this->tmp_file, 'r' );
$file->seek(0 ); $file->seek(0 );
return $file->fgetcsv( $this->get_delimiter() ); return $file->fgetcsv( $this->get_option('delimiter') );
} }
@ -40,7 +29,7 @@ class CSV extends Importer {
*/ */
public function process_item( $index ){ public function process_item( $index ){
$processedItem = []; $processedItem = [];
$headers = $this->get_fields_source(); $headers = $this->get_fields();
// search the index in the file and get values // search the index in the file and get values
$file = new \SplFileObject( $this->tmp_file, 'r' ); $file = new \SplFileObject( $this->tmp_file, 'r' );
@ -49,9 +38,9 @@ class CSV extends Importer {
if( $index === 0 ){ if( $index === 0 ){
$file->current(); $file->current();
$file->next(); $file->next();
$values = $file->fgetcsv( $this->get_delimiter() ); $values = $file->fgetcsv( $this->get_option('delimiter') );
}else{ }else{
$values = $file->fgetcsv( $this->get_delimiter() ); $values = $file->fgetcsv( $this->get_option('delimiter') );
} }
if( count( $headers ) !== count( $values ) ){ if( count( $headers ) !== count( $values ) ){
@ -65,13 +54,6 @@ class CSV extends Importer {
return $processedItem; return $processedItem;
} }
/**
* @return mixed
*/
public function get_options(){
// TODO: Implement get_options() method.
}
/** /**
* @inheritdoc * @inheritdoc
*/ */

View File

@ -5,29 +5,80 @@ use Tainacan;
abstract class Importer { abstract class Importer {
private $id; private $id;
private $processed_items; private $processed_items = [];
private $last_index;
/**
* indicates wether this importer will create all the fields collection and set the mapping
* without user interaction
*
* if set to true, user will have the ability to choose to create a new collection upon importing.
*
* The importer will have to implement the create_fields_and_mapping() method.
*
* @var bool
*/
public $import_structure_and_mapping = false;
/**
* The collection the items are going to be imported to.
*
* @var \Tainacan\Entities\Collection
*/
public $collection; public $collection;
/**
* The mapping from the source metadata structure to the Field Ids of the destination collection
*
* The format is an array where the keys are the field IDs of the destination collection and the
* values are the identifier from the source. This coulb be an ID or a string or whatever the importer finds appropriate to http_persistent_handles_clean
*
* @var array
*/
public $mapping; public $mapping;
/**
* The path to the temporary file created when user uploads a file
* @var string
*/
public $tmp_file; public $tmp_file;
public $total_items;
public $limit_query; /**
public $start; * The total number of items to be imported.
public $end; * @var int
*/
protected $total_items;
/**
* THe number of items to be processes in each step
* @var int
*/
private $items_per_step = 100;
/**
* The index of the item to start the import in the next step.
*
* (items are imported in a series of steps, via ajax, to avoid timeout)
* @var int
*/
private $start = 0;
/**
* The log with everything that happened during the import process. It generates a report afterwards
* @var array
*/
public $logs = []; public $logs = [];
private $options = [];
private $default_options = [];
public function __construct() { public function __construct() {
if (!session_id()) { if (!session_id()) {
@session_start(); @session_start();
} }
$this->id = uniqid(); $this->id = uniqid();
$this->limit_query = 100; $_SESSION['tainacan_importer'][$this->get_id()] = $this;
$this->start = 0;
$this->end = $this->start + $this->limit_query;
$this->processed_items = [];
$_SESSION['tainacan_importer'][$this->id] = $this;
} }
/** /**
@ -52,13 +103,6 @@ abstract class Importer {
return $this->processed_items; return $this->processed_items;
} }
/**
* @return mixed the last index from source
*/
public function get_last_index(){
return $this->last_index;
}
/** /**
* @return array the last index from source * @return array the last index from source
*/ */
@ -83,12 +127,12 @@ abstract class Importer {
} }
/** /**
* set the limit of query to be processed * set how many items should be processes in each step
* *
* @param $size The total of items * @param $size The total of items
*/ */
public function set_limit_query( $size ){ public function set_items_per_step( $size ){
$this->limit_query = $size; $this->items_per_step = $size;
} }
/** /**
@ -98,13 +142,6 @@ abstract class Importer {
$this->start = $start; $this->start = $start;
} }
/**
* @param mixed $end the last index in process
*/
public function set_end( $end ){
$this->end = $end;
}
/** /**
* @param $file File to be managed by importer * @param $file File to be managed by importer
* @return bool * @return bool
@ -158,20 +195,13 @@ abstract class Importer {
} }
} }
/**
* @return mixed
*/
public function get_collection_fields(){
return $this->collection;
}
/** /**
* get the fields of file/url to allow mapping * get the fields of file/url to allow mapping
* should returns an array * should return an array
* *
* @return array $fields_source the fields from the source * @return array $fields_source the fields from the source
*/ */
abstract public function get_fields_source(); abstract public function get_fields();
/** /**
* get values for a single item * get values for a single item
@ -184,11 +214,6 @@ abstract class Importer {
*/ */
abstract public function process_item( $index ); abstract public function process_item( $index );
/**
* @return mixed
*/
abstract public function get_options();
/** /**
* return the all items found * return the all items found
* *
@ -197,13 +222,45 @@ abstract class Importer {
abstract public function get_total_items(); abstract public function get_total_items();
/** /**
* Gets the options for this importer, including default values for options
* that were not set yet.
* @return array Importer options
*/
public function get_options() {
return array_merge($this->default_options, $this->options);
}
/**
* Gets one option from the options array.
*
* Checks if option exist or if it have a default value. Otherwise return an empty string
*
* @param string $key the desired option
* @return mixed the option value, the default value or an empty string
*/
public function get_option($key) {
$options = $this->get_options();
return isset($options[$key]) ? $options[$key] : '';
}
protected function set_default_options($options) {
$this->default_options = $options;
}
public function set_options($options) {
$this->options = $options;
}
/**
* process a limited size of items * process a limited size of items
* *
* @param $start init index * @param int $start the index of the item to start processing from
* @param $end last index
*/ */
public function process( $start, $end ){ public function process( $start ){
while ( $start < $end && count( $this->get_processed_items() ) <= $this->get_total_items() ){
$end = $start + $this->items_per_step;
while ( $start < $end && count( $this->get_processed_items() ) <= $this->get_total_items() ) {
$processed_item = $this->process_item( $start ); $processed_item = $this->process_item( $start );
if( $processed_item) { if( $processed_item) {
$this->insert( $start, $processed_item ); $this->insert( $start, $processed_item );
@ -213,6 +270,8 @@ abstract class Importer {
} }
$start++; $start++;
} }
$this->set_start($start);
} }
/** /**
@ -258,7 +317,7 @@ abstract class Importer {
if( $item->validate() ){ if( $item->validate() ){
$insertedItem = $Tainacan_Items->insert( $item ); $insertedItem = $Tainacan_Items->insert( $item );
} else { } else {
$this->add_log( 'error', 'Item ' . $index . ': '. $item->get_errors() ); $this->add_log( 'error', 'Item ' . $index . ': ' ); // TODO add the $item->get_errors() array
return false; return false;
} }
@ -287,9 +346,6 @@ abstract class Importer {
// inserted the id on processed item with its index as array index // inserted the id on processed item with its index as array index
$this->processed_items[ $index ] = $item->get_id(); $this->processed_items[ $index ] = $item->get_id();
// set the last index
$this->last_index = $index;
$Tainacan_Items->update( $item ); $Tainacan_Items->update( $item );
return $item; return $item;
} else { } else {
@ -303,6 +359,7 @@ abstract class Importer {
* run the process * run the process
*/ */
public function run(){ public function run(){
$this->process( $this->start, $this->end ); $this->process( $this->start );
return sizeof($this->get_processed_items());
} }
} }

View File

@ -43,6 +43,8 @@ class ImporterTests extends TAINACAN_UnitTestCase {
$csv_importer = new Importer\CSV(); $csv_importer = new Importer\CSV();
$id = $csv_importer->get_id(); $id = $csv_importer->get_id();
$_SESSION['tainacan_importer'][$id]->set_items_per_step(2);
// open the file "demosaved.csv" for writing // open the file "demosaved.csv" for writing
$file = fopen('demosaved.csv', 'w'); $file = fopen('demosaved.csv', 'w');
@ -75,7 +77,7 @@ class ImporterTests extends TAINACAN_UnitTestCase {
$this->assertEquals( 5, $_SESSION['tainacan_importer'][$id]->get_total_items() ); $this->assertEquals( 5, $_SESSION['tainacan_importer'][$id]->get_total_items() );
// get fields to mapping // get fields to mapping
$headers = $_SESSION['tainacan_importer'][$id]->get_fields_source(); $headers = $_SESSION['tainacan_importer'][$id]->get_fields();
$this->assertEquals( $headers[4], 'Column 5' ); $this->assertEquals( $headers[4], 'Column 5' );
// inserting the collection // inserting the collection
@ -109,7 +111,12 @@ class ImporterTests extends TAINACAN_UnitTestCase {
$this->assertEquals( $_SESSION['tainacan_importer'][$id]->get_mapping(), $map ); $this->assertEquals( $_SESSION['tainacan_importer'][$id]->get_mapping(), $map );
//execute the process //execute the process
$_SESSION['tainacan_importer'][$id]->run();
$this->assertEquals(2, $_SESSION['tainacan_importer'][$id]->run(), 'first step should import 2 items');
$this->assertEquals(4, $_SESSION['tainacan_importer'][$id]->run(), 'second step should import 2 items');
$this->assertEquals(5, $_SESSION['tainacan_importer'][$id]->run(), 'third step should import 3 items');
$this->assertEquals(5, $_SESSION['tainacan_importer'][$id]->run(), 'if call run again after finish, do nothing');
$items = $Tainacan_Items->fetch( [], $collection, 'OBJECT' ); $items = $Tainacan_Items->fetch( [], $collection, 'OBJECT' );