Merge pull request #739 from tainacan/feature/738

feat: refactor general tainacan search
This commit is contained in:
Vinícius Nunes Medeiros 2022-11-11 11:05:43 -03:00 committed by GitHub
commit c6dd0a237a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 213 additions and 166 deletions

View File

@ -93,7 +93,8 @@ class REST_Controller extends \WP_REST_Controller {
'hierarchical' => 'hierarchical',
'exclude' => 'post__not_in',
'excludetree' => 'exclude_tree',
'include' => 'include'
'include' => 'include',
'sentence' => 'sentence'
];
$meta_query = [
@ -312,6 +313,12 @@ class REST_Controller extends \WP_REST_Controller {
'validate_callback' => 'rest_validate_request_arg',
);
$query_params['sentence'] = array(
'description' => __( 'Whether to search by phrase. Default false.', 'tainacan' ),
'type' => 'boolean',
'default' => true,
);
$query_params['authorid'] = array(
'description' => __("Limit result set to objects assigned to specific authors by id.", 'tainacan'),
'type' => 'integer',

View File

@ -53,9 +53,21 @@ class Elastic_Press {
//https://www.elasticpress.io/blog/2019/02/custom-search-with-elasticpress-how-to-limit-results-to-full-text-matches/
if ( ! empty( $formatted_args['query']['bool']['should'] ) ) {
$formatted_args['query']['bool']['must'] = $formatted_args['query']['bool']['should'];
$formatted_args['query']['bool']['must'][0]['multi_match']['operator'] = 'AND';
$formatted_args["query"]["bool"]["must"][0]["multi_match"]["type"] = "phrase_prefix";
// $formatted_args['query']['bool']['must'][0]['multi_match']['operator'] = 'AND';
if ( isset($formatted_args['query']['bool']['must'][0]['multi_match']['operator']))
unset($formatted_args['query']['bool']['must'][0]['multi_match']['operator']);
if ( isset($formatted_args['query']['bool']['must'][1]['multi_match']['operator']))
unset($formatted_args['query']['bool']['must'][1]['multi_match']['operator']);
if ( isset($formatted_args['query']['bool']['must'][2]['multi_match']['operator']))
unset($formatted_args['query']['bool']['must'][2]['multi_match']['operator']);
if ( isset($formatted_args['query']['bool']['must'][2]) ) {
$formatted_args['query']['bool']['must'][2]['multi_match']['analyzer'] = 'default';
}
unset( $formatted_args['query']['bool']['should'] );
unset( $formatted_args["query"]["bool"]["must"][0]["multi_match"]["type"] );
}
/**
@ -63,22 +75,22 @@ class Elastic_Press {
* Elasticsearch is not good a substring matches similar to SQL like.
* here we replace `match_phrase` with` wildcard`, but this is not an efficient operation.
*/
if ( ! empty( $formatted_args['post_filter']['bool']['must'] ) ) {
$array_must = $formatted_args['post_filter']['bool']['must'];
for($i = 0; $i < count($array_must); $i++ ) {
$el_must = $array_must[$i];
if( ! empty($el_must['bool']['must']) ) {
$array_must_nested = $el_must['bool']['must'];
for($j = 0; $j < count($array_must_nested); $j++ ) {
if ( isset ($array_must_nested[$j]['match_phrase'] ) ) {
$formatted_args['post_filter']['bool']['must'][$i]['bool']['must'][$j]['match_phrase_prefix'] =
array_map( function($match) { return "$match"; } ,$array_must_nested[$j]['match_phrase']);
unset($formatted_args['post_filter']['bool']['must'][$i]['bool']['must'][$j]['match_phrase']);
}
}
}
}
}
// if ( ! empty( $formatted_args['post_filter']['bool']['must'] ) ) {
// $array_must = $formatted_args['post_filter']['bool']['must'];
// for($i = 0; $i < count($array_must); $i++ ) {
// $el_must = $array_must[$i];
// if( ! empty($el_must['bool']['must']) ) {
// $array_must_nested = $el_must['bool']['must'];
// for($j = 0; $j < count($array_must_nested); $j++ ) {
// if ( isset ($array_must_nested[$j]['match_phrase'] ) ) {
// $formatted_args['post_filter']['bool']['must'][$i]['bool']['must'][$j]['match_phrase_prefix'] =
// array_map( function($match) { return "$match"; } ,$array_must_nested[$j]['match_phrase']);
// unset($formatted_args['post_filter']['bool']['must'][$i]['bool']['must'][$j]['match_phrase']);
// }
// }
// }
// }
// }
return $formatted_args;
} );

View File

@ -39,26 +39,19 @@ class Search_Engine {
function search_hooks() {
add_filter( 'posts_join', array( &$this, 'terms_join' ) );
add_filter( 'posts_join', array( &$this, 'search_metadata_join' ) );
// add_filter( 'posts_join', array( &$this, 'terms_join' ) );
// add_filter( 'posts_join', array( &$this, 'search_metadata_join' ) );
add_filter( 'posts_join', array( &$this, 'relationships_join' ) );
//add_filter( 'posts_where', array( &$this, 'search_attachments' ) );
add_filter( 'posts_join', array( &$this, 'search_authors_join' ) );
add_filter( 'posts_search', array( &$this, 'search_where' ), 10, 2 );
add_filter( 'posts_request', array( &$this, 'distinct' ) );
//add_filter( 'posts_request', array( &$this, 'log_query' ), 10, 2 );
}
// creates the list of search keywords from the 's' parameters.
function get_search_terms() {
global $wpdb;
$s = isset( $this->query_instance->query_vars['s'] ) ? $this->query_instance->query_vars['s'] : '';
$sentence = isset( $this->query_instance->query_vars['sentence'] ) ? $this->query_instance->query_vars['sentence'] : false;
$search_terms = array();
@ -67,7 +60,7 @@ class Search_Engine {
// added slashes screw with quote grouping when done early, so done later
$s = stripslashes( $s );
if ( $sentence ) {
$search_terms = array( $s );
$search_terms = array( trim($s, " '\"\n\r\t\v\0") );
} else {
preg_match_all( '/".*?("|$)|((?<=[\\s",+])|^)[^\\s",+]+/', $s, $matches );
@ -123,6 +116,78 @@ class Search_Engine {
}
}
function get_where_to_title_and_content() {
global $wpdb;
$searchQuery = '';
$seperator = '';
$not_exact = empty($this->query_instance->query_vars['exact']);
$terms = $this->get_search_terms();
foreach ( $terms as $term ) {
$esc_term = $wpdb->prepare("%s", $not_exact ? "%".$term."%" : $term);
if ( !empty( $this->relationships ) ) {
$searchQuery .= "{$seperator}($wpdb->posts.post_title LIKE {$esc_term} OR $wpdb->posts.post_content LIKE {$esc_term} OR p2->posts.post_title LIKE {$esc_term} OR p2.post_content LIKE {$esc_term})";
} else {
$searchQuery .= "{$seperator}($wpdb->posts.post_title LIKE {$esc_term} OR $wpdb->posts.post_content LIKE {$esc_term})";
}
$seperator = ' OR ';
}
return empty($searchQuery) ? false : "($searchQuery)";
}
function get_where_to_term_taxonomies() {
if ( $this->is_tainacan_search && !empty( $this->taxonomies ) ) {
global $wpdb;
$search_tax_query = '';
$seperator = '';
$not_exact = empty($this->query_instance->query_vars['exact']);
$terms = $this->get_search_terms();
foreach ( $terms as $term ) {
$esc_term = $wpdb->prepare("%s", $not_exact ? "%".$term."%" : $term);
$search_tax_query .= "{$seperator}(tter.name LIKE {$esc_term})";
$seperator = ' OR ';
}
if (empty($search_tax_query)) return '';
$tax_where = ' ttax.taxonomy IN ( \'' . implode( '\',\'', $this->taxonomies ) . '\' ) ';
return "EXISTS (
SELECT trel.object_id
FROM
$wpdb->term_relationships AS trel
INNER JOIN $wpdb->term_taxonomy AS ttax ON trel.term_taxonomy_id = ttax.term_taxonomy_id
INNER JOIN $wpdb->terms AS tter ON ttax.term_id = tter.term_id
WHERE
$wpdb->posts.ID = trel.object_id AND $tax_where AND ( $search_tax_query )
)";
}
return '';
}
function get_where_to_metadatas() {
if ( $this->is_tainacan_search ) {
global $wpdb;
$search_meta_query = '';
$seperator = '';
$not_exact = empty($this->query_instance->query_vars['exact']);
$terms = $this->get_search_terms();
foreach ( $terms as $term ) {
$esc_term = $wpdb->prepare("%s", $not_exact ? "%".$term."%" : $term);
$search_meta_query .= "{$seperator}(m.meta_value LIKE {$esc_term})";
$seperator = ' OR ';
}
if ( empty($search_meta_query) ) return '';
$join = \is_user_logged_in()
? ''
: " INNER JOIN $wpdb->posts pmeta ON m.meta_key = pmeta.ID AND pmeta.post_status = 'publish'";
return "EXISTS (
SELECT m.post_id
FROM $wpdb->postmeta m $join
WHERE ( $wpdb->posts.ID = m.post_id AND ($search_meta_query) )
)";
}
return '';
}
// add where clause to the search query
function search_where( $where, $wp_query ) {
@ -136,47 +201,22 @@ class Search_Engine {
if ( !$this->is_tainacan_search && !$this->ajax_request)
return $where;
global $wpdb;
$search_query = $this->get_where_to_title_and_content();
$search_tax_query = $this->get_where_to_term_taxonomies();
$search_meta_query = $this->get_where_to_metadatas();
$search_query = "($search_query) ";
if(!empty($search_tax_query)) $search_query .= " OR ($search_tax_query) ";
if(!empty($search_meta_query)) $search_query .= " OR ($search_meta_query) ";
$search_query_fields = array();
$search_query_fields = array_merge($search_query_fields, $this->search_default());
$search_query_fields = array_merge($search_query_fields, $this->build_search_categories());
$search_query_fields = array_merge($search_query_fields, $this->build_search_metadata());
$search_query_fields = array_merge($search_query_fields, $this->build_search_relationships());
$search_query_fields = array_merge($search_query_fields, $this->search_authors());
$searchQuery = '(';
$seperator = '';
$not_exact = empty($this->query_instance->query_vars['exact']);
$terms = $this->get_search_terms();
$fields = implode(", ", $search_query_fields);
foreach ( $terms as $term ) {
$esc_term = $wpdb->prepare("%s", $not_exact ? "%".$term."%" : $term);
$searchQuery .= "{$seperator}CONCAT_WS(' || ', $fields ) LIKE $esc_term";
$seperator = ' OR ';
}
$searchQuery .= ')';
if ( $searchQuery != '' && $searchQuery != '()' ) {
if ( $search_query != '' && $search_query != '()' ) {
// lets use _OUR_ query instead of WP's, as we have posts already included in our query as well(assuming it's not empty which we check for)
$where = " AND ((" . $searchQuery . ")) ";
$where = " AND ((" . $search_query . ")) ";
}
return $where;
}
// search for terms in default locations like title and content
// replacing the old search terms seems to be the best way to
// avoid issue with multiple terms
function search_default() {
global $wpdb;
return ["$wpdb->posts.post_title", "$wpdb->posts.post_content"];
}
//Duplicate fix provided by Tiago.Pocinho
function distinct( $query ) {
global $wpdb;
if ( !empty( $this->query_instance->query_vars['s'] ) ) {
if ( strstr( $query, 'DISTINCT' ) ) {}
else {
@ -186,82 +226,6 @@ class Search_Engine {
return $query;
}
//search attachments
function search_attachments( $where ) {
global $wpdb;
if ( !empty( $this->query_instance->query_vars['s'] ) ) {
$where = str_replace( '"', '\'', $where );
if ( !$this->wp_ver28 ) {
$where = str_replace( " AND (post_status = 'publish'", " AND (post_status = 'publish' OR post_type = 'attachment'", $where );
$where = str_replace( "AND post_type != 'attachment'", "", $where );
}
else {
$where = str_replace( " AND ($wpdb->posts.post_status = 'publish'", " AND ($wpdb->posts.post_status = 'publish' OR $wpdb->posts.post_type = 'attachment'", $where );
$where = str_replace( "AND $wpdb->posts.post_type != 'attachment'", "", $where );
}
}
return $where;
}
// Build the author search
function search_authors() {
return ["u.display_name"];
}
function build_search_relationships(){
if ( empty( $this->relationships ) ) {
return [];
}
return ['p2.post_title'];
}
// create the search meta data query
function build_search_metadata() {
return ["m.meta_value"];
}
// create the search categories query
function build_search_categories() {
if (empty($this->taxonomies)) {
return [];
}
return ["tter.name", "ttax.description"];
}
//join for searching authors
function search_authors_join( $join ) {
if ($this->is_inner_query) {
return $join;
}
global $wpdb;
if ( $this->is_tainacan_search ) {
$join .= " LEFT JOIN $wpdb->users AS u ON ($wpdb->posts.post_author = u.ID) ";
}
return $join;
}
//join for searching metadata
function search_metadata_join( $join ) {
if ($this->is_inner_query) {
return $join;
}
global $wpdb;
if ( $this->is_tainacan_search ) {
$join .= " LEFT JOIN $wpdb->postmeta AS m ON ($wpdb->posts.ID = m.post_id) ";
}
return $join;
}
// join for relationship metadata
function relationships_join( $join ) {
@ -279,26 +243,74 @@ class Search_Engine {
return $join;
}
//join for searching tags
function terms_join( $join ) {
// //search attachments
// function search_attachments( $where ) {
// global $wpdb;
// if ( !empty( $this->query_instance->query_vars['s'] ) ) {
// $where = str_replace( '"', '\'', $where );
// if ( !$this->wp_ver28 ) {
// $where = str_replace( " AND (post_status = 'publish'", " AND (post_status = 'publish' OR post_type = 'attachment'", $where );
// $where = str_replace( "AND post_type != 'attachment'", "", $where );
// }
// else {
// $where = str_replace( " AND ($wpdb->posts.post_status = 'publish'", " AND ($wpdb->posts.post_status = 'publish' OR $wpdb->posts.post_type = 'attachment'", $where );
// $where = str_replace( "AND $wpdb->posts.post_type != 'attachment'", "", $where );
// }
// }
// return $where;
// }
// //join for searching metadata
// function search_metadata_join( $join ) {
if ($this->is_inner_query) {
return $join;
}
// if ($this->is_inner_query) {
// return $join;
// }
global $wpdb;
// global $wpdb;
// if ( $this->is_tainacan_search ) {
// $searchMetaQuery = $this->build_search_terms_query('meta_terms');
// $join .= <<<EOF
// LEFT JOIN
// (
// SELECT
// m.post_id, true as contains
// FROM
// $wpdb->postmeta m
// WHERE
// ( $searchMetaQuery )
// ) AS metas ON $wpdb->posts.ID = metas.post_id
// EOF;
// }
// return $join;
// }
if ( $this->is_tainacan_search && !empty( $this->taxonomies ) ) {
// //join for searching taxonomies terms
// function terms_join( $join ) {
// if ($this->is_inner_query) {
// return $join;
// }
// global $wpdb;
// $searchTaxQuery = $this->build_search_terms_query('tax_terms');
// if ( $this->is_tainacan_search && !empty( $this->taxonomies ) ) {
// $tax_where = ' ttax.taxonomy IN ( \'' . implode( '\',\'', $this->taxonomies ) . '\' ) ';
// $join .= <<<EOF
// LEFT JOIN (
// SELECT DISTINCT
// trel.object_id as post_id,
// true as contains
// FROM
// $wpdb->term_relationships AS trel
// INNER JOIN $wpdb->term_taxonomy AS ttax ON trel.term_taxonomy_id = ttax.term_taxonomy_id
// INNER JOIN $wpdb->terms AS tter ON ttax.term_id = tter.term_id
// WHERE
// $tax_where AND ( $searchTaxQuery )
// ) tax_terms ON $wpdb->posts.ID = tax_terms.post_id
// EOF;
// }
// return $join;
// }
foreach ( $this->taxonomies as $taxonomy ) {
$on[] = "ttax.taxonomy = '" . addslashes( $taxonomy )."'";
}
// build our final string
$on = ' ( ' . implode( ' OR ', $on ) . ' ) ';
$join .= " LEFT JOIN $wpdb->term_relationships AS trel ON ($wpdb->posts.ID = trel.object_id) LEFT JOIN $wpdb->term_taxonomy AS ttax ON ( " . $on . " AND trel.term_taxonomy_id = ttax.term_taxonomy_id) LEFT JOIN $wpdb->terms AS tter ON (ttax.term_id = tter.term_id) ";
}
return $join;
}
} // END
}

View File

@ -172,7 +172,7 @@ class TAINACAN_REST_Queries extends TAINACAN_UnitApiTestCase {
$this->assertEquals($collectionB->get_name(), $data1[0]['name']);
// Search collection with a specific keyword and not other keyword
$search_query = ['search' => 'Collection -A'];
$search_query = ['search' => 'Collection -A', 'sentence' => false];
$search_request = new \WP_REST_Request('GET', $this->namespace . '/collections');

View File

@ -217,7 +217,15 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
public function test_search() {
$search_collection_poemas = new \WP_REST_Request('GET', $this->namespace . '/collection/' . $this->collection_poemas->get_id() . '/items');
$search_query = ['search' => '"Vinícius de Moraes"'];
$search_query = ['search' => '"Vinícius de Moraes"', 'sentence' => false];
$search_collection_poemas->set_query_params($search_query);
$search_response = $this->server->dispatch($search_collection_poemas);
$items = $search_response->get_data()['items'];
$this->assertCount(2, $items);
$search_collection_poemas = new \WP_REST_Request('GET', $this->namespace . '/collection/' . $this->collection_poemas->get_id() . '/items');
$search_query = ['search' => 'Vinícius de Moraes', 'sentence' => true];
$search_collection_poemas->set_query_params($search_query);
$search_response = $this->server->dispatch($search_collection_poemas);
$items = $search_response->get_data()['items'];
@ -225,7 +233,15 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
$this->assertCount(2, $items);
$search_collection_frase = new \WP_REST_Request('GET', $this->namespace . '/collection/' . $this->collection_frases->get_id() . '/items');
$search_query = ['search' => '"Guimarães Rosa"'];
$search_query = ['search' => '"Guimarães Rosa"', 'sentence' => false];
$search_collection_frase->set_query_params($search_query);
$search_response = $this->server->dispatch($search_collection_frase);
$items = $search_response->get_data()['items'];
$this->assertCount(2, $items);
$search_collection_frase = new \WP_REST_Request('GET', $this->namespace . '/collection/' . $this->collection_frases->get_id() . '/items');
$search_query = ['search' => 'Guimarães Rosa', 'sentence' => true];
$search_collection_frase->set_query_params($search_query);
$search_response = $this->server->dispatch($search_collection_frase);
$items = $search_response->get_data()['items'];
@ -234,7 +250,7 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
$search_items = new \WP_REST_Request('GET', $this->namespace . '/items');
$search_query = ['search' => 'texto'];
$search_query = ['search' => 'texto', 'sentence' => false];
$search_items->set_query_params($search_query);
$search_response = $this->server->dispatch($search_items);
$items = $search_response->get_data()['items'];
@ -242,7 +258,7 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
$this->assertCount(4, $items);
$search_items = new \WP_REST_Request('GET', $this->namespace . '/items');
$search_query = ['search' => 'texto poesia'];
$search_query = ['search' => 'texto poesia', 'sentence' => false];
$search_items->set_query_params($search_query);
$search_response = $this->server->dispatch($search_items);
$items = $search_response->get_data()['items'];
@ -250,7 +266,7 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
$this->assertCount(4, $items);
$search_items = new \WP_REST_Request('GET', $this->namespace . '/items');
$search_query = ['search' => '"texto poesia"'];
$search_query = ['search' => '"texto poesia"', 'sentence' => false];
$search_items->set_query_params($search_query);
$search_response = $this->server->dispatch($search_items);
$items = $search_response->get_data()['items'];
@ -258,15 +274,15 @@ class TAINACAN_REST_Search extends TAINACAN_UnitApiTestCase {
$this->assertCount(1, $items);
$search_items = new \WP_REST_Request('GET', $this->namespace . '/items');
$search_query = ['search' => '"texto poesia" sagarana'];
$search_query = ['search' => '"texto poesia" sagarana', 'sentence' => false];
$search_items->set_query_params($search_query);
$search_response = $this->server->dispatch($search_items);
$items = $search_response->get_data()['items'];
$this->assertCount(2, $items);
$search_items = new \WP_REST_Request('GET', $this->namespace . '/items');
$search_query = ['search' => 'infinito dure'];
$search_query = ['search' => 'infinito dure', 'sentence' => false];
$search_items->set_query_params($search_query);
$search_response = $this->server->dispatch($search_items);
$items = $search_response->get_data()['items'];