--- a/inc/functions_search.php +++ b/inc/functions_search.php @@ -1802,3 +1802,555 @@ function perform_search_mysql_ft($search) ); } + +/** + * Clean search keywords for PostgreSQL fulltext searching, converting user input + * into valid tsquery syntax. + * + * Translates boolean operators: + * "and" => &, "or" => |, "not" => ! + * Handles quoted phrases using the followed-by operator (<->). + * Supports wildcard prefix matching with :* suffix. + * Falls back gracefully on malformed input. + * + * @param string $keywords The raw keywords from user input + * @return string|bool The cleaned tsquery string or false on failure + */ +function clean_keywords_ft_pgsql($keywords) +{ + if(!$keywords) + { + return false; + } + + $keywords = my_strtolower($keywords); + $keywords = preg_replace("#\*{2,}#s", "*", $keywords); + // Remove characters that are problematic in tsquery syntax + $keywords = preg_replace("#([\[\]\|\.\,\:\;\!\&\(\)])#s", " ", $keywords); + $keywords = preg_replace("#\s+#s", " ", $keywords); + $keywords = trim($keywords); + + if(!$keywords) + { + return false; + } + + global $mybb; + + $min_word_length = (int)$mybb->settings['minsearchword']; + if($min_word_length <= 0) + { + $min_word_length = 3; + } + + // Split on double quotes to identify phrases vs. free terms + $segments = explode('"', $keywords); + $inquote = false; + $tsquery_parts = array(); + $pending_operator = '&'; // default boolean between terms + + foreach($segments as $segment) + { + $segment = trim($segment); + if($segment === '') + { + $inquote = !$inquote; + continue; + } + + if($inquote) + { + // Quoted phrase: join words with the followed-by operator <-> + $phrase_words = preg_split("#\s+#", $segment); + $valid_phrase_words = array(); + foreach($phrase_words as $pw) + { + $pw = preg_replace("#[^a-z0-9_*]#", '', $pw); + if($pw === '' || my_strlen(str_replace('*', '', $pw)) < $min_word_length) + { + continue; + } + // Convert trailing wildcard * to PostgreSQL prefix match :* + if(substr($pw, -1) === '*') + { + $pw = substr($pw, 0, -1).':*'; + } + $valid_phrase_words[] = "'" . $pw . "'"; + } + if(!empty($valid_phrase_words)) + { + $phrase_tsquery = implode(' <-> ', $valid_phrase_words); + if(count($valid_phrase_words) > 1) + { + $phrase_tsquery = '(' . $phrase_tsquery . ')'; + } + if(!empty($tsquery_parts)) + { + $tsquery_parts[] = $pending_operator; + $pending_operator = '&'; + } + $tsquery_parts[] = $phrase_tsquery; + } + } + else + { + // Non-quoted segment: split into words and handle boolean operators + $words = preg_split("#\s+#", $segment); + foreach($words as $word) + { + $word = trim($word); + if($word === '') + { + continue; + } + + // Check for boolean operators + if($word === 'and') + { + $pending_operator = '&'; + continue; + } + elseif($word === 'or') + { + $pending_operator = '|'; + continue; + } + elseif($word === 'not') + { + // NOT modifies the next term + if(!empty($tsquery_parts)) + { + $tsquery_parts[] = '&'; + } + $tsquery_parts[] = '!'; + $pending_operator = ''; + continue; + } + + // Check for leading +/- operators from user input + $negate = false; + if(substr($word, 0, 1) === '-') + { + $negate = true; + $word = substr($word, 1); + } + elseif(substr($word, 0, 1) === '+') + { + $word = substr($word, 1); + } + + // Strip any remaining special characters + $word = preg_replace("#[^a-z0-9_*]#", '', $word); + $bare_word = str_replace('*', '', $word); + + if($bare_word === '' || my_strlen($bare_word) < $min_word_length) + { + continue; + } + + // Convert trailing wildcard * to PostgreSQL prefix match :* + if(substr($word, -1) === '*') + { + $word = substr($word, 0, -1).':*'; + } + + $term = "'" . $word . "'"; + + if($negate) + { + if(!empty($tsquery_parts) && $pending_operator !== '') + { + $tsquery_parts[] = $pending_operator; + } + elseif(!empty($tsquery_parts)) + { + $tsquery_parts[] = '&'; + } + $tsquery_parts[] = '!' . $term; + $pending_operator = '&'; + } + else + { + if(!empty($tsquery_parts) && $pending_operator !== '') + { + $tsquery_parts[] = $pending_operator; + } + elseif(!empty($tsquery_parts)) + { + $tsquery_parts[] = '&'; + } + $tsquery_parts[] = $term; + $pending_operator = '&'; + } + } + } + + $inquote = !$inquote; + } + + if(empty($tsquery_parts)) + { + return false; + } + + $tsquery = implode(' ', $tsquery_parts); + + // Clean up any malformed operator sequences (e.g., "& &", "& |", trailing operators) + $tsquery = preg_replace("#(\s*[&|!]\s*){2,}#", ' & ', $tsquery); + $tsquery = preg_replace("#^\s*[&|]\s*#", '', $tsquery); + $tsquery = preg_replace("#\s*[&|]\s*$#", '', $tsquery); + $tsquery = trim($tsquery); + + if(!$tsquery || $tsquery === '!' || $tsquery === '&' || $tsquery === '|') + { + return false; + } + + return $tsquery; +} + +/** + * Perform a thread and post search under PostgreSQL using native fulltext search + * capabilities (tsvector/tsquery with GIN indexes). + * + * This function mirrors the structure of perform_search_mysql_ft() but generates + * PostgreSQL-compatible fulltext query clauses using to_tsvector() @@ to_tsquery(). + * + * The text search configuration defaults to 'english' but can be overridden via + * the $mybb->settings['pgsql_fts_config'] setting. + * + * Recommended GIN indexes for performance: + * CREATE INDEX idx_posts_message_fts ON mybb_posts USING gin(to_tsvector('english', message)); + * CREATE INDEX idx_posts_subject_fts ON mybb_posts USING gin(to_tsvector('english', subject)); + * CREATE INDEX idx_threads_subject_fts ON mybb_threads USING gin(to_tsvector('english', subject)); + * + * @param array $search Array of search data + * @return array Array of search data with results mixed in + */ +function perform_search_pgsql_ft($search) +{ + global $mybb, $db, $lang; + + $keywords = clean_keywords_ft_pgsql($search['keywords']); + + if($mybb->settings['minsearchword'] < 1) + { + $mybb->settings['minsearchword'] = 3; + } + + // Determine the PostgreSQL text search configuration to use + $ts_config = 'english'; + if(!empty($mybb->settings['pgsql_fts_config'])) + { + // Sanitize: only allow alphanumeric and underscore + $ts_config = preg_replace("#[^a-z0-9_]#i", '', $mybb->settings['pgsql_fts_config']); + if(empty($ts_config)) + { + $ts_config = 'english'; + } + } + + $message_lookin = $subject_lookin = ''; + if($keywords) + { + // Validate keyword lengths (same approach as MySQL FT) + $keywords_for_check = preg_replace("#[':*!&|<>()\-\s]#", ' ', $keywords); + $check_words = preg_split("#\s+#", trim($keywords_for_check)); + $all_too_short = true; + foreach($check_words as $word) + { + if($word !== '' && my_strlen($word) >= $mybb->settings['minsearchword']) + { + $all_too_short = false; + break; + } + } + + if($all_too_short) + { + $lang->error_minsearchlength = $lang->sprintf($lang->error_minsearchlength, $mybb->settings['minsearchword']); + error($lang->error_minsearchlength); + } + + $escaped_keywords = $db->escape_string($keywords); + $message_lookin = "AND to_tsvector('{$ts_config}', p.message) @@ to_tsquery('{$ts_config}', '{$escaped_keywords}')"; + $subject_lookin = "AND to_tsvector('{$ts_config}', t.subject) @@ to_tsquery('{$ts_config}', '{$escaped_keywords}')"; + } + + $post_usersql = ''; + $thread_usersql = ''; + if(!empty($search['author'])) + { + $userids = array(); + $search['author'] = my_strtolower($search['author']); + if($search['matchusername']) + { + $user = get_user_by_username($search['author']); + if($user) + { + $userids[] = $user['uid']; + } + } + else + { + $query = $db->simple_select("users", "uid", "LOWER(username) LIKE '%".$db->escape_string_like($search['author'])."%'"); + while($user = $db->fetch_array($query)) + { + $userids[] = $user['uid']; + } + } + + if(count($userids) < 1) + { + error($lang->error_nosearchresults); + } + else + { + $userids = implode(',', $userids); + $post_usersql = " AND p.uid IN (".$userids.")"; + $thread_usersql = " AND t.uid IN (".$userids.")"; + } + } + + $datecut = $thread_datecut = $post_datecut = ''; + if(!empty($search['postdate'])) + { + if($search['pddir'] == 0) + { + $datecut = "<="; + } + else + { + $datecut = ">="; + } + $now = TIME_NOW; + $datelimit = $now-(86400 * $search['postdate']); + $datecut .= "'$datelimit'"; + $post_datecut = " AND p.dateline $datecut"; + $thread_datecut = " AND t.dateline $datecut"; + } + + $thread_replycut = ''; + if(!empty($search['numreplies']) && $search['findthreadst']) + { + if((int)$search['findthreadst'] == 1) + { + $thread_replycut = " AND t.replies >= '".(int)$search['numreplies']."'"; + } + else + { + $thread_replycut = " AND t.replies <= '".(int)$search['numreplies']."'"; + } + } + + $thread_prefixcut = ''; + $prefixlist = array(); + if(!empty($search['threadprefix']) && $search['threadprefix'][0] != 'any') + { + foreach($search['threadprefix'] as $threadprefix) + { + $threadprefix = (int)$threadprefix; + $prefixlist[] = $threadprefix; + } + } + if(count($prefixlist) == 1) + { + $thread_prefixcut .= " AND t.prefix='$threadprefix' "; + } + else + { + if(count($prefixlist) > 1) + { + $thread_prefixcut = " AND t.prefix IN (".implode(',', $prefixlist).")"; + } + } + + $forumin = ''; + $fidlist = array(); + if(!empty($search['forums']) && (!is_array($search['forums']) || $search['forums'][0] != "all")) + { + if(!is_array($search['forums'])) + { + $search['forums'] = array((int)$search['forums']); + } + foreach($search['forums'] as $forum) + { + $forum = (int)$forum; + if($forum > 0) + { + $fidlist[] = $forum; + $child_list = get_child_list($forum); + if(is_array($child_list)) + { + $fidlist = array_merge($fidlist, $child_list); + } + } + } + $fidlist = array_unique($fidlist); + if(count($fidlist) >= 1) + { + $forumin = " AND t.fid IN (".implode(',', $fidlist).")"; + } + } + + $permsql = ""; + $onlyusfids = array(); + + // Check group permissions if we can't view threads not started by us + $group_permissions = forum_permissions(); + foreach($group_permissions as $fid => $forum_permissions) + { + if(isset($forum_permissions['canonlyviewownthreads']) && $forum_permissions['canonlyviewownthreads'] == 1) + { + $onlyusfids[] = $fid; + } + } + if(!empty($onlyusfids)) + { + $permsql .= "AND ((t.fid IN(".implode(',', $onlyusfids).") AND t.uid='{$mybb->user['uid']}') OR t.fid NOT IN(".implode(',', $onlyusfids)."))"; + } + + $unsearchforums = get_unsearchable_forums(); + if($unsearchforums) + { + $permsql .= " AND t.fid NOT IN ($unsearchforums)"; + } + $inactiveforums = get_inactive_forums(); + if($inactiveforums) + { + $permsql .= " AND t.fid NOT IN ($inactiveforums)"; + } + + $visiblesql = $post_visiblesql = $plain_post_visiblesql = ""; + if(isset($search['visible'])) + { + if($search['visible'] == 1) + { + $visiblesql = " AND t.visible = '1'"; + if($search['postthread'] == 1) + { + $post_visiblesql = " AND p.visible = '1'"; + $plain_post_visiblesql = " AND visible = '1'"; + } + } + elseif($search['visible'] == -1) + { + $visiblesql = " AND t.visible = '-1'"; + if($search['postthread'] == 1) + { + $post_visiblesql = " AND p.visible = '-1'"; + $plain_post_visiblesql = " AND visible = '-1'"; + } + } + else + { + $visiblesql = " AND t.visible != '1'"; + if($search['postthread'] == 1) + { + $post_visiblesql = " AND p.visible != '1'"; + $plain_post_visiblesql = " AND visible != '1'"; + } + } + } + + // Moderators can view unapproved threads and deleted threads from forums they moderate + $unapproved_where_t = get_visible_where('t'); + $unapproved_where_p = get_visible_where('p'); + + // Searching a specific thread? + $tidsql = ''; + if(!empty($search['tid'])) + { + $tidsql = " AND t.tid='".(int)$search['tid']."'"; + } + + $limitsql = ''; + if((int)$mybb->settings['searchhardlimit'] > 0) + { + $limitsql = "LIMIT ".(int)$mybb->settings['searchhardlimit']; + } + + // Searching both posts and thread titles + $threads = array(); + $posts = array(); + $firstposts = array(); + if($search['postthread'] == 1) + { + // No need to search subjects when looking for results within a specific thread + if(empty($search['tid'])) + { + $query = $db->query(" + SELECT t.tid, t.firstpost + FROM ".TABLE_PREFIX."threads t + WHERE 1=1 {$thread_datecut} {$thread_replycut} {$thread_prefixcut} {$forumin} {$thread_usersql} {$permsql} {$visiblesql} AND ({$unapproved_where_t}) AND t.closed NOT LIKE 'moved|%' {$subject_lookin} + {$limitsql} + "); + while($thread = $db->fetch_array($query)) + { + $threads[$thread['tid']] = $thread['tid']; + if($thread['firstpost']) + { + $posts[$thread['tid']] = $thread['firstpost']; + } + } + } + + $query = $db->query(" + SELECT p.pid, p.tid + FROM ".TABLE_PREFIX."posts p + LEFT JOIN ".TABLE_PREFIX."threads t ON (t.tid=p.tid) + WHERE 1=1 {$post_datecut} {$thread_replycut} {$thread_prefixcut} {$forumin} {$post_usersql} {$permsql} {$tidsql} {$post_visiblesql} {$visiblesql} AND ({$unapproved_where_t}) AND ({$unapproved_where_p}) AND t.closed NOT LIKE 'moved|%' {$message_lookin} + {$limitsql} + "); + while($post = $db->fetch_array($query)) + { + $posts[$post['pid']] = $post['pid']; + $threads[$post['tid']] = $post['tid']; + } + if(count($posts) < 1 && count($threads) < 1) + { + error($lang->error_nosearchresults); + } + $threads = implode(',', $threads); + $posts = implode(',', $posts); + } + // Searching only thread titles + else + { + $query = $db->query(" + SELECT t.tid, t.firstpost + FROM ".TABLE_PREFIX."threads t + WHERE 1=1 {$thread_datecut} {$thread_replycut} {$thread_prefixcut} {$forumin} {$thread_usersql} {$permsql} {$visiblesql} AND ({$unapproved_where_t}) {$subject_lookin} + {$limitsql} + "); + while($thread = $db->fetch_array($query)) + { + $threads[$thread['tid']] = $thread['tid']; + if($thread['firstpost']) + { + $firstposts[$thread['tid']] = $thread['firstpost']; + } + } + if(count($threads) < 1) + { + error($lang->error_nosearchresults); + } + + $threads = implode(',', $threads); + $firstposts = implode(',', $firstposts); + if($firstposts) + { + $query = $db->simple_select("posts", "pid", "pid IN ($firstposts) {$plain_post_visiblesql} {$limitsql}"); + while($post = $db->fetch_array($query)) + { + $posts[$post['pid']] = $post['pid']; + } + $posts = implode(',', $posts); + } + } + return array( + "threads" => $threads, + "posts" => $posts, + "querycache" => '' + ); +} --- a/search.php +++ b/search.php @@ -1148,7 +1148,11 @@ if($db->can_search == true) { - if($mybb->settings['searchtype'] == "fulltext" && $db->supports_fulltext_boolean("posts") && $db->is_fulltext("posts")) + if($db->type == 'pgsql' && $mybb->settings['searchtype'] == "fulltext") + { + $search_results = perform_search_pgsql_ft($search_data); + } + elseif($mybb->settings['searchtype'] == "fulltext" && $db->supports_fulltext_boolean("posts") && $db->is_fulltext("posts")) { $search_results = perform_search_mysql_ft($search_data); } --- a/inc/db/PostgresPdoDbDriver.php +++ b/inc/db/PostgresPdoDbDriver.php @@ -XXX,8 +XXX,34 @@ function is_fulltext($table, $index = "") { - return false; + $tableName = $this->escape_string("{$this->table_prefix}{$table}"); + + if($index) + { + $index = $this->escape_string($index); + $query = $this->write_query(" + SELECT 1 + FROM pg_indexes + WHERE tablename = '{$tableName}' + AND indexname = '{$index}' + AND indexdef LIKE '%gin%tsvector%' + LIMIT 1 + "); + } + else + { + $query = $this->write_query(" + SELECT 1 + FROM pg_indexes + WHERE tablename = '{$tableName}' + AND indexdef LIKE '%gin%tsvector%' + LIMIT 1 + "); + } + + $exists = $this->num_rows($query); + return $exists > 0; } public function supports_fulltext($table) { - return false; + return true; } @@ -XXX,8 +XXX,36 @@ public function supports_fulltext_boolean($table) { - return false; + return true; } public function create_fulltext_index($table, $column, $name = "") { - return false; + global $mybb; + + $tableName = $this->escape_string("{$this->table_prefix}{$table}"); + $column = $this->escape_string($column); + + if(!$name) + { + $name = "ft_{$table}_{$column}"; + } + $name = $this->escape_string($name); + + // Determine the text search configuration + $ts_config = 'english'; + if(!empty($mybb->settings['pgsql_fts_config'])) + { + $ts_config = preg_replace("#[^a-z0-9_]#i", '', $mybb->settings['pgsql_fts_config']); + if(empty($ts_config)) + { + $ts_config = 'english'; + } + } + + $this->write_query(" + CREATE INDEX {$name} + ON {$tableName} + USING gin(to_tsvector('{$ts_config}', {$column})) + "); + + return true; } @@ -XXX,6 +XXX,11 @@ $this->write_query(" ALTER TABLE {$this->table_prefix}{$table} DROP INDEX {$name} "); } + + public function drop_fulltext_index($table, $name) + { + $this->write_query("DROP INDEX IF EXISTS {$this->escape_string($name)}"); + }