Files
openGauss-server/src/test/regress/sql/ts_utf8.sql
dengxuyue 1567043064 同步source code
日期: 12-26
    revision: ee5b054c
2020-12-28 22:19:21 +08:00

253 lines
14 KiB
SQL

CREATE DATABASE db_utf8 TEMPLATE template0 encoding 'UTF8';
\c db_utf8
------------------------------ basic synax for text search configuration -------------------------------
--
--- Pound
--
-- CREATE TEXT SEARCH CONFIGURATION
create text search configuration pound_utf8(parser=pound) with (split_flag = '#');
ALTER TEXT SEARCH CONFIGURATION pound_utf8 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol WITH simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东#淘宝#滴滴#爱奇艺#芒果TV');
-- SET THE SPLIT FLAG to '@'
alter text search configuration pound_utf8 set (split_flag = '@');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东@淘宝@滴滴@爱奇艺@芒果TV');
-- SET THE SPLIT FLAG to '$'
alter text search configuration pound_utf8 set (split_flag = '$');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东$淘宝$滴滴$爱奇艺$芒果TV');
-- SET THE SPLIT FLAG to '%'
alter text search configuration pound_utf8 set (split_flag = '%');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东%淘宝%滴滴%爱奇艺%芒果TV');
-- SET THE SPLIT FLAG to '/'
alter text search configuration pound_utf8 set (split_flag = '/');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东/淘宝/滴滴/爱奇艺/芒果TV');
-- FOR UNSUPPORTED SPLPT_FLAG, REPORT ERROR
alter text search configuration pound_utf8 set (split_flag = ',');
-- WHEN SPLIT FLAG IS NOT SINGLE CHARACTER, REPORT ERROR
alter text search configuration pound_utf8 set (split_flag = '#@');
-- WHEN SPLIT FLAG IS NULL CHARACTER, REPORT ERROR
alter text search configuration pound_utf8 set (split_flag = '');
-- WHEN SINGLE TOKEN EXCEED 256 CHARACTERS, SPLIT DIRECTLY EVEN NO SPLIT FLAG IS FOUND
select to_tsvector('pound_utf8','1981年8月26日,美国宇宙飞船“旅行者2号”飞过土星,取得了一系列探测成果,其中包括发现了土星的第17颗卫星——土卫17。所以,今天我们就来说一说“旅行者2号”上的“地球之音”唱片。1977年8月和9月,人类成功发射了“旅行者1号”和“旅行者2号”探测器,再次向外星人作了更详细的自我介绍。这次,它们各自携带了一张称为“地球之音”的唱片,上面录制了丰富的地球信息。这两张唱片都是镀金铜质的,直径为30.5厘米。唱片上录有115幅照片和图表,35种各类声音,近60种语言的问候语和27首世界著名乐曲等。115幅照片中包括我国八达岭长城');
select to_tsvector('pound_utf8','1981年8月26日,美国宇宙飞船“旅行者2号”飞过土星,取得了一系列探测成果,其中包括发现了土星的第17颗卫星——土卫17。所以,今天我们就来说一说“旅行者2号”上的“地球之音”唱片。1977年8月和9月,人类成功发射了“旅行者1号”和“旅行者2号”探测器,再次向外星人作了更详细的自我介绍。这次,它们各自携带了一张称为“地球之音”的唱片,上面录制了丰富的地球信息。这两张唱片都是镀金铜质的,直径为30.5厘米。唱片上录有115幅照片和图表,35种各类声音,近60种语言的问候语和27首世界著名乐曲等。115幅///照片中包括我国八达岭长城');
drop text search configuration pound_utf8;
create text search configuration pound_utf8(parser=pound) with (split_flag = '#');
ALTER TEXT SEARCH CONFIGURATION pound_utf8 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol WITH simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_utf8';
SELECT to_tsvector('pound_utf8', '京东#淘宝#滴滴#爱奇艺#芒果TV');
--
--- N-gram
--
-- CREATE TEXT SEARCH CONFIGURATION
create text search configuration ngram1(parser=ngram) with (punctuation_ignore = on, gram_size = 2, grapsymbol_ignore = on);
alter text search configuration ngram1 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol with simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (punctuation_ignore = off);
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (grapsymbol_ignore = off);
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (gram_size = 3);
SELECT to_tsvector('ngram1', '画,龙&点睛');
drop text search configuration ngram1;
create text search configuration ngram1(parser=ngram);
alter text search configuration ngram1 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol with simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
SELECT to_tsvector('ngram1', '画,龙&点睛');
--------------------------- basic operation for text search parser ----------------------------------
--
---- N-gram parser
--
set ngram_gram_size = 2;
set ngram_grapsymbol_ignore = off;
set ngram_punctuation_ignore = off;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.punctuation_ignore
set ngram_punctuation_ignore = on;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.grapsymbol_ignore
set ngram_grapsymbol_ignore = on;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.grapsymbol_ignore
set ngram_gram_size = 4;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
select ts_token_type('ngram');
------------------------------- basic operation for text search datatype -----------------------
--
--- pound
--
select to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV');
select to_tsquery('pound_utf8','芒果TV');
--
---- N-gram
--
select to_tsvector('ngram1','辽GQQ360');
select to_tsquery('ngram1','360');
------------------------------- basic operation for text search operator -----------------------
--
--- pound
--
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ to_tsquery('pound_utf8','芒果TV'); -- true
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@@ to_tsquery('pound_utf8','芒果TV'); -- true
select to_tsvector('pound_utf8','芒果TV') @@ to_tsquery('pound_utf8','QQ'); -- false
select (to_tsvector('pound_utf8','滴滴') || to_tsvector('pound_utf8','爱奇艺')) @@ to_tsquery('pound_utf8','滴滴'); -- true
select to_tsquery('pound_utf8','淘宝') && to_tsquery('pound_utf8','爱奇艺');
select to_tsquery('pound_utf8','淘宝') || to_tsquery('pound_utf8','爱奇艺');
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_utf8','淘宝')); -- false
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_utf8','爱奇艺')); -- true
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_utf8','淘宝') || to_tsquery('pound_utf8','爱奇艺')); -- true
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_utf8','淘宝') && to_tsquery('pound_utf8','爱奇艺')); --false
select !!to_tsquery('pound_utf8','爱奇艺');
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ !!to_tsquery('pound_utf8','爱奇艺'); -- false
select to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @@ !!to_tsquery('pound_utf8','淘宝'); --true
select to_tsquery('pound_utf8','爱奇艺') @> to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'); -- false
select to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') @> to_tsquery('pound_utf8','爱奇艺'); -- true
select to_tsquery('pound_utf8','爱奇艺') <@ to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'); -- true
select to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV') <@ to_tsquery('pound_utf8','爱奇艺'); -- false
--
---- N-gram
--
select to_tsvector('ngram1','辽GQQ360') @@ to_tsquery('ngram1','360'); -- true
select to_tsvector('ngram1','辽GQQ360') @@@ to_tsquery('ngram1','360'); -- true
select to_tsvector('ngram1','360') @@ to_tsquery('ngram1','QQ'); -- false
select (to_tsvector('ngram1','辽GQQ') || to_tsvector('ngram1','360')) @@ to_tsquery('ngram1','QQ'); -- true
select to_tsquery('ngram1','480') && to_tsquery('ngram1','辽GQQ');
select to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ');
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480')); -- false
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','辽GQQ')); -- true
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ')); -- true
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480') && to_tsquery('ngram1','辽GQQ')); -- false
select !!to_tsquery('ngram1','360') ;
select to_tsvector('ngram1','辽GQQ360') @@ !!to_tsquery('ngram1','360'); -- false
select to_tsvector('ngram1','辽GQQ360') @@ !!to_tsquery('ngram1','480'); -- true
select to_tsquery('ngram1','360') @> to_tsquery('ngram1','辽GQQ360'); -- false
select to_tsquery('ngram1','辽GQQ360') @> to_tsquery('ngram1','360'); -- true
select to_tsquery('ngram1','360') <@ to_tsquery('ngram1','辽GQQ360'); -- true
select to_tsquery('ngram1','辽GQQ360') <@ to_tsquery('ngram1','360'); -- false
------------------------------- basic operation for text search function -----------------------
-- pound
select get_current_ts_config(); -- english
select length(to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV')); -- 4
select length(to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV#QQ')); -- 5
select numnode(to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'));
select numnode(to_tsquery('pound_utf8','芒果TV') || to_tsquery('pound_utf8','滴滴'));
-- select plainto_tsquery('pound_utf8', 'Uber##滴滴# 爱奇艺##芒果TV');
-- select plainto_tsquery('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV');
select querytree(to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'));
select querytree(to_tsquery('pound_utf8','滴滴') && to_tsquery('pound_utf8','爱奇艺'));
select querytree(to_tsquery('pound_utf8','滴滴') || to_tsquery('pound_utf8','爱奇艺'));
select strip(to_tsvector('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'));
select ts_headline('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV', to_tsquery('pound_utf8','Uber'));
select ts_rank(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'));
select ts_rank(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','Uber'));
select ts_rank(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','滴滴'));
select ts_rank_cd(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'));
select ts_rank_cd(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','Uber'));
select ts_rank_cd(to_tsvector('pound_utf8', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','滴滴'));
select ts_rewrite(to_tsquery('pound_utf8','Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_utf8','Uber'), to_tsquery('pound_utf8','爱奇艺'));
--N-gram
select get_current_ts_config(); -- english
select length(to_tsvector('ngram1','辽GQQ360')); -- 6
select length(to_tsvector('ngram1','辽GQQ888')); -- 5
select numnode(to_tsquery('ngram1','辽GQQ360')); -- 11
select numnode(to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ')); -- 9
alter text search configuration ngram1 set (grapsymbol_ignore = off);
alter text search configuration ngram1 set (punctuation_ignore = on);
alter text search configuration ngram1 set (gram_size = 2);
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
select plainto_tsquery('ngram1', '车,牌,号');
select plainto_tsquery('ngram1', '车,牌,号');
select querytree(to_tsquery('ngram1','中国人民银行'));
select querytree(to_tsquery('ngram1','中国人') && to_tsquery('ngram1','民银行'));
select querytree(to_tsquery('ngram1','中国人') || to_tsquery('ngram1','民银行'));
select strip(to_tsvector('ngram1','中国人民银行'));
select ts_headline('ngram1', '中国人民银行', to_tsquery('ngram1','中国人'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人民银行'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中人'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人民银行'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中人'));
select ts_rewrite(to_tsquery('ngram1','中国人民银行'), to_tsquery('ngram1','银行'), to_tsquery('ngram1','政府'));
create table t1(a int);
drop table t1 cascade;
drop text search configuration ngram1;
drop text search configuration pound_utf8;
CREATE TEXT SEARCH CONFIGURATION ntestf(parser=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA);
CREATE TEXT SEARCH CONFIGURATION ntestf(parser=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.B);
CREATE TEXT SEARCH CONFIGURATION ntestf(parser=B.AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA);
CREATE TEXT SEARCH CONFIGURATION ntestf(parser=A.B.C);
CREATE TEXT SEARCH CONFIGURATION ntestf(parser='A.B.C');
\c postgres
DROP DATABASE db_utf8;