Files
openGauss-server/src/test/regress/sql/ts_gbk.sql
dengxuyue 1567043064 同步source code
日期: 12-26
    revision: ee5b054c
2020-12-28 22:19:21 +08:00

255 lines
14 KiB
SQL

CREATE DATABASE db_gbk TEMPLATE template0 encoding 'GBK' lc_ctype 'zh_CN.GBK' lc_collate 'zh_CN.GBK';
\c db_gbk
set client_encoding='UTF8';
show client_encoding;
show server_encoding;
------------------------------ basic synax for text search configuration -------------------------------
--
--- Pound
--
-- CREATE TEXT SEARCH CONFIGURATION
create text search configuration pound_gbk(parser=pound) with (split_flag = '#');
ALTER TEXT SEARCH CONFIGURATION pound_gbk ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol WITH simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东#淘宝#滴滴#爱奇艺#芒果TV');
-- SET THE SPLIT FLAG to '@'
alter text search configuration pound_gbk set (split_flag = '@');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东@淘宝@滴滴@爱奇艺@芒果TV');
-- SET THE SPLIT FLAG to '$'
alter text search configuration pound_gbk set (split_flag = '$');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东$淘宝$滴滴$爱奇艺$芒果TV');
-- SET THE SPLIT FLAG to '%'
alter text search configuration pound_gbk set (split_flag = '%');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东%淘宝%滴滴%爱奇艺%芒果TV');
-- SET THE SPLIT FLAG to '/'
alter text search configuration pound_gbk set (split_flag = '/');
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东/淘宝/滴滴/爱奇艺/芒果TV');
-- FOR UNSUPPORTED SPLPT_FLAG, REPORT ERROR
alter text search configuration pound_gbk set (split_flag = ',');
-- WHEN SPLIT FLAG IS NOT SINGLE CHARACTER, REPORT ERROR
alter text search configuration pound_gbk set (split_flag = '#@');
-- WHEN SPLIT FLAG IS NULL CHARACTER, REPORT ERROR
alter text search configuration pound_gbk set (split_flag = '');
-- WHEN SINGLE TOKEN EXCEED 256 CHARACTERS, SPLIT DIRECTLY EVEN NO SPLIT FLAG IS FOUND
select to_tsvector('pound_gbk','1981年8月26日,美国宇宙飞船“旅行者2号”飞过土星,取得了一系列探测成果,其中包括发现了土星的第17颗卫星——土卫17。所以,今天我们就来说一说“旅行者2号”上的“地球之音”唱片。1977年8月和9月,人类成功发射了“旅行者1号”和“旅行者2号”探测器,再次向外星人作了更详细的自我介绍。这次,它们各自携带了一张称为“地球之音”的唱片,上面录制了丰富的地球信息。这两张唱片都是镀金铜质的,直径为30.5厘米。唱片上录有115幅照片和图表,35种各类声音,近60种语言的问候语和27首世界著名乐曲等。115幅照片中包括我国八达岭长城');
select to_tsvector('pound_gbk','1981年8月26日,美国宇宙飞船“旅行者2号”飞过土星,取得了一系列探测成果,其中包括发现了土星的第17颗卫星——土卫17。所以,今天我们就来说一说“旅行者2号”上的“地球之音”唱片。1977年8月和9月,人类成功发射了“旅行者1号”和“旅行者2号”探测器,再次向外星人作了更详细的自我介绍。这次,它们各自携带了一张称为“地球之音”的唱片,上面录制了丰富的地球信息。这两张唱片都是镀金铜质的,直径为30.5厘米。唱片上录有115幅照片和图表,35种各类声音,近60种语言的问候语和27首世界著名乐曲等。115幅///照片中包括我国八达岭长城');
drop text search configuration pound_gbk;
create text search configuration pound_gbk(parser=pound) with (split_flag = '#');
ALTER TEXT SEARCH CONFIGURATION pound_gbk ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol WITH simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'pound_gbk';
SELECT to_tsvector('pound_gbk', '京东#淘宝#滴滴#爱奇艺#芒果TV');
--
--- N-gram
--
-- CREATE TEXT SEARCH CONFIGURATION
create text search configuration ngram1(parser=ngram) with (punctuation_ignore = on, gram_size = 2, grapsymbol_ignore = on);
alter text search configuration ngram1 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol with simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (punctuation_ignore = off);
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (grapsymbol_ignore = off);
SELECT to_tsvector('ngram1', '画,龙&点睛');
--punctuation_ignore
alter text search configuration ngram1 set (gram_size = 3);
SELECT to_tsvector('ngram1', '画,龙&点睛');
drop text search configuration ngram1;
create text search configuration ngram1(parser=ngram);
alter text search configuration ngram1 ADD MAPPING FOR zh_words, en_word, numeric, alnum, grapsymbol, multisymbol with simple;
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
SELECT to_tsvector('ngram1', '画,龙&点睛');
--------------------------- basic operation for text search parser ----------------------------------
--
---- N-gram parser
--
set ngram_gram_size = 2;
set ngram_grapsymbol_ignore = off;
set ngram_punctuation_ignore = off;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.punctuation_ignore
set ngram_punctuation_ignore = on;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.grapsymbol_ignore
set ngram_grapsymbol_ignore = on;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
-- configuration parameter: ngram.grapsymbol_ignore
set ngram_gram_size = 4;
select ts_parse('ngram', 'Uber提供了电脑版、手机版&平板电脑等版本');
select ts_token_type('ngram');
------------------------------- basic operation for text search datatype -----------------------
--
--- pound
--
select to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV');
select to_tsquery('pound_gbk','芒果TV');
--
---- N-gram
--
select to_tsvector('ngram1','辽GQQ360');
select to_tsquery('ngram1','360');
------------------------------- basic operation for text search operator -----------------------
--
--- pound
--
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ to_tsquery('pound_gbk','芒果TV'); -- true
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@@ to_tsquery('pound_gbk','芒果TV'); -- true
select to_tsvector('pound_gbk','芒果TV') @@ to_tsquery('pound_gbk','QQ'); -- false
select (to_tsvector('pound_gbk','滴滴') || to_tsvector('pound_gbk','爱奇艺')) @@ to_tsquery('pound_gbk','滴滴'); -- true
select to_tsquery('pound_gbk','淘宝') && to_tsquery('pound_gbk','爱奇艺');
select to_tsquery('pound_gbk','淘宝') || to_tsquery('pound_gbk','爱奇艺');
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_gbk','淘宝')); -- false
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_gbk','爱奇艺')); -- true
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_gbk','淘宝') || to_tsquery('pound_gbk','爱奇艺')); -- true
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ (to_tsquery('pound_gbk','淘宝') && to_tsquery('pound_gbk','爱奇艺')); --false
select !!to_tsquery('pound_gbk','爱奇艺');
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ !!to_tsquery('pound_gbk','爱奇艺'); -- false
select to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @@ !!to_tsquery('pound_gbk','淘宝'); --true
select to_tsquery('pound_gbk','爱奇艺') @> to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'); -- false
select to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') @> to_tsquery('pound_gbk','爱奇艺'); -- true
select to_tsquery('pound_gbk','爱奇艺') <@ to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'); -- true
select to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV') <@ to_tsquery('pound_gbk','爱奇艺'); -- false
--
---- N-gram
--
select to_tsvector('ngram1','辽GQQ360') @@ to_tsquery('ngram1','360'); -- true
select to_tsvector('ngram1','辽GQQ360') @@@ to_tsquery('ngram1','360'); -- true
select to_tsvector('ngram1','360') @@ to_tsquery('ngram1','QQ'); -- false
select (to_tsvector('ngram1','辽GQQ') || to_tsvector('ngram1','360')) @@ to_tsquery('ngram1','QQ'); -- true
select to_tsquery('ngram1','480') && to_tsquery('ngram1','辽GQQ');
select to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ');
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480')); -- false
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','辽GQQ')); -- true
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ')); -- true
select to_tsvector('ngram1','辽GQQ360') @@ (to_tsquery('ngram1','480') && to_tsquery('ngram1','辽GQQ')); -- false
select !!to_tsquery('ngram1','360') ;
select to_tsvector('ngram1','辽GQQ360') @@ !!to_tsquery('ngram1','360'); -- false
select to_tsvector('ngram1','辽GQQ360') @@ !!to_tsquery('ngram1','480'); -- true
select to_tsquery('ngram1','360') @> to_tsquery('ngram1','辽GQQ360'); -- false
select to_tsquery('ngram1','辽GQQ360') @> to_tsquery('ngram1','360'); -- true
select to_tsquery('ngram1','360') <@ to_tsquery('ngram1','辽GQQ360'); -- true
select to_tsquery('ngram1','辽GQQ360') <@ to_tsquery('ngram1','360'); -- false
------------------------------- basic operation for text search function -----------------------
-- pound
select get_current_ts_config(); -- english
select length(to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV')); -- 4
select length(to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV#QQ')); -- 5
select numnode(to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'));
select numnode(to_tsquery('pound_gbk','芒果TV') || to_tsquery('pound_gbk','滴滴'));
-- select plainto_tsquery('pound_gbk', 'Uber##滴滴# 爱奇艺##芒果TV');
-- select plainto_tsquery('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV');
select querytree(to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'));
select querytree(to_tsquery('pound_gbk','滴滴') && to_tsquery('pound_gbk','爱奇艺'));
select querytree(to_tsquery('pound_gbk','滴滴') || to_tsquery('pound_gbk','爱奇艺'));
select strip(to_tsvector('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'));
select ts_headline('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV', to_tsquery('pound_gbk','Uber'));
select ts_rank(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'));
select ts_rank(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','Uber'));
select ts_rank(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','滴滴'));
select ts_rank_cd(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'));
select ts_rank_cd(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','Uber'));
select ts_rank_cd(to_tsvector('pound_gbk', 'Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','滴滴'));
select ts_rewrite(to_tsquery('pound_gbk','Uber#滴滴#爱奇艺#芒果TV'), to_tsquery('pound_gbk','Uber'), to_tsquery('pound_gbk','爱奇艺'));
--N-gram
select get_current_ts_config(); -- english
select length(to_tsvector('ngram1','辽GQQ360')); -- 6
select length(to_tsvector('ngram1','辽GQQ888')); -- 5
select numnode(to_tsquery('ngram1','辽GQQ360')); -- 11
select numnode(to_tsquery('ngram1','480') || to_tsquery('ngram1','辽GQQ')); -- 9
alter text search configuration ngram1 set (grapsymbol_ignore = off);
alter text search configuration ngram1 set (punctuation_ignore = on);
alter text search configuration ngram1 set (gram_size = 2);
select cfgname, cfoptions from pg_ts_config where cfgname = 'ngram1';
select plainto_tsquery('ngram1', '车,牌,号');
select plainto_tsquery('ngram1', '车,牌,号');
select querytree(to_tsquery('ngram1','中国人民银行'));
select querytree(to_tsquery('ngram1','中国人') && to_tsquery('ngram1','民银行'));
select querytree(to_tsquery('ngram1','中国人') || to_tsquery('ngram1','民银行'));
select strip(to_tsvector('ngram1','中国人民银行'));
select ts_headline('ngram1', '中国人民银行', to_tsquery('ngram1','中国人'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人民银行'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人'));
select ts_rank(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中人'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人民银行'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中国人'));
select ts_rank_cd(to_tsvector('ngram1', '中国人民银行'), to_tsquery('ngram1','中人'));
select ts_rewrite(to_tsquery('ngram1','中国人民银行'), to_tsquery('ngram1','银行'), to_tsquery('ngram1','政府'));
-------------------------------- add test cases for deprecated zhparser -------------------------
select to_tsvector('zhparser', '城乡建设');
select ts_parse('zhparser', '版权所有 CopyRight 2006-2015 酷狗音乐');
select to_tsquery('zhparser','中国人民银行');
select * from ts_token_type('zhparser');
drop text search configuration ngram1;
drop text search configuration pound_gbk;
\c postgres
DROP DATABASE db_gbk;