Files
MaxScale/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3.c
Johan Wikman d115a54038 All changes 1.4.3...2.0.0
b709e29 Fix URL typo in release notes
01f203c Update release notes
c49810a Update COPYRIGHT
e327526 Add BSL version number to LICENSE.TXT
07e3a4e Remove superfluous COPURIGHT.md and LICENSE.md
54c3310 Replace Dynamic Data Routing Platform with Database Proxy
305d02f Remove *.cmake wildcard from .gitignore
b0b5208 Cleanup of spaces
aeca6d0 Extend maxscaled error messages
817d74c Document where the CDC users are stored
9a569db Update license
ff8697a MXS-716: Fix table level privilege detection
2071a8c Only check replies of slaves that are in use
f8dfa42 Fix possible hangs in CDC python scripts
fa1d99e Removed "filestem option" from example
009b549 Removed "filestem option" from example
8d515c2 Add example Kafka producer script for Python
64e976b Fix sporadic SSL authentication failures
5a655dc MXS-814: Check service/monitor permissions on all servers
2a7f596 Add note about galeramon priority to Galera tutorials
b90b5a6 Fixed SHOW SLAVE STATUS in binlog router
e22fe39 Changed couln size for SHOW SLAVE STATUS
ae97b18 Fix avrorouter build failure with older sqlite libraries
56ef8b7 Replace GPL license with BSL license in scripts and tests
552836f Initialize all fields when MySQL users are loaded from cache
bf42947 Update all licensing related files
b29db9d Remove optimize_wildcard parameter from use
5170844 Make readwritesplit diagnosting output more clear
262ffb1 Fix crash when a config parameter has no section
33ac9e6 Add note about LEAST_BEHIND_MASTER and server weights
e13e860 Fix a memory leak when backend authentication fails
75d00c2 MXS-801: Set the default value of strip_db_esc to true
bd5f2db MXS-800: Add a log message about the working directory
4b1dd8c Update MySQL Monitor documentation on detect_replication_lag
559bc99 Fix installation of license file
b057587 Change LICENSE to LICENSE.TXT
223fa43 Remove null characters from log messages
36fd05b Fix fatal signal handler log message
053dc8a Fix typos in documentation
371dc87 Fix Galera text in Master-Slave tutorial
30b6265 Disable adding of new objects at runtime
db92311 Update the documentation on configuration reloading
0923d40 Update Connector-C version
c4738b5 Add define for avro-conversion.ini
196e6ac Update license from GPL to BSL.
e612366 Correctly calculate the number of bytes read in dcb_read
93a2a03 Update rotate documentation in admin tutorial
c5eb854 MXS-585: Fix authentication handling regression
6330070 Fix schemarouter memory leak
aa5827e Fix CDC authentication memory leak
a5af4ad Fix avro memory leaks
627d73f Fix Avro scripts
0ff7556 Add build instructions to avrorouter documentation
734a1c8 Fix doxygen mainpage
e51ce09 Add licence text to avro scripts
4d27c14 Update Avro documentation and fix installation directories
a58a330 Fix readconnroute error message about router_options
22b138c MXS-772: Fix postinstall script
a9960b7 Fix function declaration in mysql_backend.c
cbe1704 Add missing newline
09d76ee Fix avro documentation default values
1d3f8f3 Added refresh users on START SLAVE
880db34 Make router configuration errors fatal
3bad5ca Update documentation and add logging to avrorouter
10f3384 Disable SSLv3
ca8d902 Fix rwsplit error reporting when master is lost
e816d65 Fix MaxScale Tutorial
deca3e1 Update MaxScale man page
f1735b7 Update release notes
9238714 qc: Change type of DEALLOCATE PREPARE
0b77c3b dbfwfilter: Do not require a trailing new line
1152ca9 Remove copyright message
a038a85 Remove debug assertion on ERANGE error in galeramon
12ab235 Fix comparison error for connections limit.
5de1a8f qc_sqlite: Correct string recognition
b63d754 Fix links in documentation contents
05d457e CDC protocol link fix
50676ef Fix monitor code formatting
218ba09 Remove MaxScale-and-SSL.md
0d6845b Add images to Avro documentation and tutorial
8dd2c9b Update MaxScale-2.0.0-Release-Notes.md
6d4b593 Change avrorouter default transaction grouping
4c629de Add notes about monitor changes to upgrading and release notes
267d0dc Update Binlogrouter.md
c624781 Update Replication-Proxy-Binlog-Router-Tutorial.md
f3261bc CDC users
1368797 Format authenticator and module headers
ab01749 Format filters
8b05d32 Format core and include files
f3974e5 Add GPL LICENSE to qc_mysqlembedded
bfec36a astyle rabbitmq_consumer/consumer.c
54b960a Check that the Avro directory is writable
3d4cd2e Fix cdc_users using the wrong path for service users cache
1e738dd Add CDC client program documentation
f6809fd Remove superfluous rabbitmw_consumer/LICENSE
6b5e667 Update license text in files
9bfcb46 Change CDC protocol documentation formatting
607f25c  REQUEST-DATA formatting
8175ab4 CDC protocol update
d5ca272 CDC protocol update
6c91764 Only check wsrep_local_index if node is joined
f12e2c2 Do not use SSL for monitors and services
6d2cd99 Fix TestAdminUsers
f4ae50d Apply astyle to server/core/test/*.c
7cc2824 Update build instructions
cf8e2b5 Update release notes
03c7a6c Remove wrong function prototypes
5a11eed Revert "Remove duplicate functions"
80ed488 Remove duplicate functions
bb0de8d Add info on SSL and throttling to release notes for 2.0.
0934aee Update MaxAdmin reference guide
2a3fe9b Update source URL in release notes
e575cf0 Merge branch 'MXS-177-develop' into develop
cc8c88d Change header for BSL
ecde266 Change header for BSL
890b208 Log a message when a script is executed
9c365df Added information on server side SSL to config guide.
aa3e002 Remove obsolete heading
79dd73a Make dprintAllSessions use dprintSession
1fc0db9 Align output of "show services"
1b9d301 Make monitorShowAll use monitorShow
983615e Adjust output of 'show modules'
436badd qc_sqlite: The module is now beta
a7cbbe5 Update Upgrade document
71ac13f Remove obsolete user/password from example
eb20ff8 Fix and clean up Avrorouter diagnostics code
31d4052 Change MaxScale to MariaDB MaxScale
e6e4858 Fix `source` parameter not working with `router_options`
d8de99a Update module version numbers
eb81add Merge remote-tracking branch 'origin/develop' into MXS-177-develop
daba563 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop
678f417 Changes in response to reviews.
410fb81 Changes in response to reviews.
60135e5 Add initial release notes about Avrorouter
7400ecc qc_sqlite: Remove uninitialized read
536962c Update version number
018f044 Fix debug assertion in client command processing
51f0804 Prevent 'show monitor' from crashing with failed monitor
559347e Fix "Too many connections" message; add comments.
01d3929 Add printf format checking to dcb_printf
fbd49a6 dbfwfilter: Require complete parsing only when needed
1885863 Add information to release notes about readwritesplit changes
73b56a2 Update MaxScale section in release notes.
0a2f56f MaxAdmin: Remove debug information from 'show users'
3cf3279 MaxAdmin: Report SSL information as well
29c2b66 Always use SSL if server configured with SSL
7d6b335 dprintAllServers should use dprintServer
02a5246 qc_sqlite: Correctly detect parsing context
469419b compare: Add strict mode
8c5b3d3 compare: Allow the comparison of a single statement
4691514 Add Updgrade to 2.0 document
38b3ecb Expand the checks done before a monitor is stopped
8e2cfb9 Add backend name to authentication error message
9600a07 Fix MaxInfo crash
91c58b0 Removed log message for duplicate entry while adding an user
40392fe Fixed log message priority
0ec35b8 maxadmin: Allow the last user to be removed
5a0ebed maxadmin: Change name of user file
87aa8f1 maxadmin: Always allow root to connect
bf37751 Fix COM_QUIT packet detection
7c93ee4 Update avrorouter documentation and tutorial
95ce463 Fix wrong directory in avrorouter log message
cfe54c7 Update ChangeLog
d69562c Fix LOAD DATA LOCAL INFILE data size tracking
24e7cd6 MXS-584: added support for SET @@session.autocommit
d6f6f76 Fixes, correct too many connections message
efeb924 Update release notes for 2.0.0
8f71a87 qc_sqlite: Adjust error messages
b967d60 Remove copy of enum enum_server_command
822b7e3 Update package license
b58301a Update MaxScale License for overlooked files
c09ee47 Update MaxScale License
49f46fa Tidy up. Comment out config items not yet supported.
f5c3470 Updated and simplified the Building from Source document
98b98e2 Add note about master failure modes to documentation
e036f2c Update Limitations document
62219a5 Merge remote-tracking branch 'origin/drain-writeq' into develop
5caf667 Invoke DCB_REASON_DRAINED more frequently.
77b107b qc_sqlite: Add support for LOAD DATA INFILE
8e70f26 compare: Optionally print out the parse result
ad750e6 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop
ef85779 Merge remote-tracking branch 'origin/develop' into MXS-651-develop-merge
ea9fdda MXS-477: Add LONGBLOB support for readconnroute
eae6d42 qc_sqlite: Remove superfluous columnname definition
8fe2b21 Add binlog source to avrorouter
b25cc37 qc_sqlite: Add missing destructors
8a749e7 qc_sqlite: Reduce number of keywords
5f4bb8b compare: Output query immediately
2456e52 dbfwfilter: Reject queries that cannot be parsed
5f1fbbd qc_sqlite: Extend SET grammar
b8d8418 dbfwfilter: Remove 'allow' from firewall filter rule
0bd2a44 MXS-741 When no events are read from binlog file, ...
a07c491 Remove duplicated function (merge error, probably)
b237008 Save conflict resolution, missed last time.
a0c0b40 Merge remote-tracking branch 'origin/develop' into MXS-651-develop
385d47d Change SSL logic, fix large read problem.
b93b5e0 Remove false debug assertion
b953b1f Turn off SSL read ahead.
e0d46a5 Fix error messages and remove commented code
49b4655 MXS-739: Fix invalid JSON in Maxinfo
0c30692 qc_sqlite: Handle GROUP_CONCAT arguments
54e48a1 qc_sqlite: Consider \ as an escape character in strings
713a5d6 qc_sqlite: Add test cases
20d1b51 qc_sqlite: Handle qualified names in CREATE VIEW
1019313 qc_sqlite: Make QUERY_TYPE_WRITE default for SHOW
059c14e qc_sqlite: Accept qualified function names in SELECT
db34989 qc_sqlite: Accept qualified function names
b93e2f1 qc_sqlite: Add limited support for GRAND and REVOKE
678672d qc_sqlite: Cleanup copying of database and table names
9b744b9 qc_sqlite: Update table and database names at the same time
db75e61 qc: Support getting the qualified table names
1f867f4 qc: Add join.test
9c7e02a qc_sqlite: Accept "...from NATURAL straight_join..."
93d1c31 qc_sqlite: Both " and ' can enclose a string literal
8055b21 qc_sqlite: Set more information based upon tokens
37e3663 qc_sqlite: Do not blindly add affected fields
50f1360 qc: Correctly collect affected fields
71c234e qc_sqlite: Recognize CREATE TABLE ... UNION
01803f1 qc_sqlite: Recognize {DEALLOCATE|DROP} PREPARE ...
6ecd4b3 qc_sqlite: Parse index hints
0bdab01 qc: Compare sets of tables
b908c8f Fix double freeing of internal DCBs
8903556 qc_sqlite: Recognize LEFT, INSERT and REPLACE
266e6c0 qc: Log all problems by default (compare program)
7b54cac qc_sqlite: Fix logging bug
9566e9f qc_sqlite: Plug a leak
b0a860d qc: Run compare a specified number of times
050d698 qc_sqlite: Simplified argument handling
97c56b8 qc: Allow arguments to be passed to the query classifier
09a46e0 qc_sqlite: Add argument log_unrecognized_statements
fd98153 qc: Allow arguments to be provided to the query classifier
313aa7e Fix Problems SSL assertion; non SSL connect to SSL
1d721e6 Fix DEB packaging errors
96bdc39 Fix RPM packaging failures on CentOS 7
6ba900d qc_sqlite: Recognize more SHOW commands
2869d0b qc_sqlite: Exclude support for sqlite's PRAGMA
0be68a3 qc_sqlite: Enhance SELECT syntax
28f3e1a Merge branch 'develop' into MXS-729
e18bd41 qc: Expose the result of the parsing
5896085 Add BUILD_AVRO to the CMake cache
daeb896 Remove changes to blr_master.c memory handling
d523821 Add comments
4eb9a66 Empty admin users file is now handled
52b46c6 qc: Update create.test
db09711 qc_sqlite: Ignore case when looking for test language keywords
f042a1d qc_sqlite: Extend CREATE TABLE syntax
177d2de qc_sqlite: Extend CREATE TABLE syntax
d3ca8e6 qc_sqlite: Add some support for HANDLER
86c6a96 qc_sqlite: Recognize RENAME TABLE
471594f qc_sqlite: Accept more table options at CREATE TABLE
3da6cde qc_sqlite: Remove unused keywords
bd89662 Fix crash on corrupted passwd file
b5d1764 MXS-733: Always print session states
043e2db Remove unused CMake variables
5604fe2 Restore missing line, fixes logic error.
66d15a5 Added log message warning for old users found
5be9fca Changes in response to review by Johan
899e0e2 Removed password parameter from admin_user_add and admin_remove_user
a2d9302 Merge branch 'develop' into MXS-729
bcaf82f Code review update
e61c716 Nagios plugin update with Maxadmin using UNIX socket only
d7150a2 qc_sqlite: Extend column syntax
3b78df0 qc_sqlite: Accept VALUE in addition to VALUES
85a705b qc_sqlite: Accept CHARSET in addition to CHARACTER SET
db9cec8 qc_sqlite: Accept qualified column  names in CREATE TABLE
a9cabb0 qc_sqlite: Extend SELECT syntax
f5e9878 qc_sqlite: Add set type
675cb93 qc_sqlite: Allow BINARY to turn into an identifier
b04a760 qc_sqlite: Accept DROP TABLES
1075d9c qc_sqlite: Allow qualified name with LIKE in CREATE
420ac56 qc_sqlite: Extend EXPLAIN grammar
727d626 Add missing error message to readwritesplit
f4fd09e Change templates and testing configurations to use sockets
1ef2e06 Add configurable default admin user
a723731 Remove wrong file
7c3b02b Maxadmin/maxscaled UNIX socket update
eed78d4 qc_sqlite: Pick out more information from select when CREATEing
267f091 qc_sqlite: Recognise DROP TEMPORARY TABLE
54fc29f qc_sqlite: Accept $ as a valid character in identifiers
afa2ec9 qc_sqlite: Allow keywords to be used in qualified name
db0427d MXS-729 code review update
a3b3000 Merge branch 'develop' into MXS-729
e73d66c qc_sqlite: Identify qualified identifiers
5bacade Trailing space fix
3bc9be3 MXS-729 socket=default support in maxscale.cnf
1a5c23c Code review update for MXS-729
d6665c7 qc_sqlite: Extend CREATE TABLE grammar
91725ce qc_sqlite: Dequote table and database names
cd7a022 qc: Add create test
1aa4e6b qc: Update test files
762b0c2 qc_mysqlembedded: Do not return "*" as table name
cd9968f qc_sqlite: Update delete.test
f16703d qc_sqlite: Add support for CALL
e3ca9b4 qc_mysqlembedded: Do not return an array of empty strings
5878a22 qc_sqlite: Support selects in DELETE
1cf0444 qc_sqlite: Fix bug in DELETE grammar
0bf39a1 qc_sqlite: Add support for CHECK TABLE
4a8feca qc_sqlite: Add helper for catenating SrcLists
ab299b3 qc_sqlite: Extend DELETE syntax
5778856 qc_sqlite: Extract database name as well
99901f1 qc_sqlite: Extend DELETE syntax
63396f8 qc_sqlite: Match "autocommit" caseinsensitively
e804dd3 qc_sqlite: Add support for LOCK/UNLOCK
c23e442 qc_sqlite: Extend DELETE syntax
5460e31 qc: Add delete test
ab392ad qc_sqlite: Free unused data
598c6f0 qc: Measure time of parsing
2fa3844 qc_sqlite: Put all changes being {%|#}ifdefs
1b43992 qc_sqlite: Modified update.test
1676ea4 qc_sqlite: LEFT|RIGHT are not required with JOIN
224ebd3 qc_sqlite: Extend UPDATE grammar
dbecca9 qc_sqlite: Extend UPDATE grammar
b6ca3b3 MaxAdmin security modification MXS-729
8fb47dd Remove copying of MariaDB embedded library files
22e1257 Normalize whitespace when canonicalizing queries
269cff2 MXS-697: Fix dbfwfilter logging for matched queries
6344f6f Ignore Apple .DS_Store files.
d606977 Improve comments in response to code review.
619aa13 Add configuration check flag to MaxScale
27c860b Drain write queue improvements.
33d4154 Read only one configuration file
d104c19 Format more core files
83fdead Format part of core source code
311d5de Format gateway.c and config.c with Astyle
8cbb48e Don't build maxavro library if BUILD_AVRO is not defined
32bb77a Merge branch 'MXS-483' into develop
db72c5d Format CDC/Avro related files
3c26e27 qc_sqlite: Use SrcList instead of Expr
f96ad6a Merge branch 'develop' into MXS-729
0728734 Fix query canonical form tests
e68262d Merge remote-tracking branch 'gpl-maxscale/master' into develop
65460dc Fix missing symbols from MySQLAuth
791c821 MaxAdmin listens on UNIX socket only and maxadmin can connect
89afed6 MXS-66: All configuration errors are fatal errors
d613053 Add more details to galeramon documentation
22f4f57 qc: Add support for multi UPDATE
0dba25a Added default port to blr_make_registration
9d8248c qc_sqlit: Plug leaks and access errors
057551a qc_sqlite: Fix to small an allocation
1f73820 qc_sqlite: Free memory allocated during parsing
93fefb9 qc: Enable compare to run the same test repeatedly
e52c578 qc_sqlite: Handle last_insert_id()
929e02a qc_sqlite: Extend UPDATE grammar
de3b9f7 qc_sqlite: Defines provided when running cmake and make
4d5c3b2 qc_sqlite: Add support for multiple-table DELETE FROM
36a4296 qc_mysqlembedded: Handle SQLCOM_DELETE_MULTI
41f613a Fix DCB and SESSION removal from free object pools
00f2ddd Move some common code used in only one protocol into protocol.
6fbd0b0 Format Go source with gofmt
abfbbcb Fix build failures and internal test suite
31de74a Merge branch 'develop' into MXS-483
20d461d Remove uniqueness constrain on oneshot tasks
6c09288 Add missing error message to converter task
0c2c389 Merge branch 'develop' into MXS-483
fa0accc Set freed memory to NULL after authentication failure
63f24e4 Install cdc_schema.go
5123c21 Fix ALTER TABLE parsing
004acc2 Merge branch 'develop' into MXS-483
f69a671 Remove array foreach macro use
a159cd9 qc_sqlite: Add support for SHOW DATABASES
31a2118 Make qc_mysqlembedded optional
27ef30e Changed the default query classifier
359010d Add -h flag as the alias for --host
bebc086 Fix minor bugs
c7ca253 qc_sqlite: Recognize START [TRANSACTION]
240f8bf qc_sqlite: Collect info from nested SELECTs
93ea393 qc_sqlite: Pass along the relative position of a token
cc960af qc_sqlite: Fix incorrect assigment
22a6fef Fix `gtid` avro index table
4c38bef qc_sqlite: STATUS is not a registered word
cace998 qc_sqlite: Include all fields of UPDATE
997b19c qc: Add update tests
7601b3f qc_sqlite: Parse "INSERT INTO t VALUES (), ();" correctly
ca426f1 qc_sqlite: Handle CREATE TRIGGER
f54f963 qc_sqlite: Allow INSERT without INTO
e4a1b6d Remove foreign keys from Avro index
e4501a2 Merge branch 'develop' into MXS-483
82b9585 Fix MMMon never assigning master status
a45a709 qc_mysqlembedded: Find the leaf name
2f3ca8f qc_mysqlembedded: Recognize SQLCOM_REPLACE
cc7ad83 qc_mysqlembedded: Pick up fields for INSERT SELECT as well
0e6b39e qc: Cleanup of select.test
9113f4f qc_sqlite: Pickup more fields from INSERT
4d58f98 Dummy query classifier
dfe824f Document `query_classifier` option
4aa329b MXS-718: Collect fields of INSERT
53818f2 Modify packet number for SSL backend connection
346f973 qc_sqlite: Accept qualified column names
8a83616 Fix in-memory SQLite table structure
6f2c884 Further backend SSL development
4444e92 qc_sqlite: Extend INSERT grammar
2aebcab qc_sqlite: Add support for TRUNCATE
1a6742e qc_sqlite: Accept DEFAULT as value in INSERT
07dec05 qc_sqlite: Crude classification made based on seen keywords
a90a579 Add missing function documentation
72bd0cf qc_sqlite: Change CREATE TABLE grammar
6e04bc8 qc: Add INSERT tests
3666bda qc_sqlite: Add SELECT test
d27e173 Add server/mysql-test/t/select.test to query_classifier
562d004 qc_sqlite: Cleanup error logging.
819cacb Merge branch 'develop' into MXS-483
0d3a789 Add warnings and comments to Avro row event processing
2fab570 Added support for SET autocommit=1
1aa83cf Code review fix
c999f0a Addition of SELECT USER()
8c723da Clean up avro_client.c and avro_file.c
eb21ee8 Clean up avro.c
946a284 Added Avro schema to table metadata processing
72f90be qc_sqlite: Add support for CREATE {FUNCTION|PROCEDURE} ...
4a4ab49 qc: Update line number also when skipping a block
ffddb2a qc_sqlite: Allow queries using INTERVAL
b8b03bd qc_sqlite: Add support for SELECT * FROM tbl2 = tbl1;
77a261a qc_sqlite: Add support for GROUP BY ... WITH ROLLUP
0ead41e cdc_schema now generates lowercase JSON
66e327a Classifier has to be specified explicitly
9074b91 Updated Avrorouter documentation
cf06c7a qc_sqlite: Some comments added.
f579eff Added simple Go based Avro schema generator
f448e90 MXS-419: Added ulimit calls to init scripts
b4ad257 Added FindAvro.cmake
56cc9b9 Added the last transaction script to installations
2d52da2 Added temporary avro-alpha package name
6ada071 Fixed cdc_users script
61f0206 Renaming and minor fixes to CDC Python scripts
9d77c32 Moved GTID table tracking to an in-memory database
8ae7cb0 MXS-704: Fixed `which` usage in post-install scripts
195e118 Readwritesplit sessions aren't created if master is down
2be91da Added affected tables to avro diagnostics
b185320 QUERY-LAST-TRANSACTION now returns proper table names
90860b5 Log stale master message only once
4859c60 Table name to GTID mapping
f77bd23 First steps to backend SSL, not yet working.
68b5bf0 qc_sqlite: Don't treat TRUE and FALSE as identifiers
fca8e59 qc_sqlite: Collect database names as well
6b0e04d qc_sqlite: Add support for SHOW CREATE VIEW
77f4b34 qc_mysqlembedded: Report more, rather than less
a73e033 qc_sqlite: Extend builtin functions
9d9650e qc_sqlite: SQL_BUFFER_RESULT must decay to an id
83fe99d qc_sqlite: Support INSERT IGNORE
9d1c9ca Added avrorouter limitations and tutorial
8dd094d qc_sqlite: Recognize builtin functions
2edc3d6 Moved write operations of the maxavro library to a different file
1364e54 Added more comments to the Avro RBR handling code
f711588 Added warnign about unsupported field types
df0d250 Added SQLite3 based indexing to avrorouter
0c55706 Added GTID event flag check in AVRO processing
bfe28dd qc_sqlite: Accept SET GLOBAL|SESSION ...
a8d2068 qc_mysqlembedded: Exlude code that won't compile on 5.5.42
16ea0b3 qc_sqlite: Add support for DROP FUNCTION
1c0f1fc qc: Report stats after comparison
02345b2 qc_sqlite: Recognize builtin readonly functions
c7a5e75 qc_sqlite: Recognize :=
0aa849d qc: Make compare undestand the delimiter command
fb0a877 qc_mysqlembedded: Examine Item::SUBSELECT_ITEMs
045cf8d qc: Add missing mtl commands
e5c6f45 qc_sqlite: Relax qc_get_type comparison
ac3b2df qc_sqlite: Add support for SHOW STATUS
73a34fb qc_sqlite: Add initial support for FLUSH
4ffbe79 qc_sqlite: Extend CREATE TABLE syntax
009af81 qc_sqlite: Add support for SHOW WARNINGS
001de97 qc: Ignore mysqltest constructs
128307d Merge branch 'release-1.4.3' into gpl-master
5e8a06a SET NAMES XXX added
3ca12ba MXS-685: MMMon clears server state before setting it
dc4d2b0 Further steps to connection limit, non-working.
ef70257 MXS-636: Master failures are no longer fatal errors
99f4c64 Updated QUERY-LAST-TRANSACTION format
d1ff157 Changed QUERY-LAST-TRANSACTION format to JSON
8b2f1ac Fixed formatting of the avrorouter
61543df Added QUERY-LAST-TRANSACTION command
c10d10b qc_sqlite: Add support for SHOW CREATE TABLE
106a38f qc_sqlite: Add support for DROP INDEX
2a85421 qc_sqlite: Extend what can be stated about a table
794cd1c qc_sqlite: Add support for MATCH ... AGAINST
dd7b747 qc_sqlite: Accept FULLTEXT and SPATIAL in CREATE TABLE
a13d6ce qc_sqlite: Add support for PREPARE and EXECUTE
0c5d29f qc_sqlite: Add support for ANALYZE
a6cd32b qc_sqlite: Extend SET syntax
5d47704 qc_sqlite: Pick out fields from UPDATE t SET i = ...
0e05735 qc: Understand --error in server test files
8535975 qc_sqlite: Extend CREATE VIEW syntax
b35e638 qc: Igore read type bit if write bit is on
818a814 qc_sqlite: Add support for SHOW VARIABLES
1aa877b qc_sqlite: Add initial support for DO
e92913a qc_sqlite: Add support for CREATE VIEW
d53a46d qc_sqlite: Recognize bit field literals b'1010'
1fb7977 Added GTID event timestmap into struct gtid_pos
8f95b10 Added new fields in AVRO diagnostics
cb4db54 Added tests with large SQL packets to modutil tests
e4dbd6b MXS-621: More fixes to log messages at startup
4f1e9ee qc: compare tester can now read server MySQL tests
cd8154b qc_sqlite: Allow CHARACTER SET to be specified for column
6f8d053 Added MariaDB 10.1 check for new flags in GTID event
71c471b qc_mysqlembedded: Fix type bits setting
26b00a7 qc_sqlite: Extend ALTER grammar
ea6057c qc_sqlite: Handle also pInto when dupping a struct select
2271559 qc_sqlite: Add support for SHOW TABLE STATUS
9caaf27 qc_sqlite: Add support for CREATE ... LIKE oldtable
cd19932 Merge tag '1.4.2' into master
9e9e4d8 Merge branch 'develop' of https://github.com/mariadb-corporation/maxscale-bsl into develop
267cb60 qc_mysqlembedded: Look into parenthesized comma expressions
77c6ca9 qc_sqlite: Recognize token "<=>"
5ca9a9f qc_sqlite: Allow comma expressions in where clause
b08e910 qc_sqlite: Add SELECT options
d11e581 qc_sqlite: Some recursion cleanup
d53d063 Add but don't invoke connection queue functionality.
6818104 Fix logic error in connections limiter
3c61605 qc_sqlite: Find more affected fields
9af8dfd Allow the classifiers to be specified on the command line
5d4a134 Activate call to protocol for max connections error message.
16638e7 Fix another mistake
234f9e6 Fix mistake
843a6fc Fix mistake.
2c6e9ad Fix errors in config.c; enable call to protocol on connection limit.
fd27849 Introduce configuration items for Maximum and Queued Service connections
60d198d Implement very simple connection limit.
84d8f0f Merge remote-tracking branch 'origin/develop' into MXS-177
8a58e63 Merge remote-tracking branch 'origin/develop' into develop
08487cd Add assertion to enforce number of free DCBs not being negative.
f73af2f Added MariaDB 10.1 check for new flags in GTID event
23898ec Fix wrong sprintf specifier, trailing white space.
ea6cfa3 readwritesplit: Cleaned up routeQuery
3858df0 Cleaned up select_connect_backend_servers
c38ee13 Added more buffer tests
48816df Added more modutils tests
537eac2 Added tests for modutil_get_complete_packets
22a6095 MXS-669: modutil_get_complete_packets no longer makes the buffer contiguous
51af97e qc_sqlite: Add support for CREATE INDEX
e50c573 qc_sqlite: Dig out fields for IN
f58c6df qc_sqlite: Dequote table name
319422b qc_sqlite: Accept ENUM as type for a column
5d6a45c qc_sqlite: Allow UNSIGNED to fallback to an id
16a5f20 qc_sqlite: Extend CREATE TABLE syntax
d6268da qc_sqlite: Accept RIGHT and FULL OUTER joins
2207415 qc_sqlite: Allow STRAIGHT_JOIN in SELECT
6fee546 qc_sqlite: Pick upp more table names
9de5f84 Remove trailing white space.
758f84d Improve comments and messages in dcb.c and session.c re recycle memory.
1c2de21 Merge remote-tracking branch 'origin/develop' into dcb-optimise
6614944 DCB code tidying. Fix missing spinlock release; remove redundant variables
ecd5e5c Remove extra code introduced by merge.
877127a Merge commit '3c0d3e5ab6ddde59da764ec904b517759074a31e' into develop
4275bbe Updated the Connector-C version to 2.2.3
c71042b Some tentative list management code; provide for counting service clients.
ad0c8a6 qc_sqlite: Allow empty insert statement
72e75e5 qc_sqlite: Add support for SELECT ... INTO
cc553fa qc_sqlite: MAXSCALE define can now be used everywhere
3305c6e qc_sqlite: Handle CASE in SELECT
702f62e qc_sqlite: Extend CREATE TABLE grammar
941c212 qc_sqlite: Add support for SHOW [INDEX|INDEXES|KEYS]
6a79136 qc_sqlite: Extend grammar for SHOW TABLES and SHOW COLUMNS
f175d2d qc_sqlite: Add SHOW COLUMNS support
6e47951 qc_sqlite: Add support for SHOW TABLES
bcfa0e7 qc_mysqlembedded: Return the actual name and not as-name
3e19f2e Fixed qlafilter build failure
810b24e MXS-675: Standardized qlafilter output
be92173 qc_sqlite: Exclude alias names from affected fields
9479280 qc_sqlite: Add support for explain EXTENDED
13b0e10 qc_sqlite: Add support for DELETE
a6ccfea qc_mysqlembedded: Look at all conditional items
b428041 qc_sqlite: Extend SELECT options
83f829f query_classifier: Correctly calculate the length of a GWBUF
2ddb24c query_classifier: Ensure that -- comments are handled
fa7a746 qc_sqlite: Allow STRAIGHT_JOIN in SELECTS
6f47819 FindLibUUID update
5ed897b Added FindLibUUID cmake file
16e02bb Added FindLibUUID cmake file
aff63e0 MXS-680: qc_mysqlembedded does not look into functions
8a0eeb4 query_classifier: Improve output of compare
6f08185 Query classifier can now convert enums to strings
124e2b9 MXS-679: Correctly collect fields of WHERE
353c97c transaction_safety default is off
896e37b qc_sqlite: Invert stop logic and be more verbose
7a44d4d qc_sqlite: Extend what is accepted in CREATE TABLE
4dbf499 qc_sqlite: Accept FIRST in ALTER TABLE
3f655c0 qc_sqlite: Update table and affected fields for INSERT
8e1e275 qc_sqlite: Make AS optional in CREATE statement
5f2084b qc_sqlite: Add support for ENGINE when creating a table
242f183 qc_sqlite: CREATE paramters handled in the correct place
8ed2e25 qc_sqlite: Trace only when needed
63d4531 qc_sqlite: Update affected fields also from functions
118cdc3 qc_sqlite: Allow multiple index names in USE|IGNORE INDEX
912da76 qc_sqlite: Add initial support for ...IGNORE INDEX...
0aa7de6 qc_sqlite: Log detailed message on error
3e3bf1a qc_sqlite: Extend create syntax.
c4a4572 qc_sqlite: Exclude quoted values
1621f49 Removed MYSQL_EMBEDDED_LIBRARIES
d3e324c UUID generation now comes from libuuid
e8fe678 qc_sqlite: Enable confitional compilation
a9522ba qc_sqlite: Handle X.Y selects
9bc9770 qc_sqlite: Use same stream when outputting padding
366257a qc_sqlite: Add support for UNSIGNED and ZEROFILL
d4d90ff qc_sqlite: Add support for DROP VIEW
d0519bd qc_sqlite: Extend DROP TABLE syntax
c1e4894 qc_sqlite: Add flag to compare for stopping at first error
9fd6344 MXS-674: Maxinfo generates invalid JSON
3c0d3e5 Fix stupid errors.
9d32b2d Include read queue in buffer provided by dcb_read; changes to match.
b690797 Fix double spinlock release in random_jkiss.
6a4328f Fix problems of memory not being freed in some error cases.
2112e56 Change DCB and Session handling to recycle memory; fix bug in random_jkiss.
3912f72 MXS-631, MXS-632: Cleaned up default value CMake files
383ccb8 Fixed build failure on MariaDB 5.5
a60bca5 Merge branch '1.2.1-binlog_router_trx' into develop
3c2a062 Fix to crashes in embedded library with MariaDB 10.0
d3fe938 MXS-662: Service protocol check no longer ignores bind address
c3da49b qc_sqlite: Update affected fields from everywhere
7a0fab8 qc_sqlite: Allow verbosity of compare test to be controlled
81d6822 qc_sqlite: Cleanup handling of select columns
13e5c59 qc_sqlite: Introduce custom allocation functions
026f27d qc_sqlite: Add support for "USE database"
99079df qc_sqlite: Ignore duplicates when comparing affected fields
ca45cd6 qc_sqlite: Add initial support for qc_get_database_names
75970b6 qc_sqlite: Add support for DROP TABLE.
b97e45d qc_sqlite: Move get_affected_fields() to other helpers
cb0fa96 qc_sqlite: Collect table names of INSERT
3a7c513 qc_mysqlembedded: Only look for created name if CREATE
308b0a4 qc_sqlite: Add support for gc_get_created_table_name.
0dc4af2 qc_sqlite: Add qc_has_clause() handling to update
e9f2d1d qc_sqlite: Update now also provides table names
c3192e4 qc_sqlite: Add initial support for get_table_names
c51eafd qc_sqlite: Add support for qc_has_clause
f318fb2 qc_mysqlembedded: Work around embedded lib bug
4ba2e11 qc_sqlite: Add initial support for qc_get_affected_fields
080dea5 qc_sqlite: Support is_read_query
3f94df1 Fixed compare.cc build failure
868a712 Updated freeing of buffer chains in readwritesplit
9bf7fca Formatted readwritesplit source code
de4da2b Add assertion to spinlock release to detect release of already released spinlock.
d30955a qc_sqlite: Handle the default case of affected fields.
5d02b3f qc_sqlite: Set operation when creating table
94a334d Add test for comparing qc-output
aa6f5d6 Allow a specific query classifier to be loaded explicitly
c799d37 Test both qc_mysqlembedded and qc_sqlite
f8d9aa1 qc_sqlite: Enable "set @user_var=@@system_var"
f190bdc qc_sqlite: Recognize /*!-comments
b694b55 Fixed binary Avro format streaming
c95fa86 qc_sqlite: Report correctly the type of set autocommit
9cb236c qc_sqlite: Add test case
77b4e62 Ensure classify test checks all types
962039e Change return type of qc_get_type
ae00df8 qc_sqlite: Add initial support for the SET statement.
88253c5 qc_sqlite: Rename functions
fa48043 Rework of MySQL backend protocol clean up to fix fault.
3851064 qc_sqlite: Correct recognition of system variables (@@xyz).
9d86f7f qc_sqlite: Detect user and system variables.
a683297 qc_sqlite: Recognize and accept system variables (@@xyz).
a4f64dd qc_sqlite: Add initial support for CREATE [TEMPORARY] TABLE
f834b4f MXS-661: Only COM_QUERY packets are parsed
30077c1 CMake policies set only for correct versions
a166f34 Suppress warning about unknown CMake target
1412730 Added more variables to launchable monitor scripts
358c194 MXS-656: Galera nodes with index 0 can be master again
842aec5 qc_sqlite: Add support for BEGIN, COMMIT, ROLLBACK
b9cad6d Add initial support for UPDATE.
95741cb Add initial support for insert.
3796158 Re-install sqlite whenever parse.y has changed
5bcd8cf Ensure that the query is the one passed
cf05533 Add support for obtaining the type of a query
400d8b4 Always log the outcome
45cf632 Fixed resource leaks and minor bugs
fa9e970 Printout the query when there is a mismatch.
263cb07 All classify to be used with any query classifier
ea381b9 Further cleanup of classify.c
23f6f30 Merge pull request #107 from godmodelabs/typo-dpkg
8c2a64e Fixed classify build failure
0c3070b Fixed binlog to Avro conversion bugs
b827ba9 MXS-653: Silence maxpasswd
30d981c MXS-654: Add test for checking maxpasswd
984039b Rearrange classify.c
837e46d Add log initialization
1cc7a6e Reformat query_classifier/test/classify.c
065a4e5 Merge branch 'develop' into develop-MXS-544-b-merge
ca27f13 Fixed binlog build failure
fb81be2 fixed typo dpgk <-> dpkg
1e88d5d Added python based CDC user creation script
040bbdd MXS-633: Monitor permission checks moved to modules
cde7595 Master-Slave clusters are now robust by default
158c776 Cleaned up core test suite
94c6e66 Fixed bugs resulting from merge
a491e11 Merge remote-tracking branch 'origin/MXS-544-b' into develop-MXS-544-b-merge
30f9f25 Cleaned up avro.c
6286f64 Merge branch 'release-1.4.1' into develop
00206ac MXS-194: Added support for more query types to dbfwfilter
267832b Fixed diagnostic output
a64b694 Fixed bugs in avrorouter
8faaba1 Fixed a bug in GTID seeking
a5fafb7 Fixed typos in avrorouter documentation
8080379 Added avrorouter documentation
fa07d8a Fixed dbfwfilter rule parser build failure
744ce0d Constraints are ignored in DDL statement processing
50808c6 Cleaned up avrorouter
47f6032 Merge branch '1.2.1-binlog_router_trx_lint' into develop
caa0956 Added missing dependencies to maxscale-core
92df61a Remove parallel make from travis coverity builds
fa2b2b4 Added more error logging to Avro record reading
9a98e8b Support for GTID requests and data bursts
c2a787b Small diagnostic fix
c4cee7e Added format output requested by client
50483c7 Cleaning up of Avro code
d485379 Added support for binary Avro protocol
c22cdbb Converted Avro GTID from string to integer representation
5795ca9 Added coverity notification email to .travis.yml
a06e44d Added coverity_scan to Travis
6b94384 Fixed memory leak in avro_schema.c
a11096c Support for db.table request for Avrorouter
4e5cbbf Fixed bugs in Avro record reading
a99e427 Fixed minor bugs in avrorouter
01db8ae Fixed errors with CREATE TABLE statements
f5f3d7a Diagnostic routine update
209324f Added missing include for log_manager.h
e62f764 Added sending of schemas and file rotation
8c8fcbb Added missing log_manager.h include
b13942d Changed printf calls in maxavro library to MXS_ERROR
1168962 More lint inspired changes, mainly in blr_master.c and blr_slave.c
ced8f2f Fixed directory checks in avrorouter
a8ae6be Minor fix to string processing
fbd2d95 Fixed typo in dbfwfilter's CMakeLists.txt
29c3cf4 Merge pull request #106 from mariadb-corporation/willfong-patch-1
854d4e9 Add password column name to example
2f956df Moved server state change logging to a common function
007121f Fixed truncated string values
782892b Fix lint errors and warnings for blr_file.c
4f99fc5 Added Avro testing script
2820980 Small fix to help clear lint problems in blr.c
3afeda4 Fixed errors and warnings located by lint
ecfff82 Fix most lint problems in blr.c
223689c Added ALTER TABLE support
80bc935 Fix final lint problems with mysql_common protocol functions.
e068310 Added preliminary alter table parsing
8c723f1 Lint monitor modules
fdb5620 Fix lint issues in authenticators.
84f0e04 Added function documentation and renamed files
365d9f5 Tidy up, mainly for lint
2ff3005 Added update rows event processing and event types to avro records
2ae0371 Fixed failing regex and improved data streaming
f19206a Renamed avrorouter header
aa7174b Moved relpacement and storage of ddl statements to a separate function
0c10be8 Improved client notification and added Avro block size managemet
91405a7 Cleaned up instance creation
dd97485 Removed useless vars
af64e9e Added CDC authentication with a db file
b73a118 Streamline and lint MySQL backend protocol.
65034ce Merge branch 'release-1.4.0' into develop
28f7e4e Added callback for AVRO client async data transmission
628c27a Added MAXAVRO_FILE struct to AVRO_CLIENT
32b3645 Fixed slavelag build failure
7b15542 Added default authentication method for CDC protocol
5f8e20f Renamed maxavro types and formatted files that use them
882cf84 Added more function documentation to maxavro library
9532f0b Fixed CDC protocol build failure
35a1d3a Added support for offsets in client requests
94577ac Fixed, formatted and refactored CDC protocol
da9bcad Use the maxavro library to read data from Avro files
3ececee Added low level error checking to maxavro library
01b0b8b Tidy and lint mysql_client.c
943f0a7 Added handling of Avro boolean data types to maxavro library
4c781f7 Cleaned up maxavro library and avrorouter
6b2e85d Renamed functions more consistently and cleaned up code
e07158a Moved query event handling to its own function
df7d4c0 Added avro_ prefix to rbr.c
fcbfceb Added seeking to a position in an Avro file
068243a CDC auth decoding
3584d54 Add checks to simplify downstream logic.
9b2c323 Removed useless fprintf
bd5cd52 Added missing authfunc setup
e4aff59 Added record value processing
5cc8615 Added value length functions
7921ecc Merge branch 'MXS-615' into MXS-483
4b09cca Added Travis status to readme.md
cca3a48 Simplify interface to max admin authentication.
4739838 Authenticator API update
233505f Maxavrocheck now accepts multiple files
3fdd137 Improved the Avro file handling
a6ba913 Merge from MXS-615
417d742 Added maxavrocheck
014f9cf Remove obsolete second parameter from authenticate function in authenticators.
ece7ece MaxAdmin authentication converted to a module. Fix quirk in SSL setup.
7c8b37e Moved contents of avro_schema.h into mxs_avro.h
d6660cf Improvements to type handling
71ed0cf Protocol API to have entry point for obtaining default authenticator name.
9d35de2 Fixed transaction tracking
5be02a2 Avrorouter internal state is now stored in the Avro file directory
9293464 Added new info to avro diagnostics
06e0e93 Protocol modules can still handle the authentication outside authenticator modules
6d7108b Added JSON output when Requesting an avro file
6188211 Added new CDC protocol state
c8af27f CDC authentication uses its own authenticator
6590f94 Factor out qc_get_qtype_str
b7880f1 Fix qc_sqlite CMakeLists.txt
bd4ff43 Fixed connector-c being updated and built after every make invokation
0d9e57b Fixed non-MariaDB connectors being used in builds
3d3b779 FIX BUG IN CLIENT DCB SHUTDOWN THAT CAN CAUSE CRASHES
e45ba33 Fixed Connector-C .cmake files
c130189 Fixed connector-c being updated and built after every make invokation
7f3cdf3 Fixed errors on binlog rotation
9d3c83a Remove qc_sqlite
15e8ba5 CDC protocol is now compliant with new protocol structure
4460869 Merge branch 'release-1.4.0' into MXS-483
ea40812 Cleaned up the binlog processing loop
cb646ca Add minimal select recognition to qc_sqlite
ac1a9c5 Fixed binlogrouter test
85dd227 Re-route sqlite's sqlite3Select.
7a2e6f3 Update CMakeLists.txt for qc_sqlite
7a751c3 Added timestamps to records and fixed minor bugs
f73bdde Avrorouter state storage to disk
fcf0488 Fixed Connector-C .cmake files
48b8e4e Merge branch 'MXS-615' into MXS-615-binlog-merge
7c8e19f Add missing dependencies for qc_sqlite
bb9b667 Improvements to type handling and binlog position tracking
dc66b74 Client UUID added
f12fce4 AVRO registration is now handled by avro router
575b809 Add skeleton sqlite-based query classifier.
d09d5fc Build sqlite
146d1f9 Fixed BLOB type handling and refined error messages
6e9e521 Added client user to diagnostics
4538bb8 Merge pull request #104 from rasmushoj/develop
7e18d95 Avro router diagnostics routine update
01e3f75 reverted changes in CMakeLists.txt
52f7c78 reverted changes in postinst.in
eaed577 Added sqlite 3110100
a58cdda Travis configuration for MaxScale. ...
38b452d MIGRATE FREE CLIENT DATA TO AUTH MODULE; BUG FIXES; TIDY UP
6e64506 Fixed minor bugs
aff2411 Enabled CDC protocol
f669100 Fixed NULL Avro value being assigned to a field which cannot be NULL
8f6b16a Added row event processing to avrorouter
2939fe0 Updated Avro schema management to use actual column names
9e3b0cb Removed use of RBR related functions in binlogrouter
d674903 Formatted avro files
fe028d1 DEVELOPMENT OF AUTHENTICATION AS MODULE - WILL NOT WORK YET
977aded Added authenticator modules to the build
a2b384f MOVE MYSQL AUTH CODE INTO AUTHENTICATOR MODULES DIRECTORY
a5d7484 PRELIMINARY CHANGES TO CREATE AUTHENTICATORS AS MODULES
66cf802 Merge remote-tracking branch 'origin/develop' into MXS-615
bca0a7d MINOR CHANGES TO SATISFY LINT
5a9e397 Added Avrorouter binlog file walking
fbc737f Fixed binlogrouter test
3c7c9d7 Added avrorouter main event handling loop
07ad81b Moved common binlogrouter code to a separate file
8c605ed Fixed avrorouter build failures
aa1ba05 Moved binlog definitions to a separate header and fixed build failures
eee7c55 Added create table statement detection
e52b27e Added AVRO_INSTANCE and AVRO_CLIENT
0830caa Change test for client DCB to use role being DCB_ROLE_CLIENT_HANDLER. ...
997bbca Change protocols to continue looping if an accept fails; ...
522e42d Make use of dcb_accept and dcb_listen in httpd and telnetd protocols.
4e692b0 Generalise dcb_listen to tailor log messages to different protocols. ...
52c431d Remove support for passing default port number when handling ...
afe5abc Fix bug in creation of SSL listener structure; fix bugs in ...
0bd6c77 Merge remote-tracking branch 'origin/MXS-544' into MXS-544-a ...
7598597 Add dcb_listen function to make a given DCB into a listener, ...
a275d89 Maxbinlogcheck avro version can detect proper end of file
9bb55a5 Moved row event and table map event handling to a separate file
b7d9e09 Add/improve comments, fix mistake with premature return.
c598770 First attempt at extracting general code into dcb_accept, ...
f20f28f Testing with maxbinlogcheck
b3c60b7 Added mysql_binlog files
0ff9971 Added MariaDB/MySQL binary data processing functions
124560c Merge branch '1.2.1-binlog_router_trx' into MXS-483
4deccff New router fro cdc client
2c11434 Fixed test compiler errors
c1f7d24 Obliged to merge remote-tracking branch 'origin/develop' ...
1775599 Merge remote-tracking branch 'origin/MXS-544' into Test-dev-544-merge
c5317da Small modifications in comments
11c0666 Code cleanup
64a5e9a Merge branch 'release-1.3.0' into MXS-483
2c11e89 First Implementation of CDC
2016-08-11 12:44:07 +03:00

5925 lines
196 KiB
C

/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS3 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS3 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
*/
/* The full-text index is stored in a series of b+tree (-like)
** structures called segments which map terms to doclists. The
** structures are like b+trees in layout, but are constructed from the
** bottom up in optimal fashion and are not updatable. Since trees
** are built from the bottom up, things will be described from the
** bottom up.
**
**
**** Varints ****
** The basic unit of encoding is a variable-length integer called a
** varint. We encode variable-length integers in little-endian order
** using seven bits * per byte as follows:
**
** KEY:
** A = 0xxxxxxx 7 bits of data and one flag bit
** B = 1xxxxxxx 7 bits of data and one flag bit
**
** 7 bits - A
** 14 bits - BA
** 21 bits - BBA
** and so on.
**
** This is similar in concept to how sqlite encodes "varints" but
** the encoding is not the same. SQLite varints are big-endian
** are are limited to 9 bytes in length whereas FTS3 varints are
** little-endian and can be up to 10 bytes in length (in theory).
**
** Example encodings:
**
** 1: 0x01
** 127: 0x7f
** 128: 0x81 0x00
**
**
**** Document lists ****
** A doclist (document list) holds a docid-sorted list of hits for a
** given term. Doclists hold docids and associated token positions.
** A docid is the unique integer identifier for a single document.
** A position is the index of a word within the document. The first
** word of the document has a position of 0.
**
** FTS3 used to optionally store character offsets using a compile-time
** option. But that functionality is no longer supported.
**
** A doclist is stored like this:
**
** array {
** varint docid; (delta from previous doclist)
** array { (position list for column 0)
** varint position; (2 more than the delta from previous position)
** }
** array {
** varint POS_COLUMN; (marks start of position list for new column)
** varint column; (index of new column)
** array {
** varint position; (2 more than the delta from previous position)
** }
** }
** varint POS_END; (marks end of positions for this document.
** }
**
** Here, array { X } means zero or more occurrences of X, adjacent in
** memory. A "position" is an index of a token in the token stream
** generated by the tokenizer. Note that POS_END and POS_COLUMN occur
** in the same logical place as the position element, and act as sentinals
** ending a position list array. POS_END is 0. POS_COLUMN is 1.
** The positions numbers are not stored literally but rather as two more
** than the difference from the prior position, or the just the position plus
** 2 for the first position. Example:
**
** label: A B C D E F G H I J K
** value: 123 5 9 1 1 14 35 0 234 72 0
**
** The 123 value is the first docid. For column zero in this document
** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1
** at D signals the start of a new column; the 1 at E indicates that the
** new column is column number 1. There are two positions at 12 and 45
** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
** 234 at I is the delta to next docid (357). It has one position 70
** (72-2) and then terminates with the 0 at K.
**
** A "position-list" is the list of positions for multiple columns for
** a single docid. A "column-list" is the set of positions for a single
** column. Hence, a position-list consists of one or more column-lists,
** a document record consists of a docid followed by a position-list and
** a doclist consists of one or more document records.
**
** A bare doclist omits the position information, becoming an
** array of varint-encoded docids.
**
**** Segment leaf nodes ****
** Segment leaf nodes store terms and doclists, ordered by term. Leaf
** nodes are written using LeafWriter, and read using LeafReader (to
** iterate through a single leaf node's data) and LeavesReader (to
** iterate through a segment's entire leaf layer). Leaf nodes have
** the format:
**
** varint iHeight; (height from leaf level, always 0)
** varint nTerm; (length of first term)
** char pTerm[nTerm]; (content of first term)
** varint nDoclist; (length of term's associated doclist)
** char pDoclist[nDoclist]; (content of doclist)
** array {
** (further terms are delta-encoded)
** varint nPrefix; (length of prefix shared with previous term)
** varint nSuffix; (length of unshared suffix)
** char pTermSuffix[nSuffix];(unshared suffix of next term)
** varint nDoclist; (length of term's associated doclist)
** char pDoclist[nDoclist]; (content of doclist)
** }
**
** Here, array { X } means zero or more occurrences of X, adjacent in
** memory.
**
** Leaf nodes are broken into blocks which are stored contiguously in
** the %_segments table in sorted order. This means that when the end
** of a node is reached, the next term is in the node with the next
** greater node id.
**
** New data is spilled to a new leaf node when the current node
** exceeds LEAF_MAX bytes (default 2048). New data which itself is
** larger than STANDALONE_MIN (default 1024) is placed in a standalone
** node (a leaf node with a single term and doclist). The goal of
** these settings is to pack together groups of small doclists while
** making it efficient to directly access large doclists. The
** assumption is that large doclists represent terms which are more
** likely to be query targets.
**
** TODO(shess) It may be useful for blocking decisions to be more
** dynamic. For instance, it may make more sense to have a 2.5k leaf
** node rather than splitting into 2k and .5k nodes. My intuition is
** that this might extend through 2x or 4x the pagesize.
**
**
**** Segment interior nodes ****
** Segment interior nodes store blockids for subtree nodes and terms
** to describe what data is stored by the each subtree. Interior
** nodes are written using InteriorWriter, and read using
** InteriorReader. InteriorWriters are created as needed when
** SegmentWriter creates new leaf nodes, or when an interior node
** itself grows too big and must be split. The format of interior
** nodes:
**
** varint iHeight; (height from leaf level, always >0)
** varint iBlockid; (block id of node's leftmost subtree)
** optional {
** varint nTerm; (length of first term)
** char pTerm[nTerm]; (content of first term)
** array {
** (further terms are delta-encoded)
** varint nPrefix; (length of shared prefix with previous term)
** varint nSuffix; (length of unshared suffix)
** char pTermSuffix[nSuffix]; (unshared suffix of next term)
** }
** }
**
** Here, optional { X } means an optional element, while array { X }
** means zero or more occurrences of X, adjacent in memory.
**
** An interior node encodes n terms separating n+1 subtrees. The
** subtree blocks are contiguous, so only the first subtree's blockid
** is encoded. The subtree at iBlockid will contain all terms less
** than the first term encoded (or all terms if no term is encoded).
** Otherwise, for terms greater than or equal to pTerm[i] but less
** than pTerm[i+1], the subtree for that term will be rooted at
** iBlockid+i. Interior nodes only store enough term data to
** distinguish adjacent children (if the rightmost term of the left
** child is "something", and the leftmost term of the right child is
** "wicked", only "w" is stored).
**
** New data is spilled to a new interior node at the same height when
** the current node exceeds INTERIOR_MAX bytes (default 2048).
** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
** interior nodes and making the tree too skinny. The interior nodes
** at a given height are naturally tracked by interior nodes at
** height+1, and so on.
**
**
**** Segment directory ****
** The segment directory in table %_segdir stores meta-information for
** merging and deleting segments, and also the root node of the
** segment's tree.
**
** The root node is the top node of the segment's tree after encoding
** the entire segment, restricted to ROOT_MAX bytes (default 1024).
** This could be either a leaf node or an interior node. If the top
** node requires more than ROOT_MAX bytes, it is flushed to %_segments
** and a new root interior node is generated (which should always fit
** within ROOT_MAX because it only needs space for 2 varints, the
** height and the blockid of the previous root).
**
** The meta-information in the segment directory is:
** level - segment level (see below)
** idx - index within level
** - (level,idx uniquely identify a segment)
** start_block - first leaf node
** leaves_end_block - last leaf node
** end_block - last block (including interior nodes)
** root - contents of root node
**
** If the root node is a leaf node, then start_block,
** leaves_end_block, and end_block are all 0.
**
**
**** Segment merging ****
** To amortize update costs, segments are grouped into levels and
** merged in batches. Each increase in level represents exponentially
** more documents.
**
** New documents (actually, document updates) are tokenized and
** written individually (using LeafWriter) to a level 0 segment, with
** incrementing idx. When idx reaches MERGE_COUNT (default 16), all
** level 0 segments are merged into a single level 1 segment. Level 1
** is populated like level 0, and eventually MERGE_COUNT level 1
** segments are merged to a single level 2 segment (representing
** MERGE_COUNT^2 updates), and so on.
**
** A segment merge traverses all segments at a given level in
** parallel, performing a straightforward sorted merge. Since segment
** leaf nodes are written in to the %_segments table in order, this
** merge traverses the underlying sqlite disk structures efficiently.
** After the merge, all segment blocks from the merged level are
** deleted.
**
** MERGE_COUNT controls how often we merge segments. 16 seems to be
** somewhat of a sweet spot for insertion performance. 32 and 64 show
** very similar performance numbers to 16 on insertion, though they're
** a tiny bit slower (perhaps due to more overhead in merge-time
** sorting). 8 is about 20% slower than 16, 4 about 50% slower than
** 16, 2 about 66% slower than 16.
**
** At query time, high MERGE_COUNT increases the number of segments
** which need to be scanned and merged. For instance, with 100k docs
** inserted:
**
** MERGE_COUNT segments
** 16 25
** 8 12
** 4 10
** 2 6
**
** This appears to have only a moderate impact on queries for very
** frequent terms (which are somewhat dominated by segment merge
** costs), and infrequent and non-existent terms still seem to be fast
** even with many segments.
**
** TODO(shess) That said, it would be nice to have a better query-side
** argument for MERGE_COUNT of 16. Also, it is possible/likely that
** optimizations to things like doclist merging will swing the sweet
** spot around.
**
**
**
**** Handling of deletions and updates ****
** Since we're using a segmented structure, with no docid-oriented
** index into the term index, we clearly cannot simply update the term
** index when a document is deleted or updated. For deletions, we
** write an empty doclist (varint(docid) varint(POS_END)), for updates
** we simply write the new doclist. Segment merges overwrite older
** data for a particular docid with newer data, so deletes or updates
** will eventually overtake the earlier data and knock it out. The
** query logic likewise merges doclists so that newer data knocks out
** older data.
*/
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
# define SQLITE_CORE 1
#endif
#include <assert.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "fts3.h"
#ifndef SQLITE_CORE
# include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#endif
static int fts3EvalNext(Fts3Cursor *pCsr);
static int fts3EvalStart(Fts3Cursor *pCsr);
static int fts3TermSegReaderCursor(
Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
#ifndef SQLITE_AMALGAMATION
# if defined(SQLITE_DEBUG)
int sqlite3Fts3Always(int b) { assert( b ); return b; }
int sqlite3Fts3Never(int b) { assert( !b ); return b; }
# endif
#endif
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
** The number of bytes written is returned.
*/
int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
unsigned char *q = (unsigned char *) p;
sqlite_uint64 vu = v;
do{
*q++ = (unsigned char) ((vu & 0x7f) | 0x80);
vu >>= 7;
}while( vu!=0 );
q[-1] &= 0x7f; /* turn off high bit in final byte */
assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
return (int) (q - (unsigned char *)p);
}
#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \
v = (v & mask1) | ( (*ptr++) << shift ); \
if( (v & mask2)==0 ){ var = v; return ret; }
#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \
v = (*ptr++); \
if( (v & mask2)==0 ){ var = v; return ret; }
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read, or 0 on error.
** The value is stored in *v.
*/
int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
const char *pStart = p;
u32 a;
u64 b;
int shift;
GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1);
GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2);
GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3);
GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4);
b = (a & 0x0FFFFFFF );
for(shift=28; shift<=63; shift+=7){
u64 c = *p++;
b += (c&0x7F) << shift;
if( (c & 0x80)==0 ) break;
}
*v = b;
return (int)(p - pStart);
}
/*
** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
** 32-bit integer before it is returned.
*/
int sqlite3Fts3GetVarint32(const char *p, int *pi){
u32 a;
#ifndef fts3GetVarint32
GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1);
#else
a = (*p++);
assert( a & 0x80 );
#endif
GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2);
GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3);
GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4);
a = (a & 0x0FFFFFFF );
*pi = (int)(a | ((u32)(*p & 0x0F) << 28));
return 5;
}
/*
** Return the number of bytes required to encode v as a varint
*/
int sqlite3Fts3VarintLen(sqlite3_uint64 v){
int i = 0;
do{
i++;
v >>= 7;
}while( v!=0 );
return i;
}
/*
** Convert an SQL-style quoted string into a normal string by removing
** the quote characters. The conversion is done in-place. If the
** input does not begin with a quote character, then this routine
** is a no-op.
**
** Examples:
**
** "abc" becomes abc
** 'xyz' becomes xyz
** [pqr] becomes pqr
** `mno` becomes mno
**
*/
void sqlite3Fts3Dequote(char *z){
char quote; /* Quote character (if any ) */
quote = z[0];
if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
int iIn = 1; /* Index of next byte to read from input */
int iOut = 0; /* Index of next byte to write to output */
/* If the first byte was a '[', then the close-quote character is a ']' */
if( quote=='[' ) quote = ']';
while( z[iIn] ){
if( z[iIn]==quote ){
if( z[iIn+1]!=quote ) break;
z[iOut++] = quote;
iIn += 2;
}else{
z[iOut++] = z[iIn++];
}
}
z[iOut] = '\0';
}
}
/*
** Read a single varint from the doclist at *pp and advance *pp to point
** to the first byte past the end of the varint. Add the value of the varint
** to *pVal.
*/
static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
sqlite3_int64 iVal;
*pp += sqlite3Fts3GetVarint(*pp, &iVal);
*pVal += iVal;
}
/*
** When this function is called, *pp points to the first byte following a
** varint that is part of a doclist (or position-list, or any other list
** of varints). This function moves *pp to point to the start of that varint,
** and sets *pVal by the varint value.
**
** Argument pStart points to the first byte of the doclist that the
** varint is part of.
*/
static void fts3GetReverseVarint(
char **pp,
char *pStart,
sqlite3_int64 *pVal
){
sqlite3_int64 iVal;
char *p;
/* Pointer p now points at the first byte past the varint we are
** interested in. So, unless the doclist is corrupt, the 0x80 bit is
** clear on character p[-1]. */
for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
p++;
*pp = p;
sqlite3Fts3GetVarint(p, &iVal);
*pVal = iVal;
}
/*
** The xDisconnect() virtual table method.
*/
static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table *)pVtab;
int i;
assert( p->nPendingData==0 );
assert( p->pSegments==0 );
/* Free any prepared statements held */
for(i=0; i<SizeofArray(p->aStmt); i++){
sqlite3_finalize(p->aStmt[i]);
}
sqlite3_free(p->zSegmentsTbl);
sqlite3_free(p->zReadExprlist);
sqlite3_free(p->zWriteExprlist);
sqlite3_free(p->zContentTbl);
sqlite3_free(p->zLanguageid);
/* Invoke the tokenizer destructor to free the tokenizer. */
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Write an error message into *pzErr
*/
void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){
va_list ap;
sqlite3_free(*pzErr);
va_start(ap, zFormat);
*pzErr = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
}
/*
** Construct one or more SQL statements from the format string given
** and then evaluate those statements. The success code is written
** into *pRc.
**
** If *pRc is initially non-zero then this routine is a no-op.
*/
static void fts3DbExec(
int *pRc, /* Success code */
sqlite3 *db, /* Database in which to run SQL */
const char *zFormat, /* Format string for SQL */
... /* Arguments to the format string */
){
va_list ap;
char *zSql;
if( *pRc ) return;
va_start(ap, zFormat);
zSql = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
if( zSql==0 ){
*pRc = SQLITE_NOMEM;
}else{
*pRc = sqlite3_exec(db, zSql, 0, 0, 0);
sqlite3_free(zSql);
}
}
/*
** The xDestroy() virtual table method.
*/
static int fts3DestroyMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table *)pVtab;
int rc = SQLITE_OK; /* Return code */
const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
sqlite3 *db = p->db; /* Database handle */
/* Drop the shadow tables */
if( p->zContentTbl==0 ){
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
}
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
/* If everything has worked, invoke fts3DisconnectMethod() to free the
** memory associated with the Fts3Table structure and return SQLITE_OK.
** Otherwise, return an SQLite error code.
*/
return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
}
/*
** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
** passed as the first argument. This is done as part of the xConnect()
** and xCreate() methods.
**
** If *pRc is non-zero when this function is called, it is a no-op.
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
** before returning.
*/
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
if( *pRc==SQLITE_OK ){
int i; /* Iterator variable */
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
const char *zLanguageid;
zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
/* Create a list of user columns for the virtual table */
zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
for(i=1; zCols && i<p->nColumn; i++){
zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
}
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
zSql = sqlite3_mprintf(
"CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
zCols, p->zName, zLanguageid
);
if( !zCols || !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_declare_vtab(p->db, zSql);
}
sqlite3_free(zSql);
sqlite3_free(zCols);
*pRc = rc;
}
}
/*
** Create the %_stat table if it does not already exist.
*/
void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
fts3DbExec(pRc, p->db,
"CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
"(id INTEGER PRIMARY KEY, value BLOB);",
p->zDb, p->zName
);
if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
}
/*
** Create the backing store tables (%_content, %_segments and %_segdir)
** required by the FTS3 table passed as the only argument. This is done
** as part of the vtab xCreate() method.
**
** If the p->bHasDocsize boolean is true (indicating that this is an
** FTS4 table, not an FTS3 table) then also create the %_docsize and
** %_stat tables required by FTS4.
*/
static int fts3CreateTables(Fts3Table *p){
int rc = SQLITE_OK; /* Return code */
int i; /* Iterator variable */
sqlite3 *db = p->db; /* The database connection */
if( p->zContentTbl==0 ){
const char *zLanguageid = p->zLanguageid;
char *zContentCols; /* Columns of %_content table */
/* Create a list of user columns for the content table */
zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
for(i=0; zContentCols && i<p->nColumn; i++){
char *z = p->azColumn[i];
zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
}
if( zLanguageid && zContentCols ){
zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
}
if( zContentCols==0 ) rc = SQLITE_NOMEM;
/* Create the content table */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_content'(%s)",
p->zDb, p->zName, zContentCols
);
sqlite3_free(zContentCols);
}
/* Create other tables */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
p->zDb, p->zName
);
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_segdir'("
"level INTEGER,"
"idx INTEGER,"
"start_block INTEGER,"
"leaves_end_block INTEGER,"
"end_block INTEGER,"
"root BLOB,"
"PRIMARY KEY(level, idx)"
");",
p->zDb, p->zName
);
if( p->bHasDocsize ){
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
p->zDb, p->zName
);
}
assert( p->bHasStat==p->bFts4 );
if( p->bHasStat ){
sqlite3Fts3CreateStatTable(&rc, p);
}
return rc;
}
/*
** Store the current database page-size in bytes in p->nPgsz.
**
** If *pRc is non-zero when this function is called, it is a no-op.
** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
** before returning.
*/
static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
if( *pRc==SQLITE_OK ){
int rc; /* Return code */
char *zSql; /* SQL text "PRAGMA %Q.page_size" */
sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_step(pStmt);
p->nPgsz = sqlite3_column_int(pStmt, 0);
rc = sqlite3_finalize(pStmt);
}else if( rc==SQLITE_AUTH ){
p->nPgsz = 1024;
rc = SQLITE_OK;
}
}
assert( p->nPgsz>0 || rc!=SQLITE_OK );
sqlite3_free(zSql);
*pRc = rc;
}
}
/*
** "Special" FTS4 arguments are column specifications of the following form:
**
** <key> = <value>
**
** There may not be whitespace surrounding the "=" character. The <value>
** term may be quoted, but the <key> may not.
*/
static int fts3IsSpecialColumn(
const char *z,
int *pnKey,
char **pzValue
){
char *zValue;
const char *zCsr = z;
while( *zCsr!='=' ){
if( *zCsr=='\0' ) return 0;
zCsr++;
}
*pnKey = (int)(zCsr-z);
zValue = sqlite3_mprintf("%s", &zCsr[1]);
if( zValue ){
sqlite3Fts3Dequote(zValue);
}
*pzValue = zValue;
return 1;
}
/*
** Append the output of a printf() style formatting to an existing string.
*/
static void fts3Appendf(
int *pRc, /* IN/OUT: Error code */
char **pz, /* IN/OUT: Pointer to string buffer */
const char *zFormat, /* Printf format string to append */
... /* Arguments for printf format string */
){
if( *pRc==SQLITE_OK ){
va_list ap;
char *z;
va_start(ap, zFormat);
z = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
if( z && *pz ){
char *z2 = sqlite3_mprintf("%s%s", *pz, z);
sqlite3_free(z);
z = z2;
}
if( z==0 ) *pRc = SQLITE_NOMEM;
sqlite3_free(*pz);
*pz = z;
}
}
/*
** Return a copy of input string zInput enclosed in double-quotes (") and
** with all double quote characters escaped. For example:
**
** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\""
**
** The pointer returned points to memory obtained from sqlite3_malloc(). It
** is the callers responsibility to call sqlite3_free() to release this
** memory.
*/
static char *fts3QuoteId(char const *zInput){
int nRet;
char *zRet;
nRet = 2 + (int)strlen(zInput)*2 + 1;
zRet = sqlite3_malloc(nRet);
if( zRet ){
int i;
char *z = zRet;
*(z++) = '"';
for(i=0; zInput[i]; i++){
if( zInput[i]=='"' ) *(z++) = '"';
*(z++) = zInput[i];
}
*(z++) = '"';
*(z++) = '\0';
}
return zRet;
}
/*
** Return a list of comma separated SQL expressions and a FROM clause that
** could be used in a SELECT statement such as the following:
**
** SELECT <list of expressions> FROM %_content AS x ...
**
** to return the docid, followed by each column of text data in order
** from left to write. If parameter zFunc is not NULL, then instead of
** being returned directly each column of text data is passed to an SQL
** function named zFunc first. For example, if zFunc is "unzip" and the
** table has the three user-defined columns "a", "b", and "c", the following
** string is returned:
**
** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
**
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
** is the responsibility of the caller to eventually free it.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
** no error occurs, *pRc is left unmodified.
*/
static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
char *zRet = 0;
char *zFree = 0;
char *zFunction;
int i;
if( p->zContentTbl==0 ){
if( !zFunc ){
zFunction = "";
}else{
zFree = zFunction = fts3QuoteId(zFunc);
}
fts3Appendf(pRc, &zRet, "docid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
}
sqlite3_free(zFree);
}else{
fts3Appendf(pRc, &zRet, "rowid");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
}
}
fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
p->zDb,
(p->zContentTbl ? p->zContentTbl : p->zName),
(p->zContentTbl ? "" : "_content")
);
return zRet;
}
/*
** Return a list of N comma separated question marks, where N is the number
** of columns in the %_content table (one for the docid plus one for each
** user-defined text column).
**
** If argument zFunc is not NULL, then all but the first question mark
** is preceded by zFunc and an open bracket, and followed by a closed
** bracket. For example, if zFunc is "zip" and the FTS3 table has three
** user-defined text columns, the following string is returned:
**
** "?, zip(?), zip(?), zip(?)"
**
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
** is the responsibility of the caller to eventually free it.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
** a NULL pointer is returned). Otherwise, if an OOM error is encountered
** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
** no error occurs, *pRc is left unmodified.
*/
static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
char *zRet = 0;
char *zFree = 0;
char *zFunction;
int i;
if( !zFunc ){
zFunction = "";
}else{
zFree = zFunction = fts3QuoteId(zFunc);
}
fts3Appendf(pRc, &zRet, "?");
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
}
if( p->zLanguageid ){
fts3Appendf(pRc, &zRet, ", ?");
}
sqlite3_free(zFree);
return zRet;
}
/*
** This function interprets the string at (*pp) as a non-negative integer
** value. It reads the integer and sets *pnOut to the value read, then
** sets *pp to point to the byte immediately following the last byte of
** the integer value.
**
** Only decimal digits ('0'..'9') may be part of an integer value.
**
** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
** the output value undefined. Otherwise SQLITE_OK is returned.
**
** This function is used when parsing the "prefix=" FTS4 parameter.
*/
static int fts3GobbleInt(const char **pp, int *pnOut){
const int MAX_NPREFIX = 10000000;
const char *p; /* Iterator pointer */
int nInt = 0; /* Output value */
for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
nInt = nInt * 10 + (p[0] - '0');
if( nInt>MAX_NPREFIX ){
nInt = 0;
break;
}
}
if( p==*pp ) return SQLITE_ERROR;
*pnOut = nInt;
*pp = p;
return SQLITE_OK;
}
/*
** This function is called to allocate an array of Fts3Index structures
** representing the indexes maintained by the current FTS table. FTS tables
** always maintain the main "terms" index, but may also maintain one or
** more "prefix" indexes, depending on the value of the "prefix=" parameter
** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
**
** Argument zParam is passed the value of the "prefix=" option if one was
** specified, or NULL otherwise.
**
** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
** the allocated array. *pnIndex is set to the number of elements in the
** array. If an error does occur, an SQLite error code is returned.
**
** Regardless of whether or not an error is returned, it is the responsibility
** of the caller to call sqlite3_free() on the output array to free it.
*/
static int fts3PrefixParameter(
const char *zParam, /* ABC in prefix=ABC parameter to parse */
int *pnIndex, /* OUT: size of *apIndex[] array */
struct Fts3Index **apIndex /* OUT: Array of indexes for this table */
){
struct Fts3Index *aIndex; /* Allocated array */
int nIndex = 1; /* Number of entries in array */
if( zParam && zParam[0] ){
const char *p;
nIndex++;
for(p=zParam; *p; p++){
if( *p==',' ) nIndex++;
}
}
aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
*apIndex = aIndex;
if( !aIndex ){
return SQLITE_NOMEM;
}
memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
if( zParam ){
const char *p = zParam;
int i;
for(i=1; i<nIndex; i++){
int nPrefix = 0;
if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
assert( nPrefix>=0 );
if( nPrefix==0 ){
nIndex--;
i--;
}else{
aIndex[i].nPrefix = nPrefix;
}
p++;
}
}
*pnIndex = nIndex;
return SQLITE_OK;
}
/*
** This function is called when initializing an FTS4 table that uses the
** content=xxx option. It determines the number of and names of the columns
** of the new FTS4 table.
**
** The third argument passed to this function is the value passed to the
** config=xxx option (i.e. "xxx"). This function queries the database for
** a table of that name. If found, the output variables are populated
** as follows:
**
** *pnCol: Set to the number of columns table xxx has,
**
** *pnStr: Set to the total amount of space required to store a copy
** of each columns name, including the nul-terminator.
**
** *pazCol: Set to point to an array of *pnCol strings. Each string is
** the name of the corresponding column in table xxx. The array
** and its contents are allocated using a single allocation. It
** is the responsibility of the caller to free this allocation
** by eventually passing the *pazCol value to sqlite3_free().
**
** If the table cannot be found, an error code is returned and the output
** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
** returned (and the output variables are undefined).
*/
static int fts3ContentColumns(
sqlite3 *db, /* Database handle */
const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */
const char *zTbl, /* Name of content table */
const char ***pazCol, /* OUT: Malloc'd array of column names */
int *pnCol, /* OUT: Size of array *pazCol */
int *pnStr, /* OUT: Bytes of string content */
char **pzErr /* OUT: error message */
){
int rc = SQLITE_OK; /* Return code */
char *zSql; /* "SELECT *" statement on zTbl */
sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */
zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
if( !zSql ){
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db));
}
}
sqlite3_free(zSql);
if( rc==SQLITE_OK ){
const char **azCol; /* Output array */
int nStr = 0; /* Size of all column names (incl. 0x00) */
int nCol; /* Number of table columns */
int i; /* Used to iterate through columns */
/* Loop through the returned columns. Set nStr to the number of bytes of
** space required to store a copy of each column name, including the
** nul-terminator byte. */
nCol = sqlite3_column_count(pStmt);
for(i=0; i<nCol; i++){
const char *zCol = sqlite3_column_name(pStmt, i);
nStr += (int)strlen(zCol) + 1;
}
/* Allocate and populate the array to return. */
azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
if( azCol==0 ){
rc = SQLITE_NOMEM;
}else{
char *p = (char *)&azCol[nCol];
for(i=0; i<nCol; i++){
const char *zCol = sqlite3_column_name(pStmt, i);
int n = (int)strlen(zCol)+1;
memcpy(p, zCol, n);
azCol[i] = p;
p += n;
}
}
sqlite3_finalize(pStmt);
/* Set the output variables. */
*pnCol = nCol;
*pnStr = nStr;
*pazCol = azCol;
}
return rc;
}
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
** The argv[] array contains the following:
**
** argv[0] -> module name ("fts3" or "fts4")
** argv[1] -> database name
** argv[2] -> table name
** argv[...] -> "column name" and other module argument fields.
*/
static int fts3InitVtab(
int isCreate, /* True for xCreate, false for xConnect */
sqlite3 *db, /* The SQLite database connection */
void *pAux, /* Hash table containing tokenizers */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
){
Fts3Hash *pHash = (Fts3Hash *)pAux;
Fts3Table *p = 0; /* Pointer to allocated vtab */
int rc = SQLITE_OK; /* Return code */
int i; /* Iterator variable */
int nByte; /* Size of allocation used for *p */
int iCol; /* Column index */
int nString = 0; /* Bytes required to hold all column names */
int nCol = 0; /* Number of columns in the FTS table */
char *zCsr; /* Space for holding column names */
int nDb; /* Bytes required to hold database name */
int nName; /* Bytes required to hold table name */
int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
const char **aCol; /* Array of column names */
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
int nIndex = 0; /* Size of aIndex[] array */
struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
/* The results of parsing supported FTS4 key=value options: */
int bNoDocsize = 0; /* True to omit %_docsize table */
int bDescIdx = 0; /* True to store descending indexes */
char *zPrefix = 0; /* Prefix parameter value (or NULL) */
char *zCompress = 0; /* compress=? parameter (or NULL) */
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
char *zContent = 0; /* content=? parameter (or NULL) */
char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
char **azNotindexed = 0; /* The set of notindexed= columns */
int nNotindexed = 0; /* Size of azNotindexed[] array */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
|| (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
);
nDb = (int)strlen(argv[1]) + 1;
nName = (int)strlen(argv[2]) + 1;
nByte = sizeof(const char *) * (argc-2);
aCol = (const char **)sqlite3_malloc(nByte);
if( aCol ){
memset((void*)aCol, 0, nByte);
azNotindexed = (char **)sqlite3_malloc(nByte);
}
if( azNotindexed ){
memset(azNotindexed, 0, nByte);
}
if( !aCol || !azNotindexed ){
rc = SQLITE_NOMEM;
goto fts3_init_out;
}
/* Loop through all of the arguments passed by the user to the FTS3/4
** module (i.e. all the column names and special arguments). This loop
** does the following:
**
** + Figures out the number of columns the FTSX table will have, and
** the number of bytes of space that must be allocated to store copies
** of the column names.
**
** + If there is a tokenizer specification included in the arguments,
** initializes the tokenizer pTokenizer.
*/
for(i=3; rc==SQLITE_OK && i<argc; i++){
char const *z = argv[i];
int nKey;
char *zVal;
/* Check if this is a tokenizer specification */
if( !pTokenizer
&& strlen(z)>8
&& 0==sqlite3_strnicmp(z, "tokenize", 8)
&& 0==sqlite3Fts3IsIdChar(z[8])
){
rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
}
/* Check if it is an FTS4 special argument. */
else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
struct Fts4Option {
const char *zOpt;
int nOpt;
} aFts4Opt[] = {
{ "matchinfo", 9 }, /* 0 -> MATCHINFO */
{ "prefix", 6 }, /* 1 -> PREFIX */
{ "compress", 8 }, /* 2 -> COMPRESS */
{ "uncompress", 10 }, /* 3 -> UNCOMPRESS */
{ "order", 5 }, /* 4 -> ORDER */
{ "content", 7 }, /* 5 -> CONTENT */
{ "languageid", 10 }, /* 6 -> LANGUAGEID */
{ "notindexed", 10 } /* 7 -> NOTINDEXED */
};
int iOpt;
if( !zVal ){
rc = SQLITE_NOMEM;
}else{
for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
struct Fts4Option *pOp = &aFts4Opt[iOpt];
if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
break;
}
}
if( iOpt==SizeofArray(aFts4Opt) ){
sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z);
rc = SQLITE_ERROR;
}else{
switch( iOpt ){
case 0: /* MATCHINFO */
if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal);
rc = SQLITE_ERROR;
}
bNoDocsize = 1;
break;
case 1: /* PREFIX */
sqlite3_free(zPrefix);
zPrefix = zVal;
zVal = 0;
break;
case 2: /* COMPRESS */
sqlite3_free(zCompress);
zCompress = zVal;
zVal = 0;
break;
case 3: /* UNCOMPRESS */
sqlite3_free(zUncompress);
zUncompress = zVal;
zVal = 0;
break;
case 4: /* ORDER */
if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
&& (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
){
sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal);
rc = SQLITE_ERROR;
}
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
break;
case 5: /* CONTENT */
sqlite3_free(zContent);
zContent = zVal;
zVal = 0;
break;
case 6: /* LANGUAGEID */
assert( iOpt==6 );
sqlite3_free(zLanguageid);
zLanguageid = zVal;
zVal = 0;
break;
case 7: /* NOTINDEXED */
azNotindexed[nNotindexed++] = zVal;
zVal = 0;
break;
}
}
sqlite3_free(zVal);
}
}
/* Otherwise, the argument is a column name. */
else {
nString += (int)(strlen(z) + 1);
aCol[nCol++] = z;
}
}
/* If a content=xxx option was specified, the following:
**
** 1. Ignore any compress= and uncompress= options.
**
** 2. If no column names were specified as part of the CREATE VIRTUAL
** TABLE statement, use all columns from the content table.
*/
if( rc==SQLITE_OK && zContent ){
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
zCompress = 0;
zUncompress = 0;
if( nCol==0 ){
sqlite3_free((void*)aCol);
aCol = 0;
rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr);
/* If a languageid= option was specified, remove the language id
** column from the aCol[] array. */
if( rc==SQLITE_OK && zLanguageid ){
int j;
for(j=0; j<nCol; j++){
if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
int k;
for(k=j; k<nCol; k++) aCol[k] = aCol[k+1];
nCol--;
break;
}
}
}
}
}
if( rc!=SQLITE_OK ) goto fts3_init_out;
if( nCol==0 ){
assert( nString==0 );
aCol[0] = "content";
nString = 8;
nCol = 1;
}
if( pTokenizer==0 ){
rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
if( rc!=SQLITE_OK ) goto fts3_init_out;
}
assert( pTokenizer );
rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
if( rc==SQLITE_ERROR ){
assert( zPrefix );
sqlite3Fts3ErrMsg(pzErr, "error parsing prefix parameter: %s", zPrefix);
}
if( rc!=SQLITE_OK ) goto fts3_init_out;
/* Allocate and populate the Fts3Table structure. */
nByte = sizeof(Fts3Table) + /* Fts3Table */
nCol * sizeof(char *) + /* azColumn */
nIndex * sizeof(struct Fts3Index) + /* aIndex */
nCol * sizeof(u8) + /* abNotindexed */
nName + /* zName */
nDb + /* zDb */
nString; /* Space for azColumn strings */
p = (Fts3Table*)sqlite3_malloc(nByte);
if( p==0 ){
rc = SQLITE_NOMEM;
goto fts3_init_out;
}
memset(p, 0, nByte);
p->db = db;
p->nColumn = nCol;
p->nPendingData = 0;
p->azColumn = (char **)&p[1];
p->pTokenizer = pTokenizer;
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
p->bHasDocsize = (isFts4 && bNoDocsize==0);
p->bHasStat = isFts4;
p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
zLanguageid = 0;
TESTONLY( p->inTransaction = -1 );
TESTONLY( p->mxSavepoint = -1 );
p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
p->nIndex = nIndex;
for(i=0; i<nIndex; i++){
fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
}
p->abNotindexed = (u8 *)&p->aIndex[nIndex];
/* Fill in the zName and zDb fields of the vtab structure. */
zCsr = (char *)&p->abNotindexed[nCol];
p->zName = zCsr;
memcpy(zCsr, argv[2], nName);
zCsr += nName;
p->zDb = zCsr;
memcpy(zCsr, argv[1], nDb);
zCsr += nDb;
/* Fill in the azColumn array */
for(iCol=0; iCol<nCol; iCol++){
char *z;
int n = 0;
z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
memcpy(zCsr, z, n);
zCsr[n] = '\0';
sqlite3Fts3Dequote(zCsr);
p->azColumn[iCol] = zCsr;
zCsr += n+1;
assert( zCsr <= &((char *)p)[nByte] );
}
/* Fill in the abNotindexed array */
for(iCol=0; iCol<nCol; iCol++){
int n = (int)strlen(p->azColumn[iCol]);
for(i=0; i<nNotindexed; i++){
char *zNot = azNotindexed[i];
if( zNot && n==(int)strlen(zNot)
&& 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n)
){
p->abNotindexed[iCol] = 1;
sqlite3_free(zNot);
azNotindexed[i] = 0;
}
}
}
for(i=0; i<nNotindexed; i++){
if( azNotindexed[i] ){
sqlite3Fts3ErrMsg(pzErr, "no such column: %s", azNotindexed[i]);
rc = SQLITE_ERROR;
}
}
if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){
char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
rc = SQLITE_ERROR;
sqlite3Fts3ErrMsg(pzErr, "missing %s parameter in fts4 constructor", zMiss);
}
p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
if( rc!=SQLITE_OK ) goto fts3_init_out;
/* If this is an xCreate call, create the underlying tables in the
** database. TODO: For xConnect(), it could verify that said tables exist.
*/
if( isCreate ){
rc = fts3CreateTables(p);
}
/* Check to see if a legacy fts3 table has been "upgraded" by the
** addition of a %_stat table so that it can use incremental merge.
*/
if( !isFts4 && !isCreate ){
p->bHasStat = 2;
}
/* Figure out the page-size for the database. This is required in order to
** estimate the cost of loading large doclists from the database. */
fts3DatabasePageSize(&rc, p);
p->nNodeSize = p->nPgsz-35;
/* Declare the table schema to SQLite. */
fts3DeclareVtab(&rc, p);
fts3_init_out:
sqlite3_free(zPrefix);
sqlite3_free(aIndex);
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
sqlite3_free(zContent);
sqlite3_free(zLanguageid);
for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]);
sqlite3_free((void *)aCol);
sqlite3_free((void *)azNotindexed);
if( rc!=SQLITE_OK ){
if( p ){
fts3DisconnectMethod((sqlite3_vtab *)p);
}else if( pTokenizer ){
pTokenizer->pModule->xDestroy(pTokenizer);
}
}else{
assert( p->pSegments==0 );
*ppVTab = &p->base;
}
return rc;
}
/*
** The xConnect() and xCreate() methods for the virtual table. All the
** work is done in function fts3InitVtab().
*/
static int fts3ConnectMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts3CreateMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
}
/*
** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
** extension is currently being used by a version of SQLite too old to
** support estimatedRows. In that case this function is a no-op.
*/
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
#if SQLITE_VERSION_NUMBER>=3008002
if( sqlite3_libversion_number()>=3008002 ){
pIdxInfo->estimatedRows = nRow;
}
#endif
}
/*
** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
** extension is currently being used by a version of SQLite too old to
** support index-info flags. In that case this function is a no-op.
*/
static void fts3SetUniqueFlag(sqlite3_index_info *pIdxInfo){
#if SQLITE_VERSION_NUMBER>=3008012
if( sqlite3_libversion_number()>=3008012 ){
pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
}
#endif
}
/*
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
**
** 1. Direct lookup by rowid or docid.
** 2. Full-text search using a MATCH operator on a non-docid column.
** 3. Linear scan of %_content table.
*/
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
int iLangidCons = -1; /* Index of langid=x constraint, if present */
int iDocidGe = -1; /* Index of docid>=x constraint, if present */
int iDocidLe = -1; /* Index of docid<=x constraint, if present */
int iIdx;
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
** strategy is possible.
*/
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 5000000;
for(i=0; i<pInfo->nConstraint; i++){
int bDocid; /* True if this constraint is on docid */
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
if( pCons->usable==0 ){
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
/* There exists an unusable MATCH constraint. This means that if
** the planner does elect to use the results of this call as part
** of the overall query plan the user will see an "unable to use
** function MATCH in the requested context" error. To discourage
** this, return a very high cost here. */
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 1e50;
fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
return SQLITE_OK;
}
continue;
}
bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){
pInfo->idxNum = FTS3_DOCID_SEARCH;
pInfo->estimatedCost = 1.0;
iCons = i;
}
/* A MATCH constraint. Use a full-text search.
**
** If there is more than one MATCH constraint available, use the first
** one encountered. If there is both a MATCH constraint and a direct
** rowid/docid lookup, prefer the MATCH strategy. This is done even
** though the rowid/docid lookup is faster than a MATCH query, selecting
** it would lead to an "unable to use function MATCH in the requested
** context" error.
*/
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
&& pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
){
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
}
/* Equality constraint on the langid column */
if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& pCons->iColumn==p->nColumn + 2
){
iLangidCons = i;
}
if( bDocid ){
switch( pCons->op ){
case SQLITE_INDEX_CONSTRAINT_GE:
case SQLITE_INDEX_CONSTRAINT_GT:
iDocidGe = i;
break;
case SQLITE_INDEX_CONSTRAINT_LE:
case SQLITE_INDEX_CONSTRAINT_LT:
iDocidLe = i;
break;
}
}
}
/* If using a docid=? or rowid=? strategy, set the UNIQUE flag. */
if( pInfo->idxNum==FTS3_DOCID_SEARCH ) fts3SetUniqueFlag(pInfo);
iIdx = 1;
if( iCons>=0 ){
pInfo->aConstraintUsage[iCons].argvIndex = iIdx++;
pInfo->aConstraintUsage[iCons].omit = 1;
}
if( iLangidCons>=0 ){
pInfo->idxNum |= FTS3_HAVE_LANGID;
pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++;
}
if( iDocidGe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_GE;
pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++;
}
if( iDocidLe>=0 ){
pInfo->idxNum |= FTS3_HAVE_DOCID_LE;
pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++;
}
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
*/
if( pInfo->nOrderBy==1 ){
struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
if( pOrder->desc ){
pInfo->idxStr = "DESC";
}else{
pInfo->idxStr = "ASC";
}
pInfo->orderByConsumed = 1;
}
}
assert( p->pSegments==0 );
return SQLITE_OK;
}
/*
** Implementation of xOpen method.
*/
static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
sqlite3_vtab_cursor *pCsr; /* Allocated cursor */
UNUSED_PARAMETER(pVTab);
/* Allocate a buffer large enough for an Fts3Cursor structure. If the
** allocation succeeds, zero it and return SQLITE_OK. Otherwise,
** if the allocation fails, return SQLITE_NOMEM.
*/
*ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(Fts3Cursor));
return SQLITE_OK;
}
/*
** Close the cursor. For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
sqlite3_finalize(pCsr->pStmt);
sqlite3Fts3ExprFree(pCsr->pExpr);
sqlite3Fts3FreeDeferredTokens(pCsr);
sqlite3_free(pCsr->aDoclist);
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
** compose and prepare an SQL statement of the form:
**
** "SELECT <columns> FROM %_content WHERE rowid = ?"
**
** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
** it. If an error occurs, return an SQLite error code.
**
** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
*/
static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
int rc = SQLITE_OK;
if( pCsr->pStmt==0 ){
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
char *zSql;
zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
if( !zSql ) return SQLITE_NOMEM;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
sqlite3_free(zSql);
}
*ppStmt = pCsr->pStmt;
return rc;
}
/*
** Position the pCsr->pStmt statement so that it is on the row
** of the %_content table that contains the last match. Return
** SQLITE_OK on success.
*/
static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
int rc = SQLITE_OK;
if( pCsr->isRequireSeek ){
sqlite3_stmt *pStmt = 0;
rc = fts3CursorSeekStmt(pCsr, &pStmt);
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
pCsr->isRequireSeek = 0;
if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
return SQLITE_OK;
}else{
rc = sqlite3_reset(pCsr->pStmt);
if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
/* If no row was found and no error has occurred, then the %_content
** table is missing a row that is present in the full-text index.
** The data structures are corrupt. */
rc = FTS_CORRUPT_VTAB;
pCsr->isEof = 1;
}
}
}
}
if( rc!=SQLITE_OK && pContext ){
sqlite3_result_error_code(pContext, rc);
}
return rc;
}
/*
** This function is used to process a single interior node when searching
** a b-tree for a term or term prefix. The node data is passed to this
** function via the zNode/nNode parameters. The term to search for is
** passed in zTerm/nTerm.
**
** If piFirst is not NULL, then this function sets *piFirst to the blockid
** of the child node that heads the sub-tree that may contain the term.
**
** If piLast is not NULL, then *piLast is set to the right-most child node
** that heads a sub-tree that may contain a term for which zTerm/nTerm is
** a prefix.
**
** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
*/
static int fts3ScanInteriorNode(
const char *zTerm, /* Term to select leaves for */
int nTerm, /* Size of term zTerm in bytes */
const char *zNode, /* Buffer containing segment interior node */
int nNode, /* Size of buffer at zNode */
sqlite3_int64 *piFirst, /* OUT: Selected child node */
sqlite3_int64 *piLast /* OUT: Selected child node */
){
int rc = SQLITE_OK; /* Return code */
const char *zCsr = zNode; /* Cursor to iterate through node */
const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
char *zBuffer = 0; /* Buffer to load terms into */
int nAlloc = 0; /* Size of allocated buffer */
int isFirstTerm = 1; /* True when processing first term on page */
sqlite3_int64 iChild; /* Block id of child node to descend to */
/* Skip over the 'height' varint that occurs at the start of every
** interior node. Then load the blockid of the left-child of the b-tree
** node into variable iChild.
**
** Even if the data structure on disk is corrupted, this (reading two
** varints from the buffer) does not risk an overread. If zNode is a
** root node, then the buffer comes from a SELECT statement. SQLite does
** not make this guarantee explicitly, but in practice there are always
** either more than 20 bytes of allocated space following the nNode bytes of
** contents, or two zero bytes. Or, if the node is read from the %_segments
** table, then there are always 20 bytes of zeroed padding following the
** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
*/
zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
if( zCsr>zEnd ){
return FTS_CORRUPT_VTAB;
}
while( zCsr<zEnd && (piFirst || piLast) ){
int cmp; /* memcmp() result */
int nSuffix; /* Size of term suffix */
int nPrefix = 0; /* Size of term prefix */
int nBuffer; /* Total term size */
/* Load the next term on the node into zBuffer. Use realloc() to expand
** the size of zBuffer if required. */
if( !isFirstTerm ){
zCsr += fts3GetVarint32(zCsr, &nPrefix);
}
isFirstTerm = 0;
zCsr += fts3GetVarint32(zCsr, &nSuffix);
if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
rc = FTS_CORRUPT_VTAB;
goto finish_scan;
}
if( nPrefix+nSuffix>nAlloc ){
char *zNew;
nAlloc = (nPrefix+nSuffix) * 2;
zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
if( !zNew ){
rc = SQLITE_NOMEM;
goto finish_scan;
}
zBuffer = zNew;
}
assert( zBuffer );
memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
nBuffer = nPrefix + nSuffix;
zCsr += nSuffix;
/* Compare the term we are searching for with the term just loaded from
** the interior node. If the specified term is greater than or equal
** to the term from the interior node, then all terms on the sub-tree
** headed by node iChild are smaller than zTerm. No need to search
** iChild.
**
** If the interior node term is larger than the specified term, then
** the tree headed by iChild may contain the specified term.
*/
cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
*piFirst = iChild;
piFirst = 0;
}
if( piLast && cmp<0 ){
*piLast = iChild;
piLast = 0;
}
iChild++;
};
if( piFirst ) *piFirst = iChild;
if( piLast ) *piLast = iChild;
finish_scan:
sqlite3_free(zBuffer);
return rc;
}
/*
** The buffer pointed to by argument zNode (size nNode bytes) contains an
** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
** contains a term. This function searches the sub-tree headed by the zNode
** node for the range of leaf nodes that may contain the specified term
** or terms for which the specified term is a prefix.
**
** If piLeaf is not NULL, then *piLeaf is set to the blockid of the
** left-most leaf node in the tree that may contain the specified term.
** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
** right-most leaf node that may contain a term for which the specified
** term is a prefix.
**
** It is possible that the range of returned leaf nodes does not contain
** the specified term or any terms for which it is a prefix. However, if the
** segment does contain any such terms, they are stored within the identified
** range. Because this function only inspects interior segment nodes (and
** never loads leaf nodes into memory), it is not possible to be sure.
**
** If an error occurs, an error code other than SQLITE_OK is returned.
*/
static int fts3SelectLeaf(
Fts3Table *p, /* Virtual table handle */
const char *zTerm, /* Term to select leaves for */
int nTerm, /* Size of term zTerm in bytes */
const char *zNode, /* Buffer containing segment interior node */
int nNode, /* Size of buffer at zNode */
sqlite3_int64 *piLeaf, /* Selected leaf node */
sqlite3_int64 *piLeaf2 /* Selected leaf node */
){
int rc = SQLITE_OK; /* Return code */
int iHeight; /* Height of this node in tree */
assert( piLeaf || piLeaf2 );
fts3GetVarint32(zNode, &iHeight);
rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
if( rc==SQLITE_OK && iHeight>1 ){
char *zBlob = 0; /* Blob read from %_segments table */
int nBlob = 0; /* Size of zBlob in bytes */
if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
if( rc==SQLITE_OK ){
rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
}
sqlite3_free(zBlob);
piLeaf = 0;
zBlob = 0;
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
}
if( rc==SQLITE_OK ){
rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
}
sqlite3_free(zBlob);
}
return rc;
}
/*
** This function is used to create delta-encoded serialized lists of FTS3
** varints. Each call to this function appends a single varint to a list.
*/
static void fts3PutDeltaVarint(
char **pp, /* IN/OUT: Output pointer */
sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
sqlite3_int64 iVal /* Write this value to the list */
){
assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
*pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
*piPrev = iVal;
}
/*
** When this function is called, *ppPoslist is assumed to point to the
** start of a position-list. After it returns, *ppPoslist points to the
** first byte after the position-list.
**
** A position list is list of positions (delta encoded) and columns for
** a single document record of a doclist. So, in other words, this
** routine advances *ppPoslist so that it points to the next docid in
** the doclist, or to the first byte past the end of the doclist.
**
** If pp is not NULL, then the contents of the position list are copied
** to *pp. *pp is set to point to the first byte past the last byte copied
** before this function returns.
*/
static void fts3PoslistCopy(char **pp, char **ppPoslist){
char *pEnd = *ppPoslist;
char c = 0;
/* The end of a position list is marked by a zero encoded as an FTS3
** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
** of some other, multi-byte, value.
**
** The following while-loop moves pEnd to point to the first byte that is not
** immediately preceded by a byte with the 0x80 bit set. Then increments
** pEnd once more so that it points to the byte immediately following the
** last byte in the position-list.
*/
while( *pEnd | c ){
c = *pEnd++ & 0x80;
testcase( c!=0 && (*pEnd)==0 );
}
pEnd++; /* Advance past the POS_END terminator byte */
if( pp ){
int n = (int)(pEnd - *ppPoslist);
char *p = *pp;
memcpy(p, *ppPoslist, n);
p += n;
*pp = p;
}
*ppPoslist = pEnd;
}
/*
** When this function is called, *ppPoslist is assumed to point to the
** start of a column-list. After it returns, *ppPoslist points to the
** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
**
** A column-list is list of delta-encoded positions for a single column
** within a single document within a doclist.
**
** The column-list is terminated either by a POS_COLUMN varint (1) or
** a POS_END varint (0). This routine leaves *ppPoslist pointing to
** the POS_COLUMN or POS_END that terminates the column-list.
**
** If pp is not NULL, then the contents of the column-list are copied
** to *pp. *pp is set to point to the first byte past the last byte copied
** before this function returns. The POS_COLUMN or POS_END terminator
** is not copied into *pp.
*/
static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
char *pEnd = *ppPoslist;
char c = 0;
/* A column-list is terminated by either a 0x01 or 0x00 byte that is
** not part of a multi-byte varint.
*/
while( 0xFE & (*pEnd | c) ){
c = *pEnd++ & 0x80;
testcase( c!=0 && ((*pEnd)&0xfe)==0 );
}
if( pp ){
int n = (int)(pEnd - *ppPoslist);
char *p = *pp;
memcpy(p, *ppPoslist, n);
p += n;
*pp = p;
}
*ppPoslist = pEnd;
}
/*
** Value used to signify the end of an position-list. This is safe because
** it is not possible to have a document with 2^31 terms.
*/
#define POSITION_LIST_END 0x7fffffff
/*
** This function is used to help parse position-lists. When this function is
** called, *pp may point to the start of the next varint in the position-list
** being parsed, or it may point to 1 byte past the end of the position-list
** (in which case **pp will be a terminator bytes POS_END (0) or
** (1)).
**
** If *pp points past the end of the current position-list, set *pi to
** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
** increment the current value of *pi by the value read, and set *pp to
** point to the next value before returning.
**
** Before calling this routine *pi must be initialized to the value of
** the previous position, or zero if we are reading the first position
** in the position-list. Because positions are delta-encoded, the value
** of the previous position is needed in order to compute the value of
** the next position.
*/
static void fts3ReadNextPos(
char **pp, /* IN/OUT: Pointer into position-list buffer */
sqlite3_int64 *pi /* IN/OUT: Value read from position-list */
){
if( (**pp)&0xFE ){
fts3GetDeltaVarint(pp, pi);
*pi -= 2;
}else{
*pi = POSITION_LIST_END;
}
}
/*
** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
** the value of iCol encoded as a varint to *pp. This will start a new
** column list.
**
** Set *pp to point to the byte just after the last byte written before
** returning (do not modify it if iCol==0). Return the total number of bytes
** written (0 if iCol==0).
*/
static int fts3PutColNumber(char **pp, int iCol){
int n = 0; /* Number of bytes written */
if( iCol ){
char *p = *pp; /* Output pointer */
n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
*p = 0x01;
*pp = &p[n];
}
return n;
}
/*
** Compute the union of two position lists. The output written
** into *pp contains all positions of both *pp1 and *pp2 in sorted
** order and with any duplicates removed. All pointers are
** updated appropriately. The caller is responsible for insuring
** that there is enough space in *pp to hold the complete output.
*/
static void fts3PoslistMerge(
char **pp, /* Output buffer */
char **pp1, /* Left input list */
char **pp2 /* Right input list */
){
char *p = *pp;
char *p1 = *pp1;
char *p2 = *pp2;
while( *p1 || *p2 ){
int iCol1; /* The current column index in pp1 */
int iCol2; /* The current column index in pp2 */
if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1);
else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
else iCol1 = 0;
if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2);
else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
else iCol2 = 0;
if( iCol1==iCol2 ){
sqlite3_int64 i1 = 0; /* Last position from pp1 */
sqlite3_int64 i2 = 0; /* Last position from pp2 */
sqlite3_int64 iPrev = 0;
int n = fts3PutColNumber(&p, iCol1);
p1 += n;
p2 += n;
/* At this point, both p1 and p2 point to the start of column-lists
** for the same column (the column with index iCol1 and iCol2).
** A column-list is a list of non-negative delta-encoded varints, each
** incremented by 2 before being stored. Each list is terminated by a
** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
** and writes the results to buffer p. p is left pointing to the byte
** after the list written. No terminator (POS_END or POS_COLUMN) is
** written to the output.
*/
fts3GetDeltaVarint(&p1, &i1);
fts3GetDeltaVarint(&p2, &i2);
do {
fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2);
iPrev -= 2;
if( i1==i2 ){
fts3ReadNextPos(&p1, &i1);
fts3ReadNextPos(&p2, &i2);
}else if( i1<i2 ){
fts3ReadNextPos(&p1, &i1);
}else{
fts3ReadNextPos(&p2, &i2);
}
}while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
}else if( iCol1<iCol2 ){
p1 += fts3PutColNumber(&p, iCol1);
fts3ColumnlistCopy(&p, &p1);
}else{
p2 += fts3PutColNumber(&p, iCol2);
fts3ColumnlistCopy(&p, &p2);
}
}
*p++ = POS_END;
*pp = p;
*pp1 = p1 + 1;
*pp2 = p2 + 1;
}
/*
** This function is used to merge two position lists into one. When it is
** called, *pp1 and *pp2 must both point to position lists. A position-list is
** the part of a doclist that follows each document id. For example, if a row
** contains:
**
** 'a b c'|'x y z'|'a b b a'
**
** Then the position list for this row for token 'b' would consist of:
**
** 0x02 0x01 0x02 0x03 0x03 0x00
**
** When this function returns, both *pp1 and *pp2 are left pointing to the
** byte following the 0x00 terminator of their respective position lists.
**
** If isSaveLeft is 0, an entry is added to the output position list for
** each position in *pp2 for which there exists one or more positions in
** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
** when the *pp1 token appears before the *pp2 token, but not more than nToken
** slots before it.
**
** e.g. nToken==1 searches for adjacent positions.
*/
static int fts3PoslistPhraseMerge(
char **pp, /* IN/OUT: Preallocated output buffer */
int nToken, /* Maximum difference in token positions */
int isSaveLeft, /* Save the left position */
int isExact, /* If *pp1 is exactly nTokens before *pp2 */
char **pp1, /* IN/OUT: Left input list */
char **pp2 /* IN/OUT: Right input list */
){
char *p = *pp;
char *p1 = *pp1;
char *p2 = *pp2;
int iCol1 = 0;
int iCol2 = 0;
/* Never set both isSaveLeft and isExact for the same invocation. */
assert( isSaveLeft==0 || isExact==0 );
assert( p!=0 && *p1!=0 && *p2!=0 );
if( *p1==POS_COLUMN ){
p1++;
p1 += fts3GetVarint32(p1, &iCol1);
}
if( *p2==POS_COLUMN ){
p2++;
p2 += fts3GetVarint32(p2, &iCol2);
}
while( 1 ){
if( iCol1==iCol2 ){
char *pSave = p;
sqlite3_int64 iPrev = 0;
sqlite3_int64 iPos1 = 0;
sqlite3_int64 iPos2 = 0;
if( iCol1 ){
*p++ = POS_COLUMN;
p += sqlite3Fts3PutVarint(p, iCol1);
}
assert( *p1!=POS_END && *p1!=POS_COLUMN );
assert( *p2!=POS_END && *p2!=POS_COLUMN );
fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
while( 1 ){
if( iPos2==iPos1+nToken
|| (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
){
sqlite3_int64 iSave;
iSave = isSaveLeft ? iPos1 : iPos2;
fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
pSave = 0;
assert( p );
}
if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
if( (*p2&0xFE)==0 ) break;
fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
}else{
if( (*p1&0xFE)==0 ) break;
fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
}
}
if( pSave ){
assert( pp && p );
p = pSave;
}
fts3ColumnlistCopy(0, &p1);
fts3ColumnlistCopy(0, &p2);
assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
if( 0==*p1 || 0==*p2 ) break;
p1++;
p1 += fts3GetVarint32(p1, &iCol1);
p2++;
p2 += fts3GetVarint32(p2, &iCol2);
}
/* Advance pointer p1 or p2 (whichever corresponds to the smaller of
** iCol1 and iCol2) so that it points to either the 0x00 that marks the
** end of the position list, or the 0x01 that precedes the next
** column-number in the position list.
*/
else if( iCol1<iCol2 ){
fts3ColumnlistCopy(0, &p1);
if( 0==*p1 ) break;
p1++;
p1 += fts3GetVarint32(p1, &iCol1);
}else{
fts3ColumnlistCopy(0, &p2);
if( 0==*p2 ) break;
p2++;
p2 += fts3GetVarint32(p2, &iCol2);
}
}
fts3PoslistCopy(0, &p2);
fts3PoslistCopy(0, &p1);
*pp1 = p1;
*pp2 = p2;
if( *pp==p ){
return 0;
}
*p++ = 0x00;
*pp = p;
return 1;
}
/*
** Merge two position-lists as required by the NEAR operator. The argument
** position lists correspond to the left and right phrases of an expression
** like:
**
** "phrase 1" NEAR "phrase number 2"
**
** Position list *pp1 corresponds to the left-hand side of the NEAR
** expression and *pp2 to the right. As usual, the indexes in the position
** lists are the offsets of the last token in each phrase (tokens "1" and "2"
** in the example above).
**
** The output position list - written to *pp - is a copy of *pp2 with those
** entries that are not sufficiently NEAR entries in *pp1 removed.
*/
static int fts3PoslistNearMerge(
char **pp, /* Output buffer */
char *aTmp, /* Temporary buffer space */
int nRight, /* Maximum difference in token positions */
int nLeft, /* Maximum difference in token positions */
char **pp1, /* IN/OUT: Left input list */
char **pp2 /* IN/OUT: Right input list */
){
char *p1 = *pp1;
char *p2 = *pp2;
char *pTmp1 = aTmp;
char *pTmp2;
char *aTmp2;
int res = 1;
fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
aTmp2 = pTmp2 = pTmp1;
*pp1 = p1;
*pp2 = p2;
fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
fts3PoslistMerge(pp, &aTmp, &aTmp2);
}else if( pTmp1!=aTmp ){
fts3PoslistCopy(pp, &aTmp);
}else if( pTmp2!=aTmp2 ){
fts3PoslistCopy(pp, &aTmp2);
}else{
res = 0;
}
return res;
}
/*
** An instance of this function is used to merge together the (potentially
** large number of) doclists for each term that matches a prefix query.
** See function fts3TermSelectMerge() for details.
*/
typedef struct TermSelect TermSelect;
struct TermSelect {
char *aaOutput[16]; /* Malloc'd output buffers */
int anOutput[16]; /* Size each output buffer in bytes */
};
/*
** This function is used to read a single varint from a buffer. Parameter
** pEnd points 1 byte past the end of the buffer. When this function is
** called, if *pp points to pEnd or greater, then the end of the buffer
** has been reached. In this case *pp is set to 0 and the function returns.
**
** If *pp does not point to or past pEnd, then a single varint is read
** from *pp. *pp is then set to point 1 byte past the end of the read varint.
**
** If bDescIdx is false, the value read is added to *pVal before returning.
** If it is true, the value read is subtracted from *pVal before this
** function returns.
*/
static void fts3GetDeltaVarint3(
char **pp, /* IN/OUT: Point to read varint from */
char *pEnd, /* End of buffer */
int bDescIdx, /* True if docids are descending */
sqlite3_int64 *pVal /* IN/OUT: Integer value */
){
if( *pp>=pEnd ){
*pp = 0;
}else{
sqlite3_int64 iVal;
*pp += sqlite3Fts3GetVarint(*pp, &iVal);
if( bDescIdx ){
*pVal -= iVal;
}else{
*pVal += iVal;
}
}
}
/*
** This function is used to write a single varint to a buffer. The varint
** is written to *pp. Before returning, *pp is set to point 1 byte past the
** end of the value written.
**
** If *pbFirst is zero when this function is called, the value written to
** the buffer is that of parameter iVal.
**
** If *pbFirst is non-zero when this function is called, then the value
** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
** (if bDescIdx is non-zero).
**
** Before returning, this function always sets *pbFirst to 1 and *piPrev
** to the value of parameter iVal.
*/
static void fts3PutDeltaVarint3(
char **pp, /* IN/OUT: Output pointer */
int bDescIdx, /* True for descending docids */
sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
int *pbFirst, /* IN/OUT: True after first int written */
sqlite3_int64 iVal /* Write this value to the list */
){
sqlite3_int64 iWrite;
if( bDescIdx==0 || *pbFirst==0 ){
iWrite = iVal - *piPrev;
}else{
iWrite = *piPrev - iVal;
}
assert( *pbFirst || *piPrev==0 );
assert( *pbFirst==0 || iWrite>0 );
*pp += sqlite3Fts3PutVarint(*pp, iWrite);
*piPrev = iVal;
*pbFirst = 1;
}
/*
** This macro is used by various functions that merge doclists. The two
** arguments are 64-bit docid values. If the value of the stack variable
** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2).
** Otherwise, (i2-i1).
**
** Using this makes it easier to write code that can merge doclists that are
** sorted in either ascending or descending order.
*/
#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
/*
** This function does an "OR" merge of two doclists (output contains all
** positions contained in either argument doclist). If the docids in the
** input doclists are sorted in ascending order, parameter bDescDoclist
** should be false. If they are sorted in ascending order, it should be
** passed a non-zero value.
**
** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
** containing the output doclist and SQLITE_OK is returned. In this case
** *pnOut is set to the number of bytes in the output doclist.
**
** If an error occurs, an SQLite error code is returned. The output values
** are undefined in this case.
*/
static int fts3DoclistOrMerge(
int bDescDoclist, /* True if arguments are desc */
char *a1, int n1, /* First doclist */
char *a2, int n2, /* Second doclist */
char **paOut, int *pnOut /* OUT: Malloc'd doclist */
){
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
sqlite3_int64 iPrev = 0;
char *pEnd1 = &a1[n1];
char *pEnd2 = &a2[n2];
char *p1 = a1;
char *p2 = a2;
char *p;
char *aOut;
int bFirstOut = 0;
*paOut = 0;
*pnOut = 0;
/* Allocate space for the output. Both the input and output doclists
** are delta encoded. If they are in ascending order (bDescDoclist==0),
** then the first docid in each list is simply encoded as a varint. For
** each subsequent docid, the varint stored is the difference between the
** current and previous docid (a positive number - since the list is in
** ascending order).
**
** The first docid written to the output is therefore encoded using the
** same number of bytes as it is in whichever of the input lists it is
** read from. And each subsequent docid read from the same input list
** consumes either the same or less bytes as it did in the input (since
** the difference between it and the previous value in the output must
** be a positive value less than or equal to the delta value read from
** the input list). The same argument applies to all but the first docid
** read from the 'other' list. And to the contents of all position lists
** that will be copied and merged from the input to the output.
**
** However, if the first docid copied to the output is a negative number,
** then the encoding of the first docid from the 'other' input list may
** be larger in the output than it was in the input (since the delta value
** may be a larger positive integer than the actual docid).
**
** The space required to store the output is therefore the sum of the
** sizes of the two inputs, plus enough space for exactly one of the input
** docids to grow.
**
** A symetric argument may be made if the doclists are in descending
** order.
*/
aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
if( !aOut ) return SQLITE_NOMEM;
p = aOut;
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
while( p1 || p2 ){
sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
if( p2 && p1 && iDiff==0 ){
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
fts3PoslistMerge(&p, &p1, &p2);
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}else if( !p2 || (p1 && iDiff<0) ){
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
fts3PoslistCopy(&p, &p1);
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
}else{
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
fts3PoslistCopy(&p, &p2);
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}
}
*paOut = aOut;
*pnOut = (int)(p-aOut);
assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
return SQLITE_OK;
}
/*
** This function does a "phrase" merge of two doclists. In a phrase merge,
** the output contains a copy of each position from the right-hand input
** doclist for which there is a position in the left-hand input doclist
** exactly nDist tokens before it.
**
** If the docids in the input doclists are sorted in ascending order,
** parameter bDescDoclist should be false. If they are sorted in ascending
** order, it should be passed a non-zero value.
**
** The right-hand input doclist is overwritten by this function.
*/
static int fts3DoclistPhraseMerge(
int bDescDoclist, /* True if arguments are desc */
int nDist, /* Distance from left to right (1=adjacent) */
char *aLeft, int nLeft, /* Left doclist */
char **paRight, int *pnRight /* IN/OUT: Right/output doclist */
){
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
sqlite3_int64 iPrev = 0;
char *aRight = *paRight;
char *pEnd1 = &aLeft[nLeft];
char *pEnd2 = &aRight[*pnRight];
char *p1 = aLeft;
char *p2 = aRight;
char *p;
int bFirstOut = 0;
char *aOut;
assert( nDist>0 );
if( bDescDoclist ){
aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX);
if( aOut==0 ) return SQLITE_NOMEM;
}else{
aOut = aRight;
}
p = aOut;
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
while( p1 && p2 ){
sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
if( iDiff==0 ){
char *pSave = p;
sqlite3_int64 iPrevSave = iPrev;
int bFirstOutSave = bFirstOut;
fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
p = pSave;
iPrev = iPrevSave;
bFirstOut = bFirstOutSave;
}
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}else if( iDiff<0 ){
fts3PoslistCopy(0, &p1);
fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
}else{
fts3PoslistCopy(0, &p2);
fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
}
}
*pnRight = (int)(p - aOut);
if( bDescDoclist ){
sqlite3_free(aRight);
*paRight = aOut;
}
return SQLITE_OK;
}
/*
** Argument pList points to a position list nList bytes in size. This
** function checks to see if the position list contains any entries for
** a token in position 0 (of any column). If so, it writes argument iDelta
** to the output buffer pOut, followed by a position list consisting only
** of the entries from pList at position 0, and terminated by an 0x00 byte.
** The value returned is the number of bytes written to pOut (if any).
*/
int sqlite3Fts3FirstFilter(
sqlite3_int64 iDelta, /* Varint that may be written to pOut */
char *pList, /* Position list (no 0x00 term) */
int nList, /* Size of pList in bytes */
char *pOut /* Write output here */
){
int nOut = 0;
int bWritten = 0; /* True once iDelta has been written */
char *p = pList;
char *pEnd = &pList[nList];
if( *p!=0x01 ){
if( *p==0x02 ){
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
pOut[nOut++] = 0x02;
bWritten = 1;
}
fts3ColumnlistCopy(0, &p);
}
while( p<pEnd && *p==0x01 ){
sqlite3_int64 iCol;
p++;
p += sqlite3Fts3GetVarint(p, &iCol);
if( *p==0x02 ){
if( bWritten==0 ){
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
bWritten = 1;
}
pOut[nOut++] = 0x01;
nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
pOut[nOut++] = 0x02;
}
fts3ColumnlistCopy(0, &p);
}
if( bWritten ){
pOut[nOut++] = 0x00;
}
return nOut;
}
/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
**
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
** the responsibility of the caller to free any doclists left in the
** TermSelect.aaOutput[] array.
*/
static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
char *aOut = 0;
int nOut = 0;
int i;
/* Loop through the doclists in the aaOutput[] array. Merge them all
** into a single doclist.
*/
for(i=0; i<SizeofArray(pTS->aaOutput); i++){
if( pTS->aaOutput[i] ){
if( !aOut ){
aOut = pTS->aaOutput[i];
nOut = pTS->anOutput[i];
pTS->aaOutput[i] = 0;
}else{
int nNew;
char *aNew;
int rc = fts3DoclistOrMerge(p->bDescIdx,
pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
);
if( rc!=SQLITE_OK ){
sqlite3_free(aOut);
return rc;
}
sqlite3_free(pTS->aaOutput[i]);
sqlite3_free(aOut);
pTS->aaOutput[i] = 0;
aOut = aNew;
nOut = nNew;
}
}
}
pTS->aaOutput[0] = aOut;
pTS->anOutput[0] = nOut;
return SQLITE_OK;
}
/*
** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
** as the first argument. The merge is an "OR" merge (see function
** fts3DoclistOrMerge() for details).
**
** This function is called with the doclist for each term that matches
** a queried prefix. It merges all these doclists into one, the doclist
** for the specified prefix. Since there can be a very large number of
** doclists to merge, the merging is done pair-wise using the TermSelect
** object.
**
** This function returns SQLITE_OK if the merge is successful, or an
** SQLite error code (SQLITE_NOMEM) if an error occurs.
*/
static int fts3TermSelectMerge(
Fts3Table *p, /* FTS table handle */
TermSelect *pTS, /* TermSelect object to merge into */
char *aDoclist, /* Pointer to doclist */
int nDoclist /* Size of aDoclist in bytes */
){
if( pTS->aaOutput[0]==0 ){
/* If this is the first term selected, copy the doclist to the output
** buffer using memcpy().
**
** Add FTS3_VARINT_MAX bytes of unused space to the end of the
** allocation. This is so as to ensure that the buffer is big enough
** to hold the current doclist AND'd with any other doclist. If the
** doclists are stored in order=ASC order, this padding would not be
** required (since the size of [doclistA AND doclistB] is always less
** than or equal to the size of [doclistA] in that case). But this is
** not true for order=DESC. For example, a doclist containing (1, -1)
** may be smaller than (-1), as in the first example the -1 may be stored
** as a single-byte delta, whereas in the second it must be stored as a
** FTS3_VARINT_MAX byte varint.
**
** Similar padding is added in the fts3DoclistOrMerge() function.
*/
pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1);
pTS->anOutput[0] = nDoclist;
if( pTS->aaOutput[0] ){
memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
}else{
return SQLITE_NOMEM;
}
}else{
char *aMerge = aDoclist;
int nMerge = nDoclist;
int iOut;
for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
if( pTS->aaOutput[iOut]==0 ){
assert( iOut>0 );
pTS->aaOutput[iOut] = aMerge;
pTS->anOutput[iOut] = nMerge;
break;
}else{
char *aNew;
int nNew;
int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
);
if( rc!=SQLITE_OK ){
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
return rc;
}
if( aMerge!=aDoclist ) sqlite3_free(aMerge);
sqlite3_free(pTS->aaOutput[iOut]);
pTS->aaOutput[iOut] = 0;
aMerge = aNew;
nMerge = nNew;
if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
pTS->aaOutput[iOut] = aMerge;
pTS->anOutput[iOut] = nMerge;
}
}
}
}
return SQLITE_OK;
}
/*
** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
*/
static int fts3SegReaderCursorAppend(
Fts3MultiSegReader *pCsr,
Fts3SegReader *pNew
){
if( (pCsr->nSegment%16)==0 ){
Fts3SegReader **apNew;
int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
if( !apNew ){
sqlite3Fts3SegReaderFree(pNew);
return SQLITE_NOMEM;
}
pCsr->apSegment = apNew;
}
pCsr->apSegment[pCsr->nSegment++] = pNew;
return SQLITE_OK;
}
/*
** Add seg-reader objects to the Fts3MultiSegReader object passed as the
** 8th argument.
**
** This function returns SQLITE_OK if successful, or an SQLite error code
** otherwise.
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
int rc = SQLITE_OK; /* Error code */
sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
int rc2; /* Result of sqlite3_reset() */
/* If iLevel is less than 0 and this is not a scan, include a seg-reader
** for the pending-terms. If this is a scan, then this call must be being
** made by an fts4aux module, not an FTS table. In this case calling
** Fts3SegReaderPending might segfault, as the data structures used by
** fts4aux are not completely populated. So it's easiest to filter these
** calls out here. */
if( iLevel<0 && p->aIndex ){
Fts3SegReader *pSeg = 0;
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg);
if( rc==SQLITE_OK && pSeg ){
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
}
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
Fts3SegReader *pSeg = 0;
/* Read the values returned by the SELECT into local variables. */
sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
int nRoot = sqlite3_column_bytes(pStmt, 4);
char const *zRoot = sqlite3_column_blob(pStmt, 4);
/* If zTerm is not NULL, and this segment is not stored entirely on its
** root node, the range of leaves scanned can be reduced. Do this. */
if( iStartBlock && zTerm ){
sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
if( rc!=SQLITE_OK ) goto finished;
if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
}
rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
(isPrefix==0 && isScan==0),
iStartBlock, iLeavesEndBlock,
iEndBlock, zRoot, nRoot, &pSeg
);
if( rc!=SQLITE_OK ) goto finished;
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
}
finished:
rc2 = sqlite3_reset(pStmt);
if( rc==SQLITE_DONE ) rc = rc2;
return rc;
}
/*
** Set up a cursor object for iterating through a full-text index or a
** single level therein.
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
int isScan, /* True to scan from zTerm to EOF */
Fts3MultiSegReader *pCsr /* Cursor object to populate */
){
assert( iIndex>=0 && iIndex<p->nIndex );
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
|| iLevel>=0
);
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
return fts3SegReaderCursor(
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
/*
** In addition to its current configuration, have the Fts3MultiSegReader
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
return fts3SegReaderCursor(p,
iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
);
}
/*
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
** if isPrefix is true, to scan the doclist for all terms for which
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
** an SQLite error code.
**
** It is the responsibility of the caller to free this object by eventually
** passing it to fts3SegReaderCursorFree()
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
** Output parameter *ppSegcsr is set to 0 if an error occurs.
*/
static int fts3TermSegReaderCursor(
Fts3Cursor *pCsr, /* Virtual table cursor handle */
const char *zTerm, /* Term to query for */
int nTerm, /* Size of zTerm in bytes */
int isPrefix, /* True for a prefix search */
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
){
Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
int rc = SQLITE_NOMEM; /* Return code */
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
if( pSegcsr ){
int i;
int bFound = 0; /* True once an index has been found */
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
if( isPrefix ){
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
);
pSegcsr->bLookup = 1;
}
}
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
rc = fts3SegReaderCursorAddZero(
p, pCsr->iLangid, zTerm, nTerm, pSegcsr
);
}
}
}
}
if( bFound==0 ){
rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
}
*ppSegcsr = pSegcsr;
return rc;
}
/*
** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
*/
static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
sqlite3Fts3SegReaderFinish(pSegcsr);
sqlite3_free(pSegcsr);
}
/*
** This function retrieves the doclist for the specified term (or term
** prefix) from the database.
*/
static int fts3TermSelect(
Fts3Table *p, /* Virtual table handle */
Fts3PhraseToken *pTok, /* Token to query for */
int iColumn, /* Column to query (or -ve for all columns) */
int *pnOut, /* OUT: Size of buffer at *ppOut */
char **ppOut /* OUT: Malloced result buffer */
){
int rc; /* Return code */
Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
TermSelect tsc; /* Object for pair-wise doclist merging */
Fts3SegFilter filter; /* Segment term filter configuration */
pSegcsr = pTok->pSegcsr;
memset(&tsc, 0, sizeof(TermSelect));
filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
| (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
| (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
filter.iCol = iColumn;
filter.zTerm = pTok->z;
filter.nTerm = pTok->n;
rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
while( SQLITE_OK==rc
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
){
rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
}
if( rc==SQLITE_OK ){
rc = fts3TermSelectFinishMerge(p, &tsc);
}
if( rc==SQLITE_OK ){
*ppOut = tsc.aaOutput[0];
*pnOut = tsc.anOutput[0];
}else{
int i;
for(i=0; i<SizeofArray(tsc.aaOutput); i++){
sqlite3_free(tsc.aaOutput[i]);
}
}
fts3SegReaderCursorFree(pSegcsr);
pTok->pSegcsr = 0;
return rc;
}
/*
** This function counts the total number of docids in the doclist stored
** in buffer aList[], size nList bytes.
**
** If the isPoslist argument is true, then it is assumed that the doclist
** contains a position-list following each docid. Otherwise, it is assumed
** that the doclist is simply a list of docids stored as delta encoded
** varints.
*/
static int fts3DoclistCountDocids(char *aList, int nList){
int nDoc = 0; /* Return value */
if( aList ){
char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */
char *p = aList; /* Cursor */
while( p<aEnd ){
nDoc++;
while( (*p++)&0x80 ); /* Skip docid varint */
fts3PoslistCopy(0, &p); /* Skip over position list */
}
}
return nDoc;
}
/*
** Advance the cursor to the next row in the %_content table that
** matches the search criteria. For a MATCH search, this will be
** the next row that matches. For a full-table scan, this will be
** simply the next row in the %_content table. For a docid lookup,
** this routine simply sets the EOF flag.
**
** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
** even if we reach end-of-file. The fts3EofMethod() will be called
** subsequently to determine whether or not an EOF was hit.
*/
static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
int rc;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
pCsr->isEof = 1;
rc = sqlite3_reset(pCsr->pStmt);
}else{
pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
rc = SQLITE_OK;
}
}else{
rc = fts3EvalNext((Fts3Cursor *)pCursor);
}
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
return rc;
}
/*
** The following are copied from sqliteInt.h.
**
** Constants for the largest and smallest possible 64-bit signed integers.
** These macros are designed to work correctly on both 32-bit and 64-bit
** compilers.
*/
#ifndef SQLITE_AMALGAMATION
# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
#endif
/*
** If the numeric type of argument pVal is "integer", then return it
** converted to a 64-bit signed integer. Otherwise, return a copy of
** the second parameter, iDefault.
*/
static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){
if( pVal ){
int eType = sqlite3_value_numeric_type(pVal);
if( eType==SQLITE_INTEGER ){
return sqlite3_value_int64(pVal);
}
}
return iDefault;
}
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
** information.
**
** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
** the %_content table.
**
** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
** in the %_content table.
**
** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The
** column on the left-hand side of the MATCH operator is column
** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand
** side of the MATCH operator.
*/
static int fts3FilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
int rc = SQLITE_OK;
char *zSql; /* SQL statement used to access %_content */
int eSearch;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */
sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */
sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */
sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */
int iIdx;
UNUSED_PARAMETER(idxStr);
UNUSED_PARAMETER(nVal);
eSearch = (idxNum & 0x0000FFFF);
assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
assert( p->pSegments==0 );
/* Collect arguments into local variables */
iIdx = 0;
if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++];
if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++];
if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++];
assert( iIdx==nVal );
/* In case the cursor has been used before, clear it now. */
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr->aDoclist);
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
sqlite3Fts3ExprFree(pCsr->pExpr);
memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
/* Set the lower and upper bounds on docids to return */
pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64);
pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64);
if( idxStr ){
pCsr->bDesc = (idxStr[0]=='D');
}else{
pCsr->bDesc = p->bDescIdx;
}
pCsr->eSearch = (i16)eSearch;
if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){
int iCol = eSearch-FTS3_FULLTEXT_SEARCH;
const char *zQuery = (const char *)sqlite3_value_text(pCons);
if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
pCsr->iLangid = 0;
if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid);
assert( p->base.zErrMsg==0 );
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr,
&p->base.zErrMsg
);
if( rc!=SQLITE_OK ){
return rc;
}
rc = fts3EvalStart(pCsr);
sqlite3Fts3SegmentsClose(p);
if( rc!=SQLITE_OK ) return rc;
pCsr->pNextId = pCsr->aDoclist;
pCsr->iPrevId = 0;
}
/* Compile a SELECT statement for this cursor. For a full-table-scan, the
** statement loops through all rows of the %_content table. For a
** full-text query or docid lookup, the statement retrieves a single
** row by docid.
*/
if( eSearch==FTS3_FULLSCAN_SEARCH ){
if( pDocidGe || pDocidLe ){
zSql = sqlite3_mprintf(
"SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s",
p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid,
(pCsr->bDesc ? "DESC" : "ASC")
);
}else{
zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
);
}
if( zSql ){
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
sqlite3_free(zSql);
}else{
rc = SQLITE_NOMEM;
}
}else if( eSearch==FTS3_DOCID_SEARCH ){
rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
if( rc==SQLITE_OK ){
rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons);
}
}
if( rc!=SQLITE_OK ) return rc;
return fts3NextMethod(pCursor);
}
/*
** This is the xEof method of the virtual table. SQLite calls this
** routine to find out if it has reached the end of a result set.
*/
static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
return ((Fts3Cursor *)pCursor)->isEof;
}
/*
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. fts3
** exposes %_content.docid as the rowid for the virtual table. The
** rowid should be written to *pRowid.
*/
static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
*pRowid = pCsr->iPrevId;
return SQLITE_OK;
}
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
**
** If:
**
** (iCol < p->nColumn) -> The value of the iCol'th user column.
** (iCol == p->nColumn) -> Magic column with the same name as the table.
** (iCol == p->nColumn+1) -> Docid column
** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
/* The column value supplied by SQLite must be in range. */
assert( iCol>=0 && iCol<=p->nColumn+2 );
if( iCol==p->nColumn+1 ){
/* This call is a request for the "docid" column. Since "docid" is an
** alias for "rowid", use the xRowid() method to obtain the value.
*/
sqlite3_result_int64(pCtx, pCsr->iPrevId);
}else if( iCol==p->nColumn ){
/* The extra column whose name is the same as the table.
** Return a blob which is a pointer to the cursor. */
sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
}else if( iCol==p->nColumn+2 && pCsr->pExpr ){
sqlite3_result_int64(pCtx, pCsr->iLangid);
}else{
/* The requested column is either a user column (one that contains
** indexed data), or the language-id column. */
rc = fts3CursorSeek(0, pCsr);
if( rc==SQLITE_OK ){
if( iCol==p->nColumn+2 ){
int iLangid = 0;
if( p->zLanguageid ){
iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
}
sqlite3_result_int(pCtx, iLangid);
}else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
}
}
}
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
return rc;
}
/*
** This function is the implementation of the xUpdate callback used by
** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
** inserted, updated or deleted.
*/
static int fts3UpdateMethod(
sqlite3_vtab *pVtab, /* Virtual table handle */
int nArg, /* Size of argument array */
sqlite3_value **apVal, /* Array of arguments */
sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
){
return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
}
/*
** Implementation of xSync() method. Flush the contents of the pending-terms
** hash-table to the database.
*/
static int fts3SyncMethod(sqlite3_vtab *pVtab){
/* Following an incremental-merge operation, assuming that the input
** segments are not completely consumed (the usual case), they are updated
** in place to remove the entries that have already been merged. This
** involves updating the leaf block that contains the smallest unmerged
** entry and each block (if any) between the leaf and the root node. So
** if the height of the input segment b-trees is N, and input segments
** are merged eight at a time, updating the input segments at the end
** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
** small - often between 0 and 2. So the overhead of the incremental
** merge is somewhere between 8 and 24 blocks. To avoid this overhead
** dwarfing the actual productive work accomplished, the incremental merge
** is only attempted if it will write at least 64 leaf blocks. Hence
** nMinMerge.
**
** Of course, updating the input segments also involves deleting a bunch
** of blocks from the segments table. But this is not considered overhead
** as it would also be required by a crisis-merge that used the same input
** segments.
*/
const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */
Fts3Table *p = (Fts3Table*)pVtab;
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc==SQLITE_OK
&& p->nLeafAdd>(nMinMerge/16)
&& p->nAutoincrmerge && p->nAutoincrmerge!=0xff
){
int mxLevel = 0; /* Maximum relative level value in db */
int A; /* Incr-merge parameter A */
rc = sqlite3Fts3MaxLevel(p, &mxLevel);
assert( rc==SQLITE_OK || mxLevel==0 );
A = p->nLeafAdd * mxLevel;
A += (A/2);
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
}
sqlite3Fts3SegmentsClose(p);
return rc;
}
/*
** If it is currently unknown whether or not the FTS table has an %_stat
** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat
** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code
** if an error occurs.
*/
static int fts3SetHasStat(Fts3Table *p){
int rc = SQLITE_OK;
if( p->bHasStat==2 ){
const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'";
char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName);
if( zSql ){
sqlite3_stmt *pStmt = 0;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
if( rc==SQLITE_OK ){
int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW);
rc = sqlite3_finalize(pStmt);
if( rc==SQLITE_OK ) p->bHasStat = bHasStat;
}
sqlite3_free(zSql);
}else{
rc = SQLITE_NOMEM;
}
}
return rc;
}
/*
** Implementation of xBegin() method.
*/
static int fts3BeginMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(pVtab);
assert( p->pSegments==0 );
assert( p->nPendingData==0 );
assert( p->inTransaction!=1 );
TESTONLY( p->inTransaction = 1 );
TESTONLY( p->mxSavepoint = -1; );
p->nLeafAdd = 0;
return fts3SetHasStat(p);
}
/*
** Implementation of xCommit() method. This is a no-op. The contents of
** the pending-terms hash-table have already been flushed into the database
** by fts3SyncMethod().
*/
static int fts3CommitMethod(sqlite3_vtab *pVtab){
TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
UNUSED_PARAMETER(pVtab);
assert( p->nPendingData==0 );
assert( p->inTransaction!=0 );
assert( p->pSegments==0 );
TESTONLY( p->inTransaction = 0 );
TESTONLY( p->mxSavepoint = -1; );
return SQLITE_OK;
}
/*
** Implementation of xRollback(). Discard the contents of the pending-terms
** hash-table. Any changes made to the database are reverted by SQLite.
*/
static int fts3RollbackMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
sqlite3Fts3PendingTermsClear(p);
assert( p->inTransaction!=0 );
TESTONLY( p->inTransaction = 0 );
TESTONLY( p->mxSavepoint = -1; );
return SQLITE_OK;
}
/*
** When called, *ppPoslist must point to the byte immediately following the
** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
** moves *ppPoslist so that it instead points to the first byte of the
** same position list.
*/
static void fts3ReversePoslist(char *pStart, char **ppPoslist){
char *p = &(*ppPoslist)[-2];
char c = 0;
/* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */
while( p>pStart && (c=*p--)==0 );
/* Search backwards for a varint with value zero (the end of the previous
** poslist). This is an 0x00 byte preceded by some byte that does not
** have the 0x80 bit set. */
while( p>pStart && (*p & 0x80) | c ){
c = *p--;
}
assert( p==pStart || c==0 );
/* At this point p points to that preceding byte without the 0x80 bit
** set. So to find the start of the poslist, skip forward 2 bytes then
** over a varint.
**
** Normally. The other case is that p==pStart and the poslist to return
** is the first in the doclist. In this case do not skip forward 2 bytes.
** The second part of the if condition (c==0 && *ppPoslist>&p[2])
** is required for cases where the first byte of a doclist and the
** doclist is empty. For example, if the first docid is 10, a doclist
** that begins with:
**
** 0x0A 0x00 <next docid delta varint>
*/
if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; }
while( *p++&0x80 );
*ppPoslist = p;
}
/*
** Helper function used by the implementation of the overloaded snippet(),
** offsets() and optimize() SQL functions.
**
** If the value passed as the third argument is a blob of size
** sizeof(Fts3Cursor*), then the blob contents are copied to the
** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
** message is written to context pContext and SQLITE_ERROR returned. The
** string passed via zFunc is used as part of the error message.
*/
static int fts3FunctionArg(
sqlite3_context *pContext, /* SQL function call context */
const char *zFunc, /* Function name */
sqlite3_value *pVal, /* argv[0] passed to function */
Fts3Cursor **ppCsr /* OUT: Store cursor handle here */
){
Fts3Cursor *pRet;
if( sqlite3_value_type(pVal)!=SQLITE_BLOB
|| sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
){
char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
sqlite3_result_error(pContext, zErr, -1);
sqlite3_free(zErr);
return SQLITE_ERROR;
}
memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
*ppCsr = pRet;
return SQLITE_OK;
}
/*
** Implementation of the snippet() function for FTS3
*/
static void fts3SnippetFunc(
sqlite3_context *pContext, /* SQLite function call context */
int nVal, /* Size of apVal[] array */
sqlite3_value **apVal /* Array of arguments */
){
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
const char *zStart = "<b>";
const char *zEnd = "</b>";
const char *zEllipsis = "<b>...</b>";
int iCol = -1;
int nToken = 15; /* Default number of tokens in snippet */
/* There must be at least one argument passed to this function (otherwise
** the non-overloaded version would have been called instead of this one).
*/
assert( nVal>=1 );
if( nVal>6 ){
sqlite3_result_error(pContext,
"wrong number of arguments to function snippet()", -1);
return;
}
if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
switch( nVal ){
case 6: nToken = sqlite3_value_int(apVal[5]);
case 5: iCol = sqlite3_value_int(apVal[4]);
case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
}
if( !zEllipsis || !zEnd || !zStart ){
sqlite3_result_error_nomem(pContext);
}else if( nToken==0 ){
sqlite3_result_text(pContext, "", -1, SQLITE_STATIC);
}else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
}
}
/*
** Implementation of the offsets() function for FTS3
*/
static void fts3OffsetsFunc(
sqlite3_context *pContext, /* SQLite function call context */
int nVal, /* Size of argument array */
sqlite3_value **apVal /* Array of arguments */
){
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
UNUSED_PARAMETER(nVal);
assert( nVal==1 );
if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
assert( pCsr );
if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
sqlite3Fts3Offsets(pContext, pCsr);
}
}
/*
** Implementation of the special optimize() function for FTS3. This
** function merges all segments in the database to a single segment.
** Example usage is:
**
** SELECT optimize(t) FROM t LIMIT 1;
**
** where 't' is the name of an FTS3 table.
*/
static void fts3OptimizeFunc(
sqlite3_context *pContext, /* SQLite function call context */
int nVal, /* Size of argument array */
sqlite3_value **apVal /* Array of arguments */
){
int rc; /* Return code */
Fts3Table *p; /* Virtual table handle */
Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */
UNUSED_PARAMETER(nVal);
assert( nVal==1 );
if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
p = (Fts3Table *)pCursor->base.pVtab;
assert( p );
rc = sqlite3Fts3Optimize(p);
switch( rc ){
case SQLITE_OK:
sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
break;
case SQLITE_DONE:
sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
break;
default:
sqlite3_result_error_code(pContext, rc);
break;
}
}
/*
** Implementation of the matchinfo() function for FTS3
*/
static void fts3MatchinfoFunc(
sqlite3_context *pContext, /* SQLite function call context */
int nVal, /* Size of argument array */
sqlite3_value **apVal /* Array of arguments */
){
Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
assert( nVal==1 || nVal==2 );
if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
const char *zArg = 0;
if( nVal>1 ){
zArg = (const char *)sqlite3_value_text(apVal[1]);
}
sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
}
}
/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.
*/
static int fts3FindFunctionMethod(
sqlite3_vtab *pVtab, /* Virtual table handle */
int nArg, /* Number of SQL function arguments */
const char *zName, /* Name of SQL function */
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
void **ppArg /* Unused */
){
struct Overloaded {
const char *zName;
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
} aOverload[] = {
{ "snippet", fts3SnippetFunc },
{ "offsets", fts3OffsetsFunc },
{ "optimize", fts3OptimizeFunc },
{ "matchinfo", fts3MatchinfoFunc },
};
int i; /* Iterator variable */
UNUSED_PARAMETER(pVtab);
UNUSED_PARAMETER(nArg);
UNUSED_PARAMETER(ppArg);
for(i=0; i<SizeofArray(aOverload); i++){
if( strcmp(zName, aOverload[i].zName)==0 ){
*pxFunc = aOverload[i].xFunc;
return 1;
}
}
/* No function of the specified name was found. Return 0. */
return 0;
}
/*
** Implementation of FTS3 xRename method. Rename an fts3 table.
*/
static int fts3RenameMethod(
sqlite3_vtab *pVtab, /* Virtual table handle */
const char *zName /* New name of table */
){
Fts3Table *p = (Fts3Table *)pVtab;
sqlite3 *db = p->db; /* Database connection */
int rc; /* Return Code */
/* At this point it must be known if the %_stat table exists or not.
** So bHasStat may not be 2. */
rc = fts3SetHasStat(p);
/* As it happens, the pending terms table is always empty here. This is
** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
** always opens a savepoint transaction. And the xSavepoint() method
** flushes the pending terms table. But leave the (no-op) call to
** PendingTermsFlush() in in case that changes.
*/
assert( p->nPendingData==0 );
if( rc==SQLITE_OK ){
rc = sqlite3Fts3PendingTermsFlush(p);
}
if( p->zContentTbl==0 ){
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
p->zDb, p->zName, zName
);
}
if( p->bHasDocsize ){
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
p->zDb, p->zName, zName
);
}
if( p->bHasStat ){
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
p->zDb, p->zName, zName
);
}
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
p->zDb, p->zName, zName
);
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';",
p->zDb, p->zName, zName
);
return rc;
}
/*
** The xSavepoint() method.
**
** Flush the contents of the pending-terms table to disk.
*/
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
int rc = SQLITE_OK;
UNUSED_PARAMETER(iSavepoint);
assert( ((Fts3Table *)pVtab)->inTransaction );
assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
rc = fts3SyncMethod(pVtab);
}
return rc;
}
/*
** The xRelease() method.
**
** This is a no-op.
*/
static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
UNUSED_PARAMETER(iSavepoint);
UNUSED_PARAMETER(pVtab);
assert( p->inTransaction );
assert( p->mxSavepoint >= iSavepoint );
TESTONLY( p->mxSavepoint = iSavepoint-1 );
return SQLITE_OK;
}
/*
** The xRollbackTo() method.
**
** Discard the contents of the pending terms table.
*/
static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(iSavepoint);
assert( p->inTransaction );
assert( p->mxSavepoint >= iSavepoint );
TESTONLY( p->mxSavepoint = iSavepoint );
sqlite3Fts3PendingTermsClear(p);
return SQLITE_OK;
}
static const sqlite3_module fts3Module = {
/* iVersion */ 2,
/* xCreate */ fts3CreateMethod,
/* xConnect */ fts3ConnectMethod,
/* xBestIndex */ fts3BestIndexMethod,
/* xDisconnect */ fts3DisconnectMethod,
/* xDestroy */ fts3DestroyMethod,
/* xOpen */ fts3OpenMethod,
/* xClose */ fts3CloseMethod,
/* xFilter */ fts3FilterMethod,
/* xNext */ fts3NextMethod,
/* xEof */ fts3EofMethod,
/* xColumn */ fts3ColumnMethod,
/* xRowid */ fts3RowidMethod,
/* xUpdate */ fts3UpdateMethod,
/* xBegin */ fts3BeginMethod,
/* xSync */ fts3SyncMethod,
/* xCommit */ fts3CommitMethod,
/* xRollback */ fts3RollbackMethod,
/* xFindFunction */ fts3FindFunctionMethod,
/* xRename */ fts3RenameMethod,
/* xSavepoint */ fts3SavepointMethod,
/* xRelease */ fts3ReleaseMethod,
/* xRollbackTo */ fts3RollbackToMethod,
};
/*
** This function is registered as the module destructor (called when an
** FTS3 enabled database connection is closed). It frees the memory
** allocated for the tokenizer hash table.
*/
static void hashDestroy(void *p){
Fts3Hash *pHash = (Fts3Hash *)p;
sqlite3Fts3HashClear(pHash);
sqlite3_free(pHash);
}
/*
** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are
** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
** respectively. The following three forward declarations are for functions
** declared in these files used to retrieve the respective implementations.
**
** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
** to by the argument to point to the "simple" tokenizer implementation.
** And so on.
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#ifndef SQLITE_DISABLE_FTS3_UNICODE
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif
/*
** Initialize the fts3 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
** SQLite. If fts3 is built as a dynamically loadable extension, this
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts3Init(sqlite3 *db){
int rc = SQLITE_OK;
Fts3Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
#ifndef SQLITE_DISABLE_FTS3_UNICODE
const sqlite3_tokenizer_module *pUnicode = 0;
#endif
#ifdef SQLITE_ENABLE_ICU
const sqlite3_tokenizer_module *pIcu = 0;
sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif
#ifndef SQLITE_DISABLE_FTS3_UNICODE
sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif
#ifdef SQLITE_TEST
rc = sqlite3Fts3InitTerm(db);
if( rc!=SQLITE_OK ) return rc;
#endif
rc = sqlite3Fts3InitAux(db);
if( rc!=SQLITE_OK ) return rc;
sqlite3Fts3SimpleTokenizerModule(&pSimple);
sqlite3Fts3PorterTokenizerModule(&pPorter);
/* Allocate and initialize the hash-table used to store tokenizers. */
pHash = sqlite3_malloc(sizeof(Fts3Hash));
if( !pHash ){
rc = SQLITE_NOMEM;
}else{
sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
}
/* Load the built-in tokenizers into the hash table */
if( rc==SQLITE_OK ){
if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|| sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
#endif
#ifdef SQLITE_ENABLE_ICU
|| (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
#endif
){
rc = SQLITE_NOMEM;
}
}
#ifdef SQLITE_TEST
if( rc==SQLITE_OK ){
rc = sqlite3Fts3ExprInitTestInterface(db);
}
#endif
/* Create the virtual table wrapper around the hash-table and overload
** the two scalar functions. If this is successful, register the
** module with sqlite.
*/
if( SQLITE_OK==rc
&& SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
){
rc = sqlite3_create_module_v2(
db, "fts3", &fts3Module, (void *)pHash, hashDestroy
);
if( rc==SQLITE_OK ){
rc = sqlite3_create_module_v2(
db, "fts4", &fts3Module, (void *)pHash, 0
);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts3InitTok(db, (void *)pHash);
}
return rc;
}
/* An error has occurred. Delete the hash table and return the error code. */
assert( rc!=SQLITE_OK );
if( pHash ){
sqlite3Fts3HashClear(pHash);
sqlite3_free(pHash);
}
return rc;
}
/*
** Allocate an Fts3MultiSegReader for each token in the expression headed
** by pExpr.
**
** An Fts3SegReader object is a cursor that can seek or scan a range of
** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
** Fts3SegReader objects internally to provide an interface to seek or scan
** within the union of all segments of a b-tree. Hence the name.
**
** If the allocated Fts3MultiSegReader just seeks to a single entry in a
** segment b-tree (if the term is not a prefix or it is a prefix for which
** there exists prefix b-tree of the right length) then it may be traversed
** and merged incrementally. Otherwise, it has to be merged into an in-memory
** doclist and then traversed.
*/
static void fts3EvalAllocateReaders(
Fts3Cursor *pCsr, /* FTS cursor handle */
Fts3Expr *pExpr, /* Allocate readers for this expression */
int *pnToken, /* OUT: Total number of tokens in phrase. */
int *pnOr, /* OUT: Total number of OR nodes in expr. */
int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_PHRASE ){
int i;
int nToken = pExpr->pPhrase->nToken;
*pnToken += nToken;
for(i=0; i<nToken; i++){
Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
int rc = fts3TermSegReaderCursor(pCsr,
pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
);
if( rc!=SQLITE_OK ){
*pRc = rc;
return;
}
}
assert( pExpr->pPhrase->iDoclistToken==0 );
pExpr->pPhrase->iDoclistToken = -1;
}else{
*pnOr += (pExpr->eType==FTSQUERY_OR);
fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
}
}
}
/*
** Arguments pList/nList contain the doclist for token iToken of phrase p.
** It is merged into the main doclist stored in p->doclist.aAll/nAll.
**
** This function assumes that pList points to a buffer allocated using
** sqlite3_malloc(). This function takes responsibility for eventually
** freeing the buffer.
**
** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs.
*/
static int fts3EvalPhraseMergeToken(
Fts3Table *pTab, /* FTS Table pointer */
Fts3Phrase *p, /* Phrase to merge pList/nList into */
int iToken, /* Token pList/nList corresponds to */
char *pList, /* Pointer to doclist */
int nList /* Number of bytes in pList */
){
int rc = SQLITE_OK;
assert( iToken!=p->iDoclistToken );
if( pList==0 ){
sqlite3_free(p->doclist.aAll);
p->doclist.aAll = 0;
p->doclist.nAll = 0;
}
else if( p->iDoclistToken<0 ){
p->doclist.aAll = pList;
p->doclist.nAll = nList;
}
else if( p->doclist.aAll==0 ){
sqlite3_free(pList);
}
else {
char *pLeft;
char *pRight;
int nLeft;
int nRight;
int nDiff;
if( p->iDoclistToken<iToken ){
pLeft = p->doclist.aAll;
nLeft = p->doclist.nAll;
pRight = pList;
nRight = nList;
nDiff = iToken - p->iDoclistToken;
}else{
pRight = p->doclist.aAll;
nRight = p->doclist.nAll;
pLeft = pList;
nLeft = nList;
nDiff = p->iDoclistToken - iToken;
}
rc = fts3DoclistPhraseMerge(
pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight
);
sqlite3_free(pLeft);
p->doclist.aAll = pRight;
p->doclist.nAll = nRight;
}
if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
return rc;
}
/*
** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
** does not take deferred tokens into account.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseLoad(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p /* Phrase object */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int iToken;
int rc = SQLITE_OK;
for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
Fts3PhraseToken *pToken = &p->aToken[iToken];
assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
if( pToken->pSegcsr ){
int nThis = 0;
char *pThis = 0;
rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
if( rc==SQLITE_OK ){
rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
}
}
assert( pToken->pSegcsr==0 );
}
return rc;
}
/*
** This function is called on each phrase after the position lists for
** any deferred tokens have been loaded into memory. It updates the phrases
** current position list to include only those positions that are really
** instances of the phrase (after considering deferred tokens). If this
** means that the phrase does not appear in the current row, doclist.pList
** and doclist.nList are both zeroed.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
int iToken; /* Used to iterate through phrase tokens */
char *aPoslist = 0; /* Position list for deferred tokens */
int nPoslist = 0; /* Number of bytes in aPoslist */
int iPrev = -1; /* Token number of previous deferred token */
assert( pPhrase->doclist.bFreeList==0 );
for(iToken=0; iToken<pPhrase->nToken; iToken++){
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
Fts3DeferredToken *pDeferred = pToken->pDeferred;
if( pDeferred ){
char *pList;
int nList;
int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
if( rc!=SQLITE_OK ) return rc;
if( pList==0 ){
sqlite3_free(aPoslist);
pPhrase->doclist.pList = 0;
pPhrase->doclist.nList = 0;
return SQLITE_OK;
}else if( aPoslist==0 ){
aPoslist = pList;
nPoslist = nList;
}else{
char *aOut = pList;
char *p1 = aPoslist;
char *p2 = aOut;
assert( iPrev>=0 );
fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
sqlite3_free(aPoslist);
aPoslist = pList;
nPoslist = (int)(aOut - aPoslist);
if( nPoslist==0 ){
sqlite3_free(aPoslist);
pPhrase->doclist.pList = 0;
pPhrase->doclist.nList = 0;
return SQLITE_OK;
}
}
iPrev = iToken;
}
}
if( iPrev>=0 ){
int nMaxUndeferred = pPhrase->iDoclistToken;
if( nMaxUndeferred<0 ){
pPhrase->doclist.pList = aPoslist;
pPhrase->doclist.nList = nPoslist;
pPhrase->doclist.iDocid = pCsr->iPrevId;
pPhrase->doclist.bFreeList = 1;
}else{
int nDistance;
char *p1;
char *p2;
char *aOut;
if( nMaxUndeferred>iPrev ){
p1 = aPoslist;
p2 = pPhrase->doclist.pList;
nDistance = nMaxUndeferred - iPrev;
}else{
p1 = pPhrase->doclist.pList;
p2 = aPoslist;
nDistance = iPrev - nMaxUndeferred;
}
aOut = (char *)sqlite3_malloc(nPoslist+8);
if( !aOut ){
sqlite3_free(aPoslist);
return SQLITE_NOMEM;
}
pPhrase->doclist.pList = aOut;
if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
pPhrase->doclist.bFreeList = 1;
pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList);
}else{
sqlite3_free(aOut);
pPhrase->doclist.pList = 0;
pPhrase->doclist.nList = 0;
}
sqlite3_free(aPoslist);
}
}
return SQLITE_OK;
}
/*
** Maximum number of tokens a phrase may have to be considered for the
** incremental doclists strategy.
*/
#define MAX_INCR_PHRASE_TOKENS 4
/*
** This function is called for each Fts3Phrase in a full-text query
** expression to initialize the mechanism for returning rows. Once this
** function has been called successfully on an Fts3Phrase, it may be
** used with fts3EvalPhraseNext() to iterate through the matching docids.
**
** If parameter bOptOk is true, then the phrase may (or may not) use the
** incremental loading strategy. Otherwise, the entire doclist is loaded into
** memory within this call.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK; /* Error code */
int i;
/* Determine if doclists may be loaded from disk incrementally. This is
** possible if the bOptOk argument is true, the FTS doclists will be
** scanned in forward order, and the phrase consists of
** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
** tokens or prefix tokens that cannot use a prefix-index. */
int bHaveIncr = 0;
int bIncrOk = (bOptOk
&& pCsr->bDesc==pTab->bDescIdx
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
#ifdef SQLITE_TEST
&& pTab->bNoIncrDoclist==0
#endif
);
for(i=0; bIncrOk==1 && i<p->nToken; i++){
Fts3PhraseToken *pToken = &p->aToken[i];
if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){
bIncrOk = 0;
}
if( pToken->pSegcsr ) bHaveIncr = 1;
}
if( bIncrOk && bHaveIncr ){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
Fts3PhraseToken *pToken = &p->aToken[i];
Fts3MultiSegReader *pSegcsr = pToken->pSegcsr;
if( pSegcsr ){
rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n);
}
}
p->bIncr = 1;
}else{
/* Load the full doclist for the phrase into memory. */
rc = fts3EvalPhraseLoad(pCsr, p);
p->bIncr = 0;
}
assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
return rc;
}
/*
** This function is used to iterate backwards (from the end to start)
** through doclists. It is used by this module to iterate through phrase
** doclists in reverse and by the fts3_write.c module to iterate through
** pending-terms lists when writing to databases with "order=desc".
**
** The doclist may be sorted in ascending (parameter bDescIdx==0) or
** descending (parameter bDescIdx==1) order of docid. Regardless, this
** function iterates from the end of the doclist to the beginning.
*/
void sqlite3Fts3DoclistPrev(
int bDescIdx, /* True if the doclist is desc */
char *aDoclist, /* Pointer to entire doclist */
int nDoclist, /* Length of aDoclist in bytes */
char **ppIter, /* IN/OUT: Iterator pointer */
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
int *pnList, /* OUT: List length pointer */
u8 *pbEof /* OUT: End-of-file flag */
){
char *p = *ppIter;
assert( nDoclist>0 );
assert( *pbEof==0 );
assert( p || *piDocid==0 );
assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
if( p==0 ){
sqlite3_int64 iDocid = 0;
char *pNext = 0;
char *pDocid = aDoclist;
char *pEnd = &aDoclist[nDoclist];
int iMul = 1;
while( pDocid<pEnd ){
sqlite3_int64 iDelta;
pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
iDocid += (iMul * iDelta);
pNext = pDocid;
fts3PoslistCopy(0, &pDocid);
while( pDocid<pEnd && *pDocid==0 ) pDocid++;
iMul = (bDescIdx ? -1 : 1);
}
*pnList = (int)(pEnd - pNext);
*ppIter = pNext;
*piDocid = iDocid;
}else{
int iMul = (bDescIdx ? -1 : 1);
sqlite3_int64 iDelta;
fts3GetReverseVarint(&p, aDoclist, &iDelta);
*piDocid -= (iMul * iDelta);
if( p==aDoclist ){
*pbEof = 1;
}else{
char *pSave = p;
fts3ReversePoslist(aDoclist, &p);
*pnList = (int)(pSave - p);
}
*ppIter = p;
}
}
/*
** Iterate forwards through a doclist.
*/
void sqlite3Fts3DoclistNext(
int bDescIdx, /* True if the doclist is desc */
char *aDoclist, /* Pointer to entire doclist */
int nDoclist, /* Length of aDoclist in bytes */
char **ppIter, /* IN/OUT: Iterator pointer */
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
u8 *pbEof /* OUT: End-of-file flag */
){
char *p = *ppIter;
assert( nDoclist>0 );
assert( *pbEof==0 );
assert( p || *piDocid==0 );
assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
if( p==0 ){
p = aDoclist;
p += sqlite3Fts3GetVarint(p, piDocid);
}else{
fts3PoslistCopy(0, &p);
while( p<&aDoclist[nDoclist] && *p==0 ) p++;
if( p>=&aDoclist[nDoclist] ){
*pbEof = 1;
}else{
sqlite3_int64 iVar;
p += sqlite3Fts3GetVarint(p, &iVar);
*piDocid += ((bDescIdx ? -1 : 1) * iVar);
}
}
*ppIter = p;
}
/*
** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof
** to true if EOF is reached.
*/
static void fts3EvalDlPhraseNext(
Fts3Table *pTab,
Fts3Doclist *pDL,
u8 *pbEof
){
char *pIter; /* Used to iterate through aAll */
char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
if( pDL->pNextDocid ){
pIter = pDL->pNextDocid;
}else{
pIter = pDL->aAll;
}
if( pIter>=pEnd ){
/* We have already reached the end of this doclist. EOF. */
*pbEof = 1;
}else{
sqlite3_int64 iDelta;
pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
pDL->iDocid += iDelta;
}else{
pDL->iDocid -= iDelta;
}
pDL->pList = pIter;
fts3PoslistCopy(0, &pIter);
pDL->nList = (int)(pIter - pDL->pList);
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
** edited in place by fts3EvalNearTrim(), then pIter may not actually
** point to the start of the next docid value. The following line deals
** with this case by advancing pIter past the zero-padding added by
** fts3EvalNearTrim(). */
while( pIter<pEnd && *pIter==0 ) pIter++;
pDL->pNextDocid = pIter;
assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
*pbEof = 0;
}
}
/*
** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
*/
typedef struct TokenDoclist TokenDoclist;
struct TokenDoclist {
int bIgnore;
sqlite3_int64 iDocid;
char *pList;
int nList;
};
/*
** Token pToken is an incrementally loaded token that is part of a
** multi-token phrase. Advance it to the next matching document in the
** database and populate output variable *p with the details of the new
** entry. Or, if the iterator has reached EOF, set *pbEof to true.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
*/
static int incrPhraseTokenNext(
Fts3Table *pTab, /* Virtual table handle */
Fts3Phrase *pPhrase, /* Phrase to advance token of */
int iToken, /* Specific token to advance */
TokenDoclist *p, /* OUT: Docid and doclist for new entry */
u8 *pbEof /* OUT: True if iterator is at EOF */
){
int rc = SQLITE_OK;
if( pPhrase->iDoclistToken==iToken ){
assert( p->bIgnore==0 );
assert( pPhrase->aToken[iToken].pSegcsr==0 );
fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof);
p->pList = pPhrase->doclist.pList;
p->nList = pPhrase->doclist.nList;
p->iDocid = pPhrase->doclist.iDocid;
}else{
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
assert( pToken->pDeferred==0 );
assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 );
if( pToken->pSegcsr ){
assert( p->bIgnore==0 );
rc = sqlite3Fts3MsrIncrNext(
pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
);
if( p->pList==0 ) *pbEof = 1;
}else{
p->bIgnore = 1;
}
}
return rc;
}
/*
** The phrase iterator passed as the second argument:
**
** * features at least one token that uses an incremental doclist, and
**
** * does not contain any deferred tokens.
**
** Advance it to the next matching documnent in the database and populate
** the Fts3Doclist.pList and nList fields.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
**
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
*/
static int fts3EvalIncrPhraseNext(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p, /* Phrase object to advance to next docid */
u8 *pbEof /* OUT: Set to 1 if EOF */
){
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
u8 bEof = 0;
/* This is only called if it is guaranteed that the phrase has at least
** one incremental token. In which case the bIncr flag is set. */
assert( p->bIncr==1 );
if( p->nToken==1 && p->bIncr ){
rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
&pDL->iDocid, &pDL->pList, &pDL->nList
);
if( pDL->pList==0 ) bEof = 1;
}else{
int bDescDoclist = pCsr->bDesc;
struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
memset(a, 0, sizeof(a));
assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS );
while( bEof==0 ){
int bMaxSet = 0;
sqlite3_int64 iMax = 0; /* Largest docid for all iterators */
int i; /* Used to iterate through tokens */
/* Advance the iterator for each token in the phrase once. */
for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){
iMax = a[i].iDocid;
bMaxSet = 1;
}
}
assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) );
assert( rc!=SQLITE_OK || bMaxSet );
/* Keep advancing iterators until they all point to the same document */
for(i=0; i<p->nToken; i++){
while( rc==SQLITE_OK && bEof==0
&& a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0
){
rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof);
if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
iMax = a[i].iDocid;
i = 0;
}
}
}
/* Check if the current entries really are a phrase match */
if( bEof==0 ){
int nList = 0;
int nByte = a[p->nToken-1].nList;
char *aDoclist = sqlite3_malloc(nByte+1);
if( !aDoclist ) return SQLITE_NOMEM;
memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
for(i=0; i<(p->nToken-1); i++){
if( a[i].bIgnore==0 ){
char *pL = a[i].pList;
char *pR = aDoclist;
char *pOut = aDoclist;
int nDist = p->nToken-1-i;
int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR);
if( res==0 ) break;
nList = (int)(pOut - aDoclist);
}
}
if( i==(p->nToken-1) ){
pDL->iDocid = iMax;
pDL->pList = aDoclist;
pDL->nList = nList;
pDL->bFreeList = 1;
break;
}
sqlite3_free(aDoclist);
}
}
}
*pbEof = bEof;
return rc;
}
/*
** Attempt to move the phrase iterator to point to the next matching docid.
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
**
** If there is no "next" entry and no error occurs, then *pbEof is set to
** 1 before returning. Otherwise, if no error occurs and the iterator is
** successfully advanced, *pbEof is set to 0.
*/
static int fts3EvalPhraseNext(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Phrase *p, /* Phrase object to advance to next docid */
u8 *pbEof /* OUT: Set to 1 if EOF */
){
int rc = SQLITE_OK;
Fts3Doclist *pDL = &p->doclist;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
if( p->bIncr ){
rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
}else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
&pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
);
pDL->pList = pDL->pNextDocid;
}else{
fts3EvalDlPhraseNext(pTab, pDL, pbEof);
}
return rc;
}
/*
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
** expression. Also the Fts3Expr.bDeferred variable is set to true for any
** expressions for which all descendent tokens are deferred.
**
** If parameter bOptOk is zero, then it is guaranteed that the
** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
** each phrase in the expression (subject to deferred token processing).
** Or, if bOptOk is non-zero, then one or more tokens within the expression
** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
**
** If an error occurs within this function, *pRc is set to an SQLite error
** code before returning.
*/
static void fts3EvalStartReaders(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pExpr, /* Expression to initialize phrases in */
int *pRc /* IN/OUT: Error code */
){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_PHRASE ){
int nToken = pExpr->pPhrase->nToken;
if( nToken ){
int i;
for(i=0; i<nToken; i++){
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
}
pExpr->bDeferred = (i==nToken);
}
*pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
}else{
fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
}
}
}
/*
** An array of the following structures is assembled as part of the process
** of selecting tokens to defer before the query starts executing (as part
** of the xFilter() method). There is one element in the array for each
** token in the FTS expression.
**
** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
** to phrases that are connected only by AND and NEAR operators (not OR or
** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
** separately. The root of a tokens AND/NEAR cluster is stored in
** Fts3TokenAndCost.pRoot.
*/
typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
Fts3Phrase *pPhrase; /* The phrase the token belongs to */
int iToken; /* Position of token in phrase */
Fts3PhraseToken *pToken; /* The token itself */
Fts3Expr *pRoot; /* Root of NEAR/AND cluster */
int nOvfl; /* Number of overflow pages to load doclist */
int iCol; /* The column the token must match */
};
/*
** This function is used to populate an allocated Fts3TokenAndCost array.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
** Otherwise, if an error occurs during execution, *pRc is set to an
** SQLite error code.
*/
static void fts3EvalTokenCosts(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */
Fts3Expr *pExpr, /* Expression to consider */
Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */
Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
int *pRc /* IN/OUT: Error code */
){
if( *pRc==SQLITE_OK ){
if( pExpr->eType==FTSQUERY_PHRASE ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
int i;
for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
Fts3TokenAndCost *pTC = (*ppTC)++;
pTC->pPhrase = pPhrase;
pTC->iToken = i;
pTC->pRoot = pRoot;
pTC->pToken = &pPhrase->aToken[i];
pTC->iCol = pPhrase->iColumn;
*pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
}
}else if( pExpr->eType!=FTSQUERY_NOT ){
assert( pExpr->eType==FTSQUERY_OR
|| pExpr->eType==FTSQUERY_AND
|| pExpr->eType==FTSQUERY_NEAR
);
assert( pExpr->pLeft && pExpr->pRight );
if( pExpr->eType==FTSQUERY_OR ){
pRoot = pExpr->pLeft;
**ppOr = pRoot;
(*ppOr)++;
}
fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
if( pExpr->eType==FTSQUERY_OR ){
pRoot = pExpr->pRight;
**ppOr = pRoot;
(*ppOr)++;
}
fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
}
}
}
/*
** Determine the average document (row) size in pages. If successful,
** write this value to *pnPage and return SQLITE_OK. Otherwise, return
** an SQLite error code.
**
** The average document size in pages is calculated by first calculating
** determining the average size in bytes, B. If B is less than the amount
** of data that will fit on a single leaf page of an intkey table in
** this database, then the average docsize is 1. Otherwise, it is 1 plus
** the number of overflow pages consumed by a record B bytes in size.
*/
static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
if( pCsr->nRowAvg==0 ){
/* The average document size, which is required to calculate the cost
** of each doclist, has not yet been determined. Read the required
** data from the %_stat table to calculate it.
**
** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
** varints, where nCol is the number of columns in the FTS3 table.
** The first varint is the number of documents currently stored in
** the table. The following nCol varints contain the total amount of
** data stored in all rows of each column of the table, from left
** to right.
*/
int rc;
Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
sqlite3_stmt *pStmt;
sqlite3_int64 nDoc = 0;
sqlite3_int64 nByte = 0;
const char *pEnd;
const char *a;
rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
if( rc!=SQLITE_OK ) return rc;
a = sqlite3_column_blob(pStmt, 0);
assert( a );
pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
a += sqlite3Fts3GetVarint(a, &nDoc);
while( a<pEnd ){
a += sqlite3Fts3GetVarint(a, &nByte);
}
if( nDoc==0 || nByte==0 ){
sqlite3_reset(pStmt);
return FTS_CORRUPT_VTAB;
}
pCsr->nDoc = nDoc;
pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
assert( pCsr->nRowAvg>0 );
rc = sqlite3_reset(pStmt);
if( rc!=SQLITE_OK ) return rc;
}
*pnPage = pCsr->nRowAvg;
return SQLITE_OK;
}
/*
** This function is called to select the tokens (if any) that will be
** deferred. The array aTC[] has already been populated when this is
** called.
**
** This function is called once for each AND/NEAR cluster in the
** expression. Each invocation determines which tokens to defer within
** the cluster with root node pRoot. See comments above the definition
** of struct Fts3TokenAndCost for more details.
**
** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
** called on each token to defer. Otherwise, an SQLite error code is
** returned.
*/
static int fts3EvalSelectDeferred(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pRoot, /* Consider tokens with this root node */
Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */
int nTC /* Number of entries in aTC[] */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int nDocSize = 0; /* Number of pages per doc loaded */
int rc = SQLITE_OK; /* Return code */
int ii; /* Iterator variable for various purposes */
int nOvfl = 0; /* Total overflow pages used by doclists */
int nToken = 0; /* Total number of tokens in cluster */
int nMinEst = 0; /* The minimum count for any phrase so far. */
int nLoad4 = 1; /* (Phrases that will be loaded)^4. */
/* Tokens are never deferred for FTS tables created using the content=xxx
** option. The reason being that it is not guaranteed that the content
** table actually contains the same data as the index. To prevent this from
** causing any problems, the deferred token optimization is completely
** disabled for content=xxx tables. */
if( pTab->zContentTbl ){
return SQLITE_OK;
}
/* Count the tokens in this AND/NEAR cluster. If none of the doclists
** associated with the tokens spill onto overflow pages, or if there is
** only 1 token, exit early. No tokens to defer in this case. */
for(ii=0; ii<nTC; ii++){
if( aTC[ii].pRoot==pRoot ){
nOvfl += aTC[ii].nOvfl;
nToken++;
}
}
if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
/* Obtain the average docsize (in pages). */
rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
assert( rc!=SQLITE_OK || nDocSize>0 );
/* Iterate through all tokens in this AND/NEAR cluster, in ascending order
** of the number of overflow pages that will be loaded by the pager layer
** to retrieve the entire doclist for the token from the full-text index.
** Load the doclists for tokens that are either:
**
** a. The cheapest token in the entire query (i.e. the one visited by the
** first iteration of this loop), or
**
** b. Part of a multi-token phrase.
**
** After each token doclist is loaded, merge it with the others from the
** same phrase and count the number of documents that the merged doclist
** contains. Set variable "nMinEst" to the smallest number of documents in
** any phrase doclist for which 1 or more token doclists have been loaded.
** Let nOther be the number of other phrases for which it is certain that
** one or more tokens will not be deferred.
**
** Then, for each token, defer it if loading the doclist would result in
** loading N or more overflow pages into memory, where N is computed as:
**
** (nMinEst + 4^nOther - 1) / (4^nOther)
*/
for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
int iTC; /* Used to iterate through aTC[] array. */
Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */
/* Set pTC to point to the cheapest remaining token. */
for(iTC=0; iTC<nTC; iTC++){
if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot
&& (!pTC || aTC[iTC].nOvfl<pTC->nOvfl)
){
pTC = &aTC[iTC];
}
}
assert( pTC );
if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
/* The number of overflow pages to load for this (and therefore all
** subsequent) tokens is greater than the estimated number of pages
** that will be loaded if all subsequent tokens are deferred.
*/
Fts3PhraseToken *pToken = pTC->pToken;
rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
fts3SegReaderCursorFree(pToken->pSegcsr);
pToken->pSegcsr = 0;
}else{
/* Set nLoad4 to the value of (4^nOther) for the next iteration of the
** for-loop. Except, limit the value to 2^24 to prevent it from
** overflowing the 32-bit integer it is stored in. */
if( ii<12 ) nLoad4 = nLoad4*4;
if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){
/* Either this is the cheapest token in the entire query, or it is
** part of a multi-token phrase. Either way, the entire doclist will
** (eventually) be loaded into memory. It may as well be now. */
Fts3PhraseToken *pToken = pTC->pToken;
int nList = 0;
char *pList = 0;
rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
assert( rc==SQLITE_OK || pList==0 );
if( rc==SQLITE_OK ){
rc = fts3EvalPhraseMergeToken(
pTab, pTC->pPhrase, pTC->iToken,pList,nList
);
}
if( rc==SQLITE_OK ){
int nCount;
nCount = fts3DoclistCountDocids(
pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
);
if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
}
}
}
pTC->pToken = 0;
}
return rc;
}
/*
** This function is called from within the xFilter method. It initializes
** the full-text query currently stored in pCsr->pExpr. To iterate through
** the results of a query, the caller does:
**
** fts3EvalStart(pCsr);
** while( 1 ){
** fts3EvalNext(pCsr);
** if( pCsr->bEof ) break;
** ... return row pCsr->iPrevId to the caller ...
** }
*/
static int fts3EvalStart(Fts3Cursor *pCsr){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK;
int nToken = 0;
int nOr = 0;
/* Allocate a MultiSegReader for each token in the expression. */
fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
/* Determine which, if any, tokens in the expression should be deferred. */
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
Fts3TokenAndCost *aTC;
Fts3Expr **apOr;
aTC = (Fts3TokenAndCost *)sqlite3_malloc(
sizeof(Fts3TokenAndCost) * nToken
+ sizeof(Fts3Expr *) * nOr * 2
);
apOr = (Fts3Expr **)&aTC[nToken];
if( !aTC ){
rc = SQLITE_NOMEM;
}else{
int ii;
Fts3TokenAndCost *pTC = aTC;
Fts3Expr **ppOr = apOr;
fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
nToken = (int)(pTC-aTC);
nOr = (int)(ppOr-apOr);
if( rc==SQLITE_OK ){
rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
}
}
sqlite3_free(aTC);
}
}
#endif
fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
return rc;
}
/*
** Invalidate the current position list for phrase pPhrase.
*/
static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
if( pPhrase->doclist.bFreeList ){
sqlite3_free(pPhrase->doclist.pList);
}
pPhrase->doclist.pList = 0;
pPhrase->doclist.nList = 0;
pPhrase->doclist.bFreeList = 0;
}
/*
** This function is called to edit the position list associated with
** the phrase object passed as the fifth argument according to a NEAR
** condition. For example:
**
** abc NEAR/5 "def ghi"
**
** Parameter nNear is passed the NEAR distance of the expression (5 in
** the example above). When this function is called, *paPoslist points to
** the position list, and *pnToken is the number of phrase tokens in, the
** phrase on the other side of the NEAR operator to pPhrase. For example,
** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
** the position list associated with phrase "abc".
**
** All positions in the pPhrase position list that are not sufficiently
** close to a position in the *paPoslist position list are removed. If this
** leaves 0 positions, zero is returned. Otherwise, non-zero.
**
** Before returning, *paPoslist is set to point to the position lsit
** associated with pPhrase. And *pnToken is set to the number of tokens in
** pPhrase.
*/
static int fts3EvalNearTrim(
int nNear, /* NEAR distance. As in "NEAR/nNear". */
char *aTmp, /* Temporary space to use */
char **paPoslist, /* IN/OUT: Position list */
int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */
Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */
){
int nParam1 = nNear + pPhrase->nToken;
int nParam2 = nNear + *pnToken;
int nNew;
char *p2;
char *pOut;
int res;
assert( pPhrase->doclist.pList );
p2 = pOut = pPhrase->doclist.pList;
res = fts3PoslistNearMerge(
&pOut, aTmp, nParam1, nParam2, paPoslist, &p2
);
if( res ){
nNew = (int)(pOut - pPhrase->doclist.pList) - 1;
assert( pPhrase->doclist.pList[nNew]=='\0' );
assert( nNew<=pPhrase->doclist.nList && nNew>0 );
memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
pPhrase->doclist.nList = nNew;
*paPoslist = pPhrase->doclist.pList;
*pnToken = pPhrase->nToken;
}
return res;
}
/*
** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
** Otherwise, it advances the expression passed as the second argument to
** point to the next matching row in the database. Expressions iterate through
** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
** or descending if it is non-zero.
**
** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
** successful, the following variables in pExpr are set:
**
** Fts3Expr.bEof (non-zero if EOF - there is no next row)
** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row)
**
** If the expression is of type FTSQUERY_PHRASE, and the expression is not
** at EOF, then the following variables are populated with the position list
** for the phrase for the visited row:
**
** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes)
** FTs3Expr.pPhrase->doclist.pList (pointer to position list)
**
** It says above that this function advances the expression to the next
** matching row. This is usually true, but there are the following exceptions:
**
** 1. Deferred tokens are not taken into account. If a phrase consists
** entirely of deferred tokens, it is assumed to match every row in
** the db. In this case the position-list is not populated at all.
**
** Or, if a phrase contains one or more deferred tokens and one or
** more non-deferred tokens, then the expression is advanced to the
** next possible match, considering only non-deferred tokens. In other
** words, if the phrase is "A B C", and "B" is deferred, the expression
** is advanced to the next row that contains an instance of "A * C",
** where "*" may match any single token. The position list in this case
** is populated as for "A * C" before returning.
**
** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
** advanced to point to the next row that matches "x AND y".
**
** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is
** really a match, taking into account deferred tokens and NEAR operators.
*/
static void fts3EvalNextRow(
Fts3Cursor *pCsr, /* FTS Cursor handle */
Fts3Expr *pExpr, /* Expr. to advance to next matching row */
int *pRc /* IN/OUT: Error code */
){
if( *pRc==SQLITE_OK ){
int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
assert( pExpr->bEof==0 );
pExpr->bStart = 1;
switch( pExpr->eType ){
case FTSQUERY_NEAR:
case FTSQUERY_AND: {
Fts3Expr *pLeft = pExpr->pLeft;
Fts3Expr *pRight = pExpr->pRight;
assert( !pLeft->bDeferred || !pRight->bDeferred );
if( pLeft->bDeferred ){
/* LHS is entirely deferred. So we assume it matches every row.
** Advance the RHS iterator to find the next row visited. */
fts3EvalNextRow(pCsr, pRight, pRc);
pExpr->iDocid = pRight->iDocid;
pExpr->bEof = pRight->bEof;
}else if( pRight->bDeferred ){
/* RHS is entirely deferred. So we assume it matches every row.
** Advance the LHS iterator to find the next row visited. */
fts3EvalNextRow(pCsr, pLeft, pRc);
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = pLeft->bEof;
}else{
/* Neither the RHS or LHS are deferred. */
fts3EvalNextRow(pCsr, pLeft, pRc);
fts3EvalNextRow(pCsr, pRight, pRc);
while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
if( iDiff==0 ) break;
if( iDiff<0 ){
fts3EvalNextRow(pCsr, pLeft, pRc);
}else{
fts3EvalNextRow(pCsr, pRight, pRc);
}
}
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = (pLeft->bEof || pRight->bEof);
if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){
if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){
Fts3Doclist *pDl = &pRight->pPhrase->doclist;
while( *pRc==SQLITE_OK && pRight->bEof==0 ){
memset(pDl->pList, 0, pDl->nList);
fts3EvalNextRow(pCsr, pRight, pRc);
}
}
if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){
Fts3Doclist *pDl = &pLeft->pPhrase->doclist;
while( *pRc==SQLITE_OK && pLeft->bEof==0 ){
memset(pDl->pList, 0, pDl->nList);
fts3EvalNextRow(pCsr, pLeft, pRc);
}
}
}
}
break;
}
case FTSQUERY_OR: {
Fts3Expr *pLeft = pExpr->pLeft;
Fts3Expr *pRight = pExpr->pRight;
sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
fts3EvalNextRow(pCsr, pLeft, pRc);
}else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
fts3EvalNextRow(pCsr, pRight, pRc);
}else{
fts3EvalNextRow(pCsr, pLeft, pRc);
fts3EvalNextRow(pCsr, pRight, pRc);
}
pExpr->bEof = (pLeft->bEof && pRight->bEof);
iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
pExpr->iDocid = pLeft->iDocid;
}else{
pExpr->iDocid = pRight->iDocid;
}
break;
}
case FTSQUERY_NOT: {
Fts3Expr *pLeft = pExpr->pLeft;
Fts3Expr *pRight = pExpr->pRight;
if( pRight->bStart==0 ){
fts3EvalNextRow(pCsr, pRight, pRc);
assert( *pRc!=SQLITE_OK || pRight->bStart );
}
fts3EvalNextRow(pCsr, pLeft, pRc);
if( pLeft->bEof==0 ){
while( !*pRc
&& !pRight->bEof
&& DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0
){
fts3EvalNextRow(pCsr, pRight, pRc);
}
}
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = pLeft->bEof;
break;
}
default: {
Fts3Phrase *pPhrase = pExpr->pPhrase;
fts3EvalInvalidatePoslist(pPhrase);
*pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
pExpr->iDocid = pPhrase->doclist.iDocid;
break;
}
}
}
}
/*
** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
** cluster, then this function returns 1 immediately.
**
** Otherwise, it checks if the current row really does match the NEAR
** expression, using the data currently stored in the position lists
** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression.
**
** If the current row is a match, the position list associated with each
** phrase in the NEAR expression is edited in place to contain only those
** phrase instances sufficiently close to their peers to satisfy all NEAR
** constraints. In this case it returns 1. If the NEAR expression does not
** match the current row, 0 is returned. The position lists may or may not
** be edited if 0 is returned.
*/
static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
int res = 1;
/* The following block runs if pExpr is the root of a NEAR query.
** For example, the query:
**
** "w" NEAR "x" NEAR "y" NEAR "z"
**
** which is represented in tree form as:
**
** |
** +--NEAR--+ <-- root of NEAR query
** | |
** +--NEAR--+ "z"
** | |
** +--NEAR--+ "y"
** | |
** "w" "x"
**
** The right-hand child of a NEAR node is always a phrase. The
** left-hand child may be either a phrase or a NEAR node. There are
** no exceptions to this - it's the way the parser in fts3_expr.c works.
*/
if( *pRc==SQLITE_OK
&& pExpr->eType==FTSQUERY_NEAR
&& pExpr->bEof==0
&& (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
){
Fts3Expr *p;
int nTmp = 0; /* Bytes of temp space */
char *aTmp; /* Temp space for PoslistNearMerge() */
/* Allocate temporary working space. */
for(p=pExpr; p->pLeft; p=p->pLeft){
nTmp += p->pRight->pPhrase->doclist.nList;
}
nTmp += p->pPhrase->doclist.nList;
if( nTmp==0 ){
res = 0;
}else{
aTmp = sqlite3_malloc(nTmp*2);
if( !aTmp ){
*pRc = SQLITE_NOMEM;
res = 0;
}else{
char *aPoslist = p->pPhrase->doclist.pList;
int nToken = p->pPhrase->nToken;
for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
Fts3Phrase *pPhrase = p->pRight->pPhrase;
int nNear = p->nNear;
res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
}
aPoslist = pExpr->pRight->pPhrase->doclist.pList;
nToken = pExpr->pRight->pPhrase->nToken;
for(p=pExpr->pLeft; p && res; p=p->pLeft){
int nNear;
Fts3Phrase *pPhrase;
assert( p->pParent && p->pParent->pLeft==p );
nNear = p->pParent->nNear;
pPhrase = (
p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
);
res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
}
}
sqlite3_free(aTmp);
}
}
return res;
}
/*
** This function is a helper function for sqlite3Fts3EvalTestDeferred().
** Assuming no error occurs or has occurred, It returns non-zero if the
** expression passed as the second argument matches the row that pCsr
** currently points to, or zero if it does not.
**
** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
** If an error occurs during execution of this function, *pRc is set to
** the appropriate SQLite error code. In this case the returned value is
** undefined.
*/
static int fts3EvalTestExpr(
Fts3Cursor *pCsr, /* FTS cursor handle */
Fts3Expr *pExpr, /* Expr to test. May or may not be root. */
int *pRc /* IN/OUT: Error code */
){
int bHit = 1; /* Return value */
if( *pRc==SQLITE_OK ){
switch( pExpr->eType ){
case FTSQUERY_NEAR:
case FTSQUERY_AND:
bHit = (
fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
&& fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
&& fts3EvalNearTest(pExpr, pRc)
);
/* If the NEAR expression does not match any rows, zero the doclist for
** all phrases involved in the NEAR. This is because the snippet(),
** offsets() and matchinfo() functions are not supposed to recognize
** any instances of phrases that are part of unmatched NEAR queries.
** For example if this expression:
**
** ... MATCH 'a OR (b NEAR c)'
**
** is matched against a row containing:
**
** 'a b d e'
**
** then any snippet() should ony highlight the "a" term, not the "b"
** (as "b" is part of a non-matching NEAR clause).
*/
if( bHit==0
&& pExpr->eType==FTSQUERY_NEAR
&& (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
){
Fts3Expr *p;
for(p=pExpr; p->pPhrase==0; p=p->pLeft){
if( p->pRight->iDocid==pCsr->iPrevId ){
fts3EvalInvalidatePoslist(p->pRight->pPhrase);
}
}
if( p->iDocid==pCsr->iPrevId ){
fts3EvalInvalidatePoslist(p->pPhrase);
}
}
break;
case FTSQUERY_OR: {
int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
bHit = bHit1 || bHit2;
break;
}
case FTSQUERY_NOT:
bHit = (
fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
&& !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
);
break;
default: {
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
if( pCsr->pDeferred
&& (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
){
Fts3Phrase *pPhrase = pExpr->pPhrase;
assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
if( pExpr->bDeferred ){
fts3EvalInvalidatePoslist(pPhrase);
}
*pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
bHit = (pPhrase->doclist.pList!=0);
pExpr->iDocid = pCsr->iPrevId;
}else
#endif
{
bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
}
break;
}
}
}
return bHit;
}
/*
** This function is called as the second part of each xNext operation when
** iterating through the results of a full-text query. At this point the
** cursor points to a row that matches the query expression, with the
** following caveats:
**
** * Up until this point, "NEAR" operators in the expression have been
** treated as "AND".
**
** * Deferred tokens have not yet been considered.
**
** If *pRc is not SQLITE_OK when this function is called, it immediately
** returns 0. Otherwise, it tests whether or not after considering NEAR
** operators and deferred tokens the current row is still a match for the
** expression. It returns 1 if both of the following are true:
**
** 1. *pRc is SQLITE_OK when this function returns, and
**
** 2. After scanning the current FTS table row for the deferred tokens,
** it is determined that the row does *not* match the query.
**
** Or, if no error occurs and it seems the current row does match the FTS
** query, return 0.
*/
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){
int rc = *pRc;
int bMiss = 0;
if( rc==SQLITE_OK ){
/* If there are one or more deferred tokens, load the current row into
** memory and scan it to determine the position list for each deferred
** token. Then, see if this row is really a match, considering deferred
** tokens and NEAR operators (neither of which were taken into account
** earlier, by fts3EvalNextRow()).
*/
if( pCsr->pDeferred ){
rc = fts3CursorSeek(0, pCsr);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
}
}
bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
/* Free the position-lists accumulated for each deferred token above. */
sqlite3Fts3FreeDeferredDoclists(pCsr);
*pRc = rc;
}
return (rc==SQLITE_OK && bMiss);
}
/*
** Advance to the next document that matches the FTS expression in
** Fts3Cursor.pExpr.
*/
static int fts3EvalNext(Fts3Cursor *pCsr){
int rc = SQLITE_OK; /* Return Code */
Fts3Expr *pExpr = pCsr->pExpr;
assert( pCsr->isEof==0 );
if( pExpr==0 ){
pCsr->isEof = 1;
}else{
do {
if( pCsr->isRequireSeek==0 ){
sqlite3_reset(pCsr->pStmt);
}
assert( sqlite3_data_count(pCsr->pStmt)==0 );
fts3EvalNextRow(pCsr, pExpr, &rc);
pCsr->isEof = pExpr->bEof;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pExpr->iDocid;
}while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) );
}
/* Check if the cursor is past the end of the docid range specified
** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */
if( rc==SQLITE_OK && (
(pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid)
|| (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid)
)){
pCsr->isEof = 1;
}
return rc;
}
/*
** Restart interation for expression pExpr so that the next call to
** fts3EvalNext() visits the first row. Do not allow incremental
** loading or merging of phrase doclists for this iteration.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
** a no-op. If an error occurs within this function, *pRc is set to an
** SQLite error code before returning.
*/
static void fts3EvalRestart(
Fts3Cursor *pCsr,
Fts3Expr *pExpr,
int *pRc
){
if( pExpr && *pRc==SQLITE_OK ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
if( pPhrase ){
fts3EvalInvalidatePoslist(pPhrase);
if( pPhrase->bIncr ){
int i;
for(i=0; i<pPhrase->nToken; i++){
Fts3PhraseToken *pToken = &pPhrase->aToken[i];
assert( pToken->pDeferred==0 );
if( pToken->pSegcsr ){
sqlite3Fts3MsrIncrRestart(pToken->pSegcsr);
}
}
*pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
}
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
pPhrase->pOrPoslist = 0;
}
pExpr->iDocid = 0;
pExpr->bEof = 0;
pExpr->bStart = 0;
fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
fts3EvalRestart(pCsr, pExpr->pRight, pRc);
}
}
/*
** After allocating the Fts3Expr.aMI[] array for each phrase in the
** expression rooted at pExpr, the cursor iterates through all rows matched
** by pExpr, calling this function for each row. This function increments
** the values in Fts3Expr.aMI[] according to the position-list currently
** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase
** expression nodes.
*/
static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
if( pExpr ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
if( pPhrase && pPhrase->doclist.pList ){
int iCol = 0;
char *p = pPhrase->doclist.pList;
assert( *p );
while( 1 ){
u8 c = 0;
int iCnt = 0;
while( 0xFE & (*p | c) ){
if( (c&0x80)==0 ) iCnt++;
c = *p++ & 0x80;
}
/* aMI[iCol*3 + 1] = Number of occurrences
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
*/
pExpr->aMI[iCol*3 + 1] += iCnt;
pExpr->aMI[iCol*3 + 2] += (iCnt>0);
if( *p==0x00 ) break;
p++;
p += fts3GetVarint32(p, &iCol);
}
}
fts3EvalUpdateCounts(pExpr->pLeft);
fts3EvalUpdateCounts(pExpr->pRight);
}
}
/*
** Expression pExpr must be of type FTSQUERY_PHRASE.
**
** If it is not already allocated and populated, this function allocates and
** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
** of a NEAR expression, then it also allocates and populates the same array
** for all other phrases that are part of the NEAR expression.
**
** SQLITE_OK is returned if the aMI[] array is successfully allocated and
** populated. Otherwise, if an error occurs, an SQLite error code is returned.
*/
static int fts3EvalGatherStats(
Fts3Cursor *pCsr, /* Cursor object */
Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */
){
int rc = SQLITE_OK; /* Return code */
assert( pExpr->eType==FTSQUERY_PHRASE );
if( pExpr->aMI==0 ){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
Fts3Expr *pRoot; /* Root of NEAR expression */
Fts3Expr *p; /* Iterator used for several purposes */
sqlite3_int64 iPrevId = pCsr->iPrevId;
sqlite3_int64 iDocid;
u8 bEof;
/* Find the root of the NEAR expression */
pRoot = pExpr;
while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
pRoot = pRoot->pParent;
}
iDocid = pRoot->iDocid;
bEof = pRoot->bEof;
assert( pRoot->bStart );
/* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
for(p=pRoot; p; p=p->pLeft){
Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
assert( pE->aMI==0 );
pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
if( !pE->aMI ) return SQLITE_NOMEM;
memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
}
fts3EvalRestart(pCsr, pRoot, &rc);
while( pCsr->isEof==0 && rc==SQLITE_OK ){
do {
/* Ensure the %_content statement is reset. */
if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
assert( sqlite3_data_count(pCsr->pStmt)==0 );
/* Advance to the next document */
fts3EvalNextRow(pCsr, pRoot, &rc);
pCsr->isEof = pRoot->bEof;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pRoot->iDocid;
}while( pCsr->isEof==0
&& pRoot->eType==FTSQUERY_NEAR
&& sqlite3Fts3EvalTestDeferred(pCsr, &rc)
);
if( rc==SQLITE_OK && pCsr->isEof==0 ){
fts3EvalUpdateCounts(pRoot);
}
}
pCsr->isEof = 0;
pCsr->iPrevId = iPrevId;
if( bEof ){
pRoot->bEof = bEof;
}else{
/* Caution: pRoot may iterate through docids in ascending or descending
** order. For this reason, even though it seems more defensive, the
** do loop can not be written:
**
** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
*/
fts3EvalRestart(pCsr, pRoot, &rc);
do {
fts3EvalNextRow(pCsr, pRoot, &rc);
assert( pRoot->bEof==0 );
}while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
}
}
return rc;
}
/*
** This function is used by the matchinfo() module to query a phrase
** expression node for the following information:
**
** 1. The total number of occurrences of the phrase in each column of
** the FTS table (considering all rows), and
**
** 2. For each column, the number of rows in the table for which the
** column contains at least one instance of the phrase.
**
** If no error occurs, SQLITE_OK is returned and the values for each column
** written into the array aiOut as follows:
**
** aiOut[iCol*3 + 1] = Number of occurrences
** aiOut[iCol*3 + 2] = Number of rows containing at least one instance
**
** Caveats:
**
** * If a phrase consists entirely of deferred tokens, then all output
** values are set to the number of documents in the table. In other
** words we assume that very common tokens occur exactly once in each
** column of each row of the table.
**
** * If a phrase contains some deferred tokens (and some non-deferred
** tokens), count the potential occurrence identified by considering
** the non-deferred tokens instead of actual phrase occurrences.
**
** * If the phrase is part of a NEAR expression, then only phrase instances
** that meet the NEAR constraint are included in the counts.
*/
int sqlite3Fts3EvalPhraseStats(
Fts3Cursor *pCsr, /* FTS cursor handle */
Fts3Expr *pExpr, /* Phrase expression */
u32 *aiOut /* Array to write results into (see above) */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK;
int iCol;
if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
assert( pCsr->nDoc>0 );
for(iCol=0; iCol<pTab->nColumn; iCol++){
aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
}
}else{
rc = fts3EvalGatherStats(pCsr, pExpr);
if( rc==SQLITE_OK ){
assert( pExpr->aMI );
for(iCol=0; iCol<pTab->nColumn; iCol++){
aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
}
}
}
return rc;
}
/*
** The expression pExpr passed as the second argument to this function
** must be of type FTSQUERY_PHRASE.
**
** The returned value is either NULL or a pointer to a buffer containing
** a position-list indicating the occurrences of the phrase in column iCol
** of the current row.
**
** More specifically, the returned buffer contains 1 varint for each
** occurrence of the phrase in the column, stored using the normal (delta+2)
** compression and is terminated by either an 0x01 or 0x00 byte. For example,
** if the requested column contains "a b X c d X X" and the position-list
** for 'X' is requested, the buffer returned may contain:
**
** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00
**
** This function works regardless of whether or not the phrase is deferred,
** incremental, or neither.
*/
int sqlite3Fts3EvalPhrasePoslist(
Fts3Cursor *pCsr, /* FTS3 cursor object */
Fts3Expr *pExpr, /* Phrase to return doclist for */
int iCol, /* Column to return position list for */
char **ppOut /* OUT: Pointer to position list */
){
Fts3Phrase *pPhrase = pExpr->pPhrase;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
char *pIter;
int iThis;
sqlite3_int64 iDocid;
/* If this phrase is applies specifically to some column other than
** column iCol, return a NULL pointer. */
*ppOut = 0;
assert( iCol>=0 && iCol<pTab->nColumn );
if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
return SQLITE_OK;
}
iDocid = pExpr->iDocid;
pIter = pPhrase->doclist.pList;
if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
int rc = SQLITE_OK;
int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
int bOr = 0;
u8 bTreeEof = 0;
Fts3Expr *p; /* Used to iterate from pExpr to root */
Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
int bMatch;
/* Check if this phrase descends from an OR expression node. If not,
** return NULL. Otherwise, the entry that corresponds to docid
** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the
** tree that the node is part of has been marked as EOF, but the node
** itself is not EOF, then it may point to an earlier entry. */
pNear = pExpr;
for(p=pExpr->pParent; p; p=p->pParent){
if( p->eType==FTSQUERY_OR ) bOr = 1;
if( p->eType==FTSQUERY_NEAR ) pNear = p;
if( p->bEof ) bTreeEof = 1;
}
if( bOr==0 ) return SQLITE_OK;
/* This is the descendent of an OR node. In this case we cannot use
** an incremental phrase. Load the entire doclist for the phrase
** into memory in this case. */
if( pPhrase->bIncr ){
int bEofSave = pNear->bEof;
fts3EvalRestart(pCsr, pNear, &rc);
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
if( bEofSave==0 && pNear->iDocid==iDocid ) break;
}
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
}
if( bTreeEof ){
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
}
}
if( rc!=SQLITE_OK ) return rc;
bMatch = 1;
for(p=pNear; p; p=p->pLeft){
u8 bEof = 0;
Fts3Expr *pTest = p;
Fts3Phrase *pPh;
assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE );
if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight;
assert( pTest->eType==FTSQUERY_PHRASE );
pPh = pTest->pPhrase;
pIter = pPh->pOrPoslist;
iDocid = pPh->iOrDocid;
if( pCsr->bDesc==bDescDoclist ){
bEof = !pPh->doclist.nAll ||
(pIter >= (pPh->doclist.aAll + pPh->doclist.nAll));
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
sqlite3Fts3DoclistNext(
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
&pIter, &iDocid, &bEof
);
}
}else{
bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll);
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
int dummy;
sqlite3Fts3DoclistPrev(
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
&pIter, &iDocid, &dummy, &bEof
);
}
}
pPh->pOrPoslist = pIter;
pPh->iOrDocid = iDocid;
if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0;
}
if( bMatch ){
pIter = pPhrase->pOrPoslist;
}else{
pIter = 0;
}
}
if( pIter==0 ) return SQLITE_OK;
if( *pIter==0x01 ){
pIter++;
pIter += fts3GetVarint32(pIter, &iThis);
}else{
iThis = 0;
}
while( iThis<iCol ){
fts3ColumnlistCopy(0, &pIter);
if( *pIter==0x00 ) return SQLITE_OK;
pIter++;
pIter += fts3GetVarint32(pIter, &iThis);
}
if( *pIter==0x00 ){
pIter = 0;
}
*ppOut = ((iCol==iThis)?pIter:0);
return SQLITE_OK;
}
/*
** Free all components of the Fts3Phrase structure that were allocated by
** the eval module. Specifically, this means to free:
**
** * the contents of pPhrase->doclist, and
** * any Fts3MultiSegReader objects held by phrase tokens.
*/
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
if( pPhrase ){
int i;
sqlite3_free(pPhrase->doclist.aAll);
fts3EvalInvalidatePoslist(pPhrase);
memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
for(i=0; i<pPhrase->nToken; i++){
fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
pPhrase->aToken[i].pSegcsr = 0;
}
}
}
/*
** Return SQLITE_CORRUPT_VTAB.
*/
#ifdef SQLITE_DEBUG
int sqlite3Fts3Corrupt(){
return SQLITE_CORRUPT_VTAB;
}
#endif
#if !SQLITE_CORE
/*
** Initialize API pointer table, if required.
*/
#ifdef _WIN32
__declspec(dllexport)
#endif
int sqlite3_fts3_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3Fts3Init(db);
}
#endif
#endif