
b709e29 Fix URL typo in release notes 01f203c Update release notes c49810a Update COPYRIGHT e327526 Add BSL version number to LICENSE.TXT 07e3a4e Remove superfluous COPURIGHT.md and LICENSE.md 54c3310 Replace Dynamic Data Routing Platform with Database Proxy 305d02f Remove *.cmake wildcard from .gitignore b0b5208 Cleanup of spaces aeca6d0 Extend maxscaled error messages 817d74c Document where the CDC users are stored 9a569db Update license ff8697a MXS-716: Fix table level privilege detection 2071a8c Only check replies of slaves that are in use f8dfa42 Fix possible hangs in CDC python scripts fa1d99e Removed "filestem option" from example 009b549 Removed "filestem option" from example 8d515c2 Add example Kafka producer script for Python 64e976b Fix sporadic SSL authentication failures 5a655dc MXS-814: Check service/monitor permissions on all servers 2a7f596 Add note about galeramon priority to Galera tutorials b90b5a6 Fixed SHOW SLAVE STATUS in binlog router e22fe39 Changed couln size for SHOW SLAVE STATUS ae97b18 Fix avrorouter build failure with older sqlite libraries 56ef8b7 Replace GPL license with BSL license in scripts and tests 552836f Initialize all fields when MySQL users are loaded from cache bf42947 Update all licensing related files b29db9d Remove optimize_wildcard parameter from use 5170844 Make readwritesplit diagnosting output more clear 262ffb1 Fix crash when a config parameter has no section 33ac9e6 Add note about LEAST_BEHIND_MASTER and server weights e13e860 Fix a memory leak when backend authentication fails 75d00c2 MXS-801: Set the default value of strip_db_esc to true bd5f2db MXS-800: Add a log message about the working directory 4b1dd8c Update MySQL Monitor documentation on detect_replication_lag 559bc99 Fix installation of license file b057587 Change LICENSE to LICENSE.TXT 223fa43 Remove null characters from log messages 36fd05b Fix fatal signal handler log message 053dc8a Fix typos in documentation 371dc87 Fix Galera text in Master-Slave tutorial 30b6265 Disable adding of new objects at runtime db92311 Update the documentation on configuration reloading 0923d40 Update Connector-C version c4738b5 Add define for avro-conversion.ini 196e6ac Update license from GPL to BSL. e612366 Correctly calculate the number of bytes read in dcb_read 93a2a03 Update rotate documentation in admin tutorial c5eb854 MXS-585: Fix authentication handling regression 6330070 Fix schemarouter memory leak aa5827e Fix CDC authentication memory leak a5af4ad Fix avro memory leaks 627d73f Fix Avro scripts 0ff7556 Add build instructions to avrorouter documentation 734a1c8 Fix doxygen mainpage e51ce09 Add licence text to avro scripts 4d27c14 Update Avro documentation and fix installation directories a58a330 Fix readconnroute error message about router_options 22b138c MXS-772: Fix postinstall script a9960b7 Fix function declaration in mysql_backend.c cbe1704 Add missing newline 09d76ee Fix avro documentation default values 1d3f8f3 Added refresh users on START SLAVE 880db34 Make router configuration errors fatal 3bad5ca Update documentation and add logging to avrorouter 10f3384 Disable SSLv3 ca8d902 Fix rwsplit error reporting when master is lost e816d65 Fix MaxScale Tutorial deca3e1 Update MaxScale man page f1735b7 Update release notes 9238714 qc: Change type of DEALLOCATE PREPARE 0b77c3b dbfwfilter: Do not require a trailing new line 1152ca9 Remove copyright message a038a85 Remove debug assertion on ERANGE error in galeramon 12ab235 Fix comparison error for connections limit. 5de1a8f qc_sqlite: Correct string recognition b63d754 Fix links in documentation contents 05d457e CDC protocol link fix 50676ef Fix monitor code formatting 218ba09 Remove MaxScale-and-SSL.md 0d6845b Add images to Avro documentation and tutorial 8dd2c9b Update MaxScale-2.0.0-Release-Notes.md 6d4b593 Change avrorouter default transaction grouping 4c629de Add notes about monitor changes to upgrading and release notes 267d0dc Update Binlogrouter.md c624781 Update Replication-Proxy-Binlog-Router-Tutorial.md f3261bc CDC users 1368797 Format authenticator and module headers ab01749 Format filters 8b05d32 Format core and include files f3974e5 Add GPL LICENSE to qc_mysqlembedded bfec36a astyle rabbitmq_consumer/consumer.c 54b960a Check that the Avro directory is writable 3d4cd2e Fix cdc_users using the wrong path for service users cache 1e738dd Add CDC client program documentation f6809fd Remove superfluous rabbitmw_consumer/LICENSE 6b5e667 Update license text in files 9bfcb46 Change CDC protocol documentation formatting 607f25c REQUEST-DATA formatting 8175ab4 CDC protocol update d5ca272 CDC protocol update 6c91764 Only check wsrep_local_index if node is joined f12e2c2 Do not use SSL for monitors and services 6d2cd99 Fix TestAdminUsers f4ae50d Apply astyle to server/core/test/*.c 7cc2824 Update build instructions cf8e2b5 Update release notes 03c7a6c Remove wrong function prototypes 5a11eed Revert "Remove duplicate functions" 80ed488 Remove duplicate functions bb0de8d Add info on SSL and throttling to release notes for 2.0. 0934aee Update MaxAdmin reference guide 2a3fe9b Update source URL in release notes e575cf0 Merge branch 'MXS-177-develop' into develop cc8c88d Change header for BSL ecde266 Change header for BSL 890b208 Log a message when a script is executed 9c365df Added information on server side SSL to config guide. aa3e002 Remove obsolete heading 79dd73a Make dprintAllSessions use dprintSession 1fc0db9 Align output of "show services" 1b9d301 Make monitorShowAll use monitorShow 983615e Adjust output of 'show modules' 436badd qc_sqlite: The module is now beta a7cbbe5 Update Upgrade document 71ac13f Remove obsolete user/password from example eb20ff8 Fix and clean up Avrorouter diagnostics code 31d4052 Change MaxScale to MariaDB MaxScale e6e4858 Fix `source` parameter not working with `router_options` d8de99a Update module version numbers eb81add Merge remote-tracking branch 'origin/develop' into MXS-177-develop daba563 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop 678f417 Changes in response to reviews. 410fb81 Changes in response to reviews. 60135e5 Add initial release notes about Avrorouter 7400ecc qc_sqlite: Remove uninitialized read 536962c Update version number 018f044 Fix debug assertion in client command processing 51f0804 Prevent 'show monitor' from crashing with failed monitor 559347e Fix "Too many connections" message; add comments. 01d3929 Add printf format checking to dcb_printf fbd49a6 dbfwfilter: Require complete parsing only when needed 1885863 Add information to release notes about readwritesplit changes 73b56a2 Update MaxScale section in release notes. 0a2f56f MaxAdmin: Remove debug information from 'show users' 3cf3279 MaxAdmin: Report SSL information as well 29c2b66 Always use SSL if server configured with SSL 7d6b335 dprintAllServers should use dprintServer 02a5246 qc_sqlite: Correctly detect parsing context 469419b compare: Add strict mode 8c5b3d3 compare: Allow the comparison of a single statement 4691514 Add Updgrade to 2.0 document 38b3ecb Expand the checks done before a monitor is stopped 8e2cfb9 Add backend name to authentication error message 9600a07 Fix MaxInfo crash 91c58b0 Removed log message for duplicate entry while adding an user 40392fe Fixed log message priority 0ec35b8 maxadmin: Allow the last user to be removed 5a0ebed maxadmin: Change name of user file 87aa8f1 maxadmin: Always allow root to connect bf37751 Fix COM_QUIT packet detection 7c93ee4 Update avrorouter documentation and tutorial 95ce463 Fix wrong directory in avrorouter log message cfe54c7 Update ChangeLog d69562c Fix LOAD DATA LOCAL INFILE data size tracking 24e7cd6 MXS-584: added support for SET @@session.autocommit d6f6f76 Fixes, correct too many connections message efeb924 Update release notes for 2.0.0 8f71a87 qc_sqlite: Adjust error messages b967d60 Remove copy of enum enum_server_command 822b7e3 Update package license b58301a Update MaxScale License for overlooked files c09ee47 Update MaxScale License 49f46fa Tidy up. Comment out config items not yet supported. f5c3470 Updated and simplified the Building from Source document 98b98e2 Add note about master failure modes to documentation e036f2c Update Limitations document 62219a5 Merge remote-tracking branch 'origin/drain-writeq' into develop 5caf667 Invoke DCB_REASON_DRAINED more frequently. 77b107b qc_sqlite: Add support for LOAD DATA INFILE 8e70f26 compare: Optionally print out the parse result ad750e6 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop ef85779 Merge remote-tracking branch 'origin/develop' into MXS-651-develop-merge ea9fdda MXS-477: Add LONGBLOB support for readconnroute eae6d42 qc_sqlite: Remove superfluous columnname definition 8fe2b21 Add binlog source to avrorouter b25cc37 qc_sqlite: Add missing destructors 8a749e7 qc_sqlite: Reduce number of keywords 5f4bb8b compare: Output query immediately 2456e52 dbfwfilter: Reject queries that cannot be parsed 5f1fbbd qc_sqlite: Extend SET grammar b8d8418 dbfwfilter: Remove 'allow' from firewall filter rule 0bd2a44 MXS-741 When no events are read from binlog file, ... a07c491 Remove duplicated function (merge error, probably) b237008 Save conflict resolution, missed last time. a0c0b40 Merge remote-tracking branch 'origin/develop' into MXS-651-develop 385d47d Change SSL logic, fix large read problem. b93b5e0 Remove false debug assertion b953b1f Turn off SSL read ahead. e0d46a5 Fix error messages and remove commented code 49b4655 MXS-739: Fix invalid JSON in Maxinfo 0c30692 qc_sqlite: Handle GROUP_CONCAT arguments 54e48a1 qc_sqlite: Consider \ as an escape character in strings 713a5d6 qc_sqlite: Add test cases 20d1b51 qc_sqlite: Handle qualified names in CREATE VIEW 1019313 qc_sqlite: Make QUERY_TYPE_WRITE default for SHOW 059c14e qc_sqlite: Accept qualified function names in SELECT db34989 qc_sqlite: Accept qualified function names b93e2f1 qc_sqlite: Add limited support for GRAND and REVOKE 678672d qc_sqlite: Cleanup copying of database and table names 9b744b9 qc_sqlite: Update table and database names at the same time db75e61 qc: Support getting the qualified table names 1f867f4 qc: Add join.test 9c7e02a qc_sqlite: Accept "...from NATURAL straight_join..." 93d1c31 qc_sqlite: Both " and ' can enclose a string literal 8055b21 qc_sqlite: Set more information based upon tokens 37e3663 qc_sqlite: Do not blindly add affected fields 50f1360 qc: Correctly collect affected fields 71c234e qc_sqlite: Recognize CREATE TABLE ... UNION 01803f1 qc_sqlite: Recognize {DEALLOCATE|DROP} PREPARE ... 6ecd4b3 qc_sqlite: Parse index hints 0bdab01 qc: Compare sets of tables b908c8f Fix double freeing of internal DCBs 8903556 qc_sqlite: Recognize LEFT, INSERT and REPLACE 266e6c0 qc: Log all problems by default (compare program) 7b54cac qc_sqlite: Fix logging bug 9566e9f qc_sqlite: Plug a leak b0a860d qc: Run compare a specified number of times 050d698 qc_sqlite: Simplified argument handling 97c56b8 qc: Allow arguments to be passed to the query classifier 09a46e0 qc_sqlite: Add argument log_unrecognized_statements fd98153 qc: Allow arguments to be provided to the query classifier 313aa7e Fix Problems SSL assertion; non SSL connect to SSL 1d721e6 Fix DEB packaging errors 96bdc39 Fix RPM packaging failures on CentOS 7 6ba900d qc_sqlite: Recognize more SHOW commands 2869d0b qc_sqlite: Exclude support for sqlite's PRAGMA 0be68a3 qc_sqlite: Enhance SELECT syntax 28f3e1a Merge branch 'develop' into MXS-729 e18bd41 qc: Expose the result of the parsing 5896085 Add BUILD_AVRO to the CMake cache daeb896 Remove changes to blr_master.c memory handling d523821 Add comments 4eb9a66 Empty admin users file is now handled 52b46c6 qc: Update create.test db09711 qc_sqlite: Ignore case when looking for test language keywords f042a1d qc_sqlite: Extend CREATE TABLE syntax 177d2de qc_sqlite: Extend CREATE TABLE syntax d3ca8e6 qc_sqlite: Add some support for HANDLER 86c6a96 qc_sqlite: Recognize RENAME TABLE 471594f qc_sqlite: Accept more table options at CREATE TABLE 3da6cde qc_sqlite: Remove unused keywords bd89662 Fix crash on corrupted passwd file b5d1764 MXS-733: Always print session states 043e2db Remove unused CMake variables 5604fe2 Restore missing line, fixes logic error. 66d15a5 Added log message warning for old users found 5be9fca Changes in response to review by Johan 899e0e2 Removed password parameter from admin_user_add and admin_remove_user a2d9302 Merge branch 'develop' into MXS-729 bcaf82f Code review update e61c716 Nagios plugin update with Maxadmin using UNIX socket only d7150a2 qc_sqlite: Extend column syntax 3b78df0 qc_sqlite: Accept VALUE in addition to VALUES 85a705b qc_sqlite: Accept CHARSET in addition to CHARACTER SET db9cec8 qc_sqlite: Accept qualified column names in CREATE TABLE a9cabb0 qc_sqlite: Extend SELECT syntax f5e9878 qc_sqlite: Add set type 675cb93 qc_sqlite: Allow BINARY to turn into an identifier b04a760 qc_sqlite: Accept DROP TABLES 1075d9c qc_sqlite: Allow qualified name with LIKE in CREATE 420ac56 qc_sqlite: Extend EXPLAIN grammar 727d626 Add missing error message to readwritesplit f4fd09e Change templates and testing configurations to use sockets 1ef2e06 Add configurable default admin user a723731 Remove wrong file 7c3b02b Maxadmin/maxscaled UNIX socket update eed78d4 qc_sqlite: Pick out more information from select when CREATEing 267f091 qc_sqlite: Recognise DROP TEMPORARY TABLE 54fc29f qc_sqlite: Accept $ as a valid character in identifiers afa2ec9 qc_sqlite: Allow keywords to be used in qualified name db0427d MXS-729 code review update a3b3000 Merge branch 'develop' into MXS-729 e73d66c qc_sqlite: Identify qualified identifiers 5bacade Trailing space fix 3bc9be3 MXS-729 socket=default support in maxscale.cnf 1a5c23c Code review update for MXS-729 d6665c7 qc_sqlite: Extend CREATE TABLE grammar 91725ce qc_sqlite: Dequote table and database names cd7a022 qc: Add create test 1aa4e6b qc: Update test files 762b0c2 qc_mysqlembedded: Do not return "*" as table name cd9968f qc_sqlite: Update delete.test f16703d qc_sqlite: Add support for CALL e3ca9b4 qc_mysqlembedded: Do not return an array of empty strings 5878a22 qc_sqlite: Support selects in DELETE 1cf0444 qc_sqlite: Fix bug in DELETE grammar 0bf39a1 qc_sqlite: Add support for CHECK TABLE 4a8feca qc_sqlite: Add helper for catenating SrcLists ab299b3 qc_sqlite: Extend DELETE syntax 5778856 qc_sqlite: Extract database name as well 99901f1 qc_sqlite: Extend DELETE syntax 63396f8 qc_sqlite: Match "autocommit" caseinsensitively e804dd3 qc_sqlite: Add support for LOCK/UNLOCK c23e442 qc_sqlite: Extend DELETE syntax 5460e31 qc: Add delete test ab392ad qc_sqlite: Free unused data 598c6f0 qc: Measure time of parsing 2fa3844 qc_sqlite: Put all changes being {%|#}ifdefs 1b43992 qc_sqlite: Modified update.test 1676ea4 qc_sqlite: LEFT|RIGHT are not required with JOIN 224ebd3 qc_sqlite: Extend UPDATE grammar dbecca9 qc_sqlite: Extend UPDATE grammar b6ca3b3 MaxAdmin security modification MXS-729 8fb47dd Remove copying of MariaDB embedded library files 22e1257 Normalize whitespace when canonicalizing queries 269cff2 MXS-697: Fix dbfwfilter logging for matched queries 6344f6f Ignore Apple .DS_Store files. d606977 Improve comments in response to code review. 619aa13 Add configuration check flag to MaxScale 27c860b Drain write queue improvements. 33d4154 Read only one configuration file d104c19 Format more core files 83fdead Format part of core source code 311d5de Format gateway.c and config.c with Astyle 8cbb48e Don't build maxavro library if BUILD_AVRO is not defined 32bb77a Merge branch 'MXS-483' into develop db72c5d Format CDC/Avro related files 3c26e27 qc_sqlite: Use SrcList instead of Expr f96ad6a Merge branch 'develop' into MXS-729 0728734 Fix query canonical form tests e68262d Merge remote-tracking branch 'gpl-maxscale/master' into develop 65460dc Fix missing symbols from MySQLAuth 791c821 MaxAdmin listens on UNIX socket only and maxadmin can connect 89afed6 MXS-66: All configuration errors are fatal errors d613053 Add more details to galeramon documentation 22f4f57 qc: Add support for multi UPDATE 0dba25a Added default port to blr_make_registration 9d8248c qc_sqlit: Plug leaks and access errors 057551a qc_sqlite: Fix to small an allocation 1f73820 qc_sqlite: Free memory allocated during parsing 93fefb9 qc: Enable compare to run the same test repeatedly e52c578 qc_sqlite: Handle last_insert_id() 929e02a qc_sqlite: Extend UPDATE grammar de3b9f7 qc_sqlite: Defines provided when running cmake and make 4d5c3b2 qc_sqlite: Add support for multiple-table DELETE FROM 36a4296 qc_mysqlembedded: Handle SQLCOM_DELETE_MULTI 41f613a Fix DCB and SESSION removal from free object pools 00f2ddd Move some common code used in only one protocol into protocol. 6fbd0b0 Format Go source with gofmt abfbbcb Fix build failures and internal test suite 31de74a Merge branch 'develop' into MXS-483 20d461d Remove uniqueness constrain on oneshot tasks 6c09288 Add missing error message to converter task 0c2c389 Merge branch 'develop' into MXS-483 fa0accc Set freed memory to NULL after authentication failure 63f24e4 Install cdc_schema.go 5123c21 Fix ALTER TABLE parsing 004acc2 Merge branch 'develop' into MXS-483 f69a671 Remove array foreach macro use a159cd9 qc_sqlite: Add support for SHOW DATABASES 31a2118 Make qc_mysqlembedded optional 27ef30e Changed the default query classifier 359010d Add -h flag as the alias for --host bebc086 Fix minor bugs c7ca253 qc_sqlite: Recognize START [TRANSACTION] 240f8bf qc_sqlite: Collect info from nested SELECTs 93ea393 qc_sqlite: Pass along the relative position of a token cc960af qc_sqlite: Fix incorrect assigment 22a6fef Fix `gtid` avro index table 4c38bef qc_sqlite: STATUS is not a registered word cace998 qc_sqlite: Include all fields of UPDATE 997b19c qc: Add update tests 7601b3f qc_sqlite: Parse "INSERT INTO t VALUES (), ();" correctly ca426f1 qc_sqlite: Handle CREATE TRIGGER f54f963 qc_sqlite: Allow INSERT without INTO e4a1b6d Remove foreign keys from Avro index e4501a2 Merge branch 'develop' into MXS-483 82b9585 Fix MMMon never assigning master status a45a709 qc_mysqlembedded: Find the leaf name 2f3ca8f qc_mysqlembedded: Recognize SQLCOM_REPLACE cc7ad83 qc_mysqlembedded: Pick up fields for INSERT SELECT as well 0e6b39e qc: Cleanup of select.test 9113f4f qc_sqlite: Pickup more fields from INSERT 4d58f98 Dummy query classifier dfe824f Document `query_classifier` option 4aa329b MXS-718: Collect fields of INSERT 53818f2 Modify packet number for SSL backend connection 346f973 qc_sqlite: Accept qualified column names 8a83616 Fix in-memory SQLite table structure 6f2c884 Further backend SSL development 4444e92 qc_sqlite: Extend INSERT grammar 2aebcab qc_sqlite: Add support for TRUNCATE 1a6742e qc_sqlite: Accept DEFAULT as value in INSERT 07dec05 qc_sqlite: Crude classification made based on seen keywords a90a579 Add missing function documentation 72bd0cf qc_sqlite: Change CREATE TABLE grammar 6e04bc8 qc: Add INSERT tests 3666bda qc_sqlite: Add SELECT test d27e173 Add server/mysql-test/t/select.test to query_classifier 562d004 qc_sqlite: Cleanup error logging. 819cacb Merge branch 'develop' into MXS-483 0d3a789 Add warnings and comments to Avro row event processing 2fab570 Added support for SET autocommit=1 1aa83cf Code review fix c999f0a Addition of SELECT USER() 8c723da Clean up avro_client.c and avro_file.c eb21ee8 Clean up avro.c 946a284 Added Avro schema to table metadata processing 72f90be qc_sqlite: Add support for CREATE {FUNCTION|PROCEDURE} ... 4a4ab49 qc: Update line number also when skipping a block ffddb2a qc_sqlite: Allow queries using INTERVAL b8b03bd qc_sqlite: Add support for SELECT * FROM tbl2 = tbl1; 77a261a qc_sqlite: Add support for GROUP BY ... WITH ROLLUP 0ead41e cdc_schema now generates lowercase JSON 66e327a Classifier has to be specified explicitly 9074b91 Updated Avrorouter documentation cf06c7a qc_sqlite: Some comments added. f579eff Added simple Go based Avro schema generator f448e90 MXS-419: Added ulimit calls to init scripts b4ad257 Added FindAvro.cmake 56cc9b9 Added the last transaction script to installations 2d52da2 Added temporary avro-alpha package name 6ada071 Fixed cdc_users script 61f0206 Renaming and minor fixes to CDC Python scripts 9d77c32 Moved GTID table tracking to an in-memory database 8ae7cb0 MXS-704: Fixed `which` usage in post-install scripts 195e118 Readwritesplit sessions aren't created if master is down 2be91da Added affected tables to avro diagnostics b185320 QUERY-LAST-TRANSACTION now returns proper table names 90860b5 Log stale master message only once 4859c60 Table name to GTID mapping f77bd23 First steps to backend SSL, not yet working. 68b5bf0 qc_sqlite: Don't treat TRUE and FALSE as identifiers fca8e59 qc_sqlite: Collect database names as well 6b0e04d qc_sqlite: Add support for SHOW CREATE VIEW 77f4b34 qc_mysqlembedded: Report more, rather than less a73e033 qc_sqlite: Extend builtin functions 9d9650e qc_sqlite: SQL_BUFFER_RESULT must decay to an id 83fe99d qc_sqlite: Support INSERT IGNORE 9d1c9ca Added avrorouter limitations and tutorial 8dd094d qc_sqlite: Recognize builtin functions 2edc3d6 Moved write operations of the maxavro library to a different file 1364e54 Added more comments to the Avro RBR handling code f711588 Added warnign about unsupported field types df0d250 Added SQLite3 based indexing to avrorouter 0c55706 Added GTID event flag check in AVRO processing bfe28dd qc_sqlite: Accept SET GLOBAL|SESSION ... a8d2068 qc_mysqlembedded: Exlude code that won't compile on 5.5.42 16ea0b3 qc_sqlite: Add support for DROP FUNCTION 1c0f1fc qc: Report stats after comparison 02345b2 qc_sqlite: Recognize builtin readonly functions c7a5e75 qc_sqlite: Recognize := 0aa849d qc: Make compare undestand the delimiter command fb0a877 qc_mysqlembedded: Examine Item::SUBSELECT_ITEMs 045cf8d qc: Add missing mtl commands e5c6f45 qc_sqlite: Relax qc_get_type comparison ac3b2df qc_sqlite: Add support for SHOW STATUS 73a34fb qc_sqlite: Add initial support for FLUSH 4ffbe79 qc_sqlite: Extend CREATE TABLE syntax 009af81 qc_sqlite: Add support for SHOW WARNINGS 001de97 qc: Ignore mysqltest constructs 128307d Merge branch 'release-1.4.3' into gpl-master 5e8a06a SET NAMES XXX added 3ca12ba MXS-685: MMMon clears server state before setting it dc4d2b0 Further steps to connection limit, non-working. ef70257 MXS-636: Master failures are no longer fatal errors 99f4c64 Updated QUERY-LAST-TRANSACTION format d1ff157 Changed QUERY-LAST-TRANSACTION format to JSON 8b2f1ac Fixed formatting of the avrorouter 61543df Added QUERY-LAST-TRANSACTION command c10d10b qc_sqlite: Add support for SHOW CREATE TABLE 106a38f qc_sqlite: Add support for DROP INDEX 2a85421 qc_sqlite: Extend what can be stated about a table 794cd1c qc_sqlite: Add support for MATCH ... AGAINST dd7b747 qc_sqlite: Accept FULLTEXT and SPATIAL in CREATE TABLE a13d6ce qc_sqlite: Add support for PREPARE and EXECUTE 0c5d29f qc_sqlite: Add support for ANALYZE a6cd32b qc_sqlite: Extend SET syntax 5d47704 qc_sqlite: Pick out fields from UPDATE t SET i = ... 0e05735 qc: Understand --error in server test files 8535975 qc_sqlite: Extend CREATE VIEW syntax b35e638 qc: Igore read type bit if write bit is on 818a814 qc_sqlite: Add support for SHOW VARIABLES 1aa877b qc_sqlite: Add initial support for DO e92913a qc_sqlite: Add support for CREATE VIEW d53a46d qc_sqlite: Recognize bit field literals b'1010' 1fb7977 Added GTID event timestmap into struct gtid_pos 8f95b10 Added new fields in AVRO diagnostics cb4db54 Added tests with large SQL packets to modutil tests e4dbd6b MXS-621: More fixes to log messages at startup 4f1e9ee qc: compare tester can now read server MySQL tests cd8154b qc_sqlite: Allow CHARACTER SET to be specified for column 6f8d053 Added MariaDB 10.1 check for new flags in GTID event 71c471b qc_mysqlembedded: Fix type bits setting 26b00a7 qc_sqlite: Extend ALTER grammar ea6057c qc_sqlite: Handle also pInto when dupping a struct select 2271559 qc_sqlite: Add support for SHOW TABLE STATUS 9caaf27 qc_sqlite: Add support for CREATE ... LIKE oldtable cd19932 Merge tag '1.4.2' into master 9e9e4d8 Merge branch 'develop' of https://github.com/mariadb-corporation/maxscale-bsl into develop 267cb60 qc_mysqlembedded: Look into parenthesized comma expressions 77c6ca9 qc_sqlite: Recognize token "<=>" 5ca9a9f qc_sqlite: Allow comma expressions in where clause b08e910 qc_sqlite: Add SELECT options d11e581 qc_sqlite: Some recursion cleanup d53d063 Add but don't invoke connection queue functionality. 6818104 Fix logic error in connections limiter 3c61605 qc_sqlite: Find more affected fields 9af8dfd Allow the classifiers to be specified on the command line 5d4a134 Activate call to protocol for max connections error message. 16638e7 Fix another mistake 234f9e6 Fix mistake 843a6fc Fix mistake. 2c6e9ad Fix errors in config.c; enable call to protocol on connection limit. fd27849 Introduce configuration items for Maximum and Queued Service connections 60d198d Implement very simple connection limit. 84d8f0f Merge remote-tracking branch 'origin/develop' into MXS-177 8a58e63 Merge remote-tracking branch 'origin/develop' into develop 08487cd Add assertion to enforce number of free DCBs not being negative. f73af2f Added MariaDB 10.1 check for new flags in GTID event 23898ec Fix wrong sprintf specifier, trailing white space. ea6cfa3 readwritesplit: Cleaned up routeQuery 3858df0 Cleaned up select_connect_backend_servers c38ee13 Added more buffer tests 48816df Added more modutils tests 537eac2 Added tests for modutil_get_complete_packets 22a6095 MXS-669: modutil_get_complete_packets no longer makes the buffer contiguous 51af97e qc_sqlite: Add support for CREATE INDEX e50c573 qc_sqlite: Dig out fields for IN f58c6df qc_sqlite: Dequote table name 319422b qc_sqlite: Accept ENUM as type for a column 5d6a45c qc_sqlite: Allow UNSIGNED to fallback to an id 16a5f20 qc_sqlite: Extend CREATE TABLE syntax d6268da qc_sqlite: Accept RIGHT and FULL OUTER joins 2207415 qc_sqlite: Allow STRAIGHT_JOIN in SELECT 6fee546 qc_sqlite: Pick upp more table names 9de5f84 Remove trailing white space. 758f84d Improve comments and messages in dcb.c and session.c re recycle memory. 1c2de21 Merge remote-tracking branch 'origin/develop' into dcb-optimise 6614944 DCB code tidying. Fix missing spinlock release; remove redundant variables ecd5e5c Remove extra code introduced by merge. 877127a Merge commit '3c0d3e5ab6ddde59da764ec904b517759074a31e' into develop 4275bbe Updated the Connector-C version to 2.2.3 c71042b Some tentative list management code; provide for counting service clients. ad0c8a6 qc_sqlite: Allow empty insert statement 72e75e5 qc_sqlite: Add support for SELECT ... INTO cc553fa qc_sqlite: MAXSCALE define can now be used everywhere 3305c6e qc_sqlite: Handle CASE in SELECT 702f62e qc_sqlite: Extend CREATE TABLE grammar 941c212 qc_sqlite: Add support for SHOW [INDEX|INDEXES|KEYS] 6a79136 qc_sqlite: Extend grammar for SHOW TABLES and SHOW COLUMNS f175d2d qc_sqlite: Add SHOW COLUMNS support 6e47951 qc_sqlite: Add support for SHOW TABLES bcfa0e7 qc_mysqlembedded: Return the actual name and not as-name 3e19f2e Fixed qlafilter build failure 810b24e MXS-675: Standardized qlafilter output be92173 qc_sqlite: Exclude alias names from affected fields 9479280 qc_sqlite: Add support for explain EXTENDED 13b0e10 qc_sqlite: Add support for DELETE a6ccfea qc_mysqlembedded: Look at all conditional items b428041 qc_sqlite: Extend SELECT options 83f829f query_classifier: Correctly calculate the length of a GWBUF 2ddb24c query_classifier: Ensure that -- comments are handled fa7a746 qc_sqlite: Allow STRAIGHT_JOIN in SELECTS 6f47819 FindLibUUID update 5ed897b Added FindLibUUID cmake file 16e02bb Added FindLibUUID cmake file aff63e0 MXS-680: qc_mysqlembedded does not look into functions 8a0eeb4 query_classifier: Improve output of compare 6f08185 Query classifier can now convert enums to strings 124e2b9 MXS-679: Correctly collect fields of WHERE 353c97c transaction_safety default is off 896e37b qc_sqlite: Invert stop logic and be more verbose 7a44d4d qc_sqlite: Extend what is accepted in CREATE TABLE 4dbf499 qc_sqlite: Accept FIRST in ALTER TABLE 3f655c0 qc_sqlite: Update table and affected fields for INSERT 8e1e275 qc_sqlite: Make AS optional in CREATE statement 5f2084b qc_sqlite: Add support for ENGINE when creating a table 242f183 qc_sqlite: CREATE paramters handled in the correct place 8ed2e25 qc_sqlite: Trace only when needed 63d4531 qc_sqlite: Update affected fields also from functions 118cdc3 qc_sqlite: Allow multiple index names in USE|IGNORE INDEX 912da76 qc_sqlite: Add initial support for ...IGNORE INDEX... 0aa7de6 qc_sqlite: Log detailed message on error 3e3bf1a qc_sqlite: Extend create syntax. c4a4572 qc_sqlite: Exclude quoted values 1621f49 Removed MYSQL_EMBEDDED_LIBRARIES d3e324c UUID generation now comes from libuuid e8fe678 qc_sqlite: Enable confitional compilation a9522ba qc_sqlite: Handle X.Y selects 9bc9770 qc_sqlite: Use same stream when outputting padding 366257a qc_sqlite: Add support for UNSIGNED and ZEROFILL d4d90ff qc_sqlite: Add support for DROP VIEW d0519bd qc_sqlite: Extend DROP TABLE syntax c1e4894 qc_sqlite: Add flag to compare for stopping at first error 9fd6344 MXS-674: Maxinfo generates invalid JSON 3c0d3e5 Fix stupid errors. 9d32b2d Include read queue in buffer provided by dcb_read; changes to match. b690797 Fix double spinlock release in random_jkiss. 6a4328f Fix problems of memory not being freed in some error cases. 2112e56 Change DCB and Session handling to recycle memory; fix bug in random_jkiss. 3912f72 MXS-631, MXS-632: Cleaned up default value CMake files 383ccb8 Fixed build failure on MariaDB 5.5 a60bca5 Merge branch '1.2.1-binlog_router_trx' into develop 3c2a062 Fix to crashes in embedded library with MariaDB 10.0 d3fe938 MXS-662: Service protocol check no longer ignores bind address c3da49b qc_sqlite: Update affected fields from everywhere 7a0fab8 qc_sqlite: Allow verbosity of compare test to be controlled 81d6822 qc_sqlite: Cleanup handling of select columns 13e5c59 qc_sqlite: Introduce custom allocation functions 026f27d qc_sqlite: Add support for "USE database" 99079df qc_sqlite: Ignore duplicates when comparing affected fields ca45cd6 qc_sqlite: Add initial support for qc_get_database_names 75970b6 qc_sqlite: Add support for DROP TABLE. b97e45d qc_sqlite: Move get_affected_fields() to other helpers cb0fa96 qc_sqlite: Collect table names of INSERT 3a7c513 qc_mysqlembedded: Only look for created name if CREATE 308b0a4 qc_sqlite: Add support for gc_get_created_table_name. 0dc4af2 qc_sqlite: Add qc_has_clause() handling to update e9f2d1d qc_sqlite: Update now also provides table names c3192e4 qc_sqlite: Add initial support for get_table_names c51eafd qc_sqlite: Add support for qc_has_clause f318fb2 qc_mysqlembedded: Work around embedded lib bug 4ba2e11 qc_sqlite: Add initial support for qc_get_affected_fields 080dea5 qc_sqlite: Support is_read_query 3f94df1 Fixed compare.cc build failure 868a712 Updated freeing of buffer chains in readwritesplit 9bf7fca Formatted readwritesplit source code de4da2b Add assertion to spinlock release to detect release of already released spinlock. d30955a qc_sqlite: Handle the default case of affected fields. 5d02b3f qc_sqlite: Set operation when creating table 94a334d Add test for comparing qc-output aa6f5d6 Allow a specific query classifier to be loaded explicitly c799d37 Test both qc_mysqlembedded and qc_sqlite f8d9aa1 qc_sqlite: Enable "set @user_var=@@system_var" f190bdc qc_sqlite: Recognize /*!-comments b694b55 Fixed binary Avro format streaming c95fa86 qc_sqlite: Report correctly the type of set autocommit 9cb236c qc_sqlite: Add test case 77b4e62 Ensure classify test checks all types 962039e Change return type of qc_get_type ae00df8 qc_sqlite: Add initial support for the SET statement. 88253c5 qc_sqlite: Rename functions fa48043 Rework of MySQL backend protocol clean up to fix fault. 3851064 qc_sqlite: Correct recognition of system variables (@@xyz). 9d86f7f qc_sqlite: Detect user and system variables. a683297 qc_sqlite: Recognize and accept system variables (@@xyz). a4f64dd qc_sqlite: Add initial support for CREATE [TEMPORARY] TABLE f834b4f MXS-661: Only COM_QUERY packets are parsed 30077c1 CMake policies set only for correct versions a166f34 Suppress warning about unknown CMake target 1412730 Added more variables to launchable monitor scripts 358c194 MXS-656: Galera nodes with index 0 can be master again 842aec5 qc_sqlite: Add support for BEGIN, COMMIT, ROLLBACK b9cad6d Add initial support for UPDATE. 95741cb Add initial support for insert. 3796158 Re-install sqlite whenever parse.y has changed 5bcd8cf Ensure that the query is the one passed cf05533 Add support for obtaining the type of a query 400d8b4 Always log the outcome 45cf632 Fixed resource leaks and minor bugs fa9e970 Printout the query when there is a mismatch. 263cb07 All classify to be used with any query classifier ea381b9 Further cleanup of classify.c 23f6f30 Merge pull request #107 from godmodelabs/typo-dpkg 8c2a64e Fixed classify build failure 0c3070b Fixed binlog to Avro conversion bugs b827ba9 MXS-653: Silence maxpasswd 30d981c MXS-654: Add test for checking maxpasswd 984039b Rearrange classify.c 837e46d Add log initialization 1cc7a6e Reformat query_classifier/test/classify.c 065a4e5 Merge branch 'develop' into develop-MXS-544-b-merge ca27f13 Fixed binlog build failure fb81be2 fixed typo dpgk <-> dpkg 1e88d5d Added python based CDC user creation script 040bbdd MXS-633: Monitor permission checks moved to modules cde7595 Master-Slave clusters are now robust by default 158c776 Cleaned up core test suite 94c6e66 Fixed bugs resulting from merge a491e11 Merge remote-tracking branch 'origin/MXS-544-b' into develop-MXS-544-b-merge 30f9f25 Cleaned up avro.c 6286f64 Merge branch 'release-1.4.1' into develop 00206ac MXS-194: Added support for more query types to dbfwfilter 267832b Fixed diagnostic output a64b694 Fixed bugs in avrorouter 8faaba1 Fixed a bug in GTID seeking a5fafb7 Fixed typos in avrorouter documentation 8080379 Added avrorouter documentation fa07d8a Fixed dbfwfilter rule parser build failure 744ce0d Constraints are ignored in DDL statement processing 50808c6 Cleaned up avrorouter 47f6032 Merge branch '1.2.1-binlog_router_trx_lint' into develop caa0956 Added missing dependencies to maxscale-core 92df61a Remove parallel make from travis coverity builds fa2b2b4 Added more error logging to Avro record reading 9a98e8b Support for GTID requests and data bursts c2a787b Small diagnostic fix c4cee7e Added format output requested by client 50483c7 Cleaning up of Avro code d485379 Added support for binary Avro protocol c22cdbb Converted Avro GTID from string to integer representation 5795ca9 Added coverity notification email to .travis.yml a06e44d Added coverity_scan to Travis 6b94384 Fixed memory leak in avro_schema.c a11096c Support for db.table request for Avrorouter 4e5cbbf Fixed bugs in Avro record reading a99e427 Fixed minor bugs in avrorouter 01db8ae Fixed errors with CREATE TABLE statements f5f3d7a Diagnostic routine update 209324f Added missing include for log_manager.h e62f764 Added sending of schemas and file rotation 8c8fcbb Added missing log_manager.h include b13942d Changed printf calls in maxavro library to MXS_ERROR 1168962 More lint inspired changes, mainly in blr_master.c and blr_slave.c ced8f2f Fixed directory checks in avrorouter a8ae6be Minor fix to string processing fbd2d95 Fixed typo in dbfwfilter's CMakeLists.txt 29c3cf4 Merge pull request #106 from mariadb-corporation/willfong-patch-1 854d4e9 Add password column name to example 2f956df Moved server state change logging to a common function 007121f Fixed truncated string values 782892b Fix lint errors and warnings for blr_file.c 4f99fc5 Added Avro testing script 2820980 Small fix to help clear lint problems in blr.c 3afeda4 Fixed errors and warnings located by lint ecfff82 Fix most lint problems in blr.c 223689c Added ALTER TABLE support 80bc935 Fix final lint problems with mysql_common protocol functions. e068310 Added preliminary alter table parsing 8c723f1 Lint monitor modules fdb5620 Fix lint issues in authenticators. 84f0e04 Added function documentation and renamed files 365d9f5 Tidy up, mainly for lint 2ff3005 Added update rows event processing and event types to avro records 2ae0371 Fixed failing regex and improved data streaming f19206a Renamed avrorouter header aa7174b Moved relpacement and storage of ddl statements to a separate function 0c10be8 Improved client notification and added Avro block size managemet 91405a7 Cleaned up instance creation dd97485 Removed useless vars af64e9e Added CDC authentication with a db file b73a118 Streamline and lint MySQL backend protocol. 65034ce Merge branch 'release-1.4.0' into develop 28f7e4e Added callback for AVRO client async data transmission 628c27a Added MAXAVRO_FILE struct to AVRO_CLIENT 32b3645 Fixed slavelag build failure 7b15542 Added default authentication method for CDC protocol 5f8e20f Renamed maxavro types and formatted files that use them 882cf84 Added more function documentation to maxavro library 9532f0b Fixed CDC protocol build failure 35a1d3a Added support for offsets in client requests 94577ac Fixed, formatted and refactored CDC protocol da9bcad Use the maxavro library to read data from Avro files 3ececee Added low level error checking to maxavro library 01b0b8b Tidy and lint mysql_client.c 943f0a7 Added handling of Avro boolean data types to maxavro library 4c781f7 Cleaned up maxavro library and avrorouter 6b2e85d Renamed functions more consistently and cleaned up code e07158a Moved query event handling to its own function df7d4c0 Added avro_ prefix to rbr.c fcbfceb Added seeking to a position in an Avro file 068243a CDC auth decoding 3584d54 Add checks to simplify downstream logic. 9b2c323 Removed useless fprintf bd5cd52 Added missing authfunc setup e4aff59 Added record value processing 5cc8615 Added value length functions 7921ecc Merge branch 'MXS-615' into MXS-483 4b09cca Added Travis status to readme.md cca3a48 Simplify interface to max admin authentication. 4739838 Authenticator API update 233505f Maxavrocheck now accepts multiple files 3fdd137 Improved the Avro file handling a6ba913 Merge from MXS-615 417d742 Added maxavrocheck 014f9cf Remove obsolete second parameter from authenticate function in authenticators. ece7ece MaxAdmin authentication converted to a module. Fix quirk in SSL setup. 7c8b37e Moved contents of avro_schema.h into mxs_avro.h d6660cf Improvements to type handling 71ed0cf Protocol API to have entry point for obtaining default authenticator name. 9d35de2 Fixed transaction tracking 5be02a2 Avrorouter internal state is now stored in the Avro file directory 9293464 Added new info to avro diagnostics 06e0e93 Protocol modules can still handle the authentication outside authenticator modules 6d7108b Added JSON output when Requesting an avro file 6188211 Added new CDC protocol state c8af27f CDC authentication uses its own authenticator 6590f94 Factor out qc_get_qtype_str b7880f1 Fix qc_sqlite CMakeLists.txt bd4ff43 Fixed connector-c being updated and built after every make invokation 0d9e57b Fixed non-MariaDB connectors being used in builds 3d3b779 FIX BUG IN CLIENT DCB SHUTDOWN THAT CAN CAUSE CRASHES e45ba33 Fixed Connector-C .cmake files c130189 Fixed connector-c being updated and built after every make invokation 7f3cdf3 Fixed errors on binlog rotation 9d3c83a Remove qc_sqlite 15e8ba5 CDC protocol is now compliant with new protocol structure 4460869 Merge branch 'release-1.4.0' into MXS-483 ea40812 Cleaned up the binlog processing loop cb646ca Add minimal select recognition to qc_sqlite ac1a9c5 Fixed binlogrouter test 85dd227 Re-route sqlite's sqlite3Select. 7a2e6f3 Update CMakeLists.txt for qc_sqlite 7a751c3 Added timestamps to records and fixed minor bugs f73bdde Avrorouter state storage to disk fcf0488 Fixed Connector-C .cmake files 48b8e4e Merge branch 'MXS-615' into MXS-615-binlog-merge 7c8e19f Add missing dependencies for qc_sqlite bb9b667 Improvements to type handling and binlog position tracking dc66b74 Client UUID added f12fce4 AVRO registration is now handled by avro router 575b809 Add skeleton sqlite-based query classifier. d09d5fc Build sqlite 146d1f9 Fixed BLOB type handling and refined error messages 6e9e521 Added client user to diagnostics 4538bb8 Merge pull request #104 from rasmushoj/develop 7e18d95 Avro router diagnostics routine update 01e3f75 reverted changes in CMakeLists.txt 52f7c78 reverted changes in postinst.in eaed577 Added sqlite 3110100 a58cdda Travis configuration for MaxScale. ... 38b452d MIGRATE FREE CLIENT DATA TO AUTH MODULE; BUG FIXES; TIDY UP 6e64506 Fixed minor bugs aff2411 Enabled CDC protocol f669100 Fixed NULL Avro value being assigned to a field which cannot be NULL 8f6b16a Added row event processing to avrorouter 2939fe0 Updated Avro schema management to use actual column names 9e3b0cb Removed use of RBR related functions in binlogrouter d674903 Formatted avro files fe028d1 DEVELOPMENT OF AUTHENTICATION AS MODULE - WILL NOT WORK YET 977aded Added authenticator modules to the build a2b384f MOVE MYSQL AUTH CODE INTO AUTHENTICATOR MODULES DIRECTORY a5d7484 PRELIMINARY CHANGES TO CREATE AUTHENTICATORS AS MODULES 66cf802 Merge remote-tracking branch 'origin/develop' into MXS-615 bca0a7d MINOR CHANGES TO SATISFY LINT 5a9e397 Added Avrorouter binlog file walking fbc737f Fixed binlogrouter test 3c7c9d7 Added avrorouter main event handling loop 07ad81b Moved common binlogrouter code to a separate file 8c605ed Fixed avrorouter build failures aa1ba05 Moved binlog definitions to a separate header and fixed build failures eee7c55 Added create table statement detection e52b27e Added AVRO_INSTANCE and AVRO_CLIENT 0830caa Change test for client DCB to use role being DCB_ROLE_CLIENT_HANDLER. ... 997bbca Change protocols to continue looping if an accept fails; ... 522e42d Make use of dcb_accept and dcb_listen in httpd and telnetd protocols. 4e692b0 Generalise dcb_listen to tailor log messages to different protocols. ... 52c431d Remove support for passing default port number when handling ... afe5abc Fix bug in creation of SSL listener structure; fix bugs in ... 0bd6c77 Merge remote-tracking branch 'origin/MXS-544' into MXS-544-a ... 7598597 Add dcb_listen function to make a given DCB into a listener, ... a275d89 Maxbinlogcheck avro version can detect proper end of file 9bb55a5 Moved row event and table map event handling to a separate file b7d9e09 Add/improve comments, fix mistake with premature return. c598770 First attempt at extracting general code into dcb_accept, ... f20f28f Testing with maxbinlogcheck b3c60b7 Added mysql_binlog files 0ff9971 Added MariaDB/MySQL binary data processing functions 124560c Merge branch '1.2.1-binlog_router_trx' into MXS-483 4deccff New router fro cdc client 2c11434 Fixed test compiler errors c1f7d24 Obliged to merge remote-tracking branch 'origin/develop' ... 1775599 Merge remote-tracking branch 'origin/MXS-544' into Test-dev-544-merge c5317da Small modifications in comments 11c0666 Code cleanup 64a5e9a Merge branch 'release-1.3.0' into MXS-483 2c11e89 First Implementation of CDC
6290 lines
192 KiB
C
6290 lines
192 KiB
C
/*
|
|
** 2014 May 31
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** Low level access to the FTS index stored in the database file. The
|
|
** routines in this file file implement all read and write access to the
|
|
** %_data table. Other parts of the system access this functionality via
|
|
** the interface defined in fts5Int.h.
|
|
*/
|
|
|
|
|
|
#include "fts5Int.h"
|
|
|
|
/*
|
|
** Overview:
|
|
**
|
|
** The %_data table contains all the FTS indexes for an FTS5 virtual table.
|
|
** As well as the main term index, there may be up to 31 prefix indexes.
|
|
** The format is similar to FTS3/4, except that:
|
|
**
|
|
** * all segment b-tree leaf data is stored in fixed size page records
|
|
** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
|
|
** taken to ensure it is possible to iterate in either direction through
|
|
** the entries in a doclist, or to seek to a specific entry within a
|
|
** doclist, without loading it into memory.
|
|
**
|
|
** * large doclists that span many pages have associated "doclist index"
|
|
** records that contain a copy of the first rowid on each page spanned by
|
|
** the doclist. This is used to speed up seek operations, and merges of
|
|
** large doclists with very small doclists.
|
|
**
|
|
** * extra fields in the "structure record" record the state of ongoing
|
|
** incremental merge operations.
|
|
**
|
|
*/
|
|
|
|
|
|
#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
|
|
#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
|
|
|
|
#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
|
|
|
|
#define FTS5_MAIN_PREFIX '0'
|
|
|
|
#if FTS5_MAX_PREFIX_INDEXES > 31
|
|
# error "FTS5_MAX_PREFIX_INDEXES is too large"
|
|
#endif
|
|
|
|
/*
|
|
** Details:
|
|
**
|
|
** The %_data table managed by this module,
|
|
**
|
|
** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
|
|
**
|
|
** , contains the following 5 types of records. See the comments surrounding
|
|
** the FTS5_*_ROWID macros below for a description of how %_data rowids are
|
|
** assigned to each fo them.
|
|
**
|
|
** 1. Structure Records:
|
|
**
|
|
** The set of segments that make up an index - the index structure - are
|
|
** recorded in a single record within the %_data table. The record consists
|
|
** of a single 32-bit configuration cookie value followed by a list of
|
|
** SQLite varints. If the FTS table features more than one index (because
|
|
** there are one or more prefix indexes), it is guaranteed that all share
|
|
** the same cookie value.
|
|
**
|
|
** Immediately following the configuration cookie, the record begins with
|
|
** three varints:
|
|
**
|
|
** + number of levels,
|
|
** + total number of segments on all levels,
|
|
** + value of write counter.
|
|
**
|
|
** Then, for each level from 0 to nMax:
|
|
**
|
|
** + number of input segments in ongoing merge.
|
|
** + total number of segments in level.
|
|
** + for each segment from oldest to newest:
|
|
** + segment id (always > 0)
|
|
** + first leaf page number (often 1, always greater than 0)
|
|
** + final leaf page number
|
|
**
|
|
** 2. The Averages Record:
|
|
**
|
|
** A single record within the %_data table. The data is a list of varints.
|
|
** The first value is the number of rows in the index. Then, for each column
|
|
** from left to right, the total number of tokens in the column for all
|
|
** rows of the table.
|
|
**
|
|
** 3. Segment leaves:
|
|
**
|
|
** TERM/DOCLIST FORMAT:
|
|
**
|
|
** Most of each segment leaf is taken up by term/doclist data. The
|
|
** general format of term/doclist, starting with the first term
|
|
** on the leaf page, is:
|
|
**
|
|
** varint : size of first term
|
|
** blob: first term data
|
|
** doclist: first doclist
|
|
** zero-or-more {
|
|
** varint: number of bytes in common with previous term
|
|
** varint: number of bytes of new term data (nNew)
|
|
** blob: nNew bytes of new term data
|
|
** doclist: next doclist
|
|
** }
|
|
**
|
|
** doclist format:
|
|
**
|
|
** varint: first rowid
|
|
** poslist: first poslist
|
|
** zero-or-more {
|
|
** varint: rowid delta (always > 0)
|
|
** poslist: next poslist
|
|
** }
|
|
**
|
|
** poslist format:
|
|
**
|
|
** varint: size of poslist in bytes multiplied by 2, not including
|
|
** this field. Plus 1 if this entry carries the "delete" flag.
|
|
** collist: collist for column 0
|
|
** zero-or-more {
|
|
** 0x01 byte
|
|
** varint: column number (I)
|
|
** collist: collist for column I
|
|
** }
|
|
**
|
|
** collist format:
|
|
**
|
|
** varint: first offset + 2
|
|
** zero-or-more {
|
|
** varint: offset delta + 2
|
|
** }
|
|
**
|
|
** PAGE FORMAT
|
|
**
|
|
** Each leaf page begins with a 4-byte header containing 2 16-bit
|
|
** unsigned integer fields in big-endian format. They are:
|
|
**
|
|
** * The byte offset of the first rowid on the page, if it exists
|
|
** and occurs before the first term (otherwise 0).
|
|
**
|
|
** * The byte offset of the start of the page footer. If the page
|
|
** footer is 0 bytes in size, then this field is the same as the
|
|
** size of the leaf page in bytes.
|
|
**
|
|
** The page footer consists of a single varint for each term located
|
|
** on the page. Each varint is the byte offset of the current term
|
|
** within the page, delta-compressed against the previous value. In
|
|
** other words, the first varint in the footer is the byte offset of
|
|
** the first term, the second is the byte offset of the second less that
|
|
** of the first, and so on.
|
|
**
|
|
** The term/doclist format described above is accurate if the entire
|
|
** term/doclist data fits on a single leaf page. If this is not the case,
|
|
** the format is changed in two ways:
|
|
**
|
|
** + if the first rowid on a page occurs before the first term, it
|
|
** is stored as a literal value:
|
|
**
|
|
** varint: first rowid
|
|
**
|
|
** + the first term on each page is stored in the same way as the
|
|
** very first term of the segment:
|
|
**
|
|
** varint : size of first term
|
|
** blob: first term data
|
|
**
|
|
** 5. Segment doclist indexes:
|
|
**
|
|
** Doclist indexes are themselves b-trees, however they usually consist of
|
|
** a single leaf record only. The format of each doclist index leaf page
|
|
** is:
|
|
**
|
|
** * Flags byte. Bits are:
|
|
** 0x01: Clear if leaf is also the root page, otherwise set.
|
|
**
|
|
** * Page number of fts index leaf page. As a varint.
|
|
**
|
|
** * First rowid on page indicated by previous field. As a varint.
|
|
**
|
|
** * A list of varints, one for each subsequent termless page. A
|
|
** positive delta if the termless page contains at least one rowid,
|
|
** or an 0x00 byte otherwise.
|
|
**
|
|
** Internal doclist index nodes are:
|
|
**
|
|
** * Flags byte. Bits are:
|
|
** 0x01: Clear for root page, otherwise set.
|
|
**
|
|
** * Page number of first child page. As a varint.
|
|
**
|
|
** * Copy of first rowid on page indicated by previous field. As a varint.
|
|
**
|
|
** * A list of delta-encoded varints - the first rowid on each subsequent
|
|
** child page.
|
|
**
|
|
*/
|
|
|
|
/*
|
|
** Rowids for the averages and structure records in the %_data table.
|
|
*/
|
|
#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
|
|
#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
|
|
|
|
/*
|
|
** Macros determining the rowids used by segment leaves and dlidx leaves
|
|
** and nodes. All nodes and leaves are stored in the %_data table with large
|
|
** positive rowids.
|
|
**
|
|
** Each segment has a unique non-zero 16-bit id.
|
|
**
|
|
** The rowid for each segment leaf is found by passing the segment id and
|
|
** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
|
|
** sequentially starting from 1.
|
|
*/
|
|
#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
|
|
#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
|
|
#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
|
|
#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
|
|
|
|
#define fts5_dri(segid, dlidx, height, pgno) ( \
|
|
((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
|
|
((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
|
|
((i64)(height) << (FTS5_DATA_PAGE_B)) + \
|
|
((i64)(pgno)) \
|
|
)
|
|
|
|
#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
|
|
#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
|
|
|
|
/*
|
|
** Maximum segments permitted in a single index
|
|
*/
|
|
#define FTS5_MAX_SEGMENT 2000
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
|
#endif
|
|
|
|
|
|
/*
|
|
** Each time a blob is read from the %_data table, it is padded with this
|
|
** many zero bytes. This makes it easier to decode the various record formats
|
|
** without overreading if the records are corrupt.
|
|
*/
|
|
#define FTS5_DATA_ZERO_PADDING 8
|
|
#define FTS5_DATA_PADDING 20
|
|
|
|
typedef struct Fts5Data Fts5Data;
|
|
typedef struct Fts5DlidxIter Fts5DlidxIter;
|
|
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
|
|
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
|
|
typedef struct Fts5Iter Fts5Iter;
|
|
typedef struct Fts5PageWriter Fts5PageWriter;
|
|
typedef struct Fts5SegIter Fts5SegIter;
|
|
typedef struct Fts5DoclistIter Fts5DoclistIter;
|
|
typedef struct Fts5SegWriter Fts5SegWriter;
|
|
typedef struct Fts5Structure Fts5Structure;
|
|
typedef struct Fts5StructureLevel Fts5StructureLevel;
|
|
typedef struct Fts5StructureSegment Fts5StructureSegment;
|
|
|
|
struct Fts5Data {
|
|
u8 *p; /* Pointer to buffer containing record */
|
|
int nn; /* Size of record in bytes */
|
|
int szLeaf; /* Size of leaf without page-index */
|
|
};
|
|
|
|
/*
|
|
** One object per %_data table.
|
|
*/
|
|
struct Fts5Index {
|
|
Fts5Config *pConfig; /* Virtual table configuration */
|
|
char *zDataTbl; /* Name of %_data table */
|
|
int nWorkUnit; /* Leaf pages in a "unit" of work */
|
|
|
|
/*
|
|
** Variables related to the accumulation of tokens and doclists within the
|
|
** in-memory hash tables before they are flushed to disk.
|
|
*/
|
|
Fts5Hash *pHash; /* Hash table for in-memory data */
|
|
int nPendingData; /* Current bytes of pending data */
|
|
i64 iWriteRowid; /* Rowid for current doc being written */
|
|
int bDelete; /* Current write is a delete */
|
|
|
|
/* Error state. */
|
|
int rc; /* Current error code */
|
|
|
|
/* State used by the fts5DataXXX() functions. */
|
|
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
|
|
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
|
|
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
|
|
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
|
|
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
|
|
sqlite3_stmt *pIdxSelect;
|
|
int nRead; /* Total number of blocks read */
|
|
};
|
|
|
|
struct Fts5DoclistIter {
|
|
u8 *aEof; /* Pointer to 1 byte past end of doclist */
|
|
|
|
/* Output variables. aPoslist==0 at EOF */
|
|
i64 iRowid;
|
|
u8 *aPoslist;
|
|
int nPoslist;
|
|
int nSize;
|
|
};
|
|
|
|
/*
|
|
** The contents of the "structure" record for each index are represented
|
|
** using an Fts5Structure record in memory. Which uses instances of the
|
|
** other Fts5StructureXXX types as components.
|
|
*/
|
|
struct Fts5StructureSegment {
|
|
int iSegid; /* Segment id */
|
|
int pgnoFirst; /* First leaf page number in segment */
|
|
int pgnoLast; /* Last leaf page number in segment */
|
|
};
|
|
struct Fts5StructureLevel {
|
|
int nMerge; /* Number of segments in incr-merge */
|
|
int nSeg; /* Total number of segments on level */
|
|
Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
|
|
};
|
|
struct Fts5Structure {
|
|
int nRef; /* Object reference count */
|
|
u64 nWriteCounter; /* Total leaves written to level 0 */
|
|
int nSegment; /* Total segments in this structure */
|
|
int nLevel; /* Number of levels in this index */
|
|
Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
|
|
};
|
|
|
|
/*
|
|
** An object of type Fts5SegWriter is used to write to segments.
|
|
*/
|
|
struct Fts5PageWriter {
|
|
int pgno; /* Page number for this page */
|
|
int iPrevPgidx; /* Previous value written into pgidx */
|
|
Fts5Buffer buf; /* Buffer containing leaf data */
|
|
Fts5Buffer pgidx; /* Buffer containing page-index */
|
|
Fts5Buffer term; /* Buffer containing previous term on page */
|
|
};
|
|
struct Fts5DlidxWriter {
|
|
int pgno; /* Page number for this page */
|
|
int bPrevValid; /* True if iPrev is valid */
|
|
i64 iPrev; /* Previous rowid value written to page */
|
|
Fts5Buffer buf; /* Buffer containing page data */
|
|
};
|
|
struct Fts5SegWriter {
|
|
int iSegid; /* Segid to write to */
|
|
Fts5PageWriter writer; /* PageWriter object */
|
|
i64 iPrevRowid; /* Previous rowid written to current leaf */
|
|
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
|
|
u8 bFirstRowidInPage; /* True if next rowid is first in page */
|
|
/* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
|
|
u8 bFirstTermInPage; /* True if next term will be first in leaf */
|
|
int nLeafWritten; /* Number of leaf pages written */
|
|
int nEmpty; /* Number of contiguous term-less nodes */
|
|
|
|
int nDlidx; /* Allocated size of aDlidx[] array */
|
|
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
|
|
|
|
/* Values to insert into the %_idx table */
|
|
Fts5Buffer btterm; /* Next term to insert into %_idx table */
|
|
int iBtPage; /* Page number corresponding to btterm */
|
|
};
|
|
|
|
typedef struct Fts5CResult Fts5CResult;
|
|
struct Fts5CResult {
|
|
u16 iFirst; /* aSeg[] index of firstest iterator */
|
|
u8 bTermEq; /* True if the terms are equal */
|
|
};
|
|
|
|
/*
|
|
** Object for iterating through a single segment, visiting each term/rowid
|
|
** pair in the segment.
|
|
**
|
|
** pSeg:
|
|
** The segment to iterate through.
|
|
**
|
|
** iLeafPgno:
|
|
** Current leaf page number within segment.
|
|
**
|
|
** iLeafOffset:
|
|
** Byte offset within the current leaf that is the first byte of the
|
|
** position list data (one byte passed the position-list size field).
|
|
** rowid field of the current entry. Usually this is the size field of the
|
|
** position list data. The exception is if the rowid for the current entry
|
|
** is the last thing on the leaf page.
|
|
**
|
|
** pLeaf:
|
|
** Buffer containing current leaf page data. Set to NULL at EOF.
|
|
**
|
|
** iTermLeafPgno, iTermLeafOffset:
|
|
** Leaf page number containing the last term read from the segment. And
|
|
** the offset immediately following the term data.
|
|
**
|
|
** flags:
|
|
** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
|
|
**
|
|
** FTS5_SEGITER_ONETERM:
|
|
** If set, set the iterator to point to EOF after the current doclist
|
|
** has been exhausted. Do not proceed to the next term in the segment.
|
|
**
|
|
** FTS5_SEGITER_REVERSE:
|
|
** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
|
|
** it is set, iterate through rowid in descending order instead of the
|
|
** default ascending order.
|
|
**
|
|
** iRowidOffset/nRowidOffset/aRowidOffset:
|
|
** These are used if the FTS5_SEGITER_REVERSE flag is set.
|
|
**
|
|
** For each rowid on the page corresponding to the current term, the
|
|
** corresponding aRowidOffset[] entry is set to the byte offset of the
|
|
** start of the "position-list-size" field within the page.
|
|
**
|
|
** iTermIdx:
|
|
** Index of current term on iTermLeafPgno.
|
|
*/
|
|
struct Fts5SegIter {
|
|
Fts5StructureSegment *pSeg; /* Segment to iterate through */
|
|
int flags; /* Mask of configuration flags */
|
|
int iLeafPgno; /* Current leaf page number */
|
|
Fts5Data *pLeaf; /* Current leaf data */
|
|
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
|
|
int iLeafOffset; /* Byte offset within current leaf */
|
|
|
|
/* Next method */
|
|
void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
|
|
|
|
/* The page and offset from which the current term was read. The offset
|
|
** is the offset of the first rowid in the current doclist. */
|
|
int iTermLeafPgno;
|
|
int iTermLeafOffset;
|
|
|
|
int iPgidxOff; /* Next offset in pgidx */
|
|
int iEndofDoclist;
|
|
|
|
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
|
|
int iRowidOffset; /* Current entry in aRowidOffset[] */
|
|
int nRowidOffset; /* Allocated size of aRowidOffset[] array */
|
|
int *aRowidOffset; /* Array of offset to rowid fields */
|
|
|
|
Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
|
|
|
|
/* Variables populated based on current entry. */
|
|
Fts5Buffer term; /* Current term */
|
|
i64 iRowid; /* Current rowid */
|
|
int nPos; /* Number of bytes in current position list */
|
|
u8 bDel; /* True if the delete flag is set */
|
|
};
|
|
|
|
/*
|
|
** Argument is a pointer to an Fts5Data structure that contains a
|
|
** leaf page.
|
|
*/
|
|
#define ASSERT_SZLEAF_OK(x) assert( \
|
|
(x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
|
|
)
|
|
|
|
#define FTS5_SEGITER_ONETERM 0x01
|
|
#define FTS5_SEGITER_REVERSE 0x02
|
|
|
|
/*
|
|
** Argument is a pointer to an Fts5Data structure that contains a leaf
|
|
** page. This macro evaluates to true if the leaf contains no terms, or
|
|
** false if it contains at least one term.
|
|
*/
|
|
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
|
|
|
|
#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
|
|
|
|
#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
|
|
|
|
/*
|
|
** Object for iterating through the merged results of one or more segments,
|
|
** visiting each term/rowid pair in the merged data.
|
|
**
|
|
** nSeg is always a power of two greater than or equal to the number of
|
|
** segments that this object is merging data from. Both the aSeg[] and
|
|
** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
|
|
** with zeroed objects - these are handled as if they were iterators opened
|
|
** on empty segments.
|
|
**
|
|
** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
|
|
** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
|
|
** comparison in this context is the index of the iterator that currently
|
|
** points to the smaller term/rowid combination. Iterators at EOF are
|
|
** considered to be greater than all other iterators.
|
|
**
|
|
** aFirst[1] contains the index in aSeg[] of the iterator that points to
|
|
** the smallest key overall. aFirst[0] is unused.
|
|
**
|
|
** poslist:
|
|
** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
|
|
** There is no way to tell if this is populated or not.
|
|
*/
|
|
struct Fts5Iter {
|
|
Fts5IndexIter base; /* Base class containing output vars */
|
|
|
|
Fts5Index *pIndex; /* Index that owns this iterator */
|
|
Fts5Structure *pStruct; /* Database structure for this iterator */
|
|
Fts5Buffer poslist; /* Buffer containing current poslist */
|
|
Fts5Colset *pColset; /* Restrict matches to these columns */
|
|
|
|
/* Invoked to set output variables. */
|
|
void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
|
|
|
|
int nSeg; /* Size of aSeg[] array */
|
|
int bRev; /* True to iterate in reverse order */
|
|
u8 bSkipEmpty; /* True to skip deleted entries */
|
|
|
|
i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
|
|
Fts5CResult *aFirst; /* Current merge state (see above) */
|
|
Fts5SegIter aSeg[1]; /* Array of segment iterators */
|
|
};
|
|
|
|
|
|
/*
|
|
** An instance of the following type is used to iterate through the contents
|
|
** of a doclist-index record.
|
|
**
|
|
** pData:
|
|
** Record containing the doclist-index data.
|
|
**
|
|
** bEof:
|
|
** Set to true once iterator has reached EOF.
|
|
**
|
|
** iOff:
|
|
** Set to the current offset within record pData.
|
|
*/
|
|
struct Fts5DlidxLvl {
|
|
Fts5Data *pData; /* Data for current page of this level */
|
|
int iOff; /* Current offset into pData */
|
|
int bEof; /* At EOF already */
|
|
int iFirstOff; /* Used by reverse iterators */
|
|
|
|
/* Output variables */
|
|
int iLeafPgno; /* Page number of current leaf page */
|
|
i64 iRowid; /* First rowid on leaf iLeafPgno */
|
|
};
|
|
struct Fts5DlidxIter {
|
|
int nLvl;
|
|
int iSegid;
|
|
Fts5DlidxLvl aLvl[1];
|
|
};
|
|
|
|
static void fts5PutU16(u8 *aOut, u16 iVal){
|
|
aOut[0] = (iVal>>8);
|
|
aOut[1] = (iVal&0xFF);
|
|
}
|
|
|
|
static u16 fts5GetU16(const u8 *aIn){
|
|
return ((u16)aIn[0] << 8) + aIn[1];
|
|
}
|
|
|
|
/*
|
|
** Allocate and return a buffer at least nByte bytes in size.
|
|
**
|
|
** If an OOM error is encountered, return NULL and set the error code in
|
|
** the Fts5Index handle passed as the first argument.
|
|
*/
|
|
static void *fts5IdxMalloc(Fts5Index *p, int nByte){
|
|
return sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
}
|
|
|
|
/*
|
|
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
|
|
**
|
|
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
|
|
** +ve if pRight is smaller than pLeft. In other words:
|
|
**
|
|
** res = *pLeft - *pRight
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
static int fts5BufferCompareBlob(
|
|
Fts5Buffer *pLeft, /* Left hand side of comparison */
|
|
const u8 *pRight, int nRight /* Right hand side of comparison */
|
|
){
|
|
int nCmp = MIN(pLeft->n, nRight);
|
|
int res = memcmp(pLeft->p, pRight, nCmp);
|
|
return (res==0 ? (pLeft->n - nRight) : res);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
** Compare the contents of the two buffers using memcmp(). If one buffer
|
|
** is a prefix of the other, it is considered the lesser.
|
|
**
|
|
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
|
|
** +ve if pRight is smaller than pLeft. In other words:
|
|
**
|
|
** res = *pLeft - *pRight
|
|
*/
|
|
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
|
|
int nCmp = MIN(pLeft->n, pRight->n);
|
|
int res = memcmp(pLeft->p, pRight->p, nCmp);
|
|
return (res==0 ? (pLeft->n - pRight->n) : res);
|
|
}
|
|
|
|
static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
|
|
int ret;
|
|
fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
** Close the read-only blob handle, if it is open.
|
|
*/
|
|
static void fts5CloseReader(Fts5Index *p){
|
|
if( p->pReader ){
|
|
sqlite3_blob *pReader = p->pReader;
|
|
p->pReader = 0;
|
|
sqlite3_blob_close(pReader);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Retrieve a record from the %_data table.
|
|
**
|
|
** If an error occurs, NULL is returned and an error left in the
|
|
** Fts5Index object.
|
|
*/
|
|
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
|
|
Fts5Data *pRet = 0;
|
|
if( p->rc==SQLITE_OK ){
|
|
int rc = SQLITE_OK;
|
|
|
|
if( p->pReader ){
|
|
/* This call may return SQLITE_ABORT if there has been a savepoint
|
|
** rollback since it was last used. In this case a new blob handle
|
|
** is required. */
|
|
sqlite3_blob *pBlob = p->pReader;
|
|
p->pReader = 0;
|
|
rc = sqlite3_blob_reopen(pBlob, iRowid);
|
|
assert( p->pReader==0 );
|
|
p->pReader = pBlob;
|
|
if( rc!=SQLITE_OK ){
|
|
fts5CloseReader(p);
|
|
}
|
|
if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
|
|
}
|
|
|
|
/* If the blob handle is not open at this point, open it and seek
|
|
** to the requested entry. */
|
|
if( p->pReader==0 && rc==SQLITE_OK ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
rc = sqlite3_blob_open(pConfig->db,
|
|
pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
|
|
);
|
|
}
|
|
|
|
/* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
|
|
** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
|
|
** All the reasons those functions might return SQLITE_ERROR - missing
|
|
** table, missing row, non-blob/text in block column - indicate
|
|
** backing store corruption. */
|
|
if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
|
|
|
|
if( rc==SQLITE_OK ){
|
|
u8 *aOut = 0; /* Read blob data into this buffer */
|
|
int nByte = sqlite3_blob_bytes(p->pReader);
|
|
int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
|
|
pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
|
|
if( pRet ){
|
|
pRet->nn = nByte;
|
|
aOut = pRet->p = (u8*)&pRet[1];
|
|
}else{
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
|
|
}
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(pRet);
|
|
pRet = 0;
|
|
}else{
|
|
/* TODO1: Fix this */
|
|
pRet->szLeaf = fts5GetU16(&pRet->p[2]);
|
|
}
|
|
}
|
|
p->rc = rc;
|
|
p->nRead++;
|
|
}
|
|
|
|
assert( (pRet==0)==(p->rc!=SQLITE_OK) );
|
|
return pRet;
|
|
}
|
|
|
|
|
|
/*
|
|
** Release a reference to data record returned by an earlier call to
|
|
** fts5DataRead().
|
|
*/
|
|
static void fts5DataRelease(Fts5Data *pData){
|
|
sqlite3_free(pData);
|
|
}
|
|
|
|
static int fts5IndexPrepareStmt(
|
|
Fts5Index *p,
|
|
sqlite3_stmt **ppStmt,
|
|
char *zSql
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
if( zSql ){
|
|
p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
|
|
}else{
|
|
p->rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
sqlite3_free(zSql);
|
|
return p->rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** INSERT OR REPLACE a record into the %_data table.
|
|
*/
|
|
static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
|
|
if( p->pWriter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
|
|
"REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
if( p->rc ) return;
|
|
}
|
|
|
|
sqlite3_bind_int64(p->pWriter, 1, iRowid);
|
|
sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
|
|
sqlite3_step(p->pWriter);
|
|
p->rc = sqlite3_reset(p->pWriter);
|
|
}
|
|
|
|
/*
|
|
** Execute the following SQL:
|
|
**
|
|
** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
|
|
*/
|
|
static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
|
|
if( p->rc!=SQLITE_OK ) return;
|
|
|
|
if( p->pDeleter==0 ){
|
|
int rc;
|
|
Fts5Config *pConfig = p->pConfig;
|
|
char *zSql = sqlite3_mprintf(
|
|
"DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
|
|
pConfig->zDb, pConfig->zName
|
|
);
|
|
if( zSql==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0);
|
|
sqlite3_free(zSql);
|
|
}
|
|
if( rc!=SQLITE_OK ){
|
|
p->rc = rc;
|
|
return;
|
|
}
|
|
}
|
|
|
|
sqlite3_bind_int64(p->pDeleter, 1, iFirst);
|
|
sqlite3_bind_int64(p->pDeleter, 2, iLast);
|
|
sqlite3_step(p->pDeleter);
|
|
p->rc = sqlite3_reset(p->pDeleter);
|
|
}
|
|
|
|
/*
|
|
** Remove all records associated with segment iSegid.
|
|
*/
|
|
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
|
|
i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
|
|
i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
|
|
fts5DataDelete(p, iFirst, iLast);
|
|
if( p->pIdxDeleter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
|
|
"DELETE FROM '%q'.'%q_idx' WHERE segid=?",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
if( p->rc==SQLITE_OK ){
|
|
sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
|
|
sqlite3_step(p->pIdxDeleter);
|
|
p->rc = sqlite3_reset(p->pIdxDeleter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Release a reference to an Fts5Structure object returned by an earlier
|
|
** call to fts5StructureRead() or fts5StructureDecode().
|
|
*/
|
|
static void fts5StructureRelease(Fts5Structure *pStruct){
|
|
if( pStruct && 0>=(--pStruct->nRef) ){
|
|
int i;
|
|
assert( pStruct->nRef==0 );
|
|
for(i=0; i<pStruct->nLevel; i++){
|
|
sqlite3_free(pStruct->aLevel[i].aSeg);
|
|
}
|
|
sqlite3_free(pStruct);
|
|
}
|
|
}
|
|
|
|
static void fts5StructureRef(Fts5Structure *pStruct){
|
|
pStruct->nRef++;
|
|
}
|
|
|
|
/*
|
|
** Deserialize and return the structure record currently stored in serialized
|
|
** form within buffer pData/nData.
|
|
**
|
|
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
|
|
** are over-allocated by one slot. This allows the structure contents
|
|
** to be more easily edited.
|
|
**
|
|
** If an error occurs, *ppOut is set to NULL and an SQLite error code
|
|
** returned. Otherwise, *ppOut is set to point to the new object and
|
|
** SQLITE_OK returned.
|
|
*/
|
|
static int fts5StructureDecode(
|
|
const u8 *pData, /* Buffer containing serialized structure */
|
|
int nData, /* Size of buffer pData in bytes */
|
|
int *piCookie, /* Configuration cookie value */
|
|
Fts5Structure **ppOut /* OUT: Deserialized object */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
int i = 0;
|
|
int iLvl;
|
|
int nLevel = 0;
|
|
int nSegment = 0;
|
|
int nByte; /* Bytes of space to allocate at pRet */
|
|
Fts5Structure *pRet = 0; /* Structure object to return */
|
|
|
|
/* Grab the cookie value */
|
|
if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
|
|
i = 4;
|
|
|
|
/* Read the total number of levels and segments from the start of the
|
|
** structure record. */
|
|
i += fts5GetVarint32(&pData[i], nLevel);
|
|
i += fts5GetVarint32(&pData[i], nSegment);
|
|
nByte = (
|
|
sizeof(Fts5Structure) + /* Main structure */
|
|
sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
|
|
);
|
|
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
|
|
|
|
if( pRet ){
|
|
pRet->nRef = 1;
|
|
pRet->nLevel = nLevel;
|
|
pRet->nSegment = nSegment;
|
|
i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
|
|
|
|
for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
|
|
int nTotal;
|
|
int iSeg;
|
|
|
|
if( i>=nData ){
|
|
rc = FTS5_CORRUPT;
|
|
}else{
|
|
i += fts5GetVarint32(&pData[i], pLvl->nMerge);
|
|
i += fts5GetVarint32(&pData[i], nTotal);
|
|
assert( nTotal>=pLvl->nMerge );
|
|
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
|
|
nTotal * sizeof(Fts5StructureSegment)
|
|
);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pLvl->nSeg = nTotal;
|
|
for(iSeg=0; iSeg<nTotal; iSeg++){
|
|
if( i>=nData ){
|
|
rc = FTS5_CORRUPT;
|
|
break;
|
|
}
|
|
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
|
|
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
|
|
i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
|
|
}
|
|
}
|
|
}
|
|
if( rc!=SQLITE_OK ){
|
|
fts5StructureRelease(pRet);
|
|
pRet = 0;
|
|
}
|
|
}
|
|
|
|
*ppOut = pRet;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
int nLevel = pStruct->nLevel;
|
|
int nByte = (
|
|
sizeof(Fts5Structure) + /* Main structure */
|
|
sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
|
|
);
|
|
|
|
pStruct = sqlite3_realloc(pStruct, nByte);
|
|
if( pStruct ){
|
|
memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
|
|
pStruct->nLevel++;
|
|
*ppStruct = pStruct;
|
|
}else{
|
|
*pRc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Extend level iLvl so that there is room for at least nExtra more
|
|
** segments.
|
|
*/
|
|
static void fts5StructureExtendLevel(
|
|
int *pRc,
|
|
Fts5Structure *pStruct,
|
|
int iLvl,
|
|
int nExtra,
|
|
int bInsert
|
|
){
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
Fts5StructureSegment *aNew;
|
|
int nByte;
|
|
|
|
nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
|
|
aNew = sqlite3_realloc(pLvl->aSeg, nByte);
|
|
if( aNew ){
|
|
if( bInsert==0 ){
|
|
memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
|
|
}else{
|
|
int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
|
|
memmove(&aNew[nExtra], aNew, nMove);
|
|
memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
|
|
}
|
|
pLvl->aSeg = aNew;
|
|
}else{
|
|
*pRc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Read, deserialize and return the structure record.
|
|
**
|
|
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
|
|
** are over-allocated as described for function fts5StructureDecode()
|
|
** above.
|
|
**
|
|
** If an error occurs, NULL is returned and an error code left in the
|
|
** Fts5Index handle. If an error has already occurred when this function
|
|
** is called, it is a no-op.
|
|
*/
|
|
static Fts5Structure *fts5StructureRead(Fts5Index *p){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
Fts5Structure *pRet = 0; /* Object to return */
|
|
int iCookie; /* Configuration cookie */
|
|
Fts5Data *pData;
|
|
|
|
pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
|
|
if( p->rc ) return 0;
|
|
/* TODO: Do we need this if the leaf-index is appended? Probably... */
|
|
memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
|
|
p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
|
|
if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
|
|
p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
if( p->rc!=SQLITE_OK ){
|
|
fts5StructureRelease(pRet);
|
|
pRet = 0;
|
|
}
|
|
return pRet;
|
|
}
|
|
|
|
/*
|
|
** Return the total number of segments in index structure pStruct. This
|
|
** function is only ever used as part of assert() conditions.
|
|
*/
|
|
#ifdef SQLITE_DEBUG
|
|
static int fts5StructureCountSegments(Fts5Structure *pStruct){
|
|
int nSegment = 0; /* Total number of segments */
|
|
if( pStruct ){
|
|
int iLvl; /* Used to iterate through levels */
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
nSegment += pStruct->aLevel[iLvl].nSeg;
|
|
}
|
|
}
|
|
|
|
return nSegment;
|
|
}
|
|
#endif
|
|
|
|
#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
|
|
assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
|
|
memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
|
|
(pBuf)->n += nBlob; \
|
|
}
|
|
|
|
#define fts5BufferSafeAppendVarint(pBuf, iVal) { \
|
|
(pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
|
|
assert( (pBuf)->nSpace>=(pBuf)->n ); \
|
|
}
|
|
|
|
|
|
/*
|
|
** Serialize and store the "structure" record.
|
|
**
|
|
** If an error occurs, leave an error code in the Fts5Index object. If an
|
|
** error has already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5Buffer buf; /* Buffer to serialize record into */
|
|
int iLvl; /* Used to iterate through levels */
|
|
int iCookie; /* Cookie value to store */
|
|
|
|
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
|
|
memset(&buf, 0, sizeof(Fts5Buffer));
|
|
|
|
/* Append the current configuration cookie */
|
|
iCookie = p->pConfig->iCookie;
|
|
if( iCookie<0 ) iCookie = 0;
|
|
|
|
if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
|
|
sqlite3Fts5Put32(buf.p, iCookie);
|
|
buf.n = 4;
|
|
fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
|
|
fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
|
|
fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
|
|
}
|
|
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
int iSeg; /* Used to iterate through segments */
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
|
|
assert( pLvl->nMerge<=pLvl->nSeg );
|
|
|
|
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
|
|
}
|
|
}
|
|
|
|
fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
|
|
fts5BufferFree(&buf);
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
|
|
static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
|
|
int rc = SQLITE_OK;
|
|
Fts5Buffer buf;
|
|
memset(&buf, 0, sizeof(buf));
|
|
fts5DebugStructure(&rc, &buf, pStruct);
|
|
fprintf(stdout, "%s: %s\n", zCaption, buf.p);
|
|
fflush(stdout);
|
|
fts5BufferFree(&buf);
|
|
}
|
|
#else
|
|
# define fts5PrintStructure(x,y)
|
|
#endif
|
|
|
|
static int fts5SegmentSize(Fts5StructureSegment *pSeg){
|
|
return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
|
|
}
|
|
|
|
/*
|
|
** Return a copy of index structure pStruct. Except, promote as many
|
|
** segments as possible to level iPromote. If an OOM occurs, NULL is
|
|
** returned.
|
|
*/
|
|
static void fts5StructurePromoteTo(
|
|
Fts5Index *p,
|
|
int iPromote,
|
|
int szPromote,
|
|
Fts5Structure *pStruct
|
|
){
|
|
int il, is;
|
|
Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
|
|
|
|
if( pOut->nMerge==0 ){
|
|
for(il=iPromote+1; il<pStruct->nLevel; il++){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
|
|
if( pLvl->nMerge ) return;
|
|
for(is=pLvl->nSeg-1; is>=0; is--){
|
|
int sz = fts5SegmentSize(&pLvl->aSeg[is]);
|
|
if( sz>szPromote ) return;
|
|
fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
|
|
if( p->rc ) return;
|
|
memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
|
|
pOut->nSeg++;
|
|
pLvl->nSeg--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** A new segment has just been written to level iLvl of index structure
|
|
** pStruct. This function determines if any segments should be promoted
|
|
** as a result. Segments are promoted in two scenarios:
|
|
**
|
|
** a) If the segment just written is smaller than one or more segments
|
|
** within the previous populated level, it is promoted to the previous
|
|
** populated level.
|
|
**
|
|
** b) If the segment just written is larger than the newest segment on
|
|
** the next populated level, then that segment, and any other adjacent
|
|
** segments that are also smaller than the one just written, are
|
|
** promoted.
|
|
**
|
|
** If one or more segments are promoted, the structure object is updated
|
|
** to reflect this.
|
|
*/
|
|
static void fts5StructurePromote(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
int iLvl, /* Index level just updated */
|
|
Fts5Structure *pStruct /* Index structure */
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
int iTst;
|
|
int iPromote = -1;
|
|
int szPromote = 0; /* Promote anything this size or smaller */
|
|
Fts5StructureSegment *pSeg; /* Segment just written */
|
|
int szSeg; /* Size of segment just written */
|
|
int nSeg = pStruct->aLevel[iLvl].nSeg;
|
|
|
|
if( nSeg==0 ) return;
|
|
pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
|
|
szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
|
|
|
|
/* Check for condition (a) */
|
|
for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
|
|
if( iTst>=0 ){
|
|
int i;
|
|
int szMax = 0;
|
|
Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
|
|
assert( pTst->nMerge==0 );
|
|
for(i=0; i<pTst->nSeg; i++){
|
|
int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
|
|
if( sz>szMax ) szMax = sz;
|
|
}
|
|
if( szMax>=szSeg ){
|
|
/* Condition (a) is true. Promote the newest segment on level
|
|
** iLvl to level iTst. */
|
|
iPromote = iTst;
|
|
szPromote = szMax;
|
|
}
|
|
}
|
|
|
|
/* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
|
|
** is a no-op if it is not. */
|
|
if( iPromote<0 ){
|
|
iPromote = iLvl;
|
|
szPromote = szSeg;
|
|
}
|
|
fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Advance the iterator passed as the only argument. If the end of the
|
|
** doclist-index page is reached, return non-zero.
|
|
*/
|
|
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
|
|
Fts5Data *pData = pLvl->pData;
|
|
|
|
if( pLvl->iOff==0 ){
|
|
assert( pLvl->bEof==0 );
|
|
pLvl->iOff = 1;
|
|
pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
|
|
pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
|
|
pLvl->iFirstOff = pLvl->iOff;
|
|
}else{
|
|
int iOff;
|
|
for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
|
|
if( pData->p[iOff] ) break;
|
|
}
|
|
|
|
if( iOff<pData->nn ){
|
|
i64 iVal;
|
|
pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
|
|
iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
|
|
pLvl->iRowid += iVal;
|
|
pLvl->iOff = iOff;
|
|
}else{
|
|
pLvl->bEof = 1;
|
|
}
|
|
}
|
|
|
|
return pLvl->bEof;
|
|
}
|
|
|
|
/*
|
|
** Advance the iterator passed as the only argument.
|
|
*/
|
|
static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
|
|
|
|
assert( iLvl<pIter->nLvl );
|
|
if( fts5DlidxLvlNext(pLvl) ){
|
|
if( (iLvl+1) < pIter->nLvl ){
|
|
fts5DlidxIterNextR(p, pIter, iLvl+1);
|
|
if( pLvl[1].bEof==0 ){
|
|
fts5DataRelease(pLvl->pData);
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
|
|
);
|
|
if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
|
|
}
|
|
}
|
|
}
|
|
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return fts5DlidxIterNextR(p, pIter, 0);
|
|
}
|
|
|
|
/*
|
|
** The iterator passed as the first argument has the following fields set
|
|
** as follows. This function sets up the rest of the iterator so that it
|
|
** points to the first rowid in the doclist-index.
|
|
**
|
|
** pData:
|
|
** pointer to doclist-index record,
|
|
**
|
|
** When this function is called pIter->iLeafPgno is the page number the
|
|
** doclist is associated with (the one featuring the term).
|
|
*/
|
|
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
|
|
int i;
|
|
for(i=0; i<pIter->nLvl; i++){
|
|
fts5DlidxLvlNext(&pIter->aLvl[i]);
|
|
}
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
|
|
|
|
static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
|
|
}
|
|
|
|
static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
int i;
|
|
|
|
/* Advance each level to the last entry on the last page */
|
|
for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
|
|
while( fts5DlidxLvlNext(pLvl)==0 );
|
|
pLvl->bEof = 0;
|
|
|
|
if( i>0 ){
|
|
Fts5DlidxLvl *pChild = &pLvl[-1];
|
|
fts5DataRelease(pChild->pData);
|
|
memset(pChild, 0, sizeof(Fts5DlidxLvl));
|
|
pChild->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Move the iterator passed as the only argument to the previous entry.
|
|
*/
|
|
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
|
|
int iOff = pLvl->iOff;
|
|
|
|
assert( pLvl->bEof==0 );
|
|
if( iOff<=pLvl->iFirstOff ){
|
|
pLvl->bEof = 1;
|
|
}else{
|
|
u8 *a = pLvl->pData->p;
|
|
i64 iVal;
|
|
int iLimit;
|
|
int ii;
|
|
int nZero = 0;
|
|
|
|
/* Currently iOff points to the first byte of a varint. This block
|
|
** decrements iOff until it points to the first byte of the previous
|
|
** varint. Taking care not to read any memory locations that occur
|
|
** before the buffer in memory. */
|
|
iLimit = (iOff>9 ? iOff-9 : 0);
|
|
for(iOff--; iOff>iLimit; iOff--){
|
|
if( (a[iOff-1] & 0x80)==0 ) break;
|
|
}
|
|
|
|
fts5GetVarint(&a[iOff], (u64*)&iVal);
|
|
pLvl->iRowid -= iVal;
|
|
pLvl->iLeafPgno--;
|
|
|
|
/* Skip backwards past any 0x00 varints. */
|
|
for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
|
|
nZero++;
|
|
}
|
|
if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
|
|
/* The byte immediately before the last 0x00 byte has the 0x80 bit
|
|
** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
|
|
** bytes before a[ii]. */
|
|
int bZero = 0; /* True if last 0x00 counts */
|
|
if( (ii-8)>=pLvl->iFirstOff ){
|
|
int j;
|
|
for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
|
|
bZero = (j>8);
|
|
}
|
|
if( bZero==0 ) nZero--;
|
|
}
|
|
pLvl->iLeafPgno -= nZero;
|
|
pLvl->iOff = iOff - nZero;
|
|
}
|
|
|
|
return pLvl->bEof;
|
|
}
|
|
|
|
static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
|
|
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
|
|
|
|
assert( iLvl<pIter->nLvl );
|
|
if( fts5DlidxLvlPrev(pLvl) ){
|
|
if( (iLvl+1) < pIter->nLvl ){
|
|
fts5DlidxIterPrevR(p, pIter, iLvl+1);
|
|
if( pLvl[1].bEof==0 ){
|
|
fts5DataRelease(pLvl->pData);
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p,
|
|
FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
|
|
);
|
|
if( pLvl->pData ){
|
|
while( fts5DlidxLvlNext(pLvl)==0 );
|
|
pLvl->bEof = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return pIter->aLvl[0].bEof;
|
|
}
|
|
static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
|
|
return fts5DlidxIterPrevR(p, pIter, 0);
|
|
}
|
|
|
|
/*
|
|
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
|
|
*/
|
|
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
|
|
if( pIter ){
|
|
int i;
|
|
for(i=0; i<pIter->nLvl; i++){
|
|
fts5DataRelease(pIter->aLvl[i].pData);
|
|
}
|
|
sqlite3_free(pIter);
|
|
}
|
|
}
|
|
|
|
static Fts5DlidxIter *fts5DlidxIterInit(
|
|
Fts5Index *p, /* Fts5 Backend to iterate within */
|
|
int bRev, /* True for ORDER BY ASC */
|
|
int iSegid, /* Segment id */
|
|
int iLeafPg /* Leaf page number to load dlidx for */
|
|
){
|
|
Fts5DlidxIter *pIter = 0;
|
|
int i;
|
|
int bDone = 0;
|
|
|
|
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
|
|
int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
|
|
Fts5DlidxIter *pNew;
|
|
|
|
pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
|
|
if( pNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
}else{
|
|
i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
|
|
Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
|
|
pIter = pNew;
|
|
memset(pLvl, 0, sizeof(Fts5DlidxLvl));
|
|
pLvl->pData = fts5DataRead(p, iRowid);
|
|
if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
|
|
bDone = 1;
|
|
}
|
|
pIter->nLvl = i+1;
|
|
}
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
pIter->iSegid = iSegid;
|
|
if( bRev==0 ){
|
|
fts5DlidxIterFirst(pIter);
|
|
}else{
|
|
fts5DlidxIterLast(p, pIter);
|
|
}
|
|
}
|
|
|
|
if( p->rc!=SQLITE_OK ){
|
|
fts5DlidxIterFree(pIter);
|
|
pIter = 0;
|
|
}
|
|
|
|
return pIter;
|
|
}
|
|
|
|
static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
|
|
return pIter->aLvl[0].iRowid;
|
|
}
|
|
static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
|
|
return pIter->aLvl[0].iLeafPgno;
|
|
}
|
|
|
|
/*
|
|
** Load the next leaf page into the segment iterator.
|
|
*/
|
|
static void fts5SegIterNextPage(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter /* Iterator to advance to next page */
|
|
){
|
|
Fts5Data *pLeaf;
|
|
Fts5StructureSegment *pSeg = pIter->pSeg;
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->iLeafPgno++;
|
|
if( pIter->pNextLeaf ){
|
|
pIter->pLeaf = pIter->pNextLeaf;
|
|
pIter->pNextLeaf = 0;
|
|
}else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
|
|
pIter->pLeaf = fts5DataRead(p,
|
|
FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
|
|
);
|
|
}else{
|
|
pIter->pLeaf = 0;
|
|
}
|
|
pLeaf = pIter->pLeaf;
|
|
|
|
if( pLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf;
|
|
if( fts5LeafIsTermless(pLeaf) ){
|
|
pIter->iEndofDoclist = pLeaf->nn+1;
|
|
}else{
|
|
pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
|
|
pIter->iEndofDoclist
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Argument p points to a buffer containing a varint to be interpreted as a
|
|
** position list size field. Read the varint and return the number of bytes
|
|
** read. Before returning, set *pnSz to the number of bytes in the position
|
|
** list, and *pbDel to true if the delete flag is set, or false otherwise.
|
|
*/
|
|
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
|
|
int nSz;
|
|
int n = 0;
|
|
fts5FastGetVarint32(p, n, nSz);
|
|
assert_nc( nSz>=0 );
|
|
*pnSz = nSz/2;
|
|
*pbDel = nSz & 0x0001;
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
** Fts5SegIter.iLeafOffset currently points to the first byte of a
|
|
** position-list size field. Read the value of the field and store it
|
|
** in the following variables:
|
|
**
|
|
** Fts5SegIter.nPos
|
|
** Fts5SegIter.bDel
|
|
**
|
|
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
|
|
** position list content (if any).
|
|
*/
|
|
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
|
|
if( p->rc==SQLITE_OK ){
|
|
int iOff = pIter->iLeafOffset; /* Offset to read at */
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
|
|
pIter->bDel = 0;
|
|
pIter->nPos = 1;
|
|
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
|
|
pIter->bDel = 1;
|
|
iOff++;
|
|
if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
|
|
pIter->nPos = 1;
|
|
iOff++;
|
|
}else{
|
|
pIter->nPos = 0;
|
|
}
|
|
}
|
|
}else{
|
|
int nSz;
|
|
fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
|
|
pIter->bDel = (nSz & 0x0001);
|
|
pIter->nPos = nSz>>1;
|
|
assert_nc( pIter->nPos>=0 );
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
}
|
|
}
|
|
|
|
static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
|
|
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
|
|
int iOff = pIter->iLeafOffset;
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
if( iOff>=pIter->pLeaf->szLeaf ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( pIter->pLeaf==0 ){
|
|
if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
iOff = 4;
|
|
a = pIter->pLeaf->p;
|
|
}
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
}
|
|
|
|
/*
|
|
** Fts5SegIter.iLeafOffset currently points to the first byte of the
|
|
** "nSuffix" field of a term. Function parameter nKeep contains the value
|
|
** of the "nPrefix" field (if there was one - it is passed 0 if this is
|
|
** the first term in the segment).
|
|
**
|
|
** This function populates:
|
|
**
|
|
** Fts5SegIter.term
|
|
** Fts5SegIter.rowid
|
|
**
|
|
** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
|
|
** the first position list. The position list belonging to document
|
|
** (Fts5SegIter.iRowid).
|
|
*/
|
|
static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
|
|
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
|
|
int iOff = pIter->iLeafOffset; /* Offset to read at */
|
|
int nNew; /* Bytes of new data */
|
|
|
|
iOff += fts5GetVarint32(&a[iOff], nNew);
|
|
if( iOff+nNew>pIter->pLeaf->nn ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
pIter->term.n = nKeep;
|
|
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
|
|
iOff += nNew;
|
|
pIter->iTermLeafOffset = iOff;
|
|
pIter->iTermLeafPgno = pIter->iLeafPgno;
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
}else{
|
|
int nExtra;
|
|
pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
|
|
pIter->iEndofDoclist += nExtra;
|
|
}
|
|
|
|
fts5SegIterLoadRowid(p, pIter);
|
|
}
|
|
|
|
static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
|
|
static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
|
|
static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
|
|
|
|
static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
|
|
if( pIter->flags & FTS5_SEGITER_REVERSE ){
|
|
pIter->xNext = fts5SegIterNext_Reverse;
|
|
}else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
pIter->xNext = fts5SegIterNext_None;
|
|
}else{
|
|
pIter->xNext = fts5SegIterNext;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Initialize the iterator object pIter to iterate through the entries in
|
|
** segment pSeg. The iterator is left pointing to the first entry when
|
|
** this function returns.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterInit(
|
|
Fts5Index *p, /* FTS index object */
|
|
Fts5StructureSegment *pSeg, /* Description of segment */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
if( pSeg->pgnoFirst==0 ){
|
|
/* This happens if the segment is being used as an input to an incremental
|
|
** merge and all data has already been "trimmed". See function
|
|
** fts5TrimSegments() for details. In this case leave the iterator empty.
|
|
** The caller will see the (pIter->pLeaf==0) and assume the iterator is
|
|
** at EOF already. */
|
|
assert( pIter->pLeaf==0 );
|
|
return;
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
fts5SegIterSetNext(p, pIter);
|
|
pIter->pSeg = pSeg;
|
|
pIter->iLeafPgno = pSeg->pgnoFirst-1;
|
|
fts5SegIterNextPage(p, pIter);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
pIter->iLeafOffset = 4;
|
|
assert_nc( pIter->pLeaf->nn>4 );
|
|
assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
|
|
pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
|
|
fts5SegIterLoadTerm(p, pIter, 0);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is only ever called on iterators created by calls to
|
|
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
|
|
**
|
|
** The iterator is in an unusual state when this function is called: the
|
|
** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
|
|
** the position-list size field for the first relevant rowid on the page.
|
|
** Fts5SegIter.rowid is set, but nPos and bDel are not.
|
|
**
|
|
** This function advances the iterator so that it points to the last
|
|
** relevant rowid on the page and, if necessary, initializes the
|
|
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
|
|
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
|
|
** byte of the position list content associated with said rowid.
|
|
*/
|
|
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
|
|
int eDetail = p->pConfig->eDetail;
|
|
int n = pIter->pLeaf->szLeaf;
|
|
int i = pIter->iLeafOffset;
|
|
u8 *a = pIter->pLeaf->p;
|
|
int iRowidOffset = 0;
|
|
|
|
if( n>pIter->iEndofDoclist ){
|
|
n = pIter->iEndofDoclist;
|
|
}
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
while( 1 ){
|
|
i64 iDelta = 0;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
/* todo */
|
|
if( i<n && a[i]==0 ){
|
|
i++;
|
|
if( i<n && a[i]==0 ) i++;
|
|
}
|
|
}else{
|
|
int nPos;
|
|
int bDummy;
|
|
i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
|
|
i += nPos;
|
|
}
|
|
if( i>=n ) break;
|
|
i += fts5GetVarint(&a[i], (u64*)&iDelta);
|
|
pIter->iRowid += iDelta;
|
|
|
|
/* If necessary, grow the pIter->aRowidOffset[] array. */
|
|
if( iRowidOffset>=pIter->nRowidOffset ){
|
|
int nNew = pIter->nRowidOffset + 8;
|
|
int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
|
|
if( aNew==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
break;
|
|
}
|
|
pIter->aRowidOffset = aNew;
|
|
pIter->nRowidOffset = nNew;
|
|
}
|
|
|
|
pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
|
|
pIter->iLeafOffset = i;
|
|
}
|
|
pIter->iRowidOffset = iRowidOffset;
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
|
|
assert( pIter->flags & FTS5_SEGITER_REVERSE );
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
|
|
Fts5Data *pNew;
|
|
pIter->iLeafPgno--;
|
|
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
|
|
pIter->pSeg->iSegid, pIter->iLeafPgno
|
|
));
|
|
if( pNew ){
|
|
/* iTermLeafOffset may be equal to szLeaf if the term is the last
|
|
** thing on the page - i.e. the first rowid is on the following page.
|
|
** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
|
|
if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
|
|
assert( pIter->pLeaf==0 );
|
|
if( pIter->iTermLeafOffset<pNew->szLeaf ){
|
|
pIter->pLeaf = pNew;
|
|
pIter->iLeafOffset = pIter->iTermLeafOffset;
|
|
}
|
|
}else{
|
|
int iRowidOff;
|
|
iRowidOff = fts5LeafFirstRowidOff(pNew);
|
|
if( iRowidOff ){
|
|
pIter->pLeaf = pNew;
|
|
pIter->iLeafOffset = iRowidOff;
|
|
}
|
|
}
|
|
|
|
if( pIter->pLeaf ){
|
|
u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
|
|
pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
|
|
break;
|
|
}else{
|
|
fts5DataRelease(pNew);
|
|
}
|
|
}
|
|
}
|
|
|
|
if( pIter->pLeaf ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator passed as the second argument currently
|
|
** points to a delete marker. A delete marker is an entry with a 0 byte
|
|
** position-list.
|
|
*/
|
|
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
|
|
}
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** This version of fts5SegIterNext() is only used by reverse iterators.
|
|
*/
|
|
static void fts5SegIterNext_Reverse(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbUnused /* Unused */
|
|
){
|
|
assert( pIter->flags & FTS5_SEGITER_REVERSE );
|
|
assert( pIter->pNextLeaf==0 );
|
|
UNUSED_PARAM(pbUnused);
|
|
|
|
if( pIter->iRowidOffset>0 ){
|
|
u8 *a = pIter->pLeaf->p;
|
|
int iOff;
|
|
i64 iDelta;
|
|
|
|
pIter->iRowidOffset--;
|
|
pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
iOff = pIter->iLeafOffset;
|
|
if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
|
|
iOff += pIter->nPos;
|
|
}
|
|
fts5GetVarint(&a[iOff], (u64*)&iDelta);
|
|
pIter->iRowid -= iDelta;
|
|
}else{
|
|
fts5SegIterReverseNewPage(p, pIter);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** This version of fts5SegIterNext() is only used if detail=none and the
|
|
** iterator is not a reverse direction iterator.
|
|
*/
|
|
static void fts5SegIterNext_None(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbNewTerm /* OUT: Set for new term */
|
|
){
|
|
int iOff;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
|
|
assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
|
|
|
|
ASSERT_SZLEAF_OK(pIter->pLeaf);
|
|
iOff = pIter->iLeafOffset;
|
|
|
|
/* Next entry is on the next page */
|
|
if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( p->rc || pIter->pLeaf==0 ) return;
|
|
pIter->iRowid = 0;
|
|
iOff = 4;
|
|
}
|
|
|
|
if( iOff<pIter->iEndofDoclist ){
|
|
/* Next entry is on the current page */
|
|
i64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
|
|
pIter->iLeafOffset = iOff;
|
|
pIter->iRowid += iDelta;
|
|
}else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
|
|
if( pIter->pSeg ){
|
|
int nKeep = 0;
|
|
if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
|
|
iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
fts5SegIterLoadTerm(p, pIter, nKeep);
|
|
}else{
|
|
const u8 *pList = 0;
|
|
const char *zTerm = 0;
|
|
int nList;
|
|
sqlite3Fts5HashScanNext(p->pHash);
|
|
sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
|
|
if( pList==0 ) goto next_none_eof;
|
|
pIter->pLeaf->p = (u8*)pList;
|
|
pIter->pLeaf->nn = nList;
|
|
pIter->pLeaf->szLeaf = nList;
|
|
pIter->iEndofDoclist = nList;
|
|
sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
|
|
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
|
|
}
|
|
|
|
if( pbNewTerm ) *pbNewTerm = 1;
|
|
}else{
|
|
goto next_none_eof;
|
|
}
|
|
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
|
|
return;
|
|
next_none_eof:
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}
|
|
|
|
|
|
/*
|
|
** Advance iterator pIter to the next entry.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
|
|
** is not considered an error if the iterator reaches EOF. If an error has
|
|
** already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterNext(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int *pbNewTerm /* OUT: Set for new term */
|
|
){
|
|
Fts5Data *pLeaf = pIter->pLeaf;
|
|
int iOff;
|
|
int bNewTerm = 0;
|
|
int nKeep = 0;
|
|
u8 *a;
|
|
int n;
|
|
|
|
assert( pbNewTerm==0 || *pbNewTerm==0 );
|
|
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
|
|
/* Search for the end of the position list within the current page. */
|
|
a = pLeaf->p;
|
|
n = pLeaf->szLeaf;
|
|
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
iOff = pIter->iLeafOffset + pIter->nPos;
|
|
|
|
if( iOff<n ){
|
|
/* The next entry is on the current page. */
|
|
assert_nc( iOff<=pIter->iEndofDoclist );
|
|
if( iOff>=pIter->iEndofDoclist ){
|
|
bNewTerm = 1;
|
|
if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
|
|
iOff += fts5GetVarint32(&a[iOff], nKeep);
|
|
}
|
|
}else{
|
|
u64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
|
|
pIter->iRowid += iDelta;
|
|
assert_nc( iDelta>0 );
|
|
}
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
}else if( pIter->pSeg==0 ){
|
|
const u8 *pList = 0;
|
|
const char *zTerm = 0;
|
|
int nList = 0;
|
|
assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
|
|
if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
|
|
sqlite3Fts5HashScanNext(p->pHash);
|
|
sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
|
|
}
|
|
if( pList==0 ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}else{
|
|
pIter->pLeaf->p = (u8*)pList;
|
|
pIter->pLeaf->nn = nList;
|
|
pIter->pLeaf->szLeaf = nList;
|
|
pIter->iEndofDoclist = nList+1;
|
|
sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
|
|
(u8*)zTerm);
|
|
pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
|
|
*pbNewTerm = 1;
|
|
}
|
|
}else{
|
|
iOff = 0;
|
|
/* Next entry is not on the current page */
|
|
while( iOff==0 ){
|
|
fts5SegIterNextPage(p, pIter);
|
|
pLeaf = pIter->pLeaf;
|
|
if( pLeaf==0 ) break;
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
|
|
iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( pLeaf->nn>pLeaf->szLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
|
|
&pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
|
|
);
|
|
}
|
|
|
|
}
|
|
else if( pLeaf->nn>pLeaf->szLeaf ){
|
|
pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
|
|
&pLeaf->p[pLeaf->szLeaf], iOff
|
|
);
|
|
pIter->iLeafOffset = iOff;
|
|
pIter->iEndofDoclist = iOff;
|
|
bNewTerm = 1;
|
|
}
|
|
assert_nc( iOff<pLeaf->szLeaf );
|
|
if( iOff>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check if the iterator is now at EOF. If so, return early. */
|
|
if( pIter->pLeaf ){
|
|
if( bNewTerm ){
|
|
if( pIter->flags & FTS5_SEGITER_ONETERM ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
}else{
|
|
fts5SegIterLoadTerm(p, pIter, nKeep);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
if( pbNewTerm ) *pbNewTerm = 1;
|
|
}
|
|
}else{
|
|
/* The following could be done by calling fts5SegIterLoadNPos(). But
|
|
** this block is particularly performance critical, so equivalent
|
|
** code is inlined.
|
|
**
|
|
** Later: Switched back to fts5SegIterLoadNPos() because it supports
|
|
** detail=none mode. Not ideal.
|
|
*/
|
|
int nSz;
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
|
|
pIter->bDel = (nSz & 0x0001);
|
|
pIter->nPos = nSz>>1;
|
|
assert_nc( pIter->nPos>=0 );
|
|
}
|
|
}
|
|
}
|
|
|
|
#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
|
|
|
|
#define fts5IndexSkipVarint(a, iOff) { \
|
|
int iEnd = iOff+9; \
|
|
while( (a[iOff++] & 0x80) && iOff<iEnd ); \
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to the first rowid in a doclist. This
|
|
** function sets the iterator up so that iterates in reverse order through
|
|
** the doclist.
|
|
*/
|
|
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
|
|
Fts5DlidxIter *pDlidx = pIter->pDlidx;
|
|
Fts5Data *pLast = 0;
|
|
int pgnoLast = 0;
|
|
|
|
if( pDlidx ){
|
|
int iSegid = pIter->pSeg->iSegid;
|
|
pgnoLast = fts5DlidxIterPgno(pDlidx);
|
|
pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
|
|
}else{
|
|
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
|
|
|
|
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of
|
|
** position-list content for the current rowid. Back it up so that it
|
|
** points to the start of the position-list size field. */
|
|
int iPoslist;
|
|
if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
|
|
iPoslist = pIter->iTermLeafOffset;
|
|
}else{
|
|
iPoslist = 4;
|
|
}
|
|
fts5IndexSkipVarint(pLeaf->p, iPoslist);
|
|
pIter->iLeafOffset = iPoslist;
|
|
|
|
/* If this condition is true then the largest rowid for the current
|
|
** term may not be stored on the current page. So search forward to
|
|
** see where said rowid really is. */
|
|
if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
|
|
int pgno;
|
|
Fts5StructureSegment *pSeg = pIter->pSeg;
|
|
|
|
/* The last rowid in the doclist may not be on the current page. Search
|
|
** forward to find the page containing the last rowid. */
|
|
for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
|
|
i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
|
|
Fts5Data *pNew = fts5DataRead(p, iAbs);
|
|
if( pNew ){
|
|
int iRowid, bTermless;
|
|
iRowid = fts5LeafFirstRowidOff(pNew);
|
|
bTermless = fts5LeafIsTermless(pNew);
|
|
if( iRowid ){
|
|
SWAPVAL(Fts5Data*, pNew, pLast);
|
|
pgnoLast = pgno;
|
|
}
|
|
fts5DataRelease(pNew);
|
|
if( bTermless==0 ) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If pLast is NULL at this point, then the last rowid for this doclist
|
|
** lies on the page currently indicated by the iterator. In this case
|
|
** pIter->iLeafOffset is already set to point to the position-list size
|
|
** field associated with the first relevant rowid on the page.
|
|
**
|
|
** Or, if pLast is non-NULL, then it is the page that contains the last
|
|
** rowid. In this case configure the iterator so that it points to the
|
|
** first rowid on this page.
|
|
*/
|
|
if( pLast ){
|
|
int iOff;
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = pLast;
|
|
pIter->iLeafPgno = pgnoLast;
|
|
iOff = fts5LeafFirstRowidOff(pLast);
|
|
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
|
|
if( fts5LeafIsTermless(pLast) ){
|
|
pIter->iEndofDoclist = pLast->nn+1;
|
|
}else{
|
|
pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
|
|
}
|
|
|
|
}
|
|
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to the first rowid of a doclist.
|
|
** There is a doclist-index associated with the final term on the current
|
|
** page. If the current term is the last term on the page, load the
|
|
** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
|
|
*/
|
|
static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
|
|
int iSeg = pIter->pSeg->iSegid;
|
|
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
|
|
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
|
|
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
assert( pIter->pDlidx==0 );
|
|
|
|
/* Check if the current doclist ends on this page. If it does, return
|
|
** early without loading the doclist-index (as it belongs to a different
|
|
** term. */
|
|
if( pIter->iTermLeafPgno==pIter->iLeafPgno
|
|
&& pIter->iEndofDoclist<pLeaf->szLeaf
|
|
){
|
|
return;
|
|
}
|
|
|
|
pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
|
|
}
|
|
|
|
/*
|
|
** The iterator object passed as the second argument currently contains
|
|
** no valid values except for the Fts5SegIter.pLeaf member variable. This
|
|
** function searches the leaf page for a term matching (pTerm/nTerm).
|
|
**
|
|
** If the specified term is found on the page, then the iterator is left
|
|
** pointing to it. If argument bGe is zero and the term is not found,
|
|
** the iterator is left pointing at EOF.
|
|
**
|
|
** If bGe is non-zero and the specified term is not found, then the
|
|
** iterator is left pointing to the smallest term in the segment that
|
|
** is larger than the specified term, even if this term is not on the
|
|
** current page.
|
|
*/
|
|
static void fts5LeafSeek(
|
|
Fts5Index *p, /* Leave any error code here */
|
|
int bGe, /* True for a >= search */
|
|
Fts5SegIter *pIter, /* Iterator to seek */
|
|
const u8 *pTerm, int nTerm /* Term to search for */
|
|
){
|
|
int iOff;
|
|
const u8 *a = pIter->pLeaf->p;
|
|
int szLeaf = pIter->pLeaf->szLeaf;
|
|
int n = pIter->pLeaf->nn;
|
|
|
|
int nMatch = 0;
|
|
int nKeep = 0;
|
|
int nNew = 0;
|
|
int iTermOff;
|
|
int iPgidx; /* Current offset in pgidx */
|
|
int bEndOfPage = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
iPgidx = szLeaf;
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
|
|
iOff = iTermOff;
|
|
if( iOff>n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
return;
|
|
}
|
|
|
|
while( 1 ){
|
|
|
|
/* Figure out how many new bytes are in this term */
|
|
fts5FastGetVarint32(a, iOff, nNew);
|
|
if( nKeep<nMatch ){
|
|
goto search_failed;
|
|
}
|
|
|
|
assert( nKeep>=nMatch );
|
|
if( nKeep==nMatch ){
|
|
int nCmp;
|
|
int i;
|
|
nCmp = MIN(nNew, nTerm-nMatch);
|
|
for(i=0; i<nCmp; i++){
|
|
if( a[iOff+i]!=pTerm[nMatch+i] ) break;
|
|
}
|
|
nMatch += i;
|
|
|
|
if( nTerm==nMatch ){
|
|
if( i==nNew ){
|
|
goto search_success;
|
|
}else{
|
|
goto search_failed;
|
|
}
|
|
}else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
|
|
goto search_failed;
|
|
}
|
|
}
|
|
|
|
if( iPgidx>=n ){
|
|
bEndOfPage = 1;
|
|
break;
|
|
}
|
|
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
|
|
iTermOff += nKeep;
|
|
iOff = iTermOff;
|
|
|
|
/* Read the nKeep field of the next term. */
|
|
fts5FastGetVarint32(a, iOff, nKeep);
|
|
}
|
|
|
|
search_failed:
|
|
if( bGe==0 ){
|
|
fts5DataRelease(pIter->pLeaf);
|
|
pIter->pLeaf = 0;
|
|
return;
|
|
}else if( bEndOfPage ){
|
|
do {
|
|
fts5SegIterNextPage(p, pIter);
|
|
if( pIter->pLeaf==0 ) return;
|
|
a = pIter->pLeaf->p;
|
|
if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
|
|
iPgidx = pIter->pLeaf->szLeaf;
|
|
iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
|
|
if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
nKeep = 0;
|
|
iTermOff = iOff;
|
|
n = pIter->pLeaf->nn;
|
|
iOff += fts5GetVarint32(&a[iOff], nNew);
|
|
break;
|
|
}
|
|
}
|
|
}while( 1 );
|
|
}
|
|
|
|
search_success:
|
|
|
|
pIter->iLeafOffset = iOff + nNew;
|
|
pIter->iTermLeafOffset = pIter->iLeafOffset;
|
|
pIter->iTermLeafPgno = pIter->iLeafPgno;
|
|
|
|
fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
|
|
fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
|
|
|
|
if( iPgidx>=n ){
|
|
pIter->iEndofDoclist = pIter->pLeaf->nn+1;
|
|
}else{
|
|
int nExtra;
|
|
iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
|
|
pIter->iEndofDoclist = iTermOff + nExtra;
|
|
}
|
|
pIter->iPgidxOff = iPgidx;
|
|
|
|
fts5SegIterLoadRowid(p, pIter);
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
|
|
/*
|
|
** Initialize the object pIter to point to term pTerm/nTerm within segment
|
|
** pSeg. If there is no such term in the index, the iterator is set to EOF.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterSeekInit(
|
|
Fts5Index *p, /* FTS5 backend */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to */
|
|
int flags, /* Mask of FTS5INDEX_XXX flags */
|
|
Fts5StructureSegment *pSeg, /* Description of segment */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
int iPg = 1;
|
|
int bGe = (flags & FTS5INDEX_QUERY_SCAN);
|
|
int bDlidx = 0; /* True if there is a doclist-index */
|
|
|
|
assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
|
|
assert( pTerm && nTerm );
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
pIter->pSeg = pSeg;
|
|
|
|
/* This block sets stack variable iPg to the leaf page number that may
|
|
** contain term (pTerm/nTerm), if it is present in the segment. */
|
|
if( p->pIdxSelect==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
|
|
"SELECT pgno FROM '%q'.'%q_idx' WHERE "
|
|
"segid=? AND term<=? ORDER BY term DESC LIMIT 1",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
if( p->rc ) return;
|
|
sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
|
|
sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
|
|
if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
|
|
i64 val = sqlite3_column_int(p->pIdxSelect, 0);
|
|
iPg = (int)(val>>1);
|
|
bDlidx = (val & 0x0001);
|
|
}
|
|
p->rc = sqlite3_reset(p->pIdxSelect);
|
|
|
|
if( iPg<pSeg->pgnoFirst ){
|
|
iPg = pSeg->pgnoFirst;
|
|
bDlidx = 0;
|
|
}
|
|
|
|
pIter->iLeafPgno = iPg - 1;
|
|
fts5SegIterNextPage(p, pIter);
|
|
|
|
if( pIter->pLeaf ){
|
|
fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK && bGe==0 ){
|
|
pIter->flags |= FTS5_SEGITER_ONETERM;
|
|
if( pIter->pLeaf ){
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
}
|
|
if( bDlidx ){
|
|
fts5SegIterLoadDlidx(p, pIter);
|
|
}
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
fts5SegIterReverse(p, pIter);
|
|
}
|
|
}
|
|
}
|
|
|
|
fts5SegIterSetNext(p, pIter);
|
|
|
|
/* Either:
|
|
**
|
|
** 1) an error has occurred, or
|
|
** 2) the iterator points to EOF, or
|
|
** 3) the iterator points to an entry with term (pTerm/nTerm), or
|
|
** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
|
|
** to an entry with a term greater than or equal to (pTerm/nTerm).
|
|
*/
|
|
assert( p->rc!=SQLITE_OK /* 1 */
|
|
|| pIter->pLeaf==0 /* 2 */
|
|
|| fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
|
|
|| (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
|
|
);
|
|
}
|
|
|
|
/*
|
|
** Initialize the object pIter to point to term pTerm/nTerm within the
|
|
** in-memory hash table. If there is no such term in the hash-table, the
|
|
** iterator is set to EOF.
|
|
**
|
|
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
|
|
** an error has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5SegIterHashInit(
|
|
Fts5Index *p, /* FTS5 backend */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to */
|
|
int flags, /* Mask of FTS5INDEX_XXX flags */
|
|
Fts5SegIter *pIter /* Object to populate */
|
|
){
|
|
const u8 *pList = 0;
|
|
int nList = 0;
|
|
const u8 *z = 0;
|
|
int n = 0;
|
|
|
|
assert( p->pHash );
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
|
|
p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
|
|
sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
|
|
n = (z ? (int)strlen((const char*)z) : 0);
|
|
}else{
|
|
pIter->flags |= FTS5_SEGITER_ONETERM;
|
|
sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
|
|
z = pTerm;
|
|
n = nTerm;
|
|
}
|
|
|
|
if( pList ){
|
|
Fts5Data *pLeaf;
|
|
sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
|
|
pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
|
|
if( pLeaf==0 ) return;
|
|
pLeaf->p = (u8*)pList;
|
|
pLeaf->nn = pLeaf->szLeaf = nList;
|
|
pIter->pLeaf = pLeaf;
|
|
pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
|
|
pIter->iEndofDoclist = pLeaf->nn;
|
|
|
|
if( flags & FTS5INDEX_QUERY_DESC ){
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}else{
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
}
|
|
|
|
fts5SegIterSetNext(p, pIter);
|
|
}
|
|
|
|
/*
|
|
** Zero the iterator passed as the only argument.
|
|
*/
|
|
static void fts5SegIterClear(Fts5SegIter *pIter){
|
|
fts5BufferFree(&pIter->term);
|
|
fts5DataRelease(pIter->pLeaf);
|
|
fts5DataRelease(pIter->pNextLeaf);
|
|
fts5DlidxIterFree(pIter->pDlidx);
|
|
sqlite3_free(pIter->aRowidOffset);
|
|
memset(pIter, 0, sizeof(Fts5SegIter));
|
|
}
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
|
|
/*
|
|
** This function is used as part of the big assert() procedure implemented by
|
|
** fts5AssertMultiIterSetup(). It ensures that the result currently stored
|
|
** in *pRes is the correct result of comparing the current positions of the
|
|
** two iterators.
|
|
*/
|
|
static void fts5AssertComparisonResult(
|
|
Fts5Iter *pIter,
|
|
Fts5SegIter *p1,
|
|
Fts5SegIter *p2,
|
|
Fts5CResult *pRes
|
|
){
|
|
int i1 = p1 - pIter->aSeg;
|
|
int i2 = p2 - pIter->aSeg;
|
|
|
|
if( p1->pLeaf || p2->pLeaf ){
|
|
if( p1->pLeaf==0 ){
|
|
assert( pRes->iFirst==i2 );
|
|
}else if( p2->pLeaf==0 ){
|
|
assert( pRes->iFirst==i1 );
|
|
}else{
|
|
int nMin = MIN(p1->term.n, p2->term.n);
|
|
int res = memcmp(p1->term.p, p2->term.p, nMin);
|
|
if( res==0 ) res = p1->term.n - p2->term.n;
|
|
|
|
if( res==0 ){
|
|
assert( pRes->bTermEq==1 );
|
|
assert( p1->iRowid!=p2->iRowid );
|
|
res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
|
|
}else{
|
|
assert( pRes->bTermEq==0 );
|
|
}
|
|
|
|
if( res<0 ){
|
|
assert( pRes->iFirst==i1 );
|
|
}else{
|
|
assert( pRes->iFirst==i2 );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This function is a no-op unless SQLITE_DEBUG is defined when this module
|
|
** is compiled. In that case, this function is essentially an assert()
|
|
** statement used to verify that the contents of the pIter->aFirst[] array
|
|
** are correct.
|
|
*/
|
|
static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
int i;
|
|
|
|
assert( (pFirst->pLeaf==0)==pIter->base.bEof );
|
|
|
|
/* Check that pIter->iSwitchRowid is set correctly. */
|
|
for(i=0; i<pIter->nSeg; i++){
|
|
Fts5SegIter *p1 = &pIter->aSeg[i];
|
|
assert( p1==pFirst
|
|
|| p1->pLeaf==0
|
|
|| fts5BufferCompare(&pFirst->term, &p1->term)
|
|
|| p1->iRowid==pIter->iSwitchRowid
|
|
|| (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
|
|
);
|
|
}
|
|
|
|
for(i=0; i<pIter->nSeg; i+=2){
|
|
Fts5SegIter *p1 = &pIter->aSeg[i];
|
|
Fts5SegIter *p2 = &pIter->aSeg[i+1];
|
|
Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
|
|
fts5AssertComparisonResult(pIter, p1, p2, pRes);
|
|
}
|
|
|
|
for(i=1; i<(pIter->nSeg / 2); i+=2){
|
|
Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
|
|
Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
|
|
Fts5CResult *pRes = &pIter->aFirst[i];
|
|
fts5AssertComparisonResult(pIter, p1, p2, pRes);
|
|
}
|
|
}
|
|
}
|
|
#else
|
|
# define fts5AssertMultiIterSetup(x,y)
|
|
#endif
|
|
|
|
/*
|
|
** Do the comparison necessary to populate pIter->aFirst[iOut].
|
|
**
|
|
** If the returned value is non-zero, then it is the index of an entry
|
|
** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
|
|
** to a key that is a duplicate of another, higher priority,
|
|
** segment-iterator in the pSeg->aSeg[] array.
|
|
*/
|
|
static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
|
|
int i1; /* Index of left-hand Fts5SegIter */
|
|
int i2; /* Index of right-hand Fts5SegIter */
|
|
int iRes;
|
|
Fts5SegIter *p1; /* Left-hand Fts5SegIter */
|
|
Fts5SegIter *p2; /* Right-hand Fts5SegIter */
|
|
Fts5CResult *pRes = &pIter->aFirst[iOut];
|
|
|
|
assert( iOut<pIter->nSeg && iOut>0 );
|
|
assert( pIter->bRev==0 || pIter->bRev==1 );
|
|
|
|
if( iOut>=(pIter->nSeg/2) ){
|
|
i1 = (iOut - pIter->nSeg/2) * 2;
|
|
i2 = i1 + 1;
|
|
}else{
|
|
i1 = pIter->aFirst[iOut*2].iFirst;
|
|
i2 = pIter->aFirst[iOut*2+1].iFirst;
|
|
}
|
|
p1 = &pIter->aSeg[i1];
|
|
p2 = &pIter->aSeg[i2];
|
|
|
|
pRes->bTermEq = 0;
|
|
if( p1->pLeaf==0 ){ /* If p1 is at EOF */
|
|
iRes = i2;
|
|
}else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
|
|
iRes = i1;
|
|
}else{
|
|
int res = fts5BufferCompare(&p1->term, &p2->term);
|
|
if( res==0 ){
|
|
assert( i2>i1 );
|
|
assert( i2!=0 );
|
|
pRes->bTermEq = 1;
|
|
if( p1->iRowid==p2->iRowid ){
|
|
p1->bDel = p2->bDel;
|
|
return i2;
|
|
}
|
|
res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
|
|
}
|
|
assert( res!=0 );
|
|
if( res<0 ){
|
|
iRes = i1;
|
|
}else{
|
|
iRes = i2;
|
|
}
|
|
}
|
|
|
|
pRes->iFirst = (u16)iRes;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
|
|
** It is an error if leaf iLeafPgno does not exist or contains no rowids.
|
|
*/
|
|
static void fts5SegIterGotoPage(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
int iLeafPgno
|
|
){
|
|
assert( iLeafPgno>pIter->iLeafPgno );
|
|
|
|
if( iLeafPgno>pIter->pSeg->pgnoLast ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5DataRelease(pIter->pNextLeaf);
|
|
pIter->pNextLeaf = 0;
|
|
pIter->iLeafPgno = iLeafPgno-1;
|
|
fts5SegIterNextPage(p, pIter);
|
|
assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
int iOff;
|
|
u8 *a = pIter->pLeaf->p;
|
|
int n = pIter->pLeaf->szLeaf;
|
|
|
|
iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
|
|
if( iOff<4 || iOff>=n ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
|
|
pIter->iLeafOffset = iOff;
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Advance the iterator passed as the second argument until it is at or
|
|
** past rowid iFrom. Regardless of the value of iFrom, the iterator is
|
|
** always advanced at least once.
|
|
*/
|
|
static void fts5SegIterNextFrom(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegIter *pIter, /* Iterator to advance */
|
|
i64 iMatch /* Advance iterator at least this far */
|
|
){
|
|
int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
|
|
Fts5DlidxIter *pDlidx = pIter->pDlidx;
|
|
int iLeafPgno = pIter->iLeafPgno;
|
|
int bMove = 1;
|
|
|
|
assert( pIter->flags & FTS5_SEGITER_ONETERM );
|
|
assert( pIter->pDlidx );
|
|
assert( pIter->pLeaf );
|
|
|
|
if( bRev==0 ){
|
|
while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
|
|
iLeafPgno = fts5DlidxIterPgno(pDlidx);
|
|
fts5DlidxIterNext(p, pDlidx);
|
|
}
|
|
assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
|
|
if( iLeafPgno>pIter->iLeafPgno ){
|
|
fts5SegIterGotoPage(p, pIter, iLeafPgno);
|
|
bMove = 0;
|
|
}
|
|
}else{
|
|
assert( pIter->pNextLeaf==0 );
|
|
assert( iMatch<pIter->iRowid );
|
|
while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
|
|
fts5DlidxIterPrev(p, pDlidx);
|
|
}
|
|
iLeafPgno = fts5DlidxIterPgno(pDlidx);
|
|
|
|
assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
|
|
|
|
if( iLeafPgno<pIter->iLeafPgno ){
|
|
pIter->iLeafPgno = iLeafPgno+1;
|
|
fts5SegIterReverseNewPage(p, pIter);
|
|
bMove = 0;
|
|
}
|
|
}
|
|
|
|
do{
|
|
if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
|
|
if( pIter->pLeaf==0 ) break;
|
|
if( bRev==0 && pIter->iRowid>=iMatch ) break;
|
|
if( bRev!=0 && pIter->iRowid<=iMatch ) break;
|
|
bMove = 1;
|
|
}while( p->rc==SQLITE_OK );
|
|
}
|
|
|
|
|
|
/*
|
|
** Free the iterator object passed as the second argument.
|
|
*/
|
|
static void fts5MultiIterFree(Fts5Iter *pIter){
|
|
if( pIter ){
|
|
int i;
|
|
for(i=0; i<pIter->nSeg; i++){
|
|
fts5SegIterClear(&pIter->aSeg[i]);
|
|
}
|
|
fts5StructureRelease(pIter->pStruct);
|
|
fts5BufferFree(&pIter->poslist);
|
|
sqlite3_free(pIter);
|
|
}
|
|
}
|
|
|
|
static void fts5MultiIterAdvanced(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
|
|
int iChanged, /* Index of sub-iterator just advanced */
|
|
int iMinset /* Minimum entry in aFirst[] to set */
|
|
){
|
|
int i;
|
|
for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
|
|
int iEq;
|
|
if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iEq];
|
|
assert( p->rc==SQLITE_OK );
|
|
pSeg->xNext(p, pSeg, 0);
|
|
i = pIter->nSeg + iEq;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Sub-iterator iChanged of iterator pIter has just been advanced. It still
|
|
** points to the same term though - just a different rowid. This function
|
|
** attempts to update the contents of the pIter->aFirst[] accordingly.
|
|
** If it does so successfully, 0 is returned. Otherwise 1.
|
|
**
|
|
** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
|
|
** on the iterator instead. That function does the same as this one, except
|
|
** that it deals with more complicated cases as well.
|
|
*/
|
|
static int fts5MultiIterAdvanceRowid(
|
|
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
|
|
int iChanged, /* Index of sub-iterator just advanced */
|
|
Fts5SegIter **ppFirst
|
|
){
|
|
Fts5SegIter *pNew = &pIter->aSeg[iChanged];
|
|
|
|
if( pNew->iRowid==pIter->iSwitchRowid
|
|
|| (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
|
|
){
|
|
int i;
|
|
Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
|
|
pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
|
|
for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
|
|
Fts5CResult *pRes = &pIter->aFirst[i];
|
|
|
|
assert( pNew->pLeaf );
|
|
assert( pRes->bTermEq==0 || pOther->pLeaf );
|
|
|
|
if( pRes->bTermEq ){
|
|
if( pNew->iRowid==pOther->iRowid ){
|
|
return 1;
|
|
}else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
|
|
pIter->iSwitchRowid = pOther->iRowid;
|
|
pNew = pOther;
|
|
}else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
|
|
pIter->iSwitchRowid = pOther->iRowid;
|
|
}
|
|
}
|
|
pRes->iFirst = (u16)(pNew - pIter->aSeg);
|
|
if( i==1 ) break;
|
|
|
|
pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
|
|
}
|
|
}
|
|
|
|
*ppFirst = pNew;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Set the pIter->bEof variable based on the state of the sub-iterators.
|
|
*/
|
|
static void fts5MultiIterSetEof(Fts5Iter *pIter){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
pIter->base.bEof = pSeg->pLeaf==0;
|
|
pIter->iSwitchRowid = pSeg->iRowid;
|
|
}
|
|
|
|
/*
|
|
** Move the iterator to the next entry.
|
|
**
|
|
** If an error occurs, an error code is left in Fts5Index.rc. It is not
|
|
** considered an error if the iterator reaches EOF, or if it is already at
|
|
** EOF when this function is called.
|
|
*/
|
|
static void fts5MultiIterNext(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
int bFrom, /* True if argument iFrom is valid */
|
|
i64 iFrom /* Advance at least as far as this */
|
|
){
|
|
int bUseFrom = bFrom;
|
|
while( p->rc==SQLITE_OK ){
|
|
int iFirst = pIter->aFirst[1].iFirst;
|
|
int bNewTerm = 0;
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
|
|
assert( p->rc==SQLITE_OK );
|
|
if( bUseFrom && pSeg->pDlidx ){
|
|
fts5SegIterNextFrom(p, pSeg, iFrom);
|
|
}else{
|
|
pSeg->xNext(p, pSeg, &bNewTerm);
|
|
}
|
|
|
|
if( pSeg->pLeaf==0 || bNewTerm
|
|
|| fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
|
|
){
|
|
fts5MultiIterAdvanced(p, pIter, iFirst, 1);
|
|
fts5MultiIterSetEof(pIter);
|
|
pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
|
|
if( pSeg->pLeaf==0 ) return;
|
|
}
|
|
|
|
fts5AssertMultiIterSetup(p, pIter);
|
|
assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
|
|
if( pIter->bSkipEmpty==0 || pSeg->nPos ){
|
|
pIter->xSetOutputs(pIter, pSeg);
|
|
return;
|
|
}
|
|
bUseFrom = 0;
|
|
}
|
|
}
|
|
|
|
static void fts5MultiIterNext2(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
int *pbNewTerm /* OUT: True if *might* be new term */
|
|
){
|
|
assert( pIter->bSkipEmpty );
|
|
if( p->rc==SQLITE_OK ){
|
|
do {
|
|
int iFirst = pIter->aFirst[1].iFirst;
|
|
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
|
|
int bNewTerm = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
pSeg->xNext(p, pSeg, &bNewTerm);
|
|
if( pSeg->pLeaf==0 || bNewTerm
|
|
|| fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
|
|
){
|
|
fts5MultiIterAdvanced(p, pIter, iFirst, 1);
|
|
fts5MultiIterSetEof(pIter);
|
|
*pbNewTerm = 1;
|
|
}else{
|
|
*pbNewTerm = 0;
|
|
}
|
|
fts5AssertMultiIterSetup(p, pIter);
|
|
|
|
}while( fts5MultiIterIsEmpty(p, pIter) );
|
|
}
|
|
}
|
|
|
|
static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
|
|
UNUSED_PARAM2(pUnused1, pUnused2);
|
|
}
|
|
|
|
static Fts5Iter *fts5MultiIterAlloc(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
int nSeg
|
|
){
|
|
Fts5Iter *pNew;
|
|
int nSlot; /* Power of two >= nSeg */
|
|
|
|
for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
|
|
pNew = fts5IdxMalloc(p,
|
|
sizeof(Fts5Iter) + /* pNew */
|
|
sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
|
|
sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
|
|
);
|
|
if( pNew ){
|
|
pNew->nSeg = nSlot;
|
|
pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
|
|
pNew->pIndex = p;
|
|
pNew->xSetOutputs = fts5IterSetOutputs_Noop;
|
|
}
|
|
return pNew;
|
|
}
|
|
|
|
static void fts5PoslistCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
|
|
}
|
|
}
|
|
|
|
typedef struct PoslistCallbackCtx PoslistCallbackCtx;
|
|
struct PoslistCallbackCtx {
|
|
Fts5Buffer *pBuf; /* Append to this buffer */
|
|
Fts5Colset *pColset; /* Restrict matches to this column */
|
|
int eState; /* See above */
|
|
};
|
|
|
|
typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
|
|
struct PoslistOffsetsCtx {
|
|
Fts5Buffer *pBuf; /* Append to this buffer */
|
|
Fts5Colset *pColset; /* Restrict matches to this column */
|
|
int iRead;
|
|
int iWrite;
|
|
};
|
|
|
|
/*
|
|
** TODO: Make this more efficient!
|
|
*/
|
|
static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
|
|
int i;
|
|
for(i=0; i<pColset->nCol; i++){
|
|
if( pColset->aiCol[i]==iCol ) return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void fts5PoslistOffsetsCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
int i = 0;
|
|
while( i<nChunk ){
|
|
int iVal;
|
|
i += fts5GetVarint32(&pChunk[i], iVal);
|
|
iVal += pCtx->iRead - 2;
|
|
pCtx->iRead = iVal;
|
|
if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
|
|
fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
|
|
pCtx->iWrite = iVal;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void fts5PoslistFilterCallback(
|
|
Fts5Index *pUnused,
|
|
void *pContext,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
|
|
UNUSED_PARAM(pUnused);
|
|
assert_nc( nChunk>=0 );
|
|
if( nChunk>0 ){
|
|
/* Search through to find the first varint with value 1. This is the
|
|
** start of the next columns hits. */
|
|
int i = 0;
|
|
int iStart = 0;
|
|
|
|
if( pCtx->eState==2 ){
|
|
int iCol;
|
|
fts5FastGetVarint32(pChunk, i, iCol);
|
|
if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
|
|
pCtx->eState = 1;
|
|
fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
|
|
}else{
|
|
pCtx->eState = 0;
|
|
}
|
|
}
|
|
|
|
do {
|
|
while( i<nChunk && pChunk[i]!=0x01 ){
|
|
while( pChunk[i] & 0x80 ) i++;
|
|
i++;
|
|
}
|
|
if( pCtx->eState ){
|
|
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
|
|
}
|
|
if( i<nChunk ){
|
|
int iCol;
|
|
iStart = i;
|
|
i++;
|
|
if( i>=nChunk ){
|
|
pCtx->eState = 2;
|
|
}else{
|
|
fts5FastGetVarint32(pChunk, i, iCol);
|
|
pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
|
|
if( pCtx->eState ){
|
|
fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
|
|
iStart = i;
|
|
}
|
|
}
|
|
}
|
|
}while( i<nChunk );
|
|
}
|
|
}
|
|
|
|
static void fts5ChunkIterate(
|
|
Fts5Index *p, /* Index object */
|
|
Fts5SegIter *pSeg, /* Poslist of this iterator */
|
|
void *pCtx, /* Context pointer for xChunk callback */
|
|
void (*xChunk)(Fts5Index*, void*, const u8*, int)
|
|
){
|
|
int nRem = pSeg->nPos; /* Number of bytes still to come */
|
|
Fts5Data *pData = 0;
|
|
u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
|
|
int pgno = pSeg->iLeafPgno;
|
|
int pgnoSave = 0;
|
|
|
|
/* This function does notmwork with detail=none databases. */
|
|
assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
|
|
if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
|
|
pgnoSave = pgno+1;
|
|
}
|
|
|
|
while( 1 ){
|
|
xChunk(p, pCtx, pChunk, nChunk);
|
|
nRem -= nChunk;
|
|
fts5DataRelease(pData);
|
|
if( nRem<=0 ){
|
|
break;
|
|
}else{
|
|
pgno++;
|
|
pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
|
|
if( pData==0 ) break;
|
|
pChunk = &pData->p[4];
|
|
nChunk = MIN(nRem, pData->szLeaf - 4);
|
|
if( pgno==pgnoSave ){
|
|
assert( pSeg->pNextLeaf==0 );
|
|
pSeg->pNextLeaf = pData;
|
|
pData = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter currently points to a valid entry (not EOF). This
|
|
** function appends the position list data for the current entry to
|
|
** buffer pBuf. It does not make a copy of the position-list size
|
|
** field.
|
|
*/
|
|
static void fts5SegiterPoslist(
|
|
Fts5Index *p,
|
|
Fts5SegIter *pSeg,
|
|
Fts5Colset *pColset,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
|
|
if( pColset==0 ){
|
|
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
|
|
}else{
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
|
|
PoslistCallbackCtx sCtx;
|
|
sCtx.pBuf = pBuf;
|
|
sCtx.pColset = pColset;
|
|
sCtx.eState = fts5IndexColsetTest(pColset, 0);
|
|
assert( sCtx.eState==0 || sCtx.eState==1 );
|
|
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
|
|
}else{
|
|
PoslistOffsetsCtx sCtx;
|
|
memset(&sCtx, 0, sizeof(sCtx));
|
|
sCtx.pBuf = pBuf;
|
|
sCtx.pColset = pColset;
|
|
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
|
|
** the position list contains entries for column iCol, then (*pa) is set
|
|
** to point to the sub-position-list for that column and the number of
|
|
** bytes in it returned. Or, if the argument position list does not
|
|
** contain any entries for column iCol, return 0.
|
|
*/
|
|
static int fts5IndexExtractCol(
|
|
const u8 **pa, /* IN/OUT: Pointer to poslist */
|
|
int n, /* IN: Size of poslist in bytes */
|
|
int iCol /* Column to extract from poslist */
|
|
){
|
|
int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
|
|
const u8 *p = *pa;
|
|
const u8 *pEnd = &p[n]; /* One byte past end of position list */
|
|
|
|
while( iCol>iCurrent ){
|
|
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
|
|
** not part of a varint. Note that it is not possible for a negative
|
|
** or extremely large varint to occur within an uncorrupted position
|
|
** list. So the last byte of each varint may be assumed to have a clear
|
|
** 0x80 bit. */
|
|
while( *p!=0x01 ){
|
|
while( *p++ & 0x80 );
|
|
if( p>=pEnd ) return 0;
|
|
}
|
|
*pa = p++;
|
|
iCurrent = *p++;
|
|
if( iCurrent & 0x80 ){
|
|
p--;
|
|
p += fts5GetVarint32(p, iCurrent);
|
|
}
|
|
}
|
|
if( iCol!=iCurrent ) return 0;
|
|
|
|
/* Advance pointer p until it points to pEnd or an 0x01 byte that is
|
|
** not part of a varint */
|
|
while( p<pEnd && *p!=0x01 ){
|
|
while( *p++ & 0x80 );
|
|
}
|
|
|
|
return p - (*pa);
|
|
}
|
|
|
|
static int fts5IndexExtractColset (
|
|
Fts5Colset *pColset, /* Colset to filter on */
|
|
const u8 *pPos, int nPos, /* Position list */
|
|
Fts5Buffer *pBuf /* Output buffer */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
int i;
|
|
|
|
fts5BufferZero(pBuf);
|
|
for(i=0; i<pColset->nCol; i++){
|
|
const u8 *pSub = pPos;
|
|
int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
|
|
if( nSub ){
|
|
fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=none tables.
|
|
*/
|
|
static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.nData = pSeg->nPos;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=full and detail=col tables when no
|
|
** column filters are specified.
|
|
*/
|
|
static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.nData = pSeg->nPos;
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
|
|
assert( pIter->pColset==0 );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
|
|
/* All data is stored on the current page. Populate the output
|
|
** variables to point into the body of the page object. */
|
|
pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
}else{
|
|
/* The data is distributed over two or more pages. Copy it into the
|
|
** Fts5Iter.poslist buffer and then set the output pointer to point
|
|
** to this buffer. */
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
|
|
pIter->base.pData = pIter->poslist.p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=col when there is a column filter
|
|
** and there are 100 or more columns. Also called as a fallback from
|
|
** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
|
|
*/
|
|
static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used when:
|
|
**
|
|
** * detail=col,
|
|
** * there is a column filter, and
|
|
** * the table contains 100 or fewer columns.
|
|
**
|
|
** The last point is to ensure all column numbers are stored as
|
|
** single-byte varints.
|
|
*/
|
|
static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
|
|
assert( pIter->pColset );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
|
|
fts5IterSetOutputs_Col(pIter, pSeg);
|
|
}else{
|
|
u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
u8 *pEnd = (u8*)&a[pSeg->nPos];
|
|
int iPrev = 0;
|
|
int *aiCol = pIter->pColset->aiCol;
|
|
int *aiColEnd = &aiCol[pIter->pColset->nCol];
|
|
|
|
u8 *aOut = pIter->poslist.p;
|
|
int iPrevOut = 0;
|
|
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
|
|
while( a<pEnd ){
|
|
iPrev += (int)a++[0] - 2;
|
|
while( *aiCol<iPrev ){
|
|
aiCol++;
|
|
if( aiCol==aiColEnd ) goto setoutputs_col_out;
|
|
}
|
|
if( *aiCol==iPrev ){
|
|
*aOut++ = (iPrev - iPrevOut) + 2;
|
|
iPrevOut = iPrev;
|
|
}
|
|
}
|
|
|
|
setoutputs_col_out:
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = aOut - pIter->poslist.p;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** xSetOutputs callback used by detail=full when there is a column filter.
|
|
*/
|
|
static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
|
|
Fts5Colset *pColset = pIter->pColset;
|
|
pIter->base.iRowid = pSeg->iRowid;
|
|
|
|
assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
|
|
assert( pColset );
|
|
|
|
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
|
|
/* All data is stored on the current page. Populate the output
|
|
** variables to point into the body of the page object. */
|
|
const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
|
|
if( pColset->nCol==1 ){
|
|
pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
|
|
pIter->base.pData = a;
|
|
}else{
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
}else{
|
|
/* The data is distributed over two or more pages. Copy it into the
|
|
** Fts5Iter.poslist buffer and then set the output pointer to point
|
|
** to this buffer. */
|
|
fts5BufferZero(&pIter->poslist);
|
|
fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
|
|
pIter->base.pData = pIter->poslist.p;
|
|
pIter->base.nData = pIter->poslist.n;
|
|
}
|
|
}
|
|
|
|
static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
|
|
if( *pRc==SQLITE_OK ){
|
|
Fts5Config *pConfig = pIter->pIndex->pConfig;
|
|
if( pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_None;
|
|
}
|
|
|
|
else if( pIter->pColset==0 ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
|
|
}
|
|
|
|
else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Full;
|
|
}
|
|
|
|
else{
|
|
assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
|
|
if( pConfig->nCol<=100 ){
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Col100;
|
|
sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
|
|
}else{
|
|
pIter->xSetOutputs = fts5IterSetOutputs_Col;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** Allocate a new Fts5Iter object.
|
|
**
|
|
** The new object will be used to iterate through data in structure pStruct.
|
|
** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
|
|
** is zero or greater, data from the first nSegment segments on level iLevel
|
|
** is merged.
|
|
**
|
|
** The iterator initially points to the first term/rowid entry in the
|
|
** iterated data.
|
|
*/
|
|
static void fts5MultiIterNew(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Structure *pStruct, /* Structure of specific index */
|
|
int flags, /* FTS5INDEX_QUERY_XXX flags */
|
|
Fts5Colset *pColset, /* Colset to filter on (or NULL) */
|
|
const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
|
|
int iLevel, /* Level to iterate (-1 for all) */
|
|
int nSegment, /* Number of segments to merge (iLevel>=0) */
|
|
Fts5Iter **ppOut /* New object */
|
|
){
|
|
int nSeg = 0; /* Number of segment-iters in use */
|
|
int iIter = 0; /* */
|
|
int iSeg; /* Used to iterate through segments */
|
|
Fts5StructureLevel *pLvl;
|
|
Fts5Iter *pNew;
|
|
|
|
assert( (pTerm==0 && nTerm==0) || iLevel<0 );
|
|
|
|
/* Allocate space for the new multi-seg-iterator. */
|
|
if( p->rc==SQLITE_OK ){
|
|
if( iLevel<0 ){
|
|
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
|
|
nSeg = pStruct->nSegment;
|
|
nSeg += (p->pHash ? 1 : 0);
|
|
}else{
|
|
nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
|
|
}
|
|
}
|
|
*ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
|
|
if( pNew==0 ) return;
|
|
pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
|
|
pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
|
|
pNew->pStruct = pStruct;
|
|
pNew->pColset = pColset;
|
|
fts5StructureRef(pStruct);
|
|
if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
|
|
fts5IterSetOutputCb(&p->rc, pNew);
|
|
}
|
|
|
|
/* Initialize each of the component segment iterators. */
|
|
if( p->rc==SQLITE_OK ){
|
|
if( iLevel<0 ){
|
|
Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
|
|
if( p->pHash ){
|
|
/* Add a segment iterator for the current contents of the hash table. */
|
|
Fts5SegIter *pIter = &pNew->aSeg[iIter++];
|
|
fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
|
|
}
|
|
for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
|
|
for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
Fts5SegIter *pIter = &pNew->aSeg[iIter++];
|
|
if( pTerm==0 ){
|
|
fts5SegIterInit(p, pSeg, pIter);
|
|
}else{
|
|
fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
|
|
}
|
|
}
|
|
}
|
|
}else{
|
|
pLvl = &pStruct->aLevel[iLevel];
|
|
for(iSeg=nSeg-1; iSeg>=0; iSeg--){
|
|
fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
|
|
}
|
|
}
|
|
assert( iIter==nSeg );
|
|
}
|
|
|
|
/* If the above was successful, each component iterators now points
|
|
** to the first entry in its segment. In this case initialize the
|
|
** aFirst[] array. Or, if an error has occurred, free the iterator
|
|
** object and set the output variable to NULL. */
|
|
if( p->rc==SQLITE_OK ){
|
|
for(iIter=pNew->nSeg-1; iIter>0; iIter--){
|
|
int iEq;
|
|
if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
|
|
Fts5SegIter *pSeg = &pNew->aSeg[iEq];
|
|
if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
|
|
fts5MultiIterAdvanced(p, pNew, iEq, iIter);
|
|
}
|
|
}
|
|
fts5MultiIterSetEof(pNew);
|
|
fts5AssertMultiIterSetup(p, pNew);
|
|
|
|
if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
|
|
fts5MultiIterNext(p, pNew, 0, 0);
|
|
}else if( pNew->base.bEof==0 ){
|
|
Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
|
|
pNew->xSetOutputs(pNew, pSeg);
|
|
}
|
|
|
|
}else{
|
|
fts5MultiIterFree(pNew);
|
|
*ppOut = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Create an Fts5Iter that iterates through the doclist provided
|
|
** as the second argument.
|
|
*/
|
|
static void fts5MultiIterNew2(
|
|
Fts5Index *p, /* FTS5 backend to iterate within */
|
|
Fts5Data *pData, /* Doclist to iterate through */
|
|
int bDesc, /* True for descending rowid order */
|
|
Fts5Iter **ppOut /* New object */
|
|
){
|
|
Fts5Iter *pNew;
|
|
pNew = fts5MultiIterAlloc(p, 2);
|
|
if( pNew ){
|
|
Fts5SegIter *pIter = &pNew->aSeg[1];
|
|
|
|
pIter->flags = FTS5_SEGITER_ONETERM;
|
|
if( pData->szLeaf>0 ){
|
|
pIter->pLeaf = pData;
|
|
pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
|
|
pIter->iEndofDoclist = pData->nn;
|
|
pNew->aFirst[1].iFirst = 1;
|
|
if( bDesc ){
|
|
pNew->bRev = 1;
|
|
pIter->flags |= FTS5_SEGITER_REVERSE;
|
|
fts5SegIterReverseInitPage(p, pIter);
|
|
}else{
|
|
fts5SegIterLoadNPos(p, pIter);
|
|
}
|
|
pData = 0;
|
|
}else{
|
|
pNew->base.bEof = 1;
|
|
}
|
|
fts5SegIterSetNext(p, pIter);
|
|
|
|
*ppOut = pNew;
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator is at EOF or if an error has occurred.
|
|
** False otherwise.
|
|
*/
|
|
static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
|
|
assert( p->rc
|
|
|| (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
|
|
);
|
|
return (p->rc || pIter->base.bEof);
|
|
}
|
|
|
|
/*
|
|
** Return the rowid of the entry that the iterator currently points
|
|
** to. If the iterator points to EOF when this function is called the
|
|
** results are undefined.
|
|
*/
|
|
static i64 fts5MultiIterRowid(Fts5Iter *pIter){
|
|
assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
|
|
return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
|
|
}
|
|
|
|
/*
|
|
** Move the iterator to the next entry at or following iMatch.
|
|
*/
|
|
static void fts5MultiIterNextFrom(
|
|
Fts5Index *p,
|
|
Fts5Iter *pIter,
|
|
i64 iMatch
|
|
){
|
|
while( 1 ){
|
|
i64 iRowid;
|
|
fts5MultiIterNext(p, pIter, 1, iMatch);
|
|
if( fts5MultiIterEof(p, pIter) ) break;
|
|
iRowid = fts5MultiIterRowid(pIter);
|
|
if( pIter->bRev==0 && iRowid>=iMatch ) break;
|
|
if( pIter->bRev!=0 && iRowid<=iMatch ) break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return a pointer to a buffer containing the term associated with the
|
|
** entry that the iterator currently points to.
|
|
*/
|
|
static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
|
|
Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
*pn = p->term.n;
|
|
return p->term.p;
|
|
}
|
|
|
|
/*
|
|
** Allocate a new segment-id for the structure pStruct. The new segment
|
|
** id must be between 1 and 65335 inclusive, and must not be used by
|
|
** any currently existing segment. If a free segment id cannot be found,
|
|
** SQLITE_FULL is returned.
|
|
**
|
|
** If an error has already occurred, this function is a no-op. 0 is
|
|
** returned in this case.
|
|
*/
|
|
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
|
|
int iSegid = 0;
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
|
|
p->rc = SQLITE_FULL;
|
|
}else{
|
|
while( iSegid==0 ){
|
|
int iLvl, iSeg;
|
|
sqlite3_randomness(sizeof(u32), (void*)&iSegid);
|
|
iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1);
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
|
|
iSegid = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return iSegid;
|
|
}
|
|
|
|
/*
|
|
** Discard all data currently cached in the hash-tables.
|
|
*/
|
|
static void fts5IndexDiscardData(Fts5Index *p){
|
|
assert( p->pHash || p->nPendingData==0 );
|
|
if( p->pHash ){
|
|
sqlite3Fts5HashClear(p->pHash);
|
|
p->nPendingData = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return the size of the prefix, in bytes, that buffer
|
|
** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
|
|
**
|
|
** Buffer (pNew/<length-unknown>) is guaranteed to be greater
|
|
** than buffer (pOld/nOld).
|
|
*/
|
|
static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
|
|
int i;
|
|
for(i=0; i<nOld; i++){
|
|
if( pOld[i]!=pNew[i] ) break;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
static void fts5WriteDlidxClear(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int bFlush /* If true, write dlidx to disk */
|
|
){
|
|
int i;
|
|
assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
|
|
for(i=0; i<pWriter->nDlidx; i++){
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
|
|
if( pDlidx->buf.n==0 ) break;
|
|
if( bFlush ){
|
|
assert( pDlidx->pgno!=0 );
|
|
fts5DataWrite(p,
|
|
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
|
|
pDlidx->buf.p, pDlidx->buf.n
|
|
);
|
|
}
|
|
sqlite3Fts5BufferZero(&pDlidx->buf);
|
|
pDlidx->bPrevValid = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
|
|
** Any new array elements are zeroed before returning.
|
|
*/
|
|
static int fts5WriteDlidxGrow(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int nLvl
|
|
){
|
|
if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
|
|
Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
|
|
pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
|
|
);
|
|
if( aDlidx==0 ){
|
|
p->rc = SQLITE_NOMEM;
|
|
}else{
|
|
int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
|
|
memset(&aDlidx[pWriter->nDlidx], 0, nByte);
|
|
pWriter->aDlidx = aDlidx;
|
|
pWriter->nDlidx = nLvl;
|
|
}
|
|
}
|
|
return p->rc;
|
|
}
|
|
|
|
/*
|
|
** If the current doclist-index accumulating in pWriter->aDlidx[] is large
|
|
** enough, flush it to disk and return 1. Otherwise discard it and return
|
|
** zero.
|
|
*/
|
|
static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
int bFlag = 0;
|
|
|
|
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
|
|
** to the database, also write the doclist-index to disk. */
|
|
if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
|
|
bFlag = 1;
|
|
}
|
|
fts5WriteDlidxClear(p, pWriter, bFlag);
|
|
pWriter->nEmpty = 0;
|
|
return bFlag;
|
|
}
|
|
|
|
/*
|
|
** This function is called whenever processing of the doclist for the
|
|
** last term on leaf page (pWriter->iBtPage) is completed.
|
|
**
|
|
** The doclist-index for that term is currently stored in-memory within the
|
|
** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
|
|
** writes it out to disk. Or, if it is too small to bother with, discards
|
|
** it.
|
|
**
|
|
** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
|
|
*/
|
|
static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
int bFlag;
|
|
|
|
assert( pWriter->iBtPage || pWriter->nEmpty==0 );
|
|
if( pWriter->iBtPage==0 ) return;
|
|
bFlag = fts5WriteFlushDlidx(p, pWriter);
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
|
|
/* The following was already done in fts5WriteInit(): */
|
|
/* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
|
|
sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
|
|
sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
|
|
sqlite3_step(p->pIdxWriter);
|
|
p->rc = sqlite3_reset(p->pIdxWriter);
|
|
}
|
|
pWriter->iBtPage = 0;
|
|
}
|
|
|
|
/*
|
|
** This is called once for each leaf page except the first that contains
|
|
** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
|
|
** is larger than all terms written to earlier leaves, and equal to or
|
|
** smaller than the first term on the new leaf.
|
|
**
|
|
** If an error occurs, an error code is left in Fts5Index.rc. If an error
|
|
** has already occurred when this function is called, it is a no-op.
|
|
*/
|
|
static void fts5WriteBtreeTerm(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegWriter *pWriter, /* Writer object */
|
|
int nTerm, const u8 *pTerm /* First term on new page */
|
|
){
|
|
fts5WriteFlushBtree(p, pWriter);
|
|
fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
|
|
pWriter->iBtPage = pWriter->writer.pgno;
|
|
}
|
|
|
|
/*
|
|
** This function is called when flushing a leaf page that contains no
|
|
** terms at all to disk.
|
|
*/
|
|
static void fts5WriteBtreeNoTerm(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5SegWriter *pWriter /* Writer object */
|
|
){
|
|
/* If there were no rowids on the leaf page either and the doclist-index
|
|
** has already been started, append an 0x00 byte to it. */
|
|
if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
|
|
assert( pDlidx->bPrevValid );
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
|
|
}
|
|
|
|
/* Increment the "number of sequential leaves without a term" counter. */
|
|
pWriter->nEmpty++;
|
|
}
|
|
|
|
static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
|
|
i64 iRowid;
|
|
int iOff;
|
|
|
|
iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
|
|
fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
|
|
return iRowid;
|
|
}
|
|
|
|
/*
|
|
** Rowid iRowid has just been appended to the current leaf page. It is the
|
|
** first on the page. This function appends an appropriate entry to the current
|
|
** doclist-index.
|
|
*/
|
|
static void fts5WriteDlidxAppend(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
i64 iRowid
|
|
){
|
|
int i;
|
|
int bDone = 0;
|
|
|
|
for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
|
|
i64 iVal;
|
|
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
|
|
|
|
if( pDlidx->buf.n>=p->pConfig->pgsz ){
|
|
/* The current doclist-index page is full. Write it to disk and push
|
|
** a copy of iRowid (which will become the first rowid on the next
|
|
** doclist-index leaf page) up into the next level of the b-tree
|
|
** hierarchy. If the node being flushed is currently the root node,
|
|
** also push its first rowid upwards. */
|
|
pDlidx->buf.p[0] = 0x01; /* Not the root node */
|
|
fts5DataWrite(p,
|
|
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
|
|
pDlidx->buf.p, pDlidx->buf.n
|
|
);
|
|
fts5WriteDlidxGrow(p, pWriter, i+2);
|
|
pDlidx = &pWriter->aDlidx[i];
|
|
if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
|
|
i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
|
|
|
|
/* This was the root node. Push its first rowid up to the new root. */
|
|
pDlidx[1].pgno = pDlidx->pgno;
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
|
|
pDlidx[1].bPrevValid = 1;
|
|
pDlidx[1].iPrev = iFirst;
|
|
}
|
|
|
|
sqlite3Fts5BufferZero(&pDlidx->buf);
|
|
pDlidx->bPrevValid = 0;
|
|
pDlidx->pgno++;
|
|
}else{
|
|
bDone = 1;
|
|
}
|
|
|
|
if( pDlidx->bPrevValid ){
|
|
iVal = iRowid - pDlidx->iPrev;
|
|
}else{
|
|
i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
|
|
assert( pDlidx->buf.n==0 );
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
|
|
iVal = iRowid;
|
|
}
|
|
|
|
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
|
|
pDlidx->bPrevValid = 1;
|
|
pDlidx->iPrev = iRowid;
|
|
}
|
|
}
|
|
|
|
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
|
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
i64 iRowid;
|
|
|
|
assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
|
|
|
|
/* Set the szLeaf header field. */
|
|
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
|
|
fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
|
|
|
|
if( pWriter->bFirstTermInPage ){
|
|
/* No term was written to this page. */
|
|
assert( pPage->pgidx.n==0 );
|
|
fts5WriteBtreeNoTerm(p, pWriter);
|
|
}else{
|
|
/* Append the pgidx to the page buffer. Set the szLeaf header field. */
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
|
|
}
|
|
|
|
/* Write the page out to disk */
|
|
iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
|
|
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
|
|
|
|
/* Initialize the next page. */
|
|
fts5BufferZero(&pPage->buf);
|
|
fts5BufferZero(&pPage->pgidx);
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
|
pPage->iPrevPgidx = 0;
|
|
pPage->pgno++;
|
|
|
|
/* Increase the leaves written counter */
|
|
pWriter->nLeafWritten++;
|
|
|
|
/* The new leaf holds no terms or rowids */
|
|
pWriter->bFirstTermInPage = 1;
|
|
pWriter->bFirstRowidInPage = 1;
|
|
}
|
|
|
|
/*
|
|
** Append term pTerm/nTerm to the segment being written by the writer passed
|
|
** as the second argument.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5WriteAppendTerm(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int nTerm, const u8 *pTerm
|
|
){
|
|
int nPrefix; /* Bytes of prefix compression for term */
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( pPage->buf.n>=4 );
|
|
assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
|
|
|
|
/* If the current leaf page is full, flush it to disk. */
|
|
if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
|
|
if( pPage->buf.n>4 ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
|
|
}
|
|
|
|
/* TODO1: Updating pgidx here. */
|
|
pPgidx->n += sqlite3Fts5PutVarint(
|
|
&pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
|
|
);
|
|
pPage->iPrevPgidx = pPage->buf.n;
|
|
#if 0
|
|
fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
|
|
pPgidx->n += 2;
|
|
#endif
|
|
|
|
if( pWriter->bFirstTermInPage ){
|
|
nPrefix = 0;
|
|
if( pPage->pgno!=1 ){
|
|
/* This is the first term on a leaf that is not the leftmost leaf in
|
|
** the segment b-tree. In this case it is necessary to add a term to
|
|
** the b-tree hierarchy that is (a) larger than the largest term
|
|
** already written to the segment and (b) smaller than or equal to
|
|
** this term. In other words, a prefix of (pTerm/nTerm) that is one
|
|
** byte longer than the longest prefix (pTerm/nTerm) shares with the
|
|
** previous term.
|
|
**
|
|
** Usually, the previous term is available in pPage->term. The exception
|
|
** is if this is the first term written in an incremental-merge step.
|
|
** In this case the previous term is not available, so just write a
|
|
** copy of (pTerm/nTerm) into the parent node. This is slightly
|
|
** inefficient, but still correct. */
|
|
int n = nTerm;
|
|
if( pPage->term.n ){
|
|
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
|
|
}
|
|
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
|
|
pPage = &pWriter->writer;
|
|
}
|
|
}else{
|
|
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
|
|
}
|
|
|
|
/* Append the number of bytes of new data, then the term data itself
|
|
** to the page. */
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
|
|
|
|
/* Update the Fts5PageWriter.term field. */
|
|
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
|
|
pWriter->bFirstTermInPage = 0;
|
|
|
|
pWriter->bFirstRowidInPage = 0;
|
|
pWriter->bFirstRowidInDoclist = 1;
|
|
|
|
assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
|
|
pWriter->aDlidx[0].pgno = pPage->pgno;
|
|
}
|
|
|
|
/*
|
|
** Append a rowid and position-list size field to the writers output.
|
|
*/
|
|
static void fts5WriteAppendRowid(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
i64 iRowid
|
|
){
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
|
|
if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
|
|
/* If this is to be the first rowid written to the page, set the
|
|
** rowid-pointer in the page-header. Also append a value to the dlidx
|
|
** buffer, in case a doclist-index is required. */
|
|
if( pWriter->bFirstRowidInPage ){
|
|
fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
|
|
fts5WriteDlidxAppend(p, pWriter, iRowid);
|
|
}
|
|
|
|
/* Write the rowid. */
|
|
if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
|
|
}else{
|
|
assert( p->rc || iRowid>pWriter->iPrevRowid );
|
|
fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
|
|
}
|
|
pWriter->iPrevRowid = iRowid;
|
|
pWriter->bFirstRowidInDoclist = 0;
|
|
pWriter->bFirstRowidInPage = 0;
|
|
}
|
|
}
|
|
|
|
static void fts5WriteAppendPoslistData(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
const u8 *aData,
|
|
int nData
|
|
){
|
|
Fts5PageWriter *pPage = &pWriter->writer;
|
|
const u8 *a = aData;
|
|
int n = nData;
|
|
|
|
assert( p->pConfig->pgsz>0 );
|
|
while( p->rc==SQLITE_OK
|
|
&& (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
|
|
){
|
|
int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
|
|
int nCopy = 0;
|
|
while( nCopy<nReq ){
|
|
i64 dummy;
|
|
nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
|
|
}
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
|
|
a += nCopy;
|
|
n -= nCopy;
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
if( n>0 ){
|
|
fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Flush any data cached by the writer object to the database. Free any
|
|
** allocations associated with the writer.
|
|
*/
|
|
static void fts5WriteFinish(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter, /* Writer object */
|
|
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
|
|
){
|
|
int i;
|
|
Fts5PageWriter *pLeaf = &pWriter->writer;
|
|
if( p->rc==SQLITE_OK ){
|
|
assert( pLeaf->pgno>=1 );
|
|
if( pLeaf->buf.n>4 ){
|
|
fts5WriteFlushLeaf(p, pWriter);
|
|
}
|
|
*pnLeaf = pLeaf->pgno-1;
|
|
fts5WriteFlushBtree(p, pWriter);
|
|
}
|
|
fts5BufferFree(&pLeaf->term);
|
|
fts5BufferFree(&pLeaf->buf);
|
|
fts5BufferFree(&pLeaf->pgidx);
|
|
fts5BufferFree(&pWriter->btterm);
|
|
|
|
for(i=0; i<pWriter->nDlidx; i++){
|
|
sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
|
|
}
|
|
sqlite3_free(pWriter->aDlidx);
|
|
}
|
|
|
|
static void fts5WriteInit(
|
|
Fts5Index *p,
|
|
Fts5SegWriter *pWriter,
|
|
int iSegid
|
|
){
|
|
const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
|
|
|
|
memset(pWriter, 0, sizeof(Fts5SegWriter));
|
|
pWriter->iSegid = iSegid;
|
|
|
|
fts5WriteDlidxGrow(p, pWriter, 1);
|
|
pWriter->writer.pgno = 1;
|
|
pWriter->bFirstTermInPage = 1;
|
|
pWriter->iBtPage = 1;
|
|
|
|
assert( pWriter->writer.buf.n==0 );
|
|
assert( pWriter->writer.pgidx.n==0 );
|
|
|
|
/* Grow the two buffers to pgsz + padding bytes in size. */
|
|
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
|
|
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
|
|
|
|
if( p->pIdxWriter==0 ){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
|
|
"INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
|
|
pConfig->zDb, pConfig->zName
|
|
));
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
/* Initialize the 4-byte leaf-page header to 0x00. */
|
|
memset(pWriter->writer.buf.p, 0, 4);
|
|
pWriter->writer.buf.n = 4;
|
|
|
|
/* Bind the current output segment id to the index-writer. This is an
|
|
** optimization over binding the same value over and over as rows are
|
|
** inserted into %_idx by the current writer. */
|
|
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Iterator pIter was used to iterate through the input segments of on an
|
|
** incremental merge operation. This function is called if the incremental
|
|
** merge step has finished but the input has not been completely exhausted.
|
|
*/
|
|
static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
|
|
int i;
|
|
Fts5Buffer buf;
|
|
memset(&buf, 0, sizeof(Fts5Buffer));
|
|
for(i=0; i<pIter->nSeg; i++){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[i];
|
|
if( pSeg->pSeg==0 ){
|
|
/* no-op */
|
|
}else if( pSeg->pLeaf==0 ){
|
|
/* All keys from this input segment have been transfered to the output.
|
|
** Set both the first and last page-numbers to 0 to indicate that the
|
|
** segment is now empty. */
|
|
pSeg->pSeg->pgnoLast = 0;
|
|
pSeg->pSeg->pgnoFirst = 0;
|
|
}else{
|
|
int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
|
|
i64 iLeafRowid;
|
|
Fts5Data *pData;
|
|
int iId = pSeg->pSeg->iSegid;
|
|
u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
|
|
|
|
iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
|
|
pData = fts5DataRead(p, iLeafRowid);
|
|
if( pData ){
|
|
fts5BufferZero(&buf);
|
|
fts5BufferGrow(&p->rc, &buf, pData->nn);
|
|
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
|
|
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
|
|
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
|
|
fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
|
|
if( p->rc==SQLITE_OK ){
|
|
/* Set the szLeaf field */
|
|
fts5PutU16(&buf.p[2], (u16)buf.n);
|
|
}
|
|
|
|
/* Set up the new page-index array */
|
|
fts5BufferAppendVarint(&p->rc, &buf, 4);
|
|
if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
|
|
&& pSeg->iEndofDoclist<pData->szLeaf
|
|
){
|
|
int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
|
|
fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
|
|
fts5BufferAppendBlob(&p->rc, &buf,
|
|
pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
|
|
);
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
|
|
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
|
|
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
|
|
}
|
|
}
|
|
}
|
|
fts5BufferFree(&buf);
|
|
}
|
|
|
|
static void fts5MergeChunkCallback(
|
|
Fts5Index *p,
|
|
void *pCtx,
|
|
const u8 *pChunk, int nChunk
|
|
){
|
|
Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
|
|
fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static void fts5IndexMergeLevel(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
|
|
int iLvl, /* Level to read input from */
|
|
int *pnRem /* Write up to this many output leaves */
|
|
){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
Fts5StructureLevel *pLvlOut;
|
|
Fts5Iter *pIter = 0; /* Iterator to read input data */
|
|
int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
|
|
int nInput; /* Number of input segments */
|
|
Fts5SegWriter writer; /* Writer object */
|
|
Fts5StructureSegment *pSeg; /* Output segment */
|
|
Fts5Buffer term;
|
|
int bOldest; /* True if the output segment is the oldest */
|
|
int eDetail = p->pConfig->eDetail;
|
|
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
|
|
|
|
assert( iLvl<pStruct->nLevel );
|
|
assert( pLvl->nMerge<=pLvl->nSeg );
|
|
|
|
memset(&writer, 0, sizeof(Fts5SegWriter));
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
if( pLvl->nMerge ){
|
|
pLvlOut = &pStruct->aLevel[iLvl+1];
|
|
assert( pLvlOut->nSeg>0 );
|
|
nInput = pLvl->nMerge;
|
|
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
|
|
|
|
fts5WriteInit(p, &writer, pSeg->iSegid);
|
|
writer.writer.pgno = pSeg->pgnoLast+1;
|
|
writer.iBtPage = 0;
|
|
}else{
|
|
int iSegid = fts5AllocateSegid(p, pStruct);
|
|
|
|
/* Extend the Fts5Structure object as required to ensure the output
|
|
** segment exists. */
|
|
if( iLvl==pStruct->nLevel-1 ){
|
|
fts5StructureAddLevel(&p->rc, ppStruct);
|
|
pStruct = *ppStruct;
|
|
}
|
|
fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
|
|
if( p->rc ) return;
|
|
pLvl = &pStruct->aLevel[iLvl];
|
|
pLvlOut = &pStruct->aLevel[iLvl+1];
|
|
|
|
fts5WriteInit(p, &writer, iSegid);
|
|
|
|
/* Add the new segment to the output level */
|
|
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
|
|
pLvlOut->nSeg++;
|
|
pSeg->pgnoFirst = 1;
|
|
pSeg->iSegid = iSegid;
|
|
pStruct->nSegment++;
|
|
|
|
/* Read input from all segments in the input level */
|
|
nInput = pLvl->nSeg;
|
|
}
|
|
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
|
|
|
|
assert( iLvl>=0 );
|
|
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
|
|
fts5MultiIterEof(p, pIter)==0;
|
|
fts5MultiIterNext(p, pIter, 0, 0)
|
|
){
|
|
Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
int nPos; /* position-list size field value */
|
|
int nTerm;
|
|
const u8 *pTerm;
|
|
|
|
/* Check for key annihilation. */
|
|
if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
|
|
|
|
pTerm = fts5MultiIterTerm(pIter, &nTerm);
|
|
if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
|
|
if( pnRem && writer.nLeafWritten>nRem ){
|
|
break;
|
|
}
|
|
|
|
/* This is a new term. Append a term to the output segment. */
|
|
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
|
|
fts5BufferSet(&p->rc, &term, nTerm, pTerm);
|
|
}
|
|
|
|
/* Append the rowid to the output */
|
|
/* WRITEPOSLISTSIZE */
|
|
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( pSegIter->bDel ){
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
|
|
if( pSegIter->nPos>0 ){
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
|
|
}
|
|
}
|
|
}else{
|
|
/* Append the position-list data to the output */
|
|
nPos = pSegIter->nPos*2 + pSegIter->bDel;
|
|
fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
|
|
fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
|
|
}
|
|
}
|
|
|
|
/* Flush the last leaf page to disk. Set the output segment b-tree height
|
|
** and last leaf page number at the same time. */
|
|
fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
|
|
|
|
if( fts5MultiIterEof(p, pIter) ){
|
|
int i;
|
|
|
|
/* Remove the redundant segments from the %_data table */
|
|
for(i=0; i<nInput; i++){
|
|
fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
|
|
}
|
|
|
|
/* Remove the redundant segments from the input level */
|
|
if( pLvl->nSeg!=nInput ){
|
|
int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
|
|
memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
|
|
}
|
|
pStruct->nSegment -= nInput;
|
|
pLvl->nSeg -= nInput;
|
|
pLvl->nMerge = 0;
|
|
if( pSeg->pgnoLast==0 ){
|
|
pLvlOut->nSeg--;
|
|
pStruct->nSegment--;
|
|
}
|
|
}else{
|
|
assert( pSeg->pgnoLast>0 );
|
|
fts5TrimSegments(p, pIter);
|
|
pLvl->nMerge = nInput;
|
|
}
|
|
|
|
fts5MultiIterFree(pIter);
|
|
fts5BufferFree(&term);
|
|
if( pnRem ) *pnRem -= writer.nLeafWritten;
|
|
}
|
|
|
|
/*
|
|
** Do up to nPg pages of automerge work on the index.
|
|
*/
|
|
static void fts5IndexMerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
|
|
int nPg /* Pages of work to do */
|
|
){
|
|
int nRem = nPg;
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
while( nRem>0 && p->rc==SQLITE_OK ){
|
|
int iLvl; /* To iterate through levels */
|
|
int iBestLvl = 0; /* Level offering the most input segments */
|
|
int nBest = 0; /* Number of input segments on best level */
|
|
|
|
/* Set iBestLvl to the level to read input segments from. */
|
|
assert( pStruct->nLevel>0 );
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
|
|
if( pLvl->nMerge ){
|
|
if( pLvl->nMerge>nBest ){
|
|
iBestLvl = iLvl;
|
|
nBest = pLvl->nMerge;
|
|
}
|
|
break;
|
|
}
|
|
if( pLvl->nSeg>nBest ){
|
|
nBest = pLvl->nSeg;
|
|
iBestLvl = iLvl;
|
|
}
|
|
}
|
|
|
|
/* If nBest is still 0, then the index must be empty. */
|
|
#ifdef SQLITE_DEBUG
|
|
for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
|
|
assert( pStruct->aLevel[iLvl].nSeg==0 );
|
|
}
|
|
#endif
|
|
|
|
if( nBest<p->pConfig->nAutomerge
|
|
&& pStruct->aLevel[iBestLvl].nMerge==0
|
|
){
|
|
break;
|
|
}
|
|
fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
|
|
if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
|
|
fts5StructurePromote(p, iBestLvl+1, pStruct);
|
|
}
|
|
}
|
|
*ppStruct = pStruct;
|
|
}
|
|
|
|
/*
|
|
** A total of nLeaf leaf pages of data has just been flushed to a level-0
|
|
** segment. This function updates the write-counter accordingly and, if
|
|
** necessary, performs incremental merge work.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5IndexAutomerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
|
|
int nLeaf /* Number of output leaves just written */
|
|
){
|
|
if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
u64 nWrite; /* Initial value of write-counter */
|
|
int nWork; /* Number of work-quanta to perform */
|
|
int nRem; /* Number of leaf pages left to write */
|
|
|
|
/* Update the write-counter. While doing so, set nWork. */
|
|
nWrite = pStruct->nWriteCounter;
|
|
nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
|
|
pStruct->nWriteCounter += nLeaf;
|
|
nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
|
|
|
|
fts5IndexMerge(p, ppStruct, nRem);
|
|
}
|
|
}
|
|
|
|
static void fts5IndexCrisismerge(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
|
|
){
|
|
const int nCrisis = p->pConfig->nCrisisMerge;
|
|
Fts5Structure *pStruct = *ppStruct;
|
|
int iLvl = 0;
|
|
|
|
assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
|
|
while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
|
|
fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
|
|
assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
|
|
fts5StructurePromote(p, iLvl+1, pStruct);
|
|
iLvl++;
|
|
}
|
|
*ppStruct = pStruct;
|
|
}
|
|
|
|
static int fts5IndexReturn(Fts5Index *p){
|
|
int rc = p->rc;
|
|
p->rc = SQLITE_OK;
|
|
return rc;
|
|
}
|
|
|
|
typedef struct Fts5FlushCtx Fts5FlushCtx;
|
|
struct Fts5FlushCtx {
|
|
Fts5Index *pIdx;
|
|
Fts5SegWriter writer;
|
|
};
|
|
|
|
/*
|
|
** Buffer aBuf[] contains a list of varints, all small enough to fit
|
|
** in a 32-bit integer. Return the size of the largest prefix of this
|
|
** list nMax bytes or less in size.
|
|
*/
|
|
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
|
|
int ret;
|
|
u32 dummy;
|
|
ret = fts5GetVarint32(aBuf, dummy);
|
|
if( ret<nMax ){
|
|
while( 1 ){
|
|
int i = fts5GetVarint32(&aBuf[ret], dummy);
|
|
if( (ret + i) > nMax ) break;
|
|
ret += i;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
** Flush the contents of in-memory hash table iHash to a new level-0
|
|
** segment on disk. Also update the corresponding structure record.
|
|
**
|
|
** If an error occurs, set the Fts5Index.rc error code. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5FlushOneHash(Fts5Index *p){
|
|
Fts5Hash *pHash = p->pHash;
|
|
Fts5Structure *pStruct;
|
|
int iSegid;
|
|
int pgnoLast = 0; /* Last leaf page number in segment */
|
|
|
|
/* Obtain a reference to the index structure and allocate a new segment-id
|
|
** for the new level-0 segment. */
|
|
pStruct = fts5StructureRead(p);
|
|
iSegid = fts5AllocateSegid(p, pStruct);
|
|
|
|
if( iSegid ){
|
|
const int pgsz = p->pConfig->pgsz;
|
|
int eDetail = p->pConfig->eDetail;
|
|
Fts5StructureSegment *pSeg; /* New segment within pStruct */
|
|
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
|
|
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
|
|
|
|
Fts5SegWriter writer;
|
|
fts5WriteInit(p, &writer, iSegid);
|
|
|
|
pBuf = &writer.writer.buf;
|
|
pPgidx = &writer.writer.pgidx;
|
|
|
|
/* fts5WriteInit() should have initialized the buffers to (most likely)
|
|
** the maximum space required. */
|
|
assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
|
|
assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
|
|
|
|
/* Begin scanning through hash table entries. This loop runs once for each
|
|
** term/doclist currently stored within the hash table. */
|
|
if( p->rc==SQLITE_OK ){
|
|
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
|
|
}
|
|
while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
|
|
const char *zTerm; /* Buffer containing term */
|
|
const u8 *pDoclist; /* Pointer to doclist for this term */
|
|
int nDoclist; /* Size of doclist in bytes */
|
|
|
|
/* Write the term for this entry to disk. */
|
|
sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
|
|
fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
|
|
|
|
assert( writer.bFirstRowidInPage==0 );
|
|
if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
|
|
/* The entire doclist will fit on the current leaf. */
|
|
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
|
|
}else{
|
|
i64 iRowid = 0;
|
|
i64 iDelta = 0;
|
|
int iOff = 0;
|
|
|
|
/* The entire doclist will not fit on this leaf. The following
|
|
** loop iterates through the poslists that make up the current
|
|
** doclist. */
|
|
while( p->rc==SQLITE_OK && iOff<nDoclist ){
|
|
iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
|
|
iRowid += iDelta;
|
|
|
|
if( writer.bFirstRowidInPage ){
|
|
fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
|
|
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
|
|
writer.bFirstRowidInPage = 0;
|
|
fts5WriteDlidxAppend(p, &writer, iRowid);
|
|
}else{
|
|
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
|
|
}
|
|
assert( pBuf->n<=pBuf->nSpace );
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( iOff<nDoclist && pDoclist[iOff]==0 ){
|
|
pBuf->p[pBuf->n++] = 0;
|
|
iOff++;
|
|
if( iOff<nDoclist && pDoclist[iOff]==0 ){
|
|
pBuf->p[pBuf->n++] = 0;
|
|
iOff++;
|
|
}
|
|
}
|
|
if( (pBuf->n + pPgidx->n)>=pgsz ){
|
|
fts5WriteFlushLeaf(p, &writer);
|
|
}
|
|
}else{
|
|
int bDummy;
|
|
int nPos;
|
|
int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
|
|
nCopy += nPos;
|
|
if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
|
|
/* The entire poslist will fit on the current leaf. So copy
|
|
** it in one go. */
|
|
fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
|
|
}else{
|
|
/* The entire poslist will not fit on this leaf. So it needs
|
|
** to be broken into sections. The only qualification being
|
|
** that each varint must be stored contiguously. */
|
|
const u8 *pPoslist = &pDoclist[iOff];
|
|
int iPos = 0;
|
|
while( p->rc==SQLITE_OK ){
|
|
int nSpace = pgsz - pBuf->n - pPgidx->n;
|
|
int n = 0;
|
|
if( (nCopy - iPos)<=nSpace ){
|
|
n = nCopy - iPos;
|
|
}else{
|
|
n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
|
|
}
|
|
assert( n>0 );
|
|
fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
|
|
iPos += n;
|
|
if( (pBuf->n + pPgidx->n)>=pgsz ){
|
|
fts5WriteFlushLeaf(p, &writer);
|
|
}
|
|
if( iPos>=nCopy ) break;
|
|
}
|
|
}
|
|
iOff += nCopy;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* TODO2: Doclist terminator written here. */
|
|
/* pBuf->p[pBuf->n++] = '\0'; */
|
|
assert( pBuf->n<=pBuf->nSpace );
|
|
sqlite3Fts5HashScanNext(pHash);
|
|
}
|
|
sqlite3Fts5HashClear(pHash);
|
|
fts5WriteFinish(p, &writer, &pgnoLast);
|
|
|
|
/* Update the Fts5Structure. It is written back to the database by the
|
|
** fts5StructureRelease() call below. */
|
|
if( pStruct->nLevel==0 ){
|
|
fts5StructureAddLevel(&p->rc, &pStruct);
|
|
}
|
|
fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
|
|
if( p->rc==SQLITE_OK ){
|
|
pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
|
|
pSeg->iSegid = iSegid;
|
|
pSeg->pgnoFirst = 1;
|
|
pSeg->pgnoLast = pgnoLast;
|
|
pStruct->nSegment++;
|
|
}
|
|
fts5StructurePromote(p, 0, pStruct);
|
|
}
|
|
|
|
fts5IndexAutomerge(p, &pStruct, pgnoLast);
|
|
fts5IndexCrisismerge(p, &pStruct);
|
|
fts5StructureWrite(p, pStruct);
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
|
|
/*
|
|
** Flush any data stored in the in-memory hash tables to the database.
|
|
*/
|
|
static void fts5IndexFlush(Fts5Index *p){
|
|
/* Unless it is empty, flush the hash table to disk */
|
|
if( p->nPendingData ){
|
|
assert( p->pHash );
|
|
p->nPendingData = 0;
|
|
fts5FlushOneHash(p);
|
|
}
|
|
}
|
|
|
|
|
|
int sqlite3Fts5IndexOptimize(Fts5Index *p){
|
|
Fts5Structure *pStruct;
|
|
Fts5Structure *pNew = 0;
|
|
int nSeg = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5IndexFlush(p);
|
|
pStruct = fts5StructureRead(p);
|
|
|
|
if( pStruct ){
|
|
assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
|
|
nSeg = pStruct->nSegment;
|
|
if( nSeg>1 ){
|
|
int nByte = sizeof(Fts5Structure);
|
|
nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
|
|
pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
}
|
|
}
|
|
if( pNew ){
|
|
Fts5StructureLevel *pLvl;
|
|
int nByte = nSeg * sizeof(Fts5StructureSegment);
|
|
pNew->nLevel = pStruct->nLevel+1;
|
|
pNew->nRef = 1;
|
|
pNew->nWriteCounter = pStruct->nWriteCounter;
|
|
pLvl = &pNew->aLevel[pStruct->nLevel];
|
|
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
|
|
if( pLvl->aSeg ){
|
|
int iLvl, iSeg;
|
|
int iSegOut = 0;
|
|
/* Iterate through all segments, from oldest to newest. Add them to
|
|
** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
|
|
** segment in the data structure. */
|
|
for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
iSegOut++;
|
|
}
|
|
}
|
|
pNew->nSegment = pLvl->nSeg = nSeg;
|
|
}else{
|
|
sqlite3_free(pNew);
|
|
pNew = 0;
|
|
}
|
|
}
|
|
|
|
if( pNew ){
|
|
int iLvl = pNew->nLevel-1;
|
|
while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
|
|
int nRem = FTS5_OPT_WORK_UNIT;
|
|
fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
|
|
}
|
|
|
|
fts5StructureWrite(p, pNew);
|
|
fts5StructureRelease(pNew);
|
|
}
|
|
|
|
fts5StructureRelease(pStruct);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
|
|
Fts5Structure *pStruct;
|
|
|
|
pStruct = fts5StructureRead(p);
|
|
if( pStruct && pStruct->nLevel ){
|
|
fts5IndexMerge(p, &pStruct, nMerge);
|
|
fts5StructureWrite(p, pStruct);
|
|
}
|
|
fts5StructureRelease(pStruct);
|
|
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
static void fts5AppendRowid(
|
|
Fts5Index *p,
|
|
i64 iDelta,
|
|
Fts5Iter *pUnused,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
UNUSED_PARAM(pUnused);
|
|
fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
|
|
}
|
|
|
|
static void fts5AppendPoslist(
|
|
Fts5Index *p,
|
|
i64 iDelta,
|
|
Fts5Iter *pMulti,
|
|
Fts5Buffer *pBuf
|
|
){
|
|
int nData = pMulti->base.nData;
|
|
assert( nData>0 );
|
|
if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
|
|
fts5BufferSafeAppendVarint(pBuf, iDelta);
|
|
fts5BufferSafeAppendVarint(pBuf, nData*2);
|
|
fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
|
|
}
|
|
}
|
|
|
|
|
|
static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
|
|
u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
|
|
|
|
assert( pIter->aPoslist );
|
|
if( p>=pIter->aEof ){
|
|
pIter->aPoslist = 0;
|
|
}else{
|
|
i64 iDelta;
|
|
|
|
p += fts5GetVarint(p, (u64*)&iDelta);
|
|
pIter->iRowid += iDelta;
|
|
|
|
/* Read position list size */
|
|
if( p[0] & 0x80 ){
|
|
int nPos;
|
|
pIter->nSize = fts5GetVarint32(p, nPos);
|
|
pIter->nPoslist = (nPos>>1);
|
|
}else{
|
|
pIter->nPoslist = ((int)(p[0])) >> 1;
|
|
pIter->nSize = 1;
|
|
}
|
|
|
|
pIter->aPoslist = p;
|
|
}
|
|
}
|
|
|
|
static void fts5DoclistIterInit(
|
|
Fts5Buffer *pBuf,
|
|
Fts5DoclistIter *pIter
|
|
){
|
|
memset(pIter, 0, sizeof(*pIter));
|
|
pIter->aPoslist = pBuf->p;
|
|
pIter->aEof = &pBuf->p[pBuf->n];
|
|
fts5DoclistIterNext(pIter);
|
|
}
|
|
|
|
#if 0
|
|
/*
|
|
** Append a doclist to buffer pBuf.
|
|
**
|
|
** This function assumes that space within the buffer has already been
|
|
** allocated.
|
|
*/
|
|
static void fts5MergeAppendDocid(
|
|
Fts5Buffer *pBuf, /* Buffer to write to */
|
|
i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
|
|
i64 iRowid /* Rowid to append */
|
|
){
|
|
assert( pBuf->n!=0 || (*piLastRowid)==0 );
|
|
fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
|
|
*piLastRowid = iRowid;
|
|
}
|
|
#endif
|
|
|
|
#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
|
|
assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
|
|
fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
|
|
(iLastRowid) = (iRowid); \
|
|
}
|
|
|
|
/*
|
|
** Swap the contents of buffer *p1 with that of *p2.
|
|
*/
|
|
static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
|
|
Fts5Buffer tmp = *p1;
|
|
*p1 = *p2;
|
|
*p2 = tmp;
|
|
}
|
|
|
|
static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
|
|
int i = *piOff;
|
|
if( i>=pBuf->n ){
|
|
*piOff = -1;
|
|
}else{
|
|
u64 iVal;
|
|
*piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
|
|
*piRowid += iVal;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
|
|
** In this case the buffers consist of a delta-encoded list of rowids only.
|
|
*/
|
|
static void fts5MergeRowidLists(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Buffer *p1, /* First list to merge */
|
|
Fts5Buffer *p2 /* Second list to merge */
|
|
){
|
|
int i1 = 0;
|
|
int i2 = 0;
|
|
i64 iRowid1 = 0;
|
|
i64 iRowid2 = 0;
|
|
i64 iOut = 0;
|
|
|
|
Fts5Buffer out;
|
|
memset(&out, 0, sizeof(out));
|
|
sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
|
|
if( p->rc ) return;
|
|
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
fts5NextRowid(p2, &i2, &iRowid2);
|
|
while( i1>=0 || i2>=0 ){
|
|
if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
|
|
assert( iOut==0 || iRowid1>iOut );
|
|
fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
|
|
iOut = iRowid1;
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
}else{
|
|
assert( iOut==0 || iRowid2>iOut );
|
|
fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
|
|
iOut = iRowid2;
|
|
if( i1>=0 && iRowid1==iRowid2 ){
|
|
fts5NextRowid(p1, &i1, &iRowid1);
|
|
}
|
|
fts5NextRowid(p2, &i2, &iRowid2);
|
|
}
|
|
}
|
|
|
|
fts5BufferSwap(&out, p1);
|
|
fts5BufferFree(&out);
|
|
}
|
|
|
|
/*
|
|
** Buffers p1 and p2 contain doclists. This function merges the content
|
|
** of the two doclists together and sets buffer p1 to the result before
|
|
** returning.
|
|
**
|
|
** If an error occurs, an error code is left in p->rc. If an error has
|
|
** already occurred, this function is a no-op.
|
|
*/
|
|
static void fts5MergePrefixLists(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5Buffer *p1, /* First list to merge */
|
|
Fts5Buffer *p2 /* Second list to merge */
|
|
){
|
|
if( p2->n ){
|
|
i64 iLastRowid = 0;
|
|
Fts5DoclistIter i1;
|
|
Fts5DoclistIter i2;
|
|
Fts5Buffer out = {0, 0, 0};
|
|
Fts5Buffer tmp = {0, 0, 0};
|
|
|
|
if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
|
|
fts5DoclistIterInit(p1, &i1);
|
|
fts5DoclistIterInit(p2, &i2);
|
|
|
|
while( 1 ){
|
|
if( i1.iRowid<i2.iRowid ){
|
|
/* Copy entry from i1 */
|
|
fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
|
|
fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
|
|
fts5DoclistIterNext(&i1);
|
|
if( i1.aPoslist==0 ) break;
|
|
}
|
|
else if( i2.iRowid!=i1.iRowid ){
|
|
/* Copy entry from i2 */
|
|
fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
|
|
fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
|
|
fts5DoclistIterNext(&i2);
|
|
if( i2.aPoslist==0 ) break;
|
|
}
|
|
else{
|
|
/* Merge the two position lists. */
|
|
i64 iPos1 = 0;
|
|
i64 iPos2 = 0;
|
|
int iOff1 = 0;
|
|
int iOff2 = 0;
|
|
u8 *a1 = &i1.aPoslist[i1.nSize];
|
|
u8 *a2 = &i2.aPoslist[i2.nSize];
|
|
|
|
i64 iPrev = 0;
|
|
Fts5PoslistWriter writer;
|
|
memset(&writer, 0, sizeof(writer));
|
|
|
|
fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
|
|
fts5BufferZero(&tmp);
|
|
sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
|
|
if( p->rc ) break;
|
|
|
|
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
|
|
sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
|
|
assert( iPos1>=0 && iPos2>=0 );
|
|
|
|
if( iPos1<iPos2 ){
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
|
|
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
|
|
}else{
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
|
|
sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
|
|
}
|
|
|
|
if( iPos1>=0 && iPos2>=0 ){
|
|
while( 1 ){
|
|
if( iPos1<iPos2 ){
|
|
if( iPos1!=iPrev ){
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
|
|
}
|
|
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
|
|
if( iPos1<0 ) break;
|
|
}else{
|
|
assert( iPos2!=iPrev );
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
|
|
sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
|
|
if( iPos2<0 ) break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( iPos1>=0 ){
|
|
if( iPos1!=iPrev ){
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
|
|
}
|
|
fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
|
|
}else{
|
|
assert( iPos2>=0 && iPos2!=iPrev );
|
|
sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
|
|
fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
|
|
}
|
|
|
|
/* WRITEPOSLISTSIZE */
|
|
fts5BufferSafeAppendVarint(&out, tmp.n * 2);
|
|
fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
|
|
fts5DoclistIterNext(&i1);
|
|
fts5DoclistIterNext(&i2);
|
|
if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
|
|
}
|
|
}
|
|
|
|
if( i1.aPoslist ){
|
|
fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
|
|
fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
|
|
}
|
|
else if( i2.aPoslist ){
|
|
fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
|
|
fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
|
|
}
|
|
|
|
fts5BufferSet(&p->rc, p1, out.n, out.p);
|
|
fts5BufferFree(&tmp);
|
|
fts5BufferFree(&out);
|
|
}
|
|
}
|
|
|
|
static void fts5SetupPrefixIter(
|
|
Fts5Index *p, /* Index to read from */
|
|
int bDesc, /* True for "ORDER BY rowid DESC" */
|
|
const u8 *pToken, /* Buffer containing prefix to match */
|
|
int nToken, /* Size of buffer pToken in bytes */
|
|
Fts5Colset *pColset, /* Restrict matches to these columns */
|
|
Fts5Iter **ppIter /* OUT: New iterator */
|
|
){
|
|
Fts5Structure *pStruct;
|
|
Fts5Buffer *aBuf;
|
|
const int nBuf = 32;
|
|
|
|
void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
|
|
void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
|
|
if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
|
|
xMerge = fts5MergeRowidLists;
|
|
xAppend = fts5AppendRowid;
|
|
}else{
|
|
xMerge = fts5MergePrefixLists;
|
|
xAppend = fts5AppendPoslist;
|
|
}
|
|
|
|
aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
|
|
pStruct = fts5StructureRead(p);
|
|
|
|
if( aBuf && pStruct ){
|
|
const int flags = FTS5INDEX_QUERY_SCAN
|
|
| FTS5INDEX_QUERY_SKIPEMPTY
|
|
| FTS5INDEX_QUERY_NOOUTPUT;
|
|
int i;
|
|
i64 iLastRowid = 0;
|
|
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
|
|
Fts5Data *pData;
|
|
Fts5Buffer doclist;
|
|
int bNewTerm = 1;
|
|
|
|
memset(&doclist, 0, sizeof(doclist));
|
|
fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
|
|
fts5IterSetOutputCb(&p->rc, p1);
|
|
for( /* no-op */ ;
|
|
fts5MultiIterEof(p, p1)==0;
|
|
fts5MultiIterNext2(p, p1, &bNewTerm)
|
|
){
|
|
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
|
|
int nTerm = pSeg->term.n;
|
|
const u8 *pTerm = pSeg->term.p;
|
|
p1->xSetOutputs(p1, pSeg);
|
|
|
|
assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
|
|
if( bNewTerm ){
|
|
if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
|
|
}
|
|
|
|
if( p1->base.nData==0 ) continue;
|
|
|
|
if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
|
|
for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
|
|
assert( i<nBuf );
|
|
if( aBuf[i].n==0 ){
|
|
fts5BufferSwap(&doclist, &aBuf[i]);
|
|
fts5BufferZero(&doclist);
|
|
}else{
|
|
xMerge(p, &doclist, &aBuf[i]);
|
|
fts5BufferZero(&aBuf[i]);
|
|
}
|
|
}
|
|
iLastRowid = 0;
|
|
}
|
|
|
|
xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
|
|
iLastRowid = p1->base.iRowid;
|
|
}
|
|
|
|
for(i=0; i<nBuf; i++){
|
|
if( p->rc==SQLITE_OK ){
|
|
xMerge(p, &doclist, &aBuf[i]);
|
|
}
|
|
fts5BufferFree(&aBuf[i]);
|
|
}
|
|
fts5MultiIterFree(p1);
|
|
|
|
pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
|
|
if( pData ){
|
|
pData->p = (u8*)&pData[1];
|
|
pData->nn = pData->szLeaf = doclist.n;
|
|
memcpy(pData->p, doclist.p, doclist.n);
|
|
fts5MultiIterNew2(p, pData, bDesc, ppIter);
|
|
}
|
|
fts5BufferFree(&doclist);
|
|
}
|
|
|
|
fts5StructureRelease(pStruct);
|
|
sqlite3_free(aBuf);
|
|
}
|
|
|
|
|
|
/*
|
|
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
|
|
** to the document with rowid iRowid.
|
|
*/
|
|
int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
|
|
assert( p->rc==SQLITE_OK );
|
|
|
|
/* Allocate the hash table if it has not already been allocated */
|
|
if( p->pHash==0 ){
|
|
p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
|
|
}
|
|
|
|
/* Flush the hash table to disk if required */
|
|
if( iRowid<p->iWriteRowid
|
|
|| (iRowid==p->iWriteRowid && p->bDelete==0)
|
|
|| (p->nPendingData > p->pConfig->nHashSize)
|
|
){
|
|
fts5IndexFlush(p);
|
|
}
|
|
|
|
p->iWriteRowid = iRowid;
|
|
p->bDelete = bDelete;
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Commit data to disk.
|
|
*/
|
|
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5IndexFlush(p);
|
|
if( bCommit ) fts5CloseReader(p);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Discard any data stored in the in-memory hash tables. Do not write it
|
|
** to the database. Additionally, assume that the contents of the %_data
|
|
** table may have changed on disk. So any in-memory caches of %_data
|
|
** records must be invalidated.
|
|
*/
|
|
int sqlite3Fts5IndexRollback(Fts5Index *p){
|
|
fts5CloseReader(p);
|
|
fts5IndexDiscardData(p);
|
|
/* assert( p->rc==SQLITE_OK ); */
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** The %_data table is completely empty when this function is called. This
|
|
** function populates it with the initial structure objects for each index,
|
|
** and the initial version of the "averages" record (a zero-byte blob).
|
|
*/
|
|
int sqlite3Fts5IndexReinit(Fts5Index *p){
|
|
Fts5Structure s;
|
|
memset(&s, 0, sizeof(Fts5Structure));
|
|
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
|
|
fts5StructureWrite(p, &s);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Open a new Fts5Index handle. If the bCreate argument is true, create
|
|
** and initialize the underlying %_data table.
|
|
**
|
|
** If successful, set *pp to point to the new object and return SQLITE_OK.
|
|
** Otherwise, set *pp to NULL and return an SQLite error code.
|
|
*/
|
|
int sqlite3Fts5IndexOpen(
|
|
Fts5Config *pConfig,
|
|
int bCreate,
|
|
Fts5Index **pp,
|
|
char **pzErr
|
|
){
|
|
int rc = SQLITE_OK;
|
|
Fts5Index *p; /* New object */
|
|
|
|
*pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
|
|
if( rc==SQLITE_OK ){
|
|
p->pConfig = pConfig;
|
|
p->nWorkUnit = FTS5_WORK_UNIT;
|
|
p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
|
|
if( p->zDataTbl && bCreate ){
|
|
rc = sqlite3Fts5CreateTable(
|
|
pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5CreateTable(pConfig, "idx",
|
|
"segid, term, pgno, PRIMARY KEY(segid, term)",
|
|
1, pzErr
|
|
);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5IndexReinit(p);
|
|
}
|
|
}
|
|
}
|
|
|
|
assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
|
|
if( rc ){
|
|
sqlite3Fts5IndexClose(p);
|
|
*pp = 0;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
|
|
*/
|
|
int sqlite3Fts5IndexClose(Fts5Index *p){
|
|
int rc = SQLITE_OK;
|
|
if( p ){
|
|
assert( p->pReader==0 );
|
|
sqlite3_finalize(p->pWriter);
|
|
sqlite3_finalize(p->pDeleter);
|
|
sqlite3_finalize(p->pIdxWriter);
|
|
sqlite3_finalize(p->pIdxDeleter);
|
|
sqlite3_finalize(p->pIdxSelect);
|
|
sqlite3Fts5HashFree(p->pHash);
|
|
sqlite3_free(p->zDataTbl);
|
|
sqlite3_free(p);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Argument p points to a buffer containing utf-8 text that is n bytes in
|
|
** size. Return the number of bytes in the nChar character prefix of the
|
|
** buffer, or 0 if there are less than nChar characters in total.
|
|
*/
|
|
int sqlite3Fts5IndexCharlenToBytelen(
|
|
const char *p,
|
|
int nByte,
|
|
int nChar
|
|
){
|
|
int n = 0;
|
|
int i;
|
|
for(i=0; i<nChar; i++){
|
|
if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
|
|
if( (unsigned char)p[n++]>=0xc0 ){
|
|
while( (p[n] & 0xc0)==0x80 ) n++;
|
|
}
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
|
|
** unicode characters in the string.
|
|
*/
|
|
static int fts5IndexCharlen(const char *pIn, int nIn){
|
|
int nChar = 0;
|
|
int i = 0;
|
|
while( i<nIn ){
|
|
if( (unsigned char)pIn[i++]>=0xc0 ){
|
|
while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
|
|
}
|
|
nChar++;
|
|
}
|
|
return nChar;
|
|
}
|
|
|
|
/*
|
|
** Insert or remove data to or from the index. Each time a document is
|
|
** added to or removed from the index, this function is called one or more
|
|
** times.
|
|
**
|
|
** For an insert, it must be called once for each token in the new document.
|
|
** If the operation is a delete, it must be called (at least) once for each
|
|
** unique token in the document with an iCol value less than zero. The iPos
|
|
** argument is ignored for a delete.
|
|
*/
|
|
int sqlite3Fts5IndexWrite(
|
|
Fts5Index *p, /* Index to write to */
|
|
int iCol, /* Column token appears in (-ve -> delete) */
|
|
int iPos, /* Position of token within column */
|
|
const char *pToken, int nToken /* Token to add or remove to or from index */
|
|
){
|
|
int i; /* Used to iterate through indexes */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
Fts5Config *pConfig = p->pConfig;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
assert( (iCol<0)==p->bDelete );
|
|
|
|
/* Add the entry to the main terms index. */
|
|
rc = sqlite3Fts5HashWrite(
|
|
p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
|
|
);
|
|
|
|
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
|
|
const int nChar = pConfig->aPrefix[i];
|
|
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
|
|
if( nByte ){
|
|
rc = sqlite3Fts5HashWrite(p->pHash,
|
|
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
|
|
nByte
|
|
);
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Open a new iterator to iterate though all rowid that match the
|
|
** specified token or token prefix.
|
|
*/
|
|
int sqlite3Fts5IndexQuery(
|
|
Fts5Index *p, /* FTS index to query */
|
|
const char *pToken, int nToken, /* Token (or prefix) to query for */
|
|
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
|
|
Fts5Colset *pColset, /* Match these columns only */
|
|
Fts5IndexIter **ppIter /* OUT: New iterator object */
|
|
){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
Fts5Iter *pRet = 0;
|
|
Fts5Buffer buf = {0, 0, 0};
|
|
|
|
/* If the QUERY_SCAN flag is set, all other flags must be clear. */
|
|
assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
|
|
|
|
if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
|
|
int iIdx = 0; /* Index to search */
|
|
memcpy(&buf.p[1], pToken, nToken);
|
|
|
|
/* Figure out which index to search and set iIdx accordingly. If this
|
|
** is a prefix query for which there is no prefix index, set iIdx to
|
|
** greater than pConfig->nPrefix to indicate that the query will be
|
|
** satisfied by scanning multiple terms in the main index.
|
|
**
|
|
** If the QUERY_TEST_NOIDX flag was specified, then this must be a
|
|
** prefix-query. Instead of using a prefix-index (if one exists),
|
|
** evaluate the prefix query using the main FTS index. This is used
|
|
** for internal sanity checking by the integrity-check in debug
|
|
** mode only. */
|
|
#ifdef SQLITE_DEBUG
|
|
if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
|
|
assert( flags & FTS5INDEX_QUERY_PREFIX );
|
|
iIdx = 1+pConfig->nPrefix;
|
|
}else
|
|
#endif
|
|
if( flags & FTS5INDEX_QUERY_PREFIX ){
|
|
int nChar = fts5IndexCharlen(pToken, nToken);
|
|
for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
|
|
if( pConfig->aPrefix[iIdx-1]==nChar ) break;
|
|
}
|
|
}
|
|
|
|
if( iIdx<=pConfig->nPrefix ){
|
|
/* Straight index lookup */
|
|
Fts5Structure *pStruct = fts5StructureRead(p);
|
|
buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
|
|
if( pStruct ){
|
|
fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
|
|
pColset, buf.p, nToken+1, -1, 0, &pRet
|
|
);
|
|
fts5StructureRelease(pStruct);
|
|
}
|
|
}else{
|
|
/* Scan multiple terms in the main index */
|
|
int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
|
|
buf.p[0] = FTS5_MAIN_PREFIX;
|
|
fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
|
|
assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
|
|
fts5IterSetOutputCb(&p->rc, pRet);
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
|
|
if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
|
|
}
|
|
}
|
|
|
|
if( p->rc ){
|
|
sqlite3Fts5IterClose(&pRet->base);
|
|
pRet = 0;
|
|
fts5CloseReader(p);
|
|
}
|
|
|
|
*ppIter = &pRet->base;
|
|
sqlite3Fts5BufferFree(&buf);
|
|
}
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Return true if the iterator passed as the only argument is at EOF.
|
|
*/
|
|
/*
|
|
** Move to the next matching rowid.
|
|
*/
|
|
int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
assert( pIter->pIndex->rc==SQLITE_OK );
|
|
fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Move to the next matching term/rowid. Used by the fts5vocab module.
|
|
*/
|
|
int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5Index *p = pIter->pIndex;
|
|
|
|
assert( pIter->pIndex->rc==SQLITE_OK );
|
|
|
|
fts5MultiIterNext(p, pIter, 0, 0);
|
|
if( p->rc==SQLITE_OK ){
|
|
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
|
|
if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
|
|
fts5DataRelease(pSeg->pLeaf);
|
|
pSeg->pLeaf = 0;
|
|
pIter->base.bEof = 1;
|
|
}
|
|
}
|
|
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Move to the next matching rowid that occurs at or after iMatch. The
|
|
** definition of "at or after" depends on whether this iterator iterates
|
|
** in ascending or descending rowid order.
|
|
*/
|
|
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
|
|
return fts5IndexReturn(pIter->pIndex);
|
|
}
|
|
|
|
/*
|
|
** Return the current term.
|
|
*/
|
|
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
|
|
int n;
|
|
const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
|
|
*pn = n-1;
|
|
return &z[1];
|
|
}
|
|
|
|
/*
|
|
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
|
|
*/
|
|
void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
|
|
if( pIndexIter ){
|
|
Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
|
|
Fts5Index *pIndex = pIter->pIndex;
|
|
fts5MultiIterFree(pIter);
|
|
fts5CloseReader(pIndex);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Read and decode the "averages" record from the database.
|
|
**
|
|
** Parameter anSize must point to an array of size nCol, where nCol is
|
|
** the number of user defined columns in the FTS table.
|
|
*/
|
|
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
|
|
int nCol = p->pConfig->nCol;
|
|
Fts5Data *pData;
|
|
|
|
*pnRow = 0;
|
|
memset(anSize, 0, sizeof(i64) * nCol);
|
|
pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
|
|
if( p->rc==SQLITE_OK && pData->nn ){
|
|
int i = 0;
|
|
int iCol;
|
|
i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
|
|
for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
|
|
i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
|
|
}
|
|
}
|
|
|
|
fts5DataRelease(pData);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Replace the current "averages" record with the contents of the buffer
|
|
** supplied as the second argument.
|
|
*/
|
|
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
|
|
assert( p->rc==SQLITE_OK );
|
|
fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*
|
|
** Return the total number of blocks this module has read from the %_data
|
|
** table since it was created.
|
|
*/
|
|
int sqlite3Fts5IndexReads(Fts5Index *p){
|
|
return p->nRead;
|
|
}
|
|
|
|
/*
|
|
** Set the 32-bit cookie value stored at the start of all structure
|
|
** records to the value passed as the second argument.
|
|
**
|
|
** Return SQLITE_OK if successful, or an SQLite error code if an error
|
|
** occurs.
|
|
*/
|
|
int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
|
|
int rc; /* Return code */
|
|
Fts5Config *pConfig = p->pConfig; /* Configuration object */
|
|
u8 aCookie[4]; /* Binary representation of iNew */
|
|
sqlite3_blob *pBlob = 0;
|
|
|
|
assert( p->rc==SQLITE_OK );
|
|
sqlite3Fts5Put32(aCookie, iNew);
|
|
|
|
rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
|
|
"block", FTS5_STRUCTURE_ROWID, 1, &pBlob
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_blob_write(pBlob, aCookie, 4, 0);
|
|
rc = sqlite3_blob_close(pBlob);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
|
|
Fts5Structure *pStruct;
|
|
pStruct = fts5StructureRead(p);
|
|
fts5StructureRelease(pStruct);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
**************************************************************************
|
|
** Below this point is the implementation of the integrity-check
|
|
** functionality.
|
|
*/
|
|
|
|
/*
|
|
** Return a simple checksum value based on the arguments.
|
|
*/
|
|
u64 sqlite3Fts5IndexEntryCksum(
|
|
i64 iRowid,
|
|
int iCol,
|
|
int iPos,
|
|
int iIdx,
|
|
const char *pTerm,
|
|
int nTerm
|
|
){
|
|
int i;
|
|
u64 ret = iRowid;
|
|
ret += (ret<<3) + iCol;
|
|
ret += (ret<<3) + iPos;
|
|
if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
|
|
for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
|
|
return ret;
|
|
}
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/*
|
|
** This function is purely an internal test. It does not contribute to
|
|
** FTS functionality, or even the integrity-check, in any way.
|
|
**
|
|
** Instead, it tests that the same set of pgno/rowid combinations are
|
|
** visited regardless of whether the doclist-index identified by parameters
|
|
** iSegid/iLeaf is iterated in forwards or reverse order.
|
|
*/
|
|
static void fts5TestDlidxReverse(
|
|
Fts5Index *p,
|
|
int iSegid, /* Segment id to load from */
|
|
int iLeaf /* Load doclist-index for this leaf */
|
|
){
|
|
Fts5DlidxIter *pDlidx = 0;
|
|
u64 cksum1 = 13;
|
|
u64 cksum2 = 13;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterNext(p, pDlidx)
|
|
){
|
|
i64 iRowid = fts5DlidxIterRowid(pDlidx);
|
|
int pgno = fts5DlidxIterPgno(pDlidx);
|
|
assert( pgno>iLeaf );
|
|
cksum1 += iRowid + ((i64)pgno<<32);
|
|
}
|
|
fts5DlidxIterFree(pDlidx);
|
|
pDlidx = 0;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterPrev(p, pDlidx)
|
|
){
|
|
i64 iRowid = fts5DlidxIterRowid(pDlidx);
|
|
int pgno = fts5DlidxIterPgno(pDlidx);
|
|
assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
|
|
cksum2 += iRowid + ((i64)pgno<<32);
|
|
}
|
|
fts5DlidxIterFree(pDlidx);
|
|
pDlidx = 0;
|
|
|
|
if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
|
|
static int fts5QueryCksum(
|
|
Fts5Index *p, /* Fts5 index object */
|
|
int iIdx,
|
|
const char *z, /* Index key to query for */
|
|
int n, /* Size of index key in bytes */
|
|
int flags, /* Flags for Fts5IndexQuery */
|
|
u64 *pCksum /* IN/OUT: Checksum value */
|
|
){
|
|
int eDetail = p->pConfig->eDetail;
|
|
u64 cksum = *pCksum;
|
|
Fts5IndexIter *pIter = 0;
|
|
int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
|
|
|
|
while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
|
|
i64 rowid = pIter->iRowid;
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
|
|
}else{
|
|
Fts5PoslistReader sReader;
|
|
for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
|
|
sReader.bEof==0;
|
|
sqlite3Fts5PoslistReaderNext(&sReader)
|
|
){
|
|
int iCol = FTS5_POS2COLUMN(sReader.iPos);
|
|
int iOff = FTS5_POS2OFFSET(sReader.iPos);
|
|
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3Fts5IterNext(pIter);
|
|
}
|
|
}
|
|
sqlite3Fts5IterClose(pIter);
|
|
|
|
*pCksum = cksum;
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** This function is also purely an internal test. It does not contribute to
|
|
** FTS functionality, or even the integrity-check, in any way.
|
|
*/
|
|
static void fts5TestTerm(
|
|
Fts5Index *p,
|
|
Fts5Buffer *pPrev, /* Previous term */
|
|
const char *z, int n, /* Possibly new term to test */
|
|
u64 expected,
|
|
u64 *pCksum
|
|
){
|
|
int rc = p->rc;
|
|
if( pPrev->n==0 ){
|
|
fts5BufferSet(&rc, pPrev, n, (const u8*)z);
|
|
}else
|
|
if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
|
|
u64 cksum3 = *pCksum;
|
|
const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
|
|
int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
|
|
int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
|
|
int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
|
|
u64 ck1 = 0;
|
|
u64 ck2 = 0;
|
|
|
|
/* Check that the results returned for ASC and DESC queries are
|
|
** the same. If not, call this corruption. */
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
|
|
if( rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_DESC;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
}
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
|
|
/* If this is a prefix query, check that the results returned if the
|
|
** the index is disabled are the same. In both ASC and DESC order.
|
|
**
|
|
** This check may only be performed if the hash table is empty. This
|
|
** is because the hash table only supports a single scan query at
|
|
** a time, and the multi-iter loop from which this function is called
|
|
** is already performing such a scan. */
|
|
if( p->nPendingData==0 ){
|
|
if( iIdx>0 && rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
|
|
ck2 = 0;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
}
|
|
if( iIdx>0 && rc==SQLITE_OK ){
|
|
int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
|
|
ck2 = 0;
|
|
rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
|
|
if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
|
|
cksum3 ^= ck1;
|
|
fts5BufferSet(&rc, pPrev, n, (const u8*)z);
|
|
|
|
if( rc==SQLITE_OK && cksum3!=expected ){
|
|
rc = FTS5_CORRUPT;
|
|
}
|
|
*pCksum = cksum3;
|
|
}
|
|
p->rc = rc;
|
|
}
|
|
|
|
#else
|
|
# define fts5TestDlidxReverse(x,y,z)
|
|
# define fts5TestTerm(u,v,w,x,y,z)
|
|
#endif
|
|
|
|
/*
|
|
** Check that:
|
|
**
|
|
** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
|
|
** contain zero terms.
|
|
** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
|
|
** contain zero rowids.
|
|
*/
|
|
static void fts5IndexIntegrityCheckEmpty(
|
|
Fts5Index *p,
|
|
Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
|
|
int iFirst,
|
|
int iNoRowid,
|
|
int iLast
|
|
){
|
|
int i;
|
|
|
|
/* Now check that the iter.nEmpty leaves following the current leaf
|
|
** (a) exist and (b) contain no terms. */
|
|
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
|
|
Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
|
|
if( pLeaf ){
|
|
if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
|
|
if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
|
|
static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
|
|
int iTermOff = 0;
|
|
int ii;
|
|
|
|
Fts5Buffer buf1 = {0,0,0};
|
|
Fts5Buffer buf2 = {0,0,0};
|
|
|
|
ii = pLeaf->szLeaf;
|
|
while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
|
|
int res;
|
|
int iOff;
|
|
int nIncr;
|
|
|
|
ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
|
|
iTermOff += nIncr;
|
|
iOff = iTermOff;
|
|
|
|
if( iOff>=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else if( iTermOff==nIncr ){
|
|
int nByte;
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
|
|
if( (iOff+nByte)>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
|
|
}
|
|
}else{
|
|
int nKeep, nByte;
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
|
|
if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
buf1.n = nKeep;
|
|
fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
|
|
}
|
|
|
|
if( p->rc==SQLITE_OK ){
|
|
res = fts5BufferCompare(&buf1, &buf2);
|
|
if( res<=0 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
}
|
|
fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
|
|
}
|
|
|
|
fts5BufferFree(&buf1);
|
|
fts5BufferFree(&buf2);
|
|
}
|
|
|
|
static void fts5IndexIntegrityCheckSegment(
|
|
Fts5Index *p, /* FTS5 backend object */
|
|
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
|
|
){
|
|
Fts5Config *pConfig = p->pConfig;
|
|
sqlite3_stmt *pStmt = 0;
|
|
int rc2;
|
|
int iIdxPrevLeaf = pSeg->pgnoFirst-1;
|
|
int iDlidxPrevLeaf = pSeg->pgnoLast;
|
|
|
|
if( pSeg->pgnoFirst==0 ) return;
|
|
|
|
fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
|
|
"SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
|
|
pConfig->zDb, pConfig->zName, pSeg->iSegid
|
|
));
|
|
|
|
/* Iterate through the b-tree hierarchy. */
|
|
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
|
|
i64 iRow; /* Rowid for this leaf */
|
|
Fts5Data *pLeaf; /* Data for this leaf */
|
|
|
|
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
|
|
const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
|
|
int iIdxLeaf = sqlite3_column_int(pStmt, 2);
|
|
int bIdxDlidx = sqlite3_column_int(pStmt, 3);
|
|
|
|
/* If the leaf in question has already been trimmed from the segment,
|
|
** ignore this b-tree entry. Otherwise, load it into memory. */
|
|
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
|
|
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
|
|
pLeaf = fts5DataRead(p, iRow);
|
|
if( pLeaf==0 ) break;
|
|
|
|
/* Check that the leaf contains at least one term, and that it is equal
|
|
** to or larger than the split-key in zIdxTerm. Also check that if there
|
|
** is also a rowid pointer within the leaf page header, it points to a
|
|
** location before the term. */
|
|
if( pLeaf->nn<=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
int iOff; /* Offset of first term on leaf */
|
|
int iRowidOff; /* Offset of first rowid on leaf */
|
|
int nTerm; /* Size of term on leaf in bytes */
|
|
int res; /* Comparison of term and split-key */
|
|
|
|
iOff = fts5LeafFirstTermOff(pLeaf);
|
|
iRowidOff = fts5LeafFirstRowidOff(pLeaf);
|
|
if( iRowidOff>=iOff ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
|
|
res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
|
|
if( res==0 ) res = nTerm - nIdxTerm;
|
|
if( res<0 ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
|
|
fts5IntegrityCheckPgidx(p, pLeaf);
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
if( p->rc ) break;
|
|
|
|
/* Now check that the iter.nEmpty leaves following the current leaf
|
|
** (a) exist and (b) contain no terms. */
|
|
fts5IndexIntegrityCheckEmpty(
|
|
p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
|
|
);
|
|
if( p->rc ) break;
|
|
|
|
/* If there is a doclist-index, check that it looks right. */
|
|
if( bIdxDlidx ){
|
|
Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
|
|
int iPrevLeaf = iIdxLeaf;
|
|
int iSegid = pSeg->iSegid;
|
|
int iPg = 0;
|
|
i64 iKey;
|
|
|
|
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
|
|
fts5DlidxIterEof(p, pDlidx)==0;
|
|
fts5DlidxIterNext(p, pDlidx)
|
|
){
|
|
|
|
/* Check any rowid-less pages that occur before the current leaf. */
|
|
for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
|
|
iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
|
|
pLeaf = fts5DataRead(p, iKey);
|
|
if( pLeaf ){
|
|
if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
iPrevLeaf = fts5DlidxIterPgno(pDlidx);
|
|
|
|
/* Check that the leaf page indicated by the iterator really does
|
|
** contain the rowid suggested by the same. */
|
|
iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
|
|
pLeaf = fts5DataRead(p, iKey);
|
|
if( pLeaf ){
|
|
i64 iRowid;
|
|
int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
|
|
ASSERT_SZLEAF_OK(pLeaf);
|
|
if( iRowidOff>=pLeaf->szLeaf ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}else{
|
|
fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
|
|
if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
|
|
}
|
|
fts5DataRelease(pLeaf);
|
|
}
|
|
}
|
|
|
|
iDlidxPrevLeaf = iPg;
|
|
fts5DlidxIterFree(pDlidx);
|
|
fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
|
|
}else{
|
|
iDlidxPrevLeaf = pSeg->pgnoLast;
|
|
/* TODO: Check there is no doclist index */
|
|
}
|
|
|
|
iIdxPrevLeaf = iIdxLeaf;
|
|
}
|
|
|
|
rc2 = sqlite3_finalize(pStmt);
|
|
if( p->rc==SQLITE_OK ) p->rc = rc2;
|
|
|
|
/* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
|
|
#if 0
|
|
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
|
|
p->rc = FTS5_CORRUPT;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
** Run internal checks to ensure that the FTS index (a) is internally
|
|
** consistent and (b) contains entries for which the XOR of the checksums
|
|
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
|
|
**
|
|
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
|
|
** checksum does not match. Return SQLITE_OK if all checks pass without
|
|
** error, or some other SQLite error code if another error (e.g. OOM)
|
|
** occurs.
|
|
*/
|
|
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
|
int eDetail = p->pConfig->eDetail;
|
|
u64 cksum2 = 0; /* Checksum based on contents of indexes */
|
|
Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
|
|
Fts5Iter *pIter; /* Used to iterate through entire index */
|
|
Fts5Structure *pStruct; /* Index structure */
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* Used by extra internal tests only run if NDEBUG is not defined */
|
|
u64 cksum3 = 0; /* Checksum based on contents of indexes */
|
|
Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
|
|
#endif
|
|
const int flags = FTS5INDEX_QUERY_NOOUTPUT;
|
|
|
|
/* Load the FTS index structure */
|
|
pStruct = fts5StructureRead(p);
|
|
|
|
/* Check that the internal nodes of each segment match the leaves */
|
|
if( pStruct ){
|
|
int iLvl, iSeg;
|
|
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
|
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
|
fts5IndexIntegrityCheckSegment(p, pSeg);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* The cksum argument passed to this function is a checksum calculated
|
|
** based on all expected entries in the FTS index (including prefix index
|
|
** entries). This block checks that a checksum calculated based on the
|
|
** actual contents of FTS index is identical.
|
|
**
|
|
** Two versions of the same checksum are calculated. The first (stack
|
|
** variable cksum2) based on entries extracted from the full-text index
|
|
** while doing a linear scan of each individual index in turn.
|
|
**
|
|
** As each term visited by the linear scans, a separate query for the
|
|
** same term is performed. cksum3 is calculated based on the entries
|
|
** extracted by these queries.
|
|
*/
|
|
for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
|
|
fts5MultiIterEof(p, pIter)==0;
|
|
fts5MultiIterNext(p, pIter, 0, 0)
|
|
){
|
|
int n; /* Size of term in bytes */
|
|
i64 iPos = 0; /* Position read from poslist */
|
|
int iOff = 0; /* Offset within poslist */
|
|
i64 iRowid = fts5MultiIterRowid(pIter);
|
|
char *z = (char*)fts5MultiIterTerm(pIter, &n);
|
|
|
|
/* If this is a new term, query for it. Update cksum3 with the results. */
|
|
fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
|
|
|
|
if( eDetail==FTS5_DETAIL_NONE ){
|
|
if( 0==fts5MultiIterIsEmpty(p, pIter) ){
|
|
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
|
|
}
|
|
}else{
|
|
poslist.n = 0;
|
|
fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
|
|
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
|
|
int iCol = FTS5_POS2COLUMN(iPos);
|
|
int iTokOff = FTS5_POS2OFFSET(iPos);
|
|
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
|
|
}
|
|
}
|
|
}
|
|
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
|
|
|
|
fts5MultiIterFree(pIter);
|
|
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
|
|
|
|
fts5StructureRelease(pStruct);
|
|
#ifdef SQLITE_DEBUG
|
|
fts5BufferFree(&term);
|
|
#endif
|
|
fts5BufferFree(&poslist);
|
|
return fts5IndexReturn(p);
|
|
}
|
|
|
|
/*************************************************************************
|
|
**************************************************************************
|
|
** Below this point is the implementation of the fts5_decode() scalar
|
|
** function only.
|
|
*/
|
|
|
|
/*
|
|
** Decode a segment-data rowid from the %_data table. This function is
|
|
** the opposite of macro FTS5_SEGMENT_ROWID().
|
|
*/
|
|
static void fts5DecodeRowid(
|
|
i64 iRowid, /* Rowid from %_data table */
|
|
int *piSegid, /* OUT: Segment id */
|
|
int *pbDlidx, /* OUT: Dlidx flag */
|
|
int *piHeight, /* OUT: Height */
|
|
int *piPgno /* OUT: Page number */
|
|
){
|
|
*piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
|
|
iRowid >>= FTS5_DATA_PAGE_B;
|
|
|
|
*piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
|
|
iRowid >>= FTS5_DATA_HEIGHT_B;
|
|
|
|
*pbDlidx = (int)(iRowid & 0x0001);
|
|
iRowid >>= FTS5_DATA_DLI_B;
|
|
|
|
*piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
|
|
}
|
|
|
|
static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
|
|
int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
|
|
fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
|
|
|
|
if( iSegid==0 ){
|
|
if( iKey==FTS5_AVERAGES_ROWID ){
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
|
|
}else{
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
|
|
}
|
|
}
|
|
else{
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
|
|
bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
|
|
);
|
|
}
|
|
}
|
|
|
|
static void fts5DebugStructure(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
Fts5Structure *p
|
|
){
|
|
int iLvl, iSeg; /* Iterate through levels, segments */
|
|
|
|
for(iLvl=0; iLvl<p->nLevel; iLvl++){
|
|
Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
|
|
" {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
|
|
);
|
|
for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
|
|
Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
|
|
pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
|
|
);
|
|
}
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is part of the fts5_decode() debugging aid.
|
|
**
|
|
** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
|
|
** function appends a human-readable representation of the same object
|
|
** to the buffer passed as the second argument.
|
|
*/
|
|
static void fts5DecodeStructure(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
const u8 *pBlob, int nBlob
|
|
){
|
|
int rc; /* Return code */
|
|
Fts5Structure *p = 0; /* Decoded structure object */
|
|
|
|
rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
|
|
if( rc!=SQLITE_OK ){
|
|
*pRc = rc;
|
|
return;
|
|
}
|
|
|
|
fts5DebugStructure(pRc, pBuf, p);
|
|
fts5StructureRelease(p);
|
|
}
|
|
|
|
/*
|
|
** This is part of the fts5_decode() debugging aid.
|
|
**
|
|
** Arguments pBlob/nBlob contain an "averages" record. This function
|
|
** appends a human-readable representation of record to the buffer passed
|
|
** as the second argument.
|
|
*/
|
|
static void fts5DecodeAverages(
|
|
int *pRc, /* IN/OUT: error code */
|
|
Fts5Buffer *pBuf,
|
|
const u8 *pBlob, int nBlob
|
|
){
|
|
int i = 0;
|
|
const char *zSpace = "";
|
|
|
|
while( i<nBlob ){
|
|
u64 iVal;
|
|
i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
|
|
zSpace = " ";
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
|
|
** each varint and append its string representation to buffer pBuf. Return
|
|
** after either the input buffer is exhausted or a 0 value is read.
|
|
**
|
|
** The return value is the number of bytes read from the input buffer.
|
|
*/
|
|
static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
|
|
int iOff = 0;
|
|
while( iOff<n ){
|
|
int iVal;
|
|
iOff += fts5GetVarint32(&a[iOff], iVal);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
|
|
}
|
|
return iOff;
|
|
}
|
|
|
|
/*
|
|
** The start of buffer (a/n) contains the start of a doclist. The doclist
|
|
** may or may not finish within the buffer. This function appends a text
|
|
** representation of the part of the doclist that is present to buffer
|
|
** pBuf.
|
|
**
|
|
** The return value is the number of bytes read from the input buffer.
|
|
*/
|
|
static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
|
|
i64 iDocid = 0;
|
|
int iOff = 0;
|
|
|
|
if( n>0 ){
|
|
iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
|
|
}
|
|
while( iOff<n ){
|
|
int nPos;
|
|
int bDel;
|
|
iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
|
|
iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
|
|
if( iOff<n ){
|
|
i64 iDelta;
|
|
iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
|
|
iDocid += iDelta;
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
|
|
}
|
|
}
|
|
|
|
return iOff;
|
|
}
|
|
|
|
/*
|
|
** This function is part of the fts5_decode() debugging function. It is
|
|
** only ever used with detail=none tables.
|
|
**
|
|
** Buffer (pData/nData) contains a doclist in the format used by detail=none
|
|
** tables. This function appends a human-readable version of that list to
|
|
** buffer pBuf.
|
|
**
|
|
** If *pRc is other than SQLITE_OK when this function is called, it is a
|
|
** no-op. If an OOM or other error occurs within this function, *pRc is
|
|
** set to an SQLite error code before returning. The final state of buffer
|
|
** pBuf is undefined in this case.
|
|
*/
|
|
static void fts5DecodeRowidList(
|
|
int *pRc, /* IN/OUT: Error code */
|
|
Fts5Buffer *pBuf, /* Buffer to append text to */
|
|
const u8 *pData, int nData /* Data to decode list-of-rowids from */
|
|
){
|
|
int i = 0;
|
|
i64 iRowid = 0;
|
|
|
|
while( i<nData ){
|
|
const char *zApp = "";
|
|
u64 iVal;
|
|
i += sqlite3Fts5GetVarint(&pData[i], &iVal);
|
|
iRowid += iVal;
|
|
|
|
if( i<nData && pData[i]==0x00 ){
|
|
i++;
|
|
if( i<nData && pData[i]==0x00 ){
|
|
i++;
|
|
zApp = "+";
|
|
}else{
|
|
zApp = "*";
|
|
}
|
|
}
|
|
|
|
sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** The implementation of user-defined scalar function fts5_decode().
|
|
*/
|
|
static void fts5DecodeFunction(
|
|
sqlite3_context *pCtx, /* Function call context */
|
|
int nArg, /* Number of args (always 2) */
|
|
sqlite3_value **apVal /* Function arguments */
|
|
){
|
|
i64 iRowid; /* Rowid for record being decoded */
|
|
int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
|
|
const u8 *aBlob; int n; /* Record to decode */
|
|
u8 *a = 0;
|
|
Fts5Buffer s; /* Build up text to return here */
|
|
int rc = SQLITE_OK; /* Return code */
|
|
int nSpace = 0;
|
|
int eDetailNone = (sqlite3_user_data(pCtx)!=0);
|
|
|
|
assert( nArg==2 );
|
|
UNUSED_PARAM(nArg);
|
|
memset(&s, 0, sizeof(Fts5Buffer));
|
|
iRowid = sqlite3_value_int64(apVal[0]);
|
|
|
|
/* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
|
|
** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
|
|
** buffer overreads even if the record is corrupt. */
|
|
n = sqlite3_value_bytes(apVal[1]);
|
|
aBlob = sqlite3_value_blob(apVal[1]);
|
|
nSpace = n + FTS5_DATA_ZERO_PADDING;
|
|
a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
|
|
if( a==0 ) goto decode_out;
|
|
memcpy(a, aBlob, n);
|
|
|
|
|
|
fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
|
|
|
|
fts5DebugRowid(&rc, &s, iRowid);
|
|
if( bDlidx ){
|
|
Fts5Data dlidx;
|
|
Fts5DlidxLvl lvl;
|
|
|
|
dlidx.p = a;
|
|
dlidx.nn = n;
|
|
|
|
memset(&lvl, 0, sizeof(Fts5DlidxLvl));
|
|
lvl.pData = &dlidx;
|
|
lvl.iLeafPgno = iPgno;
|
|
|
|
for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
|
|
sqlite3Fts5BufferAppendPrintf(&rc, &s,
|
|
" %d(%lld)", lvl.iLeafPgno, lvl.iRowid
|
|
);
|
|
}
|
|
}else if( iSegid==0 ){
|
|
if( iRowid==FTS5_AVERAGES_ROWID ){
|
|
fts5DecodeAverages(&rc, &s, a, n);
|
|
}else{
|
|
fts5DecodeStructure(&rc, &s, a, n);
|
|
}
|
|
}else if( eDetailNone ){
|
|
Fts5Buffer term; /* Current term read from page */
|
|
int szLeaf;
|
|
int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
|
|
int iTermOff;
|
|
int nKeep = 0;
|
|
int iOff;
|
|
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
|
|
/* Decode any entries that occur before the first term. */
|
|
if( szLeaf<n ){
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
|
|
}else{
|
|
iTermOff = szLeaf;
|
|
}
|
|
fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
|
|
|
|
iOff = iTermOff;
|
|
while( iOff<szLeaf ){
|
|
int nAppend;
|
|
|
|
/* Read the term data for the next term*/
|
|
iOff += fts5GetVarint32(&a[iOff], nAppend);
|
|
term.n = nKeep;
|
|
fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
|
|
sqlite3Fts5BufferAppendPrintf(
|
|
&rc, &s, " term=%.*s", term.n, (const char*)term.p
|
|
);
|
|
iOff += nAppend;
|
|
|
|
/* Figure out where the doclist for this term ends */
|
|
if( iPgidxOff<n ){
|
|
int nIncr;
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
|
|
iTermOff += nIncr;
|
|
}else{
|
|
iTermOff = szLeaf;
|
|
}
|
|
|
|
fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
|
|
iOff = iTermOff;
|
|
if( iOff<szLeaf ){
|
|
iOff += fts5GetVarint32(&a[iOff], nKeep);
|
|
}
|
|
}
|
|
|
|
fts5BufferFree(&term);
|
|
}else{
|
|
Fts5Buffer term; /* Current term read from page */
|
|
int szLeaf; /* Offset of pgidx in a[] */
|
|
int iPgidxOff;
|
|
int iPgidxPrev = 0; /* Previous value read from pgidx */
|
|
int iTermOff = 0;
|
|
int iRowidOff = 0;
|
|
int iOff;
|
|
int nDoclist;
|
|
|
|
memset(&term, 0, sizeof(Fts5Buffer));
|
|
|
|
if( n<4 ){
|
|
sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
|
|
goto decode_out;
|
|
}else{
|
|
iRowidOff = fts5GetU16(&a[0]);
|
|
iPgidxOff = szLeaf = fts5GetU16(&a[2]);
|
|
if( iPgidxOff<n ){
|
|
fts5GetVarint32(&a[iPgidxOff], iTermOff);
|
|
}
|
|
}
|
|
|
|
/* Decode the position list tail at the start of the page */
|
|
if( iRowidOff!=0 ){
|
|
iOff = iRowidOff;
|
|
}else if( iTermOff!=0 ){
|
|
iOff = iTermOff;
|
|
}else{
|
|
iOff = szLeaf;
|
|
}
|
|
fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
|
|
|
|
/* Decode any more doclist data that appears on the page before the
|
|
** first term. */
|
|
nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
|
|
fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
|
|
|
|
while( iPgidxOff<n ){
|
|
int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
|
|
int nByte; /* Bytes of data */
|
|
int iEnd;
|
|
|
|
iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
|
|
iPgidxPrev += nByte;
|
|
iOff = iPgidxPrev;
|
|
|
|
if( iPgidxOff<n ){
|
|
fts5GetVarint32(&a[iPgidxOff], nByte);
|
|
iEnd = iPgidxPrev + nByte;
|
|
}else{
|
|
iEnd = szLeaf;
|
|
}
|
|
|
|
if( bFirst==0 ){
|
|
iOff += fts5GetVarint32(&a[iOff], nByte);
|
|
term.n = nByte;
|
|
}
|
|
iOff += fts5GetVarint32(&a[iOff], nByte);
|
|
fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
|
|
iOff += nByte;
|
|
|
|
sqlite3Fts5BufferAppendPrintf(
|
|
&rc, &s, " term=%.*s", term.n, (const char*)term.p
|
|
);
|
|
iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
|
|
}
|
|
|
|
fts5BufferFree(&term);
|
|
}
|
|
|
|
decode_out:
|
|
sqlite3_free(a);
|
|
if( rc==SQLITE_OK ){
|
|
sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
|
|
}else{
|
|
sqlite3_result_error_code(pCtx, rc);
|
|
}
|
|
fts5BufferFree(&s);
|
|
}
|
|
|
|
/*
|
|
** The implementation of user-defined scalar function fts5_rowid().
|
|
*/
|
|
static void fts5RowidFunction(
|
|
sqlite3_context *pCtx, /* Function call context */
|
|
int nArg, /* Number of args (always 2) */
|
|
sqlite3_value **apVal /* Function arguments */
|
|
){
|
|
const char *zArg;
|
|
if( nArg==0 ){
|
|
sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
|
|
}else{
|
|
zArg = (const char*)sqlite3_value_text(apVal[0]);
|
|
if( 0==sqlite3_stricmp(zArg, "segment") ){
|
|
i64 iRowid;
|
|
int segid, pgno;
|
|
if( nArg!=3 ){
|
|
sqlite3_result_error(pCtx,
|
|
"should be: fts5_rowid('segment', segid, pgno))", -1
|
|
);
|
|
}else{
|
|
segid = sqlite3_value_int(apVal[1]);
|
|
pgno = sqlite3_value_int(apVal[2]);
|
|
iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
|
|
sqlite3_result_int64(pCtx, iRowid);
|
|
}
|
|
}else{
|
|
sqlite3_result_error(pCtx,
|
|
"first arg to fts5_rowid() must be 'segment'" , -1
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This is called as part of registering the FTS5 module with database
|
|
** connection db. It registers several user-defined scalar functions useful
|
|
** with FTS5.
|
|
**
|
|
** If successful, SQLITE_OK is returned. If an error occurs, some other
|
|
** SQLite error code is returned instead.
|
|
*/
|
|
int sqlite3Fts5IndexInit(sqlite3 *db){
|
|
int rc = sqlite3_create_function(
|
|
db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
|
|
);
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_create_function(
|
|
db, "fts5_decode_none", 2,
|
|
SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
|
|
);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_create_function(
|
|
db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
|
|
);
|
|
}
|
|
return rc;
|
|
}
|