
b709e29 Fix URL typo in release notes 01f203c Update release notes c49810a Update COPYRIGHT e327526 Add BSL version number to LICENSE.TXT 07e3a4e Remove superfluous COPURIGHT.md and LICENSE.md 54c3310 Replace Dynamic Data Routing Platform with Database Proxy 305d02f Remove *.cmake wildcard from .gitignore b0b5208 Cleanup of spaces aeca6d0 Extend maxscaled error messages 817d74c Document where the CDC users are stored 9a569db Update license ff8697a MXS-716: Fix table level privilege detection 2071a8c Only check replies of slaves that are in use f8dfa42 Fix possible hangs in CDC python scripts fa1d99e Removed "filestem option" from example 009b549 Removed "filestem option" from example 8d515c2 Add example Kafka producer script for Python 64e976b Fix sporadic SSL authentication failures 5a655dc MXS-814: Check service/monitor permissions on all servers 2a7f596 Add note about galeramon priority to Galera tutorials b90b5a6 Fixed SHOW SLAVE STATUS in binlog router e22fe39 Changed couln size for SHOW SLAVE STATUS ae97b18 Fix avrorouter build failure with older sqlite libraries 56ef8b7 Replace GPL license with BSL license in scripts and tests 552836f Initialize all fields when MySQL users are loaded from cache bf42947 Update all licensing related files b29db9d Remove optimize_wildcard parameter from use 5170844 Make readwritesplit diagnosting output more clear 262ffb1 Fix crash when a config parameter has no section 33ac9e6 Add note about LEAST_BEHIND_MASTER and server weights e13e860 Fix a memory leak when backend authentication fails 75d00c2 MXS-801: Set the default value of strip_db_esc to true bd5f2db MXS-800: Add a log message about the working directory 4b1dd8c Update MySQL Monitor documentation on detect_replication_lag 559bc99 Fix installation of license file b057587 Change LICENSE to LICENSE.TXT 223fa43 Remove null characters from log messages 36fd05b Fix fatal signal handler log message 053dc8a Fix typos in documentation 371dc87 Fix Galera text in Master-Slave tutorial 30b6265 Disable adding of new objects at runtime db92311 Update the documentation on configuration reloading 0923d40 Update Connector-C version c4738b5 Add define for avro-conversion.ini 196e6ac Update license from GPL to BSL. e612366 Correctly calculate the number of bytes read in dcb_read 93a2a03 Update rotate documentation in admin tutorial c5eb854 MXS-585: Fix authentication handling regression 6330070 Fix schemarouter memory leak aa5827e Fix CDC authentication memory leak a5af4ad Fix avro memory leaks 627d73f Fix Avro scripts 0ff7556 Add build instructions to avrorouter documentation 734a1c8 Fix doxygen mainpage e51ce09 Add licence text to avro scripts 4d27c14 Update Avro documentation and fix installation directories a58a330 Fix readconnroute error message about router_options 22b138c MXS-772: Fix postinstall script a9960b7 Fix function declaration in mysql_backend.c cbe1704 Add missing newline 09d76ee Fix avro documentation default values 1d3f8f3 Added refresh users on START SLAVE 880db34 Make router configuration errors fatal 3bad5ca Update documentation and add logging to avrorouter 10f3384 Disable SSLv3 ca8d902 Fix rwsplit error reporting when master is lost e816d65 Fix MaxScale Tutorial deca3e1 Update MaxScale man page f1735b7 Update release notes 9238714 qc: Change type of DEALLOCATE PREPARE 0b77c3b dbfwfilter: Do not require a trailing new line 1152ca9 Remove copyright message a038a85 Remove debug assertion on ERANGE error in galeramon 12ab235 Fix comparison error for connections limit. 5de1a8f qc_sqlite: Correct string recognition b63d754 Fix links in documentation contents 05d457e CDC protocol link fix 50676ef Fix monitor code formatting 218ba09 Remove MaxScale-and-SSL.md 0d6845b Add images to Avro documentation and tutorial 8dd2c9b Update MaxScale-2.0.0-Release-Notes.md 6d4b593 Change avrorouter default transaction grouping 4c629de Add notes about monitor changes to upgrading and release notes 267d0dc Update Binlogrouter.md c624781 Update Replication-Proxy-Binlog-Router-Tutorial.md f3261bc CDC users 1368797 Format authenticator and module headers ab01749 Format filters 8b05d32 Format core and include files f3974e5 Add GPL LICENSE to qc_mysqlembedded bfec36a astyle rabbitmq_consumer/consumer.c 54b960a Check that the Avro directory is writable 3d4cd2e Fix cdc_users using the wrong path for service users cache 1e738dd Add CDC client program documentation f6809fd Remove superfluous rabbitmw_consumer/LICENSE 6b5e667 Update license text in files 9bfcb46 Change CDC protocol documentation formatting 607f25c REQUEST-DATA formatting 8175ab4 CDC protocol update d5ca272 CDC protocol update 6c91764 Only check wsrep_local_index if node is joined f12e2c2 Do not use SSL for monitors and services 6d2cd99 Fix TestAdminUsers f4ae50d Apply astyle to server/core/test/*.c 7cc2824 Update build instructions cf8e2b5 Update release notes 03c7a6c Remove wrong function prototypes 5a11eed Revert "Remove duplicate functions" 80ed488 Remove duplicate functions bb0de8d Add info on SSL and throttling to release notes for 2.0. 0934aee Update MaxAdmin reference guide 2a3fe9b Update source URL in release notes e575cf0 Merge branch 'MXS-177-develop' into develop cc8c88d Change header for BSL ecde266 Change header for BSL 890b208 Log a message when a script is executed 9c365df Added information on server side SSL to config guide. aa3e002 Remove obsolete heading 79dd73a Make dprintAllSessions use dprintSession 1fc0db9 Align output of "show services" 1b9d301 Make monitorShowAll use monitorShow 983615e Adjust output of 'show modules' 436badd qc_sqlite: The module is now beta a7cbbe5 Update Upgrade document 71ac13f Remove obsolete user/password from example eb20ff8 Fix and clean up Avrorouter diagnostics code 31d4052 Change MaxScale to MariaDB MaxScale e6e4858 Fix `source` parameter not working with `router_options` d8de99a Update module version numbers eb81add Merge remote-tracking branch 'origin/develop' into MXS-177-develop daba563 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop 678f417 Changes in response to reviews. 410fb81 Changes in response to reviews. 60135e5 Add initial release notes about Avrorouter 7400ecc qc_sqlite: Remove uninitialized read 536962c Update version number 018f044 Fix debug assertion in client command processing 51f0804 Prevent 'show monitor' from crashing with failed monitor 559347e Fix "Too many connections" message; add comments. 01d3929 Add printf format checking to dcb_printf fbd49a6 dbfwfilter: Require complete parsing only when needed 1885863 Add information to release notes about readwritesplit changes 73b56a2 Update MaxScale section in release notes. 0a2f56f MaxAdmin: Remove debug information from 'show users' 3cf3279 MaxAdmin: Report SSL information as well 29c2b66 Always use SSL if server configured with SSL 7d6b335 dprintAllServers should use dprintServer 02a5246 qc_sqlite: Correctly detect parsing context 469419b compare: Add strict mode 8c5b3d3 compare: Allow the comparison of a single statement 4691514 Add Updgrade to 2.0 document 38b3ecb Expand the checks done before a monitor is stopped 8e2cfb9 Add backend name to authentication error message 9600a07 Fix MaxInfo crash 91c58b0 Removed log message for duplicate entry while adding an user 40392fe Fixed log message priority 0ec35b8 maxadmin: Allow the last user to be removed 5a0ebed maxadmin: Change name of user file 87aa8f1 maxadmin: Always allow root to connect bf37751 Fix COM_QUIT packet detection 7c93ee4 Update avrorouter documentation and tutorial 95ce463 Fix wrong directory in avrorouter log message cfe54c7 Update ChangeLog d69562c Fix LOAD DATA LOCAL INFILE data size tracking 24e7cd6 MXS-584: added support for SET @@session.autocommit d6f6f76 Fixes, correct too many connections message efeb924 Update release notes for 2.0.0 8f71a87 qc_sqlite: Adjust error messages b967d60 Remove copy of enum enum_server_command 822b7e3 Update package license b58301a Update MaxScale License for overlooked files c09ee47 Update MaxScale License 49f46fa Tidy up. Comment out config items not yet supported. f5c3470 Updated and simplified the Building from Source document 98b98e2 Add note about master failure modes to documentation e036f2c Update Limitations document 62219a5 Merge remote-tracking branch 'origin/drain-writeq' into develop 5caf667 Invoke DCB_REASON_DRAINED more frequently. 77b107b qc_sqlite: Add support for LOAD DATA INFILE 8e70f26 compare: Optionally print out the parse result ad750e6 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop ef85779 Merge remote-tracking branch 'origin/develop' into MXS-651-develop-merge ea9fdda MXS-477: Add LONGBLOB support for readconnroute eae6d42 qc_sqlite: Remove superfluous columnname definition 8fe2b21 Add binlog source to avrorouter b25cc37 qc_sqlite: Add missing destructors 8a749e7 qc_sqlite: Reduce number of keywords 5f4bb8b compare: Output query immediately 2456e52 dbfwfilter: Reject queries that cannot be parsed 5f1fbbd qc_sqlite: Extend SET grammar b8d8418 dbfwfilter: Remove 'allow' from firewall filter rule 0bd2a44 MXS-741 When no events are read from binlog file, ... a07c491 Remove duplicated function (merge error, probably) b237008 Save conflict resolution, missed last time. a0c0b40 Merge remote-tracking branch 'origin/develop' into MXS-651-develop 385d47d Change SSL logic, fix large read problem. b93b5e0 Remove false debug assertion b953b1f Turn off SSL read ahead. e0d46a5 Fix error messages and remove commented code 49b4655 MXS-739: Fix invalid JSON in Maxinfo 0c30692 qc_sqlite: Handle GROUP_CONCAT arguments 54e48a1 qc_sqlite: Consider \ as an escape character in strings 713a5d6 qc_sqlite: Add test cases 20d1b51 qc_sqlite: Handle qualified names in CREATE VIEW 1019313 qc_sqlite: Make QUERY_TYPE_WRITE default for SHOW 059c14e qc_sqlite: Accept qualified function names in SELECT db34989 qc_sqlite: Accept qualified function names b93e2f1 qc_sqlite: Add limited support for GRAND and REVOKE 678672d qc_sqlite: Cleanup copying of database and table names 9b744b9 qc_sqlite: Update table and database names at the same time db75e61 qc: Support getting the qualified table names 1f867f4 qc: Add join.test 9c7e02a qc_sqlite: Accept "...from NATURAL straight_join..." 93d1c31 qc_sqlite: Both " and ' can enclose a string literal 8055b21 qc_sqlite: Set more information based upon tokens 37e3663 qc_sqlite: Do not blindly add affected fields 50f1360 qc: Correctly collect affected fields 71c234e qc_sqlite: Recognize CREATE TABLE ... UNION 01803f1 qc_sqlite: Recognize {DEALLOCATE|DROP} PREPARE ... 6ecd4b3 qc_sqlite: Parse index hints 0bdab01 qc: Compare sets of tables b908c8f Fix double freeing of internal DCBs 8903556 qc_sqlite: Recognize LEFT, INSERT and REPLACE 266e6c0 qc: Log all problems by default (compare program) 7b54cac qc_sqlite: Fix logging bug 9566e9f qc_sqlite: Plug a leak b0a860d qc: Run compare a specified number of times 050d698 qc_sqlite: Simplified argument handling 97c56b8 qc: Allow arguments to be passed to the query classifier 09a46e0 qc_sqlite: Add argument log_unrecognized_statements fd98153 qc: Allow arguments to be provided to the query classifier 313aa7e Fix Problems SSL assertion; non SSL connect to SSL 1d721e6 Fix DEB packaging errors 96bdc39 Fix RPM packaging failures on CentOS 7 6ba900d qc_sqlite: Recognize more SHOW commands 2869d0b qc_sqlite: Exclude support for sqlite's PRAGMA 0be68a3 qc_sqlite: Enhance SELECT syntax 28f3e1a Merge branch 'develop' into MXS-729 e18bd41 qc: Expose the result of the parsing 5896085 Add BUILD_AVRO to the CMake cache daeb896 Remove changes to blr_master.c memory handling d523821 Add comments 4eb9a66 Empty admin users file is now handled 52b46c6 qc: Update create.test db09711 qc_sqlite: Ignore case when looking for test language keywords f042a1d qc_sqlite: Extend CREATE TABLE syntax 177d2de qc_sqlite: Extend CREATE TABLE syntax d3ca8e6 qc_sqlite: Add some support for HANDLER 86c6a96 qc_sqlite: Recognize RENAME TABLE 471594f qc_sqlite: Accept more table options at CREATE TABLE 3da6cde qc_sqlite: Remove unused keywords bd89662 Fix crash on corrupted passwd file b5d1764 MXS-733: Always print session states 043e2db Remove unused CMake variables 5604fe2 Restore missing line, fixes logic error. 66d15a5 Added log message warning for old users found 5be9fca Changes in response to review by Johan 899e0e2 Removed password parameter from admin_user_add and admin_remove_user a2d9302 Merge branch 'develop' into MXS-729 bcaf82f Code review update e61c716 Nagios plugin update with Maxadmin using UNIX socket only d7150a2 qc_sqlite: Extend column syntax 3b78df0 qc_sqlite: Accept VALUE in addition to VALUES 85a705b qc_sqlite: Accept CHARSET in addition to CHARACTER SET db9cec8 qc_sqlite: Accept qualified column names in CREATE TABLE a9cabb0 qc_sqlite: Extend SELECT syntax f5e9878 qc_sqlite: Add set type 675cb93 qc_sqlite: Allow BINARY to turn into an identifier b04a760 qc_sqlite: Accept DROP TABLES 1075d9c qc_sqlite: Allow qualified name with LIKE in CREATE 420ac56 qc_sqlite: Extend EXPLAIN grammar 727d626 Add missing error message to readwritesplit f4fd09e Change templates and testing configurations to use sockets 1ef2e06 Add configurable default admin user a723731 Remove wrong file 7c3b02b Maxadmin/maxscaled UNIX socket update eed78d4 qc_sqlite: Pick out more information from select when CREATEing 267f091 qc_sqlite: Recognise DROP TEMPORARY TABLE 54fc29f qc_sqlite: Accept $ as a valid character in identifiers afa2ec9 qc_sqlite: Allow keywords to be used in qualified name db0427d MXS-729 code review update a3b3000 Merge branch 'develop' into MXS-729 e73d66c qc_sqlite: Identify qualified identifiers 5bacade Trailing space fix 3bc9be3 MXS-729 socket=default support in maxscale.cnf 1a5c23c Code review update for MXS-729 d6665c7 qc_sqlite: Extend CREATE TABLE grammar 91725ce qc_sqlite: Dequote table and database names cd7a022 qc: Add create test 1aa4e6b qc: Update test files 762b0c2 qc_mysqlembedded: Do not return "*" as table name cd9968f qc_sqlite: Update delete.test f16703d qc_sqlite: Add support for CALL e3ca9b4 qc_mysqlembedded: Do not return an array of empty strings 5878a22 qc_sqlite: Support selects in DELETE 1cf0444 qc_sqlite: Fix bug in DELETE grammar 0bf39a1 qc_sqlite: Add support for CHECK TABLE 4a8feca qc_sqlite: Add helper for catenating SrcLists ab299b3 qc_sqlite: Extend DELETE syntax 5778856 qc_sqlite: Extract database name as well 99901f1 qc_sqlite: Extend DELETE syntax 63396f8 qc_sqlite: Match "autocommit" caseinsensitively e804dd3 qc_sqlite: Add support for LOCK/UNLOCK c23e442 qc_sqlite: Extend DELETE syntax 5460e31 qc: Add delete test ab392ad qc_sqlite: Free unused data 598c6f0 qc: Measure time of parsing 2fa3844 qc_sqlite: Put all changes being {%|#}ifdefs 1b43992 qc_sqlite: Modified update.test 1676ea4 qc_sqlite: LEFT|RIGHT are not required with JOIN 224ebd3 qc_sqlite: Extend UPDATE grammar dbecca9 qc_sqlite: Extend UPDATE grammar b6ca3b3 MaxAdmin security modification MXS-729 8fb47dd Remove copying of MariaDB embedded library files 22e1257 Normalize whitespace when canonicalizing queries 269cff2 MXS-697: Fix dbfwfilter logging for matched queries 6344f6f Ignore Apple .DS_Store files. d606977 Improve comments in response to code review. 619aa13 Add configuration check flag to MaxScale 27c860b Drain write queue improvements. 33d4154 Read only one configuration file d104c19 Format more core files 83fdead Format part of core source code 311d5de Format gateway.c and config.c with Astyle 8cbb48e Don't build maxavro library if BUILD_AVRO is not defined 32bb77a Merge branch 'MXS-483' into develop db72c5d Format CDC/Avro related files 3c26e27 qc_sqlite: Use SrcList instead of Expr f96ad6a Merge branch 'develop' into MXS-729 0728734 Fix query canonical form tests e68262d Merge remote-tracking branch 'gpl-maxscale/master' into develop 65460dc Fix missing symbols from MySQLAuth 791c821 MaxAdmin listens on UNIX socket only and maxadmin can connect 89afed6 MXS-66: All configuration errors are fatal errors d613053 Add more details to galeramon documentation 22f4f57 qc: Add support for multi UPDATE 0dba25a Added default port to blr_make_registration 9d8248c qc_sqlit: Plug leaks and access errors 057551a qc_sqlite: Fix to small an allocation 1f73820 qc_sqlite: Free memory allocated during parsing 93fefb9 qc: Enable compare to run the same test repeatedly e52c578 qc_sqlite: Handle last_insert_id() 929e02a qc_sqlite: Extend UPDATE grammar de3b9f7 qc_sqlite: Defines provided when running cmake and make 4d5c3b2 qc_sqlite: Add support for multiple-table DELETE FROM 36a4296 qc_mysqlembedded: Handle SQLCOM_DELETE_MULTI 41f613a Fix DCB and SESSION removal from free object pools 00f2ddd Move some common code used in only one protocol into protocol. 6fbd0b0 Format Go source with gofmt abfbbcb Fix build failures and internal test suite 31de74a Merge branch 'develop' into MXS-483 20d461d Remove uniqueness constrain on oneshot tasks 6c09288 Add missing error message to converter task 0c2c389 Merge branch 'develop' into MXS-483 fa0accc Set freed memory to NULL after authentication failure 63f24e4 Install cdc_schema.go 5123c21 Fix ALTER TABLE parsing 004acc2 Merge branch 'develop' into MXS-483 f69a671 Remove array foreach macro use a159cd9 qc_sqlite: Add support for SHOW DATABASES 31a2118 Make qc_mysqlembedded optional 27ef30e Changed the default query classifier 359010d Add -h flag as the alias for --host bebc086 Fix minor bugs c7ca253 qc_sqlite: Recognize START [TRANSACTION] 240f8bf qc_sqlite: Collect info from nested SELECTs 93ea393 qc_sqlite: Pass along the relative position of a token cc960af qc_sqlite: Fix incorrect assigment 22a6fef Fix `gtid` avro index table 4c38bef qc_sqlite: STATUS is not a registered word cace998 qc_sqlite: Include all fields of UPDATE 997b19c qc: Add update tests 7601b3f qc_sqlite: Parse "INSERT INTO t VALUES (), ();" correctly ca426f1 qc_sqlite: Handle CREATE TRIGGER f54f963 qc_sqlite: Allow INSERT without INTO e4a1b6d Remove foreign keys from Avro index e4501a2 Merge branch 'develop' into MXS-483 82b9585 Fix MMMon never assigning master status a45a709 qc_mysqlembedded: Find the leaf name 2f3ca8f qc_mysqlembedded: Recognize SQLCOM_REPLACE cc7ad83 qc_mysqlembedded: Pick up fields for INSERT SELECT as well 0e6b39e qc: Cleanup of select.test 9113f4f qc_sqlite: Pickup more fields from INSERT 4d58f98 Dummy query classifier dfe824f Document `query_classifier` option 4aa329b MXS-718: Collect fields of INSERT 53818f2 Modify packet number for SSL backend connection 346f973 qc_sqlite: Accept qualified column names 8a83616 Fix in-memory SQLite table structure 6f2c884 Further backend SSL development 4444e92 qc_sqlite: Extend INSERT grammar 2aebcab qc_sqlite: Add support for TRUNCATE 1a6742e qc_sqlite: Accept DEFAULT as value in INSERT 07dec05 qc_sqlite: Crude classification made based on seen keywords a90a579 Add missing function documentation 72bd0cf qc_sqlite: Change CREATE TABLE grammar 6e04bc8 qc: Add INSERT tests 3666bda qc_sqlite: Add SELECT test d27e173 Add server/mysql-test/t/select.test to query_classifier 562d004 qc_sqlite: Cleanup error logging. 819cacb Merge branch 'develop' into MXS-483 0d3a789 Add warnings and comments to Avro row event processing 2fab570 Added support for SET autocommit=1 1aa83cf Code review fix c999f0a Addition of SELECT USER() 8c723da Clean up avro_client.c and avro_file.c eb21ee8 Clean up avro.c 946a284 Added Avro schema to table metadata processing 72f90be qc_sqlite: Add support for CREATE {FUNCTION|PROCEDURE} ... 4a4ab49 qc: Update line number also when skipping a block ffddb2a qc_sqlite: Allow queries using INTERVAL b8b03bd qc_sqlite: Add support for SELECT * FROM tbl2 = tbl1; 77a261a qc_sqlite: Add support for GROUP BY ... WITH ROLLUP 0ead41e cdc_schema now generates lowercase JSON 66e327a Classifier has to be specified explicitly 9074b91 Updated Avrorouter documentation cf06c7a qc_sqlite: Some comments added. f579eff Added simple Go based Avro schema generator f448e90 MXS-419: Added ulimit calls to init scripts b4ad257 Added FindAvro.cmake 56cc9b9 Added the last transaction script to installations 2d52da2 Added temporary avro-alpha package name 6ada071 Fixed cdc_users script 61f0206 Renaming and minor fixes to CDC Python scripts 9d77c32 Moved GTID table tracking to an in-memory database 8ae7cb0 MXS-704: Fixed `which` usage in post-install scripts 195e118 Readwritesplit sessions aren't created if master is down 2be91da Added affected tables to avro diagnostics b185320 QUERY-LAST-TRANSACTION now returns proper table names 90860b5 Log stale master message only once 4859c60 Table name to GTID mapping f77bd23 First steps to backend SSL, not yet working. 68b5bf0 qc_sqlite: Don't treat TRUE and FALSE as identifiers fca8e59 qc_sqlite: Collect database names as well 6b0e04d qc_sqlite: Add support for SHOW CREATE VIEW 77f4b34 qc_mysqlembedded: Report more, rather than less a73e033 qc_sqlite: Extend builtin functions 9d9650e qc_sqlite: SQL_BUFFER_RESULT must decay to an id 83fe99d qc_sqlite: Support INSERT IGNORE 9d1c9ca Added avrorouter limitations and tutorial 8dd094d qc_sqlite: Recognize builtin functions 2edc3d6 Moved write operations of the maxavro library to a different file 1364e54 Added more comments to the Avro RBR handling code f711588 Added warnign about unsupported field types df0d250 Added SQLite3 based indexing to avrorouter 0c55706 Added GTID event flag check in AVRO processing bfe28dd qc_sqlite: Accept SET GLOBAL|SESSION ... a8d2068 qc_mysqlembedded: Exlude code that won't compile on 5.5.42 16ea0b3 qc_sqlite: Add support for DROP FUNCTION 1c0f1fc qc: Report stats after comparison 02345b2 qc_sqlite: Recognize builtin readonly functions c7a5e75 qc_sqlite: Recognize := 0aa849d qc: Make compare undestand the delimiter command fb0a877 qc_mysqlembedded: Examine Item::SUBSELECT_ITEMs 045cf8d qc: Add missing mtl commands e5c6f45 qc_sqlite: Relax qc_get_type comparison ac3b2df qc_sqlite: Add support for SHOW STATUS 73a34fb qc_sqlite: Add initial support for FLUSH 4ffbe79 qc_sqlite: Extend CREATE TABLE syntax 009af81 qc_sqlite: Add support for SHOW WARNINGS 001de97 qc: Ignore mysqltest constructs 128307d Merge branch 'release-1.4.3' into gpl-master 5e8a06a SET NAMES XXX added 3ca12ba MXS-685: MMMon clears server state before setting it dc4d2b0 Further steps to connection limit, non-working. ef70257 MXS-636: Master failures are no longer fatal errors 99f4c64 Updated QUERY-LAST-TRANSACTION format d1ff157 Changed QUERY-LAST-TRANSACTION format to JSON 8b2f1ac Fixed formatting of the avrorouter 61543df Added QUERY-LAST-TRANSACTION command c10d10b qc_sqlite: Add support for SHOW CREATE TABLE 106a38f qc_sqlite: Add support for DROP INDEX 2a85421 qc_sqlite: Extend what can be stated about a table 794cd1c qc_sqlite: Add support for MATCH ... AGAINST dd7b747 qc_sqlite: Accept FULLTEXT and SPATIAL in CREATE TABLE a13d6ce qc_sqlite: Add support for PREPARE and EXECUTE 0c5d29f qc_sqlite: Add support for ANALYZE a6cd32b qc_sqlite: Extend SET syntax 5d47704 qc_sqlite: Pick out fields from UPDATE t SET i = ... 0e05735 qc: Understand --error in server test files 8535975 qc_sqlite: Extend CREATE VIEW syntax b35e638 qc: Igore read type bit if write bit is on 818a814 qc_sqlite: Add support for SHOW VARIABLES 1aa877b qc_sqlite: Add initial support for DO e92913a qc_sqlite: Add support for CREATE VIEW d53a46d qc_sqlite: Recognize bit field literals b'1010' 1fb7977 Added GTID event timestmap into struct gtid_pos 8f95b10 Added new fields in AVRO diagnostics cb4db54 Added tests with large SQL packets to modutil tests e4dbd6b MXS-621: More fixes to log messages at startup 4f1e9ee qc: compare tester can now read server MySQL tests cd8154b qc_sqlite: Allow CHARACTER SET to be specified for column 6f8d053 Added MariaDB 10.1 check for new flags in GTID event 71c471b qc_mysqlembedded: Fix type bits setting 26b00a7 qc_sqlite: Extend ALTER grammar ea6057c qc_sqlite: Handle also pInto when dupping a struct select 2271559 qc_sqlite: Add support for SHOW TABLE STATUS 9caaf27 qc_sqlite: Add support for CREATE ... LIKE oldtable cd19932 Merge tag '1.4.2' into master 9e9e4d8 Merge branch 'develop' of https://github.com/mariadb-corporation/maxscale-bsl into develop 267cb60 qc_mysqlembedded: Look into parenthesized comma expressions 77c6ca9 qc_sqlite: Recognize token "<=>" 5ca9a9f qc_sqlite: Allow comma expressions in where clause b08e910 qc_sqlite: Add SELECT options d11e581 qc_sqlite: Some recursion cleanup d53d063 Add but don't invoke connection queue functionality. 6818104 Fix logic error in connections limiter 3c61605 qc_sqlite: Find more affected fields 9af8dfd Allow the classifiers to be specified on the command line 5d4a134 Activate call to protocol for max connections error message. 16638e7 Fix another mistake 234f9e6 Fix mistake 843a6fc Fix mistake. 2c6e9ad Fix errors in config.c; enable call to protocol on connection limit. fd27849 Introduce configuration items for Maximum and Queued Service connections 60d198d Implement very simple connection limit. 84d8f0f Merge remote-tracking branch 'origin/develop' into MXS-177 8a58e63 Merge remote-tracking branch 'origin/develop' into develop 08487cd Add assertion to enforce number of free DCBs not being negative. f73af2f Added MariaDB 10.1 check for new flags in GTID event 23898ec Fix wrong sprintf specifier, trailing white space. ea6cfa3 readwritesplit: Cleaned up routeQuery 3858df0 Cleaned up select_connect_backend_servers c38ee13 Added more buffer tests 48816df Added more modutils tests 537eac2 Added tests for modutil_get_complete_packets 22a6095 MXS-669: modutil_get_complete_packets no longer makes the buffer contiguous 51af97e qc_sqlite: Add support for CREATE INDEX e50c573 qc_sqlite: Dig out fields for IN f58c6df qc_sqlite: Dequote table name 319422b qc_sqlite: Accept ENUM as type for a column 5d6a45c qc_sqlite: Allow UNSIGNED to fallback to an id 16a5f20 qc_sqlite: Extend CREATE TABLE syntax d6268da qc_sqlite: Accept RIGHT and FULL OUTER joins 2207415 qc_sqlite: Allow STRAIGHT_JOIN in SELECT 6fee546 qc_sqlite: Pick upp more table names 9de5f84 Remove trailing white space. 758f84d Improve comments and messages in dcb.c and session.c re recycle memory. 1c2de21 Merge remote-tracking branch 'origin/develop' into dcb-optimise 6614944 DCB code tidying. Fix missing spinlock release; remove redundant variables ecd5e5c Remove extra code introduced by merge. 877127a Merge commit '3c0d3e5ab6ddde59da764ec904b517759074a31e' into develop 4275bbe Updated the Connector-C version to 2.2.3 c71042b Some tentative list management code; provide for counting service clients. ad0c8a6 qc_sqlite: Allow empty insert statement 72e75e5 qc_sqlite: Add support for SELECT ... INTO cc553fa qc_sqlite: MAXSCALE define can now be used everywhere 3305c6e qc_sqlite: Handle CASE in SELECT 702f62e qc_sqlite: Extend CREATE TABLE grammar 941c212 qc_sqlite: Add support for SHOW [INDEX|INDEXES|KEYS] 6a79136 qc_sqlite: Extend grammar for SHOW TABLES and SHOW COLUMNS f175d2d qc_sqlite: Add SHOW COLUMNS support 6e47951 qc_sqlite: Add support for SHOW TABLES bcfa0e7 qc_mysqlembedded: Return the actual name and not as-name 3e19f2e Fixed qlafilter build failure 810b24e MXS-675: Standardized qlafilter output be92173 qc_sqlite: Exclude alias names from affected fields 9479280 qc_sqlite: Add support for explain EXTENDED 13b0e10 qc_sqlite: Add support for DELETE a6ccfea qc_mysqlembedded: Look at all conditional items b428041 qc_sqlite: Extend SELECT options 83f829f query_classifier: Correctly calculate the length of a GWBUF 2ddb24c query_classifier: Ensure that -- comments are handled fa7a746 qc_sqlite: Allow STRAIGHT_JOIN in SELECTS 6f47819 FindLibUUID update 5ed897b Added FindLibUUID cmake file 16e02bb Added FindLibUUID cmake file aff63e0 MXS-680: qc_mysqlembedded does not look into functions 8a0eeb4 query_classifier: Improve output of compare 6f08185 Query classifier can now convert enums to strings 124e2b9 MXS-679: Correctly collect fields of WHERE 353c97c transaction_safety default is off 896e37b qc_sqlite: Invert stop logic and be more verbose 7a44d4d qc_sqlite: Extend what is accepted in CREATE TABLE 4dbf499 qc_sqlite: Accept FIRST in ALTER TABLE 3f655c0 qc_sqlite: Update table and affected fields for INSERT 8e1e275 qc_sqlite: Make AS optional in CREATE statement 5f2084b qc_sqlite: Add support for ENGINE when creating a table 242f183 qc_sqlite: CREATE paramters handled in the correct place 8ed2e25 qc_sqlite: Trace only when needed 63d4531 qc_sqlite: Update affected fields also from functions 118cdc3 qc_sqlite: Allow multiple index names in USE|IGNORE INDEX 912da76 qc_sqlite: Add initial support for ...IGNORE INDEX... 0aa7de6 qc_sqlite: Log detailed message on error 3e3bf1a qc_sqlite: Extend create syntax. c4a4572 qc_sqlite: Exclude quoted values 1621f49 Removed MYSQL_EMBEDDED_LIBRARIES d3e324c UUID generation now comes from libuuid e8fe678 qc_sqlite: Enable confitional compilation a9522ba qc_sqlite: Handle X.Y selects 9bc9770 qc_sqlite: Use same stream when outputting padding 366257a qc_sqlite: Add support for UNSIGNED and ZEROFILL d4d90ff qc_sqlite: Add support for DROP VIEW d0519bd qc_sqlite: Extend DROP TABLE syntax c1e4894 qc_sqlite: Add flag to compare for stopping at first error 9fd6344 MXS-674: Maxinfo generates invalid JSON 3c0d3e5 Fix stupid errors. 9d32b2d Include read queue in buffer provided by dcb_read; changes to match. b690797 Fix double spinlock release in random_jkiss. 6a4328f Fix problems of memory not being freed in some error cases. 2112e56 Change DCB and Session handling to recycle memory; fix bug in random_jkiss. 3912f72 MXS-631, MXS-632: Cleaned up default value CMake files 383ccb8 Fixed build failure on MariaDB 5.5 a60bca5 Merge branch '1.2.1-binlog_router_trx' into develop 3c2a062 Fix to crashes in embedded library with MariaDB 10.0 d3fe938 MXS-662: Service protocol check no longer ignores bind address c3da49b qc_sqlite: Update affected fields from everywhere 7a0fab8 qc_sqlite: Allow verbosity of compare test to be controlled 81d6822 qc_sqlite: Cleanup handling of select columns 13e5c59 qc_sqlite: Introduce custom allocation functions 026f27d qc_sqlite: Add support for "USE database" 99079df qc_sqlite: Ignore duplicates when comparing affected fields ca45cd6 qc_sqlite: Add initial support for qc_get_database_names 75970b6 qc_sqlite: Add support for DROP TABLE. b97e45d qc_sqlite: Move get_affected_fields() to other helpers cb0fa96 qc_sqlite: Collect table names of INSERT 3a7c513 qc_mysqlembedded: Only look for created name if CREATE 308b0a4 qc_sqlite: Add support for gc_get_created_table_name. 0dc4af2 qc_sqlite: Add qc_has_clause() handling to update e9f2d1d qc_sqlite: Update now also provides table names c3192e4 qc_sqlite: Add initial support for get_table_names c51eafd qc_sqlite: Add support for qc_has_clause f318fb2 qc_mysqlembedded: Work around embedded lib bug 4ba2e11 qc_sqlite: Add initial support for qc_get_affected_fields 080dea5 qc_sqlite: Support is_read_query 3f94df1 Fixed compare.cc build failure 868a712 Updated freeing of buffer chains in readwritesplit 9bf7fca Formatted readwritesplit source code de4da2b Add assertion to spinlock release to detect release of already released spinlock. d30955a qc_sqlite: Handle the default case of affected fields. 5d02b3f qc_sqlite: Set operation when creating table 94a334d Add test for comparing qc-output aa6f5d6 Allow a specific query classifier to be loaded explicitly c799d37 Test both qc_mysqlembedded and qc_sqlite f8d9aa1 qc_sqlite: Enable "set @user_var=@@system_var" f190bdc qc_sqlite: Recognize /*!-comments b694b55 Fixed binary Avro format streaming c95fa86 qc_sqlite: Report correctly the type of set autocommit 9cb236c qc_sqlite: Add test case 77b4e62 Ensure classify test checks all types 962039e Change return type of qc_get_type ae00df8 qc_sqlite: Add initial support for the SET statement. 88253c5 qc_sqlite: Rename functions fa48043 Rework of MySQL backend protocol clean up to fix fault. 3851064 qc_sqlite: Correct recognition of system variables (@@xyz). 9d86f7f qc_sqlite: Detect user and system variables. a683297 qc_sqlite: Recognize and accept system variables (@@xyz). a4f64dd qc_sqlite: Add initial support for CREATE [TEMPORARY] TABLE f834b4f MXS-661: Only COM_QUERY packets are parsed 30077c1 CMake policies set only for correct versions a166f34 Suppress warning about unknown CMake target 1412730 Added more variables to launchable monitor scripts 358c194 MXS-656: Galera nodes with index 0 can be master again 842aec5 qc_sqlite: Add support for BEGIN, COMMIT, ROLLBACK b9cad6d Add initial support for UPDATE. 95741cb Add initial support for insert. 3796158 Re-install sqlite whenever parse.y has changed 5bcd8cf Ensure that the query is the one passed cf05533 Add support for obtaining the type of a query 400d8b4 Always log the outcome 45cf632 Fixed resource leaks and minor bugs fa9e970 Printout the query when there is a mismatch. 263cb07 All classify to be used with any query classifier ea381b9 Further cleanup of classify.c 23f6f30 Merge pull request #107 from godmodelabs/typo-dpkg 8c2a64e Fixed classify build failure 0c3070b Fixed binlog to Avro conversion bugs b827ba9 MXS-653: Silence maxpasswd 30d981c MXS-654: Add test for checking maxpasswd 984039b Rearrange classify.c 837e46d Add log initialization 1cc7a6e Reformat query_classifier/test/classify.c 065a4e5 Merge branch 'develop' into develop-MXS-544-b-merge ca27f13 Fixed binlog build failure fb81be2 fixed typo dpgk <-> dpkg 1e88d5d Added python based CDC user creation script 040bbdd MXS-633: Monitor permission checks moved to modules cde7595 Master-Slave clusters are now robust by default 158c776 Cleaned up core test suite 94c6e66 Fixed bugs resulting from merge a491e11 Merge remote-tracking branch 'origin/MXS-544-b' into develop-MXS-544-b-merge 30f9f25 Cleaned up avro.c 6286f64 Merge branch 'release-1.4.1' into develop 00206ac MXS-194: Added support for more query types to dbfwfilter 267832b Fixed diagnostic output a64b694 Fixed bugs in avrorouter 8faaba1 Fixed a bug in GTID seeking a5fafb7 Fixed typos in avrorouter documentation 8080379 Added avrorouter documentation fa07d8a Fixed dbfwfilter rule parser build failure 744ce0d Constraints are ignored in DDL statement processing 50808c6 Cleaned up avrorouter 47f6032 Merge branch '1.2.1-binlog_router_trx_lint' into develop caa0956 Added missing dependencies to maxscale-core 92df61a Remove parallel make from travis coverity builds fa2b2b4 Added more error logging to Avro record reading 9a98e8b Support for GTID requests and data bursts c2a787b Small diagnostic fix c4cee7e Added format output requested by client 50483c7 Cleaning up of Avro code d485379 Added support for binary Avro protocol c22cdbb Converted Avro GTID from string to integer representation 5795ca9 Added coverity notification email to .travis.yml a06e44d Added coverity_scan to Travis 6b94384 Fixed memory leak in avro_schema.c a11096c Support for db.table request for Avrorouter 4e5cbbf Fixed bugs in Avro record reading a99e427 Fixed minor bugs in avrorouter 01db8ae Fixed errors with CREATE TABLE statements f5f3d7a Diagnostic routine update 209324f Added missing include for log_manager.h e62f764 Added sending of schemas and file rotation 8c8fcbb Added missing log_manager.h include b13942d Changed printf calls in maxavro library to MXS_ERROR 1168962 More lint inspired changes, mainly in blr_master.c and blr_slave.c ced8f2f Fixed directory checks in avrorouter a8ae6be Minor fix to string processing fbd2d95 Fixed typo in dbfwfilter's CMakeLists.txt 29c3cf4 Merge pull request #106 from mariadb-corporation/willfong-patch-1 854d4e9 Add password column name to example 2f956df Moved server state change logging to a common function 007121f Fixed truncated string values 782892b Fix lint errors and warnings for blr_file.c 4f99fc5 Added Avro testing script 2820980 Small fix to help clear lint problems in blr.c 3afeda4 Fixed errors and warnings located by lint ecfff82 Fix most lint problems in blr.c 223689c Added ALTER TABLE support 80bc935 Fix final lint problems with mysql_common protocol functions. e068310 Added preliminary alter table parsing 8c723f1 Lint monitor modules fdb5620 Fix lint issues in authenticators. 84f0e04 Added function documentation and renamed files 365d9f5 Tidy up, mainly for lint 2ff3005 Added update rows event processing and event types to avro records 2ae0371 Fixed failing regex and improved data streaming f19206a Renamed avrorouter header aa7174b Moved relpacement and storage of ddl statements to a separate function 0c10be8 Improved client notification and added Avro block size managemet 91405a7 Cleaned up instance creation dd97485 Removed useless vars af64e9e Added CDC authentication with a db file b73a118 Streamline and lint MySQL backend protocol. 65034ce Merge branch 'release-1.4.0' into develop 28f7e4e Added callback for AVRO client async data transmission 628c27a Added MAXAVRO_FILE struct to AVRO_CLIENT 32b3645 Fixed slavelag build failure 7b15542 Added default authentication method for CDC protocol 5f8e20f Renamed maxavro types and formatted files that use them 882cf84 Added more function documentation to maxavro library 9532f0b Fixed CDC protocol build failure 35a1d3a Added support for offsets in client requests 94577ac Fixed, formatted and refactored CDC protocol da9bcad Use the maxavro library to read data from Avro files 3ececee Added low level error checking to maxavro library 01b0b8b Tidy and lint mysql_client.c 943f0a7 Added handling of Avro boolean data types to maxavro library 4c781f7 Cleaned up maxavro library and avrorouter 6b2e85d Renamed functions more consistently and cleaned up code e07158a Moved query event handling to its own function df7d4c0 Added avro_ prefix to rbr.c fcbfceb Added seeking to a position in an Avro file 068243a CDC auth decoding 3584d54 Add checks to simplify downstream logic. 9b2c323 Removed useless fprintf bd5cd52 Added missing authfunc setup e4aff59 Added record value processing 5cc8615 Added value length functions 7921ecc Merge branch 'MXS-615' into MXS-483 4b09cca Added Travis status to readme.md cca3a48 Simplify interface to max admin authentication. 4739838 Authenticator API update 233505f Maxavrocheck now accepts multiple files 3fdd137 Improved the Avro file handling a6ba913 Merge from MXS-615 417d742 Added maxavrocheck 014f9cf Remove obsolete second parameter from authenticate function in authenticators. ece7ece MaxAdmin authentication converted to a module. Fix quirk in SSL setup. 7c8b37e Moved contents of avro_schema.h into mxs_avro.h d6660cf Improvements to type handling 71ed0cf Protocol API to have entry point for obtaining default authenticator name. 9d35de2 Fixed transaction tracking 5be02a2 Avrorouter internal state is now stored in the Avro file directory 9293464 Added new info to avro diagnostics 06e0e93 Protocol modules can still handle the authentication outside authenticator modules 6d7108b Added JSON output when Requesting an avro file 6188211 Added new CDC protocol state c8af27f CDC authentication uses its own authenticator 6590f94 Factor out qc_get_qtype_str b7880f1 Fix qc_sqlite CMakeLists.txt bd4ff43 Fixed connector-c being updated and built after every make invokation 0d9e57b Fixed non-MariaDB connectors being used in builds 3d3b779 FIX BUG IN CLIENT DCB SHUTDOWN THAT CAN CAUSE CRASHES e45ba33 Fixed Connector-C .cmake files c130189 Fixed connector-c being updated and built after every make invokation 7f3cdf3 Fixed errors on binlog rotation 9d3c83a Remove qc_sqlite 15e8ba5 CDC protocol is now compliant with new protocol structure 4460869 Merge branch 'release-1.4.0' into MXS-483 ea40812 Cleaned up the binlog processing loop cb646ca Add minimal select recognition to qc_sqlite ac1a9c5 Fixed binlogrouter test 85dd227 Re-route sqlite's sqlite3Select. 7a2e6f3 Update CMakeLists.txt for qc_sqlite 7a751c3 Added timestamps to records and fixed minor bugs f73bdde Avrorouter state storage to disk fcf0488 Fixed Connector-C .cmake files 48b8e4e Merge branch 'MXS-615' into MXS-615-binlog-merge 7c8e19f Add missing dependencies for qc_sqlite bb9b667 Improvements to type handling and binlog position tracking dc66b74 Client UUID added f12fce4 AVRO registration is now handled by avro router 575b809 Add skeleton sqlite-based query classifier. d09d5fc Build sqlite 146d1f9 Fixed BLOB type handling and refined error messages 6e9e521 Added client user to diagnostics 4538bb8 Merge pull request #104 from rasmushoj/develop 7e18d95 Avro router diagnostics routine update 01e3f75 reverted changes in CMakeLists.txt 52f7c78 reverted changes in postinst.in eaed577 Added sqlite 3110100 a58cdda Travis configuration for MaxScale. ... 38b452d MIGRATE FREE CLIENT DATA TO AUTH MODULE; BUG FIXES; TIDY UP 6e64506 Fixed minor bugs aff2411 Enabled CDC protocol f669100 Fixed NULL Avro value being assigned to a field which cannot be NULL 8f6b16a Added row event processing to avrorouter 2939fe0 Updated Avro schema management to use actual column names 9e3b0cb Removed use of RBR related functions in binlogrouter d674903 Formatted avro files fe028d1 DEVELOPMENT OF AUTHENTICATION AS MODULE - WILL NOT WORK YET 977aded Added authenticator modules to the build a2b384f MOVE MYSQL AUTH CODE INTO AUTHENTICATOR MODULES DIRECTORY a5d7484 PRELIMINARY CHANGES TO CREATE AUTHENTICATORS AS MODULES 66cf802 Merge remote-tracking branch 'origin/develop' into MXS-615 bca0a7d MINOR CHANGES TO SATISFY LINT 5a9e397 Added Avrorouter binlog file walking fbc737f Fixed binlogrouter test 3c7c9d7 Added avrorouter main event handling loop 07ad81b Moved common binlogrouter code to a separate file 8c605ed Fixed avrorouter build failures aa1ba05 Moved binlog definitions to a separate header and fixed build failures eee7c55 Added create table statement detection e52b27e Added AVRO_INSTANCE and AVRO_CLIENT 0830caa Change test for client DCB to use role being DCB_ROLE_CLIENT_HANDLER. ... 997bbca Change protocols to continue looping if an accept fails; ... 522e42d Make use of dcb_accept and dcb_listen in httpd and telnetd protocols. 4e692b0 Generalise dcb_listen to tailor log messages to different protocols. ... 52c431d Remove support for passing default port number when handling ... afe5abc Fix bug in creation of SSL listener structure; fix bugs in ... 0bd6c77 Merge remote-tracking branch 'origin/MXS-544' into MXS-544-a ... 7598597 Add dcb_listen function to make a given DCB into a listener, ... a275d89 Maxbinlogcheck avro version can detect proper end of file 9bb55a5 Moved row event and table map event handling to a separate file b7d9e09 Add/improve comments, fix mistake with premature return. c598770 First attempt at extracting general code into dcb_accept, ... f20f28f Testing with maxbinlogcheck b3c60b7 Added mysql_binlog files 0ff9971 Added MariaDB/MySQL binary data processing functions 124560c Merge branch '1.2.1-binlog_router_trx' into MXS-483 4deccff New router fro cdc client 2c11434 Fixed test compiler errors c1f7d24 Obliged to merge remote-tracking branch 'origin/develop' ... 1775599 Merge remote-tracking branch 'origin/MXS-544' into Test-dev-544-merge c5317da Small modifications in comments 11c0666 Code cleanup 64a5e9a Merge branch 'release-1.3.0' into MXS-483 2c11e89 First Implementation of CDC
7572 lines
252 KiB
C
7572 lines
252 KiB
C
/*
|
|
** 2004 May 22
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** This file contains the VFS implementation for unix-like operating systems
|
|
** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
|
|
**
|
|
** There are actually several different VFS implementations in this file.
|
|
** The differences are in the way that file locking is done. The default
|
|
** implementation uses Posix Advisory Locks. Alternative implementations
|
|
** use flock(), dot-files, various proprietary locking schemas, or simply
|
|
** skip locking all together.
|
|
**
|
|
** This source file is organized into divisions where the logic for various
|
|
** subfunctions is contained within the appropriate division. PLEASE
|
|
** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed
|
|
** in the correct division and should be clearly labeled.
|
|
**
|
|
** The layout of divisions is as follows:
|
|
**
|
|
** * General-purpose declarations and utility functions.
|
|
** * Unique file ID logic used by VxWorks.
|
|
** * Various locking primitive implementations (all except proxy locking):
|
|
** + for Posix Advisory Locks
|
|
** + for no-op locks
|
|
** + for dot-file locks
|
|
** + for flock() locking
|
|
** + for named semaphore locks (VxWorks only)
|
|
** + for AFP filesystem locks (MacOSX only)
|
|
** * sqlite3_file methods not associated with locking.
|
|
** * Definitions of sqlite3_io_methods objects for all locking
|
|
** methods plus "finder" functions for each locking method.
|
|
** * sqlite3_vfs method implementations.
|
|
** * Locking primitives for the proxy uber-locking-method. (MacOSX only)
|
|
** * Definitions of sqlite3_vfs objects for all locking methods
|
|
** plus implementations of sqlite3_os_init() and sqlite3_os_end().
|
|
*/
|
|
#include "sqliteInt.h"
|
|
#if SQLITE_OS_UNIX /* This file is used on unix only */
|
|
|
|
/*
|
|
** There are various methods for file locking used for concurrency
|
|
** control:
|
|
**
|
|
** 1. POSIX locking (the default),
|
|
** 2. No locking,
|
|
** 3. Dot-file locking,
|
|
** 4. flock() locking,
|
|
** 5. AFP locking (OSX only),
|
|
** 6. Named POSIX semaphores (VXWorks only),
|
|
** 7. proxy locking. (OSX only)
|
|
**
|
|
** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
|
|
** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
|
|
** selection of the appropriate locking style based on the filesystem
|
|
** where the database is located.
|
|
*/
|
|
#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
|
|
# if defined(__APPLE__)
|
|
# define SQLITE_ENABLE_LOCKING_STYLE 1
|
|
# else
|
|
# define SQLITE_ENABLE_LOCKING_STYLE 0
|
|
# endif
|
|
#endif
|
|
|
|
/*
|
|
** standard include files.
|
|
*/
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <errno.h>
|
|
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
|
|
# include <sys/mman.h>
|
|
#endif
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
# include <sys/ioctl.h>
|
|
# include <sys/file.h>
|
|
# include <sys/param.h>
|
|
#endif /* SQLITE_ENABLE_LOCKING_STYLE */
|
|
|
|
#if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \
|
|
(__IPHONE_OS_VERSION_MIN_REQUIRED > 2000))
|
|
# if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \
|
|
&& (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))
|
|
# define HAVE_GETHOSTUUID 1
|
|
# else
|
|
# warning "gethostuuid() is disabled."
|
|
# endif
|
|
#endif
|
|
|
|
|
|
#if OS_VXWORKS
|
|
# include <sys/ioctl.h>
|
|
# include <semaphore.h>
|
|
# include <limits.h>
|
|
#endif /* OS_VXWORKS */
|
|
|
|
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
|
|
# include <sys/mount.h>
|
|
#endif
|
|
|
|
#ifdef HAVE_UTIME
|
|
# include <utime.h>
|
|
#endif
|
|
|
|
/*
|
|
** Allowed values of unixFile.fsFlags
|
|
*/
|
|
#define SQLITE_FSFLAGS_IS_MSDOS 0x1
|
|
|
|
/*
|
|
** If we are to be thread-safe, include the pthreads header and define
|
|
** the SQLITE_UNIX_THREADS macro.
|
|
*/
|
|
#if SQLITE_THREADSAFE
|
|
# include <pthread.h>
|
|
# define SQLITE_UNIX_THREADS 1
|
|
#endif
|
|
|
|
/*
|
|
** Default permissions when creating a new file
|
|
*/
|
|
#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
|
|
# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
|
|
#endif
|
|
|
|
/*
|
|
** Default permissions when creating auto proxy dir
|
|
*/
|
|
#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
|
|
# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
|
|
#endif
|
|
|
|
/*
|
|
** Maximum supported path-length.
|
|
*/
|
|
#define MAX_PATHNAME 512
|
|
|
|
/*
|
|
** Maximum supported symbolic links
|
|
*/
|
|
#define SQLITE_MAX_SYMLINKS 100
|
|
|
|
/* Always cast the getpid() return type for compatibility with
|
|
** kernel modules in VxWorks. */
|
|
#define osGetpid(X) (pid_t)getpid()
|
|
|
|
/*
|
|
** Only set the lastErrno if the error code is a real error and not
|
|
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
|
|
*/
|
|
#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
|
|
|
|
/* Forward references */
|
|
typedef struct unixShm unixShm; /* Connection shared memory */
|
|
typedef struct unixShmNode unixShmNode; /* Shared memory instance */
|
|
typedef struct unixInodeInfo unixInodeInfo; /* An i-node */
|
|
typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */
|
|
|
|
/*
|
|
** Sometimes, after a file handle is closed by SQLite, the file descriptor
|
|
** cannot be closed immediately. In these cases, instances of the following
|
|
** structure are used to store the file descriptor while waiting for an
|
|
** opportunity to either close or reuse it.
|
|
*/
|
|
struct UnixUnusedFd {
|
|
int fd; /* File descriptor to close */
|
|
int flags; /* Flags this file descriptor was opened with */
|
|
UnixUnusedFd *pNext; /* Next unused file descriptor on same file */
|
|
};
|
|
|
|
/*
|
|
** The unixFile structure is subclass of sqlite3_file specific to the unix
|
|
** VFS implementations.
|
|
*/
|
|
typedef struct unixFile unixFile;
|
|
struct unixFile {
|
|
sqlite3_io_methods const *pMethod; /* Always the first entry */
|
|
sqlite3_vfs *pVfs; /* The VFS that created this unixFile */
|
|
unixInodeInfo *pInode; /* Info about locks on this inode */
|
|
int h; /* The file descriptor */
|
|
unsigned char eFileLock; /* The type of lock held on this fd */
|
|
unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
|
|
int lastErrno; /* The unix errno from last I/O error */
|
|
void *lockingContext; /* Locking style specific state */
|
|
UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */
|
|
const char *zPath; /* Name of the file */
|
|
unixShm *pShm; /* Shared memory segment information */
|
|
int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
int nFetchOut; /* Number of outstanding xFetch refs */
|
|
sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
|
|
sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */
|
|
sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */
|
|
void *pMapRegion; /* Memory mapped region */
|
|
#endif
|
|
#ifdef __QNXNTO__
|
|
int sectorSize; /* Device sector size */
|
|
int deviceCharacteristics; /* Precomputed device characteristics */
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
int openFlags; /* The flags specified at open() */
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
|
|
unsigned fsFlags; /* cached details from statfs() */
|
|
#endif
|
|
#if OS_VXWORKS
|
|
struct vxworksFileId *pId; /* Unique file ID */
|
|
#endif
|
|
#ifdef SQLITE_DEBUG
|
|
/* The next group of variables are used to track whether or not the
|
|
** transaction counter in bytes 24-27 of database files are updated
|
|
** whenever any part of the database changes. An assertion fault will
|
|
** occur if a file is updated without also updating the transaction
|
|
** counter. This test is made to avoid new problems similar to the
|
|
** one described by ticket #3584.
|
|
*/
|
|
unsigned char transCntrChng; /* True if the transaction counter changed */
|
|
unsigned char dbUpdate; /* True if any part of database file changed */
|
|
unsigned char inNormalWrite; /* True if in a normal write operation */
|
|
|
|
#endif
|
|
|
|
#ifdef SQLITE_TEST
|
|
/* In test mode, increase the size of this structure a bit so that
|
|
** it is larger than the struct CrashFile defined in test6.c.
|
|
*/
|
|
char aPadding[32];
|
|
#endif
|
|
};
|
|
|
|
/* This variable holds the process id (pid) from when the xRandomness()
|
|
** method was called. If xOpen() is called from a different process id,
|
|
** indicating that a fork() has occurred, the PRNG will be reset.
|
|
*/
|
|
static pid_t randomnessPid = 0;
|
|
|
|
/*
|
|
** Allowed values for the unixFile.ctrlFlags bitmask:
|
|
*/
|
|
#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
|
|
#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
|
|
#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
|
|
#ifndef SQLITE_DISABLE_DIRSYNC
|
|
# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
|
|
#else
|
|
# define UNIXFILE_DIRSYNC 0x00
|
|
#endif
|
|
#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
|
|
#define UNIXFILE_DELETE 0x20 /* Delete on close */
|
|
#define UNIXFILE_URI 0x40 /* Filename might have query parameters */
|
|
#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */
|
|
|
|
/*
|
|
** Include code that is common to all os_*.c files
|
|
*/
|
|
#include "os_common.h"
|
|
|
|
/*
|
|
** Define various macros that are missing from some systems.
|
|
*/
|
|
#ifndef O_LARGEFILE
|
|
# define O_LARGEFILE 0
|
|
#endif
|
|
#ifdef SQLITE_DISABLE_LFS
|
|
# undef O_LARGEFILE
|
|
# define O_LARGEFILE 0
|
|
#endif
|
|
#ifndef O_NOFOLLOW
|
|
# define O_NOFOLLOW 0
|
|
#endif
|
|
#ifndef O_BINARY
|
|
# define O_BINARY 0
|
|
#endif
|
|
|
|
/*
|
|
** The threadid macro resolves to the thread-id or to 0. Used for
|
|
** testing and debugging only.
|
|
*/
|
|
#if SQLITE_THREADSAFE
|
|
#define threadid pthread_self()
|
|
#else
|
|
#define threadid 0
|
|
#endif
|
|
|
|
/*
|
|
** HAVE_MREMAP defaults to true on Linux and false everywhere else.
|
|
*/
|
|
#if !defined(HAVE_MREMAP)
|
|
# if defined(__linux__) && defined(_GNU_SOURCE)
|
|
# define HAVE_MREMAP 1
|
|
# else
|
|
# define HAVE_MREMAP 0
|
|
# endif
|
|
#endif
|
|
|
|
/*
|
|
** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
|
|
** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
|
|
*/
|
|
#ifdef __ANDROID__
|
|
# define lseek lseek64
|
|
#endif
|
|
|
|
/*
|
|
** Different Unix systems declare open() in different ways. Same use
|
|
** open(const char*,int,mode_t). Others use open(const char*,int,...).
|
|
** The difference is important when using a pointer to the function.
|
|
**
|
|
** The safest way to deal with the problem is to always use this wrapper
|
|
** which always has the same well-defined interface.
|
|
*/
|
|
static int posixOpen(const char *zFile, int flags, int mode){
|
|
return open(zFile, flags, mode);
|
|
}
|
|
|
|
/* Forward reference */
|
|
static int openDirectory(const char*, int*);
|
|
static int unixGetpagesize(void);
|
|
|
|
/*
|
|
** Many system calls are accessed through pointer-to-functions so that
|
|
** they may be overridden at runtime to facilitate fault injection during
|
|
** testing and sandboxing. The following array holds the names and pointers
|
|
** to all overrideable system calls.
|
|
*/
|
|
static struct unix_syscall {
|
|
const char *zName; /* Name of the system call */
|
|
sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
|
|
sqlite3_syscall_ptr pDefault; /* Default value */
|
|
} aSyscall[] = {
|
|
{ "open", (sqlite3_syscall_ptr)posixOpen, 0 },
|
|
#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
|
|
|
|
{ "close", (sqlite3_syscall_ptr)close, 0 },
|
|
#define osClose ((int(*)(int))aSyscall[1].pCurrent)
|
|
|
|
{ "access", (sqlite3_syscall_ptr)access, 0 },
|
|
#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent)
|
|
|
|
{ "getcwd", (sqlite3_syscall_ptr)getcwd, 0 },
|
|
#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
|
|
|
|
{ "stat", (sqlite3_syscall_ptr)stat, 0 },
|
|
#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
|
|
|
|
/*
|
|
** The DJGPP compiler environment looks mostly like Unix, but it
|
|
** lacks the fcntl() system call. So redefine fcntl() to be something
|
|
** that always succeeds. This means that locking does not occur under
|
|
** DJGPP. But it is DOS - what did you expect?
|
|
*/
|
|
#ifdef __DJGPP__
|
|
{ "fstat", 0, 0 },
|
|
#define osFstat(a,b,c) 0
|
|
#else
|
|
{ "fstat", (sqlite3_syscall_ptr)fstat, 0 },
|
|
#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
|
|
#endif
|
|
|
|
{ "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 },
|
|
#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
|
|
|
|
{ "fcntl", (sqlite3_syscall_ptr)fcntl, 0 },
|
|
#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent)
|
|
|
|
{ "read", (sqlite3_syscall_ptr)read, 0 },
|
|
#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
|
|
|
|
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
|
|
{ "pread", (sqlite3_syscall_ptr)pread, 0 },
|
|
#else
|
|
{ "pread", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
|
|
|
|
#if defined(USE_PREAD64)
|
|
{ "pread64", (sqlite3_syscall_ptr)pread64, 0 },
|
|
#else
|
|
{ "pread64", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent)
|
|
|
|
{ "write", (sqlite3_syscall_ptr)write, 0 },
|
|
#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
|
|
|
|
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
|
|
{ "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
|
|
#else
|
|
{ "pwrite", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\
|
|
aSyscall[12].pCurrent)
|
|
|
|
#if defined(USE_PREAD64)
|
|
{ "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },
|
|
#else
|
|
{ "pwrite64", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\
|
|
aSyscall[13].pCurrent)
|
|
|
|
{ "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
|
|
#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
|
|
|
|
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
|
|
{ "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },
|
|
#else
|
|
{ "fallocate", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
|
|
|
|
{ "unlink", (sqlite3_syscall_ptr)unlink, 0 },
|
|
#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent)
|
|
|
|
{ "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
|
|
#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
|
|
|
|
{ "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
|
|
#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
|
|
|
|
{ "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
|
|
#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
|
|
|
|
#if defined(HAVE_FCHOWN)
|
|
{ "fchown", (sqlite3_syscall_ptr)fchown, 0 },
|
|
#else
|
|
{ "fchown", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
|
|
|
|
{ "geteuid", (sqlite3_syscall_ptr)geteuid, 0 },
|
|
#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent)
|
|
|
|
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
|
|
{ "mmap", (sqlite3_syscall_ptr)mmap, 0 },
|
|
#else
|
|
{ "mmap", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent)
|
|
|
|
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
|
|
{ "munmap", (sqlite3_syscall_ptr)munmap, 0 },
|
|
#else
|
|
{ "munmap", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osMunmap ((void*(*)(void*,size_t))aSyscall[23].pCurrent)
|
|
|
|
#if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
|
|
{ "mremap", (sqlite3_syscall_ptr)mremap, 0 },
|
|
#else
|
|
{ "mremap", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent)
|
|
|
|
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
|
|
{ "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 },
|
|
#else
|
|
{ "getpagesize", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent)
|
|
|
|
#if defined(HAVE_READLINK)
|
|
{ "readlink", (sqlite3_syscall_ptr)readlink, 0 },
|
|
#else
|
|
{ "readlink", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent)
|
|
|
|
#if defined(HAVE_LSTAT)
|
|
{ "lstat", (sqlite3_syscall_ptr)lstat, 0 },
|
|
#else
|
|
{ "lstat", (sqlite3_syscall_ptr)0, 0 },
|
|
#endif
|
|
#define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)
|
|
|
|
}; /* End of the overrideable system calls */
|
|
|
|
|
|
/*
|
|
** On some systems, calls to fchown() will trigger a message in a security
|
|
** log if they come from non-root processes. So avoid calling fchown() if
|
|
** we are not running as root.
|
|
*/
|
|
static int robustFchown(int fd, uid_t uid, gid_t gid){
|
|
#if defined(HAVE_FCHOWN)
|
|
return osGeteuid() ? 0 : osFchown(fd,uid,gid);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
** This is the xSetSystemCall() method of sqlite3_vfs for all of the
|
|
** "unix" VFSes. Return SQLITE_OK opon successfully updating the
|
|
** system call pointer, or SQLITE_NOTFOUND if there is no configurable
|
|
** system call named zName.
|
|
*/
|
|
static int unixSetSystemCall(
|
|
sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */
|
|
const char *zName, /* Name of system call to override */
|
|
sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */
|
|
){
|
|
unsigned int i;
|
|
int rc = SQLITE_NOTFOUND;
|
|
|
|
UNUSED_PARAMETER(pNotUsed);
|
|
if( zName==0 ){
|
|
/* If no zName is given, restore all system calls to their default
|
|
** settings and return NULL
|
|
*/
|
|
rc = SQLITE_OK;
|
|
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
|
|
if( aSyscall[i].pDefault ){
|
|
aSyscall[i].pCurrent = aSyscall[i].pDefault;
|
|
}
|
|
}
|
|
}else{
|
|
/* If zName is specified, operate on only the one system call
|
|
** specified.
|
|
*/
|
|
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
|
|
if( strcmp(zName, aSyscall[i].zName)==0 ){
|
|
if( aSyscall[i].pDefault==0 ){
|
|
aSyscall[i].pDefault = aSyscall[i].pCurrent;
|
|
}
|
|
rc = SQLITE_OK;
|
|
if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
|
|
aSyscall[i].pCurrent = pNewFunc;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Return the value of a system call. Return NULL if zName is not a
|
|
** recognized system call name. NULL is also returned if the system call
|
|
** is currently undefined.
|
|
*/
|
|
static sqlite3_syscall_ptr unixGetSystemCall(
|
|
sqlite3_vfs *pNotUsed,
|
|
const char *zName
|
|
){
|
|
unsigned int i;
|
|
|
|
UNUSED_PARAMETER(pNotUsed);
|
|
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
|
|
if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Return the name of the first system call after zName. If zName==NULL
|
|
** then return the name of the first system call. Return NULL if zName
|
|
** is the last system call or if zName is not the name of a valid
|
|
** system call.
|
|
*/
|
|
static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
|
|
int i = -1;
|
|
|
|
UNUSED_PARAMETER(p);
|
|
if( zName ){
|
|
for(i=0; i<ArraySize(aSyscall)-1; i++){
|
|
if( strcmp(zName, aSyscall[i].zName)==0 ) break;
|
|
}
|
|
}
|
|
for(i++; i<ArraySize(aSyscall); i++){
|
|
if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Do not accept any file descriptor less than this value, in order to avoid
|
|
** opening database file using file descriptors that are commonly used for
|
|
** standard input, output, and error.
|
|
*/
|
|
#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
|
|
# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
|
|
#endif
|
|
|
|
/*
|
|
** Invoke open(). Do so multiple times, until it either succeeds or
|
|
** fails for some reason other than EINTR.
|
|
**
|
|
** If the file creation mode "m" is 0 then set it to the default for
|
|
** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
|
|
** 0644) as modified by the system umask. If m is not 0, then
|
|
** make the file creation mode be exactly m ignoring the umask.
|
|
**
|
|
** The m parameter will be non-zero only when creating -wal, -journal,
|
|
** and -shm files. We want those files to have *exactly* the same
|
|
** permissions as their original database, unadulterated by the umask.
|
|
** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
|
|
** transaction crashes and leaves behind hot journals, then any
|
|
** process that is able to write to the database will also be able to
|
|
** recover the hot journals.
|
|
*/
|
|
static int robust_open(const char *z, int f, mode_t m){
|
|
int fd;
|
|
mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
|
|
while(1){
|
|
#if defined(O_CLOEXEC)
|
|
fd = osOpen(z,f|O_CLOEXEC,m2);
|
|
#else
|
|
fd = osOpen(z,f,m2);
|
|
#endif
|
|
if( fd<0 ){
|
|
if( errno==EINTR ) continue;
|
|
break;
|
|
}
|
|
if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
|
|
osClose(fd);
|
|
sqlite3_log(SQLITE_WARNING,
|
|
"attempt to open \"%s\" as file descriptor %d", z, fd);
|
|
fd = -1;
|
|
if( osOpen("/dev/null", f, m)<0 ) break;
|
|
}
|
|
if( fd>=0 ){
|
|
if( m!=0 ){
|
|
struct stat statbuf;
|
|
if( osFstat(fd, &statbuf)==0
|
|
&& statbuf.st_size==0
|
|
&& (statbuf.st_mode&0777)!=m
|
|
){
|
|
osFchmod(fd, m);
|
|
}
|
|
}
|
|
#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
|
|
osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
|
|
#endif
|
|
}
|
|
return fd;
|
|
}
|
|
|
|
/*
|
|
** Helper functions to obtain and relinquish the global mutex. The
|
|
** global mutex is used to protect the unixInodeInfo and
|
|
** vxworksFileId objects used by this file, all of which may be
|
|
** shared by multiple threads.
|
|
**
|
|
** Function unixMutexHeld() is used to assert() that the global mutex
|
|
** is held when required. This function is only used as part of assert()
|
|
** statements. e.g.
|
|
**
|
|
** unixEnterMutex()
|
|
** assert( unixMutexHeld() );
|
|
** unixEnterLeave()
|
|
*/
|
|
static void unixEnterMutex(void){
|
|
sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
|
|
}
|
|
static void unixLeaveMutex(void){
|
|
sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
|
|
}
|
|
#ifdef SQLITE_DEBUG
|
|
static int unixMutexHeld(void) {
|
|
return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
|
|
}
|
|
#endif
|
|
|
|
|
|
#ifdef SQLITE_HAVE_OS_TRACE
|
|
/*
|
|
** Helper function for printing out trace information from debugging
|
|
** binaries. This returns the string representation of the supplied
|
|
** integer lock-type.
|
|
*/
|
|
static const char *azFileLock(int eFileLock){
|
|
switch( eFileLock ){
|
|
case NO_LOCK: return "NONE";
|
|
case SHARED_LOCK: return "SHARED";
|
|
case RESERVED_LOCK: return "RESERVED";
|
|
case PENDING_LOCK: return "PENDING";
|
|
case EXCLUSIVE_LOCK: return "EXCLUSIVE";
|
|
}
|
|
return "ERROR";
|
|
}
|
|
#endif
|
|
|
|
#ifdef SQLITE_LOCK_TRACE
|
|
/*
|
|
** Print out information about all locking operations.
|
|
**
|
|
** This routine is used for troubleshooting locks on multithreaded
|
|
** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
|
|
** command-line option on the compiler. This code is normally
|
|
** turned off.
|
|
*/
|
|
static int lockTrace(int fd, int op, struct flock *p){
|
|
char *zOpName, *zType;
|
|
int s;
|
|
int savedErrno;
|
|
if( op==F_GETLK ){
|
|
zOpName = "GETLK";
|
|
}else if( op==F_SETLK ){
|
|
zOpName = "SETLK";
|
|
}else{
|
|
s = osFcntl(fd, op, p);
|
|
sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
|
|
return s;
|
|
}
|
|
if( p->l_type==F_RDLCK ){
|
|
zType = "RDLCK";
|
|
}else if( p->l_type==F_WRLCK ){
|
|
zType = "WRLCK";
|
|
}else if( p->l_type==F_UNLCK ){
|
|
zType = "UNLCK";
|
|
}else{
|
|
assert( 0 );
|
|
}
|
|
assert( p->l_whence==SEEK_SET );
|
|
s = osFcntl(fd, op, p);
|
|
savedErrno = errno;
|
|
sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
|
|
threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
|
|
(int)p->l_pid, s);
|
|
if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
|
|
struct flock l2;
|
|
l2 = *p;
|
|
osFcntl(fd, F_GETLK, &l2);
|
|
if( l2.l_type==F_RDLCK ){
|
|
zType = "RDLCK";
|
|
}else if( l2.l_type==F_WRLCK ){
|
|
zType = "WRLCK";
|
|
}else if( l2.l_type==F_UNLCK ){
|
|
zType = "UNLCK";
|
|
}else{
|
|
assert( 0 );
|
|
}
|
|
sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
|
|
zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
|
|
}
|
|
errno = savedErrno;
|
|
return s;
|
|
}
|
|
#undef osFcntl
|
|
#define osFcntl lockTrace
|
|
#endif /* SQLITE_LOCK_TRACE */
|
|
|
|
/*
|
|
** Retry ftruncate() calls that fail due to EINTR
|
|
**
|
|
** All calls to ftruncate() within this file should be made through
|
|
** this wrapper. On the Android platform, bypassing the logic below
|
|
** could lead to a corrupt database.
|
|
*/
|
|
static int robust_ftruncate(int h, sqlite3_int64 sz){
|
|
int rc;
|
|
#ifdef __ANDROID__
|
|
/* On Android, ftruncate() always uses 32-bit offsets, even if
|
|
** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to
|
|
** truncate a file to any size larger than 2GiB. Silently ignore any
|
|
** such attempts. */
|
|
if( sz>(sqlite3_int64)0x7FFFFFFF ){
|
|
rc = SQLITE_OK;
|
|
}else
|
|
#endif
|
|
do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This routine translates a standard POSIX errno code into something
|
|
** useful to the clients of the sqlite3 functions. Specifically, it is
|
|
** intended to translate a variety of "try again" errors into SQLITE_BUSY
|
|
** and a variety of "please close the file descriptor NOW" errors into
|
|
** SQLITE_IOERR
|
|
**
|
|
** Errors during initialization of locks, or file system support for locks,
|
|
** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
|
|
*/
|
|
static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
|
|
assert( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
|
|
(sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
|
|
(sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
|
|
(sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) );
|
|
switch (posixError) {
|
|
case EACCES:
|
|
case EAGAIN:
|
|
case ETIMEDOUT:
|
|
case EBUSY:
|
|
case EINTR:
|
|
case ENOLCK:
|
|
/* random NFS retry error, unless during file system support
|
|
* introspection, in which it actually means what it says */
|
|
return SQLITE_BUSY;
|
|
|
|
case EPERM:
|
|
return SQLITE_PERM;
|
|
|
|
default:
|
|
return sqliteIOErr;
|
|
}
|
|
}
|
|
|
|
|
|
/******************************************************************************
|
|
****************** Begin Unique File ID Utility Used By VxWorks ***************
|
|
**
|
|
** On most versions of unix, we can get a unique ID for a file by concatenating
|
|
** the device number and the inode number. But this does not work on VxWorks.
|
|
** On VxWorks, a unique file id must be based on the canonical filename.
|
|
**
|
|
** A pointer to an instance of the following structure can be used as a
|
|
** unique file ID in VxWorks. Each instance of this structure contains
|
|
** a copy of the canonical filename. There is also a reference count.
|
|
** The structure is reclaimed when the number of pointers to it drops to
|
|
** zero.
|
|
**
|
|
** There are never very many files open at one time and lookups are not
|
|
** a performance-critical path, so it is sufficient to put these
|
|
** structures on a linked list.
|
|
*/
|
|
struct vxworksFileId {
|
|
struct vxworksFileId *pNext; /* Next in a list of them all */
|
|
int nRef; /* Number of references to this one */
|
|
int nName; /* Length of the zCanonicalName[] string */
|
|
char *zCanonicalName; /* Canonical filename */
|
|
};
|
|
|
|
#if OS_VXWORKS
|
|
/*
|
|
** All unique filenames are held on a linked list headed by this
|
|
** variable:
|
|
*/
|
|
static struct vxworksFileId *vxworksFileList = 0;
|
|
|
|
/*
|
|
** Simplify a filename into its canonical form
|
|
** by making the following changes:
|
|
**
|
|
** * removing any trailing and duplicate /
|
|
** * convert /./ into just /
|
|
** * convert /A/../ where A is any simple name into just /
|
|
**
|
|
** Changes are made in-place. Return the new name length.
|
|
**
|
|
** The original filename is in z[0..n-1]. Return the number of
|
|
** characters in the simplified name.
|
|
*/
|
|
static int vxworksSimplifyName(char *z, int n){
|
|
int i, j;
|
|
while( n>1 && z[n-1]=='/' ){ n--; }
|
|
for(i=j=0; i<n; i++){
|
|
if( z[i]=='/' ){
|
|
if( z[i+1]=='/' ) continue;
|
|
if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
|
|
while( j>0 && z[j-1]!='/' ){ j--; }
|
|
if( j>0 ){ j--; }
|
|
i += 2;
|
|
continue;
|
|
}
|
|
}
|
|
z[j++] = z[i];
|
|
}
|
|
z[j] = 0;
|
|
return j;
|
|
}
|
|
|
|
/*
|
|
** Find a unique file ID for the given absolute pathname. Return
|
|
** a pointer to the vxworksFileId object. This pointer is the unique
|
|
** file ID.
|
|
**
|
|
** The nRef field of the vxworksFileId object is incremented before
|
|
** the object is returned. A new vxworksFileId object is created
|
|
** and added to the global list if necessary.
|
|
**
|
|
** If a memory allocation error occurs, return NULL.
|
|
*/
|
|
static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
|
|
struct vxworksFileId *pNew; /* search key and new file ID */
|
|
struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
|
|
int n; /* Length of zAbsoluteName string */
|
|
|
|
assert( zAbsoluteName[0]=='/' );
|
|
n = (int)strlen(zAbsoluteName);
|
|
pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) );
|
|
if( pNew==0 ) return 0;
|
|
pNew->zCanonicalName = (char*)&pNew[1];
|
|
memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
|
|
n = vxworksSimplifyName(pNew->zCanonicalName, n);
|
|
|
|
/* Search for an existing entry that matching the canonical name.
|
|
** If found, increment the reference count and return a pointer to
|
|
** the existing file ID.
|
|
*/
|
|
unixEnterMutex();
|
|
for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
|
|
if( pCandidate->nName==n
|
|
&& memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
|
|
){
|
|
sqlite3_free(pNew);
|
|
pCandidate->nRef++;
|
|
unixLeaveMutex();
|
|
return pCandidate;
|
|
}
|
|
}
|
|
|
|
/* No match was found. We will make a new file ID */
|
|
pNew->nRef = 1;
|
|
pNew->nName = n;
|
|
pNew->pNext = vxworksFileList;
|
|
vxworksFileList = pNew;
|
|
unixLeaveMutex();
|
|
return pNew;
|
|
}
|
|
|
|
/*
|
|
** Decrement the reference count on a vxworksFileId object. Free
|
|
** the object when the reference count reaches zero.
|
|
*/
|
|
static void vxworksReleaseFileId(struct vxworksFileId *pId){
|
|
unixEnterMutex();
|
|
assert( pId->nRef>0 );
|
|
pId->nRef--;
|
|
if( pId->nRef==0 ){
|
|
struct vxworksFileId **pp;
|
|
for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
|
|
assert( *pp==pId );
|
|
*pp = pId->pNext;
|
|
sqlite3_free(pId);
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
#endif /* OS_VXWORKS */
|
|
/*************** End of Unique File ID Utility Used By VxWorks ****************
|
|
******************************************************************************/
|
|
|
|
|
|
/******************************************************************************
|
|
*************************** Posix Advisory Locking ****************************
|
|
**
|
|
** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)
|
|
** section 6.5.2.2 lines 483 through 490 specify that when a process
|
|
** sets or clears a lock, that operation overrides any prior locks set
|
|
** by the same process. It does not explicitly say so, but this implies
|
|
** that it overrides locks set by the same process using a different
|
|
** file descriptor. Consider this test case:
|
|
**
|
|
** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
|
|
** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
|
|
**
|
|
** Suppose ./file1 and ./file2 are really the same file (because
|
|
** one is a hard or symbolic link to the other) then if you set
|
|
** an exclusive lock on fd1, then try to get an exclusive lock
|
|
** on fd2, it works. I would have expected the second lock to
|
|
** fail since there was already a lock on the file due to fd1.
|
|
** But not so. Since both locks came from the same process, the
|
|
** second overrides the first, even though they were on different
|
|
** file descriptors opened on different file names.
|
|
**
|
|
** This means that we cannot use POSIX locks to synchronize file access
|
|
** among competing threads of the same process. POSIX locks will work fine
|
|
** to synchronize access for threads in separate processes, but not
|
|
** threads within the same process.
|
|
**
|
|
** To work around the problem, SQLite has to manage file locks internally
|
|
** on its own. Whenever a new database is opened, we have to find the
|
|
** specific inode of the database file (the inode is determined by the
|
|
** st_dev and st_ino fields of the stat structure that fstat() fills in)
|
|
** and check for locks already existing on that inode. When locks are
|
|
** created or removed, we have to look at our own internal record of the
|
|
** locks to see if another thread has previously set a lock on that same
|
|
** inode.
|
|
**
|
|
** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
|
|
** For VxWorks, we have to use the alternative unique ID system based on
|
|
** canonical filename and implemented in the previous division.)
|
|
**
|
|
** The sqlite3_file structure for POSIX is no longer just an integer file
|
|
** descriptor. It is now a structure that holds the integer file
|
|
** descriptor and a pointer to a structure that describes the internal
|
|
** locks on the corresponding inode. There is one locking structure
|
|
** per inode, so if the same inode is opened twice, both unixFile structures
|
|
** point to the same locking structure. The locking structure keeps
|
|
** a reference count (so we will know when to delete it) and a "cnt"
|
|
** field that tells us its internal lock status. cnt==0 means the
|
|
** file is unlocked. cnt==-1 means the file has an exclusive lock.
|
|
** cnt>0 means there are cnt shared locks on the file.
|
|
**
|
|
** Any attempt to lock or unlock a file first checks the locking
|
|
** structure. The fcntl() system call is only invoked to set a
|
|
** POSIX lock if the internal lock structure transitions between
|
|
** a locked and an unlocked state.
|
|
**
|
|
** But wait: there are yet more problems with POSIX advisory locks.
|
|
**
|
|
** If you close a file descriptor that points to a file that has locks,
|
|
** all locks on that file that are owned by the current process are
|
|
** released. To work around this problem, each unixInodeInfo object
|
|
** maintains a count of the number of pending locks on tha inode.
|
|
** When an attempt is made to close an unixFile, if there are
|
|
** other unixFile open on the same inode that are holding locks, the call
|
|
** to close() the file descriptor is deferred until all of the locks clear.
|
|
** The unixInodeInfo structure keeps a list of file descriptors that need to
|
|
** be closed and that list is walked (and cleared) when the last lock
|
|
** clears.
|
|
**
|
|
** Yet another problem: LinuxThreads do not play well with posix locks.
|
|
**
|
|
** Many older versions of linux use the LinuxThreads library which is
|
|
** not posix compliant. Under LinuxThreads, a lock created by thread
|
|
** A cannot be modified or overridden by a different thread B.
|
|
** Only thread A can modify the lock. Locking behavior is correct
|
|
** if the appliation uses the newer Native Posix Thread Library (NPTL)
|
|
** on linux - with NPTL a lock created by thread A can override locks
|
|
** in thread B. But there is no way to know at compile-time which
|
|
** threading library is being used. So there is no way to know at
|
|
** compile-time whether or not thread A can override locks on thread B.
|
|
** One has to do a run-time check to discover the behavior of the
|
|
** current process.
|
|
**
|
|
** SQLite used to support LinuxThreads. But support for LinuxThreads
|
|
** was dropped beginning with version 3.7.0. SQLite will still work with
|
|
** LinuxThreads provided that (1) there is no more than one connection
|
|
** per database file in the same process and (2) database connections
|
|
** do not move across threads.
|
|
*/
|
|
|
|
/*
|
|
** An instance of the following structure serves as the key used
|
|
** to locate a particular unixInodeInfo object.
|
|
*/
|
|
struct unixFileId {
|
|
dev_t dev; /* Device number */
|
|
#if OS_VXWORKS
|
|
struct vxworksFileId *pId; /* Unique file ID for vxworks. */
|
|
#else
|
|
ino_t ino; /* Inode number */
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
** An instance of the following structure is allocated for each open
|
|
** inode. Or, on LinuxThreads, there is one of these structures for
|
|
** each inode opened by each thread.
|
|
**
|
|
** A single inode can have multiple file descriptors, so each unixFile
|
|
** structure contains a pointer to an instance of this object and this
|
|
** object keeps a count of the number of unixFile pointing to it.
|
|
*/
|
|
struct unixInodeInfo {
|
|
struct unixFileId fileId; /* The lookup key */
|
|
int nShared; /* Number of SHARED locks held */
|
|
unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
|
|
unsigned char bProcessLock; /* An exclusive process lock is held */
|
|
int nRef; /* Number of pointers to this structure */
|
|
unixShmNode *pShmNode; /* Shared memory associated with this inode */
|
|
int nLock; /* Number of outstanding file locks */
|
|
UnixUnusedFd *pUnused; /* Unused file descriptors to close */
|
|
unixInodeInfo *pNext; /* List of all unixInodeInfo objects */
|
|
unixInodeInfo *pPrev; /* .... doubly linked */
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
unsigned long long sharedByte; /* for AFP simulated shared lock */
|
|
#endif
|
|
#if OS_VXWORKS
|
|
sem_t *pSem; /* Named POSIX semaphore */
|
|
char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
** A lists of all unixInodeInfo objects.
|
|
*/
|
|
static unixInodeInfo *inodeList = 0;
|
|
|
|
/*
|
|
**
|
|
** This function - unixLogErrorAtLine(), is only ever called via the macro
|
|
** unixLogError().
|
|
**
|
|
** It is invoked after an error occurs in an OS function and errno has been
|
|
** set. It logs a message using sqlite3_log() containing the current value of
|
|
** errno and, if possible, the human-readable equivalent from strerror() or
|
|
** strerror_r().
|
|
**
|
|
** The first argument passed to the macro should be the error code that
|
|
** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).
|
|
** The two subsequent arguments should be the name of the OS function that
|
|
** failed (e.g. "unlink", "open") and the associated file-system path,
|
|
** if any.
|
|
*/
|
|
#define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__)
|
|
static int unixLogErrorAtLine(
|
|
int errcode, /* SQLite error code */
|
|
const char *zFunc, /* Name of OS function that failed */
|
|
const char *zPath, /* File path associated with error */
|
|
int iLine /* Source line number where error occurred */
|
|
){
|
|
char *zErr; /* Message from strerror() or equivalent */
|
|
int iErrno = errno; /* Saved syscall error number */
|
|
|
|
/* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use
|
|
** the strerror() function to obtain the human-readable error message
|
|
** equivalent to errno. Otherwise, use strerror_r().
|
|
*/
|
|
#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R)
|
|
char aErr[80];
|
|
memset(aErr, 0, sizeof(aErr));
|
|
zErr = aErr;
|
|
|
|
/* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,
|
|
** assume that the system provides the GNU version of strerror_r() that
|
|
** returns a pointer to a buffer containing the error message. That pointer
|
|
** may point to aErr[], or it may point to some static storage somewhere.
|
|
** Otherwise, assume that the system provides the POSIX version of
|
|
** strerror_r(), which always writes an error message into aErr[].
|
|
**
|
|
** If the code incorrectly assumes that it is the POSIX version that is
|
|
** available, the error message will often be an empty string. Not a
|
|
** huge problem. Incorrectly concluding that the GNU version is available
|
|
** could lead to a segfault though.
|
|
*/
|
|
#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU)
|
|
zErr =
|
|
# endif
|
|
strerror_r(iErrno, aErr, sizeof(aErr)-1);
|
|
|
|
#elif SQLITE_THREADSAFE
|
|
/* This is a threadsafe build, but strerror_r() is not available. */
|
|
zErr = "";
|
|
#else
|
|
/* Non-threadsafe build, use strerror(). */
|
|
zErr = strerror(iErrno);
|
|
#endif
|
|
|
|
if( zPath==0 ) zPath = "";
|
|
sqlite3_log(errcode,
|
|
"os_unix.c:%d: (%d) %s(%s) - %s",
|
|
iLine, iErrno, zFunc, zPath, zErr
|
|
);
|
|
|
|
return errcode;
|
|
}
|
|
|
|
/*
|
|
** Close a file descriptor.
|
|
**
|
|
** We assume that close() almost always works, since it is only in a
|
|
** very sick application or on a very sick platform that it might fail.
|
|
** If it does fail, simply leak the file descriptor, but do log the
|
|
** error.
|
|
**
|
|
** Note that it is not safe to retry close() after EINTR since the
|
|
** file descriptor might have already been reused by another thread.
|
|
** So we don't even try to recover from an EINTR. Just log the error
|
|
** and move on.
|
|
*/
|
|
static void robust_close(unixFile *pFile, int h, int lineno){
|
|
if( osClose(h) ){
|
|
unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close",
|
|
pFile ? pFile->zPath : 0, lineno);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Set the pFile->lastErrno. Do this in a subroutine as that provides
|
|
** a convenient place to set a breakpoint.
|
|
*/
|
|
static void storeLastErrno(unixFile *pFile, int error){
|
|
pFile->lastErrno = error;
|
|
}
|
|
|
|
/*
|
|
** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.
|
|
*/
|
|
static void closePendingFds(unixFile *pFile){
|
|
unixInodeInfo *pInode = pFile->pInode;
|
|
UnixUnusedFd *p;
|
|
UnixUnusedFd *pNext;
|
|
for(p=pInode->pUnused; p; p=pNext){
|
|
pNext = p->pNext;
|
|
robust_close(pFile, p->fd, __LINE__);
|
|
sqlite3_free(p);
|
|
}
|
|
pInode->pUnused = 0;
|
|
}
|
|
|
|
/*
|
|
** Release a unixInodeInfo structure previously allocated by findInodeInfo().
|
|
**
|
|
** The mutex entered using the unixEnterMutex() function must be held
|
|
** when this function is called.
|
|
*/
|
|
static void releaseInodeInfo(unixFile *pFile){
|
|
unixInodeInfo *pInode = pFile->pInode;
|
|
assert( unixMutexHeld() );
|
|
if( ALWAYS(pInode) ){
|
|
pInode->nRef--;
|
|
if( pInode->nRef==0 ){
|
|
assert( pInode->pShmNode==0 );
|
|
closePendingFds(pFile);
|
|
if( pInode->pPrev ){
|
|
assert( pInode->pPrev->pNext==pInode );
|
|
pInode->pPrev->pNext = pInode->pNext;
|
|
}else{
|
|
assert( inodeList==pInode );
|
|
inodeList = pInode->pNext;
|
|
}
|
|
if( pInode->pNext ){
|
|
assert( pInode->pNext->pPrev==pInode );
|
|
pInode->pNext->pPrev = pInode->pPrev;
|
|
}
|
|
sqlite3_free(pInode);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Given a file descriptor, locate the unixInodeInfo object that
|
|
** describes that file descriptor. Create a new one if necessary. The
|
|
** return value might be uninitialized if an error occurs.
|
|
**
|
|
** The mutex entered using the unixEnterMutex() function must be held
|
|
** when this function is called.
|
|
**
|
|
** Return an appropriate error code.
|
|
*/
|
|
static int findInodeInfo(
|
|
unixFile *pFile, /* Unix file with file desc used in the key */
|
|
unixInodeInfo **ppInode /* Return the unixInodeInfo object here */
|
|
){
|
|
int rc; /* System call return code */
|
|
int fd; /* The file descriptor for pFile */
|
|
struct unixFileId fileId; /* Lookup key for the unixInodeInfo */
|
|
struct stat statbuf; /* Low-level file information */
|
|
unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */
|
|
|
|
assert( unixMutexHeld() );
|
|
|
|
/* Get low-level information about the file that we can used to
|
|
** create a unique name for the file.
|
|
*/
|
|
fd = pFile->h;
|
|
rc = osFstat(fd, &statbuf);
|
|
if( rc!=0 ){
|
|
storeLastErrno(pFile, errno);
|
|
#if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS)
|
|
if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
|
|
#endif
|
|
return SQLITE_IOERR;
|
|
}
|
|
|
|
#ifdef __APPLE__
|
|
/* On OS X on an msdos filesystem, the inode number is reported
|
|
** incorrectly for zero-size files. See ticket #3260. To work
|
|
** around this problem (we consider it a bug in OS X, not SQLite)
|
|
** we always increase the file size to 1 by writing a single byte
|
|
** prior to accessing the inode number. The one byte written is
|
|
** an ASCII 'S' character which also happens to be the first byte
|
|
** in the header of every SQLite database. In this way, if there
|
|
** is a race condition such that another thread has already populated
|
|
** the first page of the database, no damage is done.
|
|
*/
|
|
if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
|
|
do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );
|
|
if( rc!=1 ){
|
|
storeLastErrno(pFile, errno);
|
|
return SQLITE_IOERR;
|
|
}
|
|
rc = osFstat(fd, &statbuf);
|
|
if( rc!=0 ){
|
|
storeLastErrno(pFile, errno);
|
|
return SQLITE_IOERR;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
memset(&fileId, 0, sizeof(fileId));
|
|
fileId.dev = statbuf.st_dev;
|
|
#if OS_VXWORKS
|
|
fileId.pId = pFile->pId;
|
|
#else
|
|
fileId.ino = statbuf.st_ino;
|
|
#endif
|
|
pInode = inodeList;
|
|
while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
|
|
pInode = pInode->pNext;
|
|
}
|
|
if( pInode==0 ){
|
|
pInode = sqlite3_malloc64( sizeof(*pInode) );
|
|
if( pInode==0 ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
memset(pInode, 0, sizeof(*pInode));
|
|
memcpy(&pInode->fileId, &fileId, sizeof(fileId));
|
|
pInode->nRef = 1;
|
|
pInode->pNext = inodeList;
|
|
pInode->pPrev = 0;
|
|
if( inodeList ) inodeList->pPrev = pInode;
|
|
inodeList = pInode;
|
|
}else{
|
|
pInode->nRef++;
|
|
}
|
|
*ppInode = pInode;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Return TRUE if pFile has been renamed or unlinked since it was first opened.
|
|
*/
|
|
static int fileHasMoved(unixFile *pFile){
|
|
#if OS_VXWORKS
|
|
return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;
|
|
#else
|
|
struct stat buf;
|
|
return pFile->pInode!=0 &&
|
|
(osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino);
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
** Check a unixFile that is a database. Verify the following:
|
|
**
|
|
** (1) There is exactly one hard link on the file
|
|
** (2) The file is not a symbolic link
|
|
** (3) The file has not been renamed or unlinked
|
|
**
|
|
** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
|
|
*/
|
|
static void verifyDbFile(unixFile *pFile){
|
|
struct stat buf;
|
|
int rc;
|
|
rc = osFstat(pFile->h, &buf);
|
|
if( rc!=0 ){
|
|
sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
|
|
return;
|
|
}
|
|
if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
|
|
sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
|
|
return;
|
|
}
|
|
if( buf.st_nlink>1 ){
|
|
sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
|
|
return;
|
|
}
|
|
if( fileHasMoved(pFile) ){
|
|
sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
*/
|
|
static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
|
|
int rc = SQLITE_OK;
|
|
int reserved = 0;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
|
|
|
|
assert( pFile );
|
|
assert( pFile->eFileLock<=SHARED_LOCK );
|
|
unixEnterMutex(); /* Because pFile->pInode is shared across threads */
|
|
|
|
/* Check if a thread in this process holds such a lock */
|
|
if( pFile->pInode->eFileLock>SHARED_LOCK ){
|
|
reserved = 1;
|
|
}
|
|
|
|
/* Otherwise see if some other process holds it.
|
|
*/
|
|
#ifndef __DJGPP__
|
|
if( !reserved && !pFile->pInode->bProcessLock ){
|
|
struct flock lock;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = RESERVED_BYTE;
|
|
lock.l_len = 1;
|
|
lock.l_type = F_WRLCK;
|
|
if( osFcntl(pFile->h, F_GETLK, &lock) ){
|
|
rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
|
|
storeLastErrno(pFile, errno);
|
|
} else if( lock.l_type!=F_UNLCK ){
|
|
reserved = 1;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
unixLeaveMutex();
|
|
OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));
|
|
|
|
*pResOut = reserved;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Attempt to set a system-lock on the file pFile. The lock is
|
|
** described by pLock.
|
|
**
|
|
** If the pFile was opened read/write from unix-excl, then the only lock
|
|
** ever obtained is an exclusive lock, and it is obtained exactly once
|
|
** the first time any lock is attempted. All subsequent system locking
|
|
** operations become no-ops. Locking operations still happen internally,
|
|
** in order to coordinate access between separate database connections
|
|
** within this process, but all of that is handled in memory and the
|
|
** operating system does not participate.
|
|
**
|
|
** This function is a pass-through to fcntl(F_SETLK) if pFile is using
|
|
** any VFS other than "unix-excl" or if pFile is opened on "unix-excl"
|
|
** and is read-only.
|
|
**
|
|
** Zero is returned if the call completes successfully, or -1 if a call
|
|
** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).
|
|
*/
|
|
static int unixFileLock(unixFile *pFile, struct flock *pLock){
|
|
int rc;
|
|
unixInodeInfo *pInode = pFile->pInode;
|
|
assert( unixMutexHeld() );
|
|
assert( pInode!=0 );
|
|
if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){
|
|
if( pInode->bProcessLock==0 ){
|
|
struct flock lock;
|
|
assert( pInode->nLock==0 );
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = SHARED_SIZE;
|
|
lock.l_type = F_WRLCK;
|
|
rc = osFcntl(pFile->h, F_SETLK, &lock);
|
|
if( rc<0 ) return rc;
|
|
pInode->bProcessLock = 1;
|
|
pInode->nLock++;
|
|
}else{
|
|
rc = 0;
|
|
}
|
|
}else{
|
|
rc = osFcntl(pFile->h, F_SETLK, pLock);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
*/
|
|
static int unixLock(sqlite3_file *id, int eFileLock){
|
|
/* The following describes the implementation of the various locks and
|
|
** lock transitions in terms of the POSIX advisory shared and exclusive
|
|
** lock primitives (called read-locks and write-locks below, to avoid
|
|
** confusion with SQLite lock names). The algorithms are complicated
|
|
** slightly in order to be compatible with windows systems simultaneously
|
|
** accessing the same database file, in case that is ever required.
|
|
**
|
|
** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
|
|
** byte', each single bytes at well known offsets, and the 'shared byte
|
|
** range', a range of 510 bytes at a well known offset.
|
|
**
|
|
** To obtain a SHARED lock, a read-lock is obtained on the 'pending
|
|
** byte'. If this is successful, a random byte from the 'shared byte
|
|
** range' is read-locked and the lock on the 'pending byte' released.
|
|
**
|
|
** A process may only obtain a RESERVED lock after it has a SHARED lock.
|
|
** A RESERVED lock is implemented by grabbing a write-lock on the
|
|
** 'reserved byte'.
|
|
**
|
|
** A process may only obtain a PENDING lock after it has obtained a
|
|
** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
|
|
** on the 'pending byte'. This ensures that no new SHARED locks can be
|
|
** obtained, but existing SHARED locks are allowed to persist. A process
|
|
** does not have to obtain a RESERVED lock on the way to a PENDING lock.
|
|
** This property is used by the algorithm for rolling back a journal file
|
|
** after a crash.
|
|
**
|
|
** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
|
|
** implemented by obtaining a write-lock on the entire 'shared byte
|
|
** range'. Since all other locks require a read-lock on one of the bytes
|
|
** within this range, this ensures that no other locks are held on the
|
|
** database.
|
|
**
|
|
** The reason a single byte cannot be used instead of the 'shared byte
|
|
** range' is that some versions of windows do not support read-locks. By
|
|
** locking a random byte from a range, concurrent SHARED locks may exist
|
|
** even if the locking primitive used is always a write-lock.
|
|
*/
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile*)id;
|
|
unixInodeInfo *pInode;
|
|
struct flock lock;
|
|
int tErrno = 0;
|
|
|
|
assert( pFile );
|
|
OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
|
|
azFileLock(eFileLock), azFileLock(pFile->eFileLock),
|
|
azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared,
|
|
osGetpid(0)));
|
|
|
|
/* If there is already a lock of this type or more restrictive on the
|
|
** unixFile, do nothing. Don't use the end_lock: exit path, as
|
|
** unixEnterMutex() hasn't been called yet.
|
|
*/
|
|
if( pFile->eFileLock>=eFileLock ){
|
|
OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h,
|
|
azFileLock(eFileLock)));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* Make sure the locking sequence is correct.
|
|
** (1) We never move from unlocked to anything higher than shared lock.
|
|
** (2) SQLite never explicitly requests a pendig lock.
|
|
** (3) A shared lock is always held when a reserve lock is requested.
|
|
*/
|
|
assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
|
|
assert( eFileLock!=PENDING_LOCK );
|
|
assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
|
|
|
|
/* This mutex is needed because pFile->pInode is shared across threads
|
|
*/
|
|
unixEnterMutex();
|
|
pInode = pFile->pInode;
|
|
|
|
/* If some thread using this PID has a lock via a different unixFile*
|
|
** handle that precludes the requested lock, return BUSY.
|
|
*/
|
|
if( (pFile->eFileLock!=pInode->eFileLock &&
|
|
(pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
|
|
){
|
|
rc = SQLITE_BUSY;
|
|
goto end_lock;
|
|
}
|
|
|
|
/* If a SHARED lock is requested, and some thread using this PID already
|
|
** has a SHARED or RESERVED lock, then increment reference counts and
|
|
** return SQLITE_OK.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK &&
|
|
(pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
|
|
assert( eFileLock==SHARED_LOCK );
|
|
assert( pFile->eFileLock==0 );
|
|
assert( pInode->nShared>0 );
|
|
pFile->eFileLock = SHARED_LOCK;
|
|
pInode->nShared++;
|
|
pInode->nLock++;
|
|
goto end_lock;
|
|
}
|
|
|
|
|
|
/* A PENDING lock is needed before acquiring a SHARED lock and before
|
|
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
|
|
** be released.
|
|
*/
|
|
lock.l_len = 1L;
|
|
lock.l_whence = SEEK_SET;
|
|
if( eFileLock==SHARED_LOCK
|
|
|| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
|
|
){
|
|
lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
|
|
lock.l_start = PENDING_BYTE;
|
|
if( unixFileLock(pFile, &lock) ){
|
|
tErrno = errno;
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
if( rc!=SQLITE_BUSY ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
goto end_lock;
|
|
}
|
|
}
|
|
|
|
|
|
/* If control gets to this point, then actually go ahead and make
|
|
** operating system calls for the specified lock.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK ){
|
|
assert( pInode->nShared==0 );
|
|
assert( pInode->eFileLock==0 );
|
|
assert( rc==SQLITE_OK );
|
|
|
|
/* Now get the read-lock */
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = SHARED_SIZE;
|
|
if( unixFileLock(pFile, &lock) ){
|
|
tErrno = errno;
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
}
|
|
|
|
/* Drop the temporary PENDING lock */
|
|
lock.l_start = PENDING_BYTE;
|
|
lock.l_len = 1L;
|
|
lock.l_type = F_UNLCK;
|
|
if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){
|
|
/* This could happen with a network mount */
|
|
tErrno = errno;
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
}
|
|
|
|
if( rc ){
|
|
if( rc!=SQLITE_BUSY ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
goto end_lock;
|
|
}else{
|
|
pFile->eFileLock = SHARED_LOCK;
|
|
pInode->nLock++;
|
|
pInode->nShared = 1;
|
|
}
|
|
}else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
|
|
/* We are trying for an exclusive lock but another thread in this
|
|
** same process is still holding a shared lock. */
|
|
rc = SQLITE_BUSY;
|
|
}else{
|
|
/* The request was for a RESERVED or EXCLUSIVE lock. It is
|
|
** assumed that there is a SHARED or greater lock on the file
|
|
** already.
|
|
*/
|
|
assert( 0!=pFile->eFileLock );
|
|
lock.l_type = F_WRLCK;
|
|
|
|
assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK );
|
|
if( eFileLock==RESERVED_LOCK ){
|
|
lock.l_start = RESERVED_BYTE;
|
|
lock.l_len = 1L;
|
|
}else{
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = SHARED_SIZE;
|
|
}
|
|
|
|
if( unixFileLock(pFile, &lock) ){
|
|
tErrno = errno;
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
if( rc!=SQLITE_BUSY ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* Set up the transaction-counter change checking flags when
|
|
** transitioning from a SHARED to a RESERVED lock. The change
|
|
** from SHARED to RESERVED marks the beginning of a normal
|
|
** write operation (not a hot journal rollback).
|
|
*/
|
|
if( rc==SQLITE_OK
|
|
&& pFile->eFileLock<=SHARED_LOCK
|
|
&& eFileLock==RESERVED_LOCK
|
|
){
|
|
pFile->transCntrChng = 0;
|
|
pFile->dbUpdate = 0;
|
|
pFile->inNormalWrite = 1;
|
|
}
|
|
#endif
|
|
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pFile->eFileLock = eFileLock;
|
|
pInode->eFileLock = eFileLock;
|
|
}else if( eFileLock==EXCLUSIVE_LOCK ){
|
|
pFile->eFileLock = PENDING_LOCK;
|
|
pInode->eFileLock = PENDING_LOCK;
|
|
}
|
|
|
|
end_lock:
|
|
unixLeaveMutex();
|
|
OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock),
|
|
rc==SQLITE_OK ? "ok" : "failed"));
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Add the file descriptor used by file handle pFile to the corresponding
|
|
** pUnused list.
|
|
*/
|
|
static void setPendingFd(unixFile *pFile){
|
|
unixInodeInfo *pInode = pFile->pInode;
|
|
UnixUnusedFd *p = pFile->pUnused;
|
|
p->pNext = pInode->pUnused;
|
|
pInode->pUnused = p;
|
|
pFile->h = -1;
|
|
pFile->pUnused = 0;
|
|
}
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
**
|
|
** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED
|
|
** the byte range is divided into 2 parts and the first part is unlocked then
|
|
** set to a read lock, then the other part is simply unlocked. This works
|
|
** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to
|
|
** remove the write lock on a region when a read lock is set.
|
|
*/
|
|
static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
|
|
unixFile *pFile = (unixFile*)id;
|
|
unixInodeInfo *pInode;
|
|
struct flock lock;
|
|
int rc = SQLITE_OK;
|
|
|
|
assert( pFile );
|
|
OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
|
|
pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
|
|
osGetpid(0)));
|
|
|
|
assert( eFileLock<=SHARED_LOCK );
|
|
if( pFile->eFileLock<=eFileLock ){
|
|
return SQLITE_OK;
|
|
}
|
|
unixEnterMutex();
|
|
pInode = pFile->pInode;
|
|
assert( pInode->nShared!=0 );
|
|
if( pFile->eFileLock>SHARED_LOCK ){
|
|
assert( pInode->eFileLock==pFile->eFileLock );
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* When reducing a lock such that other processes can start
|
|
** reading the database file again, make sure that the
|
|
** transaction counter was updated if any part of the database
|
|
** file changed. If the transaction counter is not updated,
|
|
** other connections to the same file might not realize that
|
|
** the file has changed and hence might not know to flush their
|
|
** cache. The use of a stale cache can lead to database corruption.
|
|
*/
|
|
pFile->inNormalWrite = 0;
|
|
#endif
|
|
|
|
/* downgrading to a shared lock on NFS involves clearing the write lock
|
|
** before establishing the readlock - to avoid a race condition we downgrade
|
|
** the lock in 2 blocks, so that part of the range will be covered by a
|
|
** write lock until the rest is covered by a read lock:
|
|
** 1: [WWWWW]
|
|
** 2: [....W]
|
|
** 3: [RRRRW]
|
|
** 4: [RRRR.]
|
|
*/
|
|
if( eFileLock==SHARED_LOCK ){
|
|
#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE
|
|
(void)handleNFSUnlock;
|
|
assert( handleNFSUnlock==0 );
|
|
#endif
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
if( handleNFSUnlock ){
|
|
int tErrno; /* Error code from system call errors */
|
|
off_t divSize = SHARED_SIZE - 1;
|
|
|
|
lock.l_type = F_UNLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = divSize;
|
|
if( unixFileLock(pFile, &lock)==(-1) ){
|
|
tErrno = errno;
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, tErrno);
|
|
goto end_unlock;
|
|
}
|
|
lock.l_type = F_RDLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = divSize;
|
|
if( unixFileLock(pFile, &lock)==(-1) ){
|
|
tErrno = errno;
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
|
|
if( IS_LOCK_ERROR(rc) ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
goto end_unlock;
|
|
}
|
|
lock.l_type = F_UNLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = SHARED_FIRST+divSize;
|
|
lock.l_len = SHARED_SIZE-divSize;
|
|
if( unixFileLock(pFile, &lock)==(-1) ){
|
|
tErrno = errno;
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, tErrno);
|
|
goto end_unlock;
|
|
}
|
|
}else
|
|
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
|
|
{
|
|
lock.l_type = F_RDLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = SHARED_FIRST;
|
|
lock.l_len = SHARED_SIZE;
|
|
if( unixFileLock(pFile, &lock) ){
|
|
/* In theory, the call to unixFileLock() cannot fail because another
|
|
** process is holding an incompatible lock. If it does, this
|
|
** indicates that the other process is not following the locking
|
|
** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning
|
|
** SQLITE_BUSY would confuse the upper layer (in practice it causes
|
|
** an assert to fail). */
|
|
rc = SQLITE_IOERR_RDLOCK;
|
|
storeLastErrno(pFile, errno);
|
|
goto end_unlock;
|
|
}
|
|
}
|
|
}
|
|
lock.l_type = F_UNLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = PENDING_BYTE;
|
|
lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
|
|
if( unixFileLock(pFile, &lock)==0 ){
|
|
pInode->eFileLock = SHARED_LOCK;
|
|
}else{
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, errno);
|
|
goto end_unlock;
|
|
}
|
|
}
|
|
if( eFileLock==NO_LOCK ){
|
|
/* Decrement the shared lock counter. Release the lock using an
|
|
** OS call only when all threads in this same process have released
|
|
** the lock.
|
|
*/
|
|
pInode->nShared--;
|
|
if( pInode->nShared==0 ){
|
|
lock.l_type = F_UNLCK;
|
|
lock.l_whence = SEEK_SET;
|
|
lock.l_start = lock.l_len = 0L;
|
|
if( unixFileLock(pFile, &lock)==0 ){
|
|
pInode->eFileLock = NO_LOCK;
|
|
}else{
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, errno);
|
|
pInode->eFileLock = NO_LOCK;
|
|
pFile->eFileLock = NO_LOCK;
|
|
}
|
|
}
|
|
|
|
/* Decrement the count of locks against this same file. When the
|
|
** count reaches zero, close any other file descriptors whose close
|
|
** was deferred because of outstanding locks.
|
|
*/
|
|
pInode->nLock--;
|
|
assert( pInode->nLock>=0 );
|
|
if( pInode->nLock==0 ){
|
|
closePendingFds(pFile);
|
|
}
|
|
}
|
|
|
|
end_unlock:
|
|
unixLeaveMutex();
|
|
if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int unixUnlock(sqlite3_file *id, int eFileLock){
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
|
|
#endif
|
|
return posixUnlock(id, eFileLock, 0);
|
|
}
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
static int unixMapfile(unixFile *pFd, i64 nByte);
|
|
static void unixUnmapfile(unixFile *pFd);
|
|
#endif
|
|
|
|
/*
|
|
** This function performs the parts of the "close file" operation
|
|
** common to all locking schemes. It closes the directory and file
|
|
** handles, if they are valid, and sets all fields of the unixFile
|
|
** structure to 0.
|
|
**
|
|
** It is *not* necessary to hold the mutex when this routine is called,
|
|
** even on VxWorks. A mutex will be acquired on VxWorks by the
|
|
** vxworksReleaseFileId() routine.
|
|
*/
|
|
static int closeUnixFile(sqlite3_file *id){
|
|
unixFile *pFile = (unixFile*)id;
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
unixUnmapfile(pFile);
|
|
#endif
|
|
if( pFile->h>=0 ){
|
|
robust_close(pFile, pFile->h, __LINE__);
|
|
pFile->h = -1;
|
|
}
|
|
#if OS_VXWORKS
|
|
if( pFile->pId ){
|
|
if( pFile->ctrlFlags & UNIXFILE_DELETE ){
|
|
osUnlink(pFile->pId->zCanonicalName);
|
|
}
|
|
vxworksReleaseFileId(pFile->pId);
|
|
pFile->pId = 0;
|
|
}
|
|
#endif
|
|
#ifdef SQLITE_UNLINK_AFTER_CLOSE
|
|
if( pFile->ctrlFlags & UNIXFILE_DELETE ){
|
|
osUnlink(pFile->zPath);
|
|
sqlite3_free(*(char**)&pFile->zPath);
|
|
pFile->zPath = 0;
|
|
}
|
|
#endif
|
|
OSTRACE(("CLOSE %-3d\n", pFile->h));
|
|
OpenCounter(-1);
|
|
sqlite3_free(pFile->pUnused);
|
|
memset(pFile, 0, sizeof(unixFile));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Close a file.
|
|
*/
|
|
static int unixClose(sqlite3_file *id){
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile *)id;
|
|
verifyDbFile(pFile);
|
|
unixUnlock(id, NO_LOCK);
|
|
unixEnterMutex();
|
|
|
|
/* unixFile.pInode is always valid here. Otherwise, a different close
|
|
** routine (e.g. nolockClose()) would be called instead.
|
|
*/
|
|
assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );
|
|
if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){
|
|
/* If there are outstanding locks, do not actually close the file just
|
|
** yet because that would clear those locks. Instead, add the file
|
|
** descriptor to pInode->pUnused list. It will be automatically closed
|
|
** when the last lock is cleared.
|
|
*/
|
|
setPendingFd(pFile);
|
|
}
|
|
releaseInodeInfo(pFile);
|
|
rc = closeUnixFile(id);
|
|
unixLeaveMutex();
|
|
return rc;
|
|
}
|
|
|
|
/************** End of the posix advisory lock implementation *****************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
****************************** No-op Locking **********************************
|
|
**
|
|
** Of the various locking implementations available, this is by far the
|
|
** simplest: locking is ignored. No attempt is made to lock the database
|
|
** file for reading or writing.
|
|
**
|
|
** This locking mode is appropriate for use on read-only databases
|
|
** (ex: databases that are burned into CD-ROM, for example.) It can
|
|
** also be used if the application employs some external mechanism to
|
|
** prevent simultaneous access of the same database by two or more
|
|
** database connections. But there is a serious risk of database
|
|
** corruption if this locking mode is used in situations where multiple
|
|
** database connections are accessing the same database file at the same
|
|
** time and one or more of those connections are writing.
|
|
*/
|
|
|
|
static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
*pResOut = 0;
|
|
return SQLITE_OK;
|
|
}
|
|
static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
|
|
UNUSED_PARAMETER2(NotUsed, NotUsed2);
|
|
return SQLITE_OK;
|
|
}
|
|
static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
|
|
UNUSED_PARAMETER2(NotUsed, NotUsed2);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Close the file.
|
|
*/
|
|
static int nolockClose(sqlite3_file *id) {
|
|
return closeUnixFile(id);
|
|
}
|
|
|
|
/******************* End of the no-op lock implementation *********************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
************************* Begin dot-file Locking ******************************
|
|
**
|
|
** The dotfile locking implementation uses the existence of separate lock
|
|
** files (really a directory) to control access to the database. This works
|
|
** on just about every filesystem imaginable. But there are serious downsides:
|
|
**
|
|
** (1) There is zero concurrency. A single reader blocks all other
|
|
** connections from reading or writing the database.
|
|
**
|
|
** (2) An application crash or power loss can leave stale lock files
|
|
** sitting around that need to be cleared manually.
|
|
**
|
|
** Nevertheless, a dotlock is an appropriate locking mode for use if no
|
|
** other locking strategy is available.
|
|
**
|
|
** Dotfile locking works by creating a subdirectory in the same directory as
|
|
** the database and with the same name but with a ".lock" extension added.
|
|
** The existence of a lock directory implies an EXCLUSIVE lock. All other
|
|
** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
|
|
*/
|
|
|
|
/*
|
|
** The file suffix added to the data base filename in order to create the
|
|
** lock directory.
|
|
*/
|
|
#define DOTLOCK_SUFFIX ".lock"
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
**
|
|
** In dotfile locking, either a lock exists or it does not. So in this
|
|
** variation of CheckReservedLock(), *pResOut is set to true if any lock
|
|
** is held on the file and false if the file is unlocked.
|
|
*/
|
|
static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
|
|
int rc = SQLITE_OK;
|
|
int reserved = 0;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
|
|
|
|
assert( pFile );
|
|
reserved = osAccess((const char*)pFile->lockingContext, 0)==0;
|
|
OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved));
|
|
*pResOut = reserved;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
**
|
|
** With dotfile locking, we really only support state (4): EXCLUSIVE.
|
|
** But we track the other locking levels internally.
|
|
*/
|
|
static int dotlockLock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
char *zLockFile = (char *)pFile->lockingContext;
|
|
int rc = SQLITE_OK;
|
|
|
|
|
|
/* If we have any lock, then the lock file already exists. All we have
|
|
** to do is adjust our internal record of the lock level.
|
|
*/
|
|
if( pFile->eFileLock > NO_LOCK ){
|
|
pFile->eFileLock = eFileLock;
|
|
/* Always update the timestamp on the old file */
|
|
#ifdef HAVE_UTIME
|
|
utime(zLockFile, NULL);
|
|
#else
|
|
utimes(zLockFile, NULL);
|
|
#endif
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* grab an exclusive lock */
|
|
rc = osMkdir(zLockFile, 0777);
|
|
if( rc<0 ){
|
|
/* failed to open/create the lock directory */
|
|
int tErrno = errno;
|
|
if( EEXIST == tErrno ){
|
|
rc = SQLITE_BUSY;
|
|
} else {
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
if( rc!=SQLITE_BUSY ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/* got it, set the type and return ok */
|
|
pFile->eFileLock = eFileLock;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
**
|
|
** When the locking level reaches NO_LOCK, delete the lock file.
|
|
*/
|
|
static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
char *zLockFile = (char *)pFile->lockingContext;
|
|
int rc;
|
|
|
|
assert( pFile );
|
|
OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
|
|
pFile->eFileLock, osGetpid(0)));
|
|
assert( eFileLock<=SHARED_LOCK );
|
|
|
|
/* no-op if possible */
|
|
if( pFile->eFileLock==eFileLock ){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* To downgrade to shared, simply update our internal notion of the
|
|
** lock state. No need to mess with the file on disk.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK ){
|
|
pFile->eFileLock = SHARED_LOCK;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* To fully unlock the database, delete the lock file */
|
|
assert( eFileLock==NO_LOCK );
|
|
rc = osRmdir(zLockFile);
|
|
if( rc<0 ){
|
|
int tErrno = errno;
|
|
if( tErrno==ENOENT ){
|
|
rc = SQLITE_OK;
|
|
}else{
|
|
rc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
return rc;
|
|
}
|
|
pFile->eFileLock = NO_LOCK;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Close a file. Make sure the lock has been released before closing.
|
|
*/
|
|
static int dotlockClose(sqlite3_file *id) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
assert( id!=0 );
|
|
dotlockUnlock(id, NO_LOCK);
|
|
sqlite3_free(pFile->lockingContext);
|
|
return closeUnixFile(id);
|
|
}
|
|
/****************** End of the dot-file lock implementation *******************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
************************** Begin flock Locking ********************************
|
|
**
|
|
** Use the flock() system call to do file locking.
|
|
**
|
|
** flock() locking is like dot-file locking in that the various
|
|
** fine-grain locking levels supported by SQLite are collapsed into
|
|
** a single exclusive lock. In other words, SHARED, RESERVED, and
|
|
** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite
|
|
** still works when you do this, but concurrency is reduced since
|
|
** only a single process can be reading the database at a time.
|
|
**
|
|
** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off
|
|
*/
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
|
|
/*
|
|
** Retry flock() calls that fail with EINTR
|
|
*/
|
|
#ifdef EINTR
|
|
static int robust_flock(int fd, int op){
|
|
int rc;
|
|
do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );
|
|
return rc;
|
|
}
|
|
#else
|
|
# define robust_flock(a,b) flock(a,b)
|
|
#endif
|
|
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
*/
|
|
static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
|
|
int rc = SQLITE_OK;
|
|
int reserved = 0;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
|
|
|
|
assert( pFile );
|
|
|
|
/* Check if a thread in this process holds such a lock */
|
|
if( pFile->eFileLock>SHARED_LOCK ){
|
|
reserved = 1;
|
|
}
|
|
|
|
/* Otherwise see if some other process holds it. */
|
|
if( !reserved ){
|
|
/* attempt to get the lock */
|
|
int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB);
|
|
if( !lrc ){
|
|
/* got the lock, unlock it */
|
|
lrc = robust_flock(pFile->h, LOCK_UN);
|
|
if ( lrc ) {
|
|
int tErrno = errno;
|
|
/* unlock failed with an error */
|
|
lrc = SQLITE_IOERR_UNLOCK;
|
|
storeLastErrno(pFile, tErrno);
|
|
rc = lrc;
|
|
}
|
|
} else {
|
|
int tErrno = errno;
|
|
reserved = 1;
|
|
/* someone else might have it reserved */
|
|
lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
if( IS_LOCK_ERROR(lrc) ){
|
|
storeLastErrno(pFile, tErrno);
|
|
rc = lrc;
|
|
}
|
|
}
|
|
}
|
|
OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved));
|
|
|
|
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
|
|
if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
|
|
rc = SQLITE_OK;
|
|
reserved=1;
|
|
}
|
|
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
|
|
*pResOut = reserved;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** flock() only really support EXCLUSIVE locks. We track intermediate
|
|
** lock states in the sqlite3_file structure, but all locks SHARED or
|
|
** above are really EXCLUSIVE locks and exclude all other processes from
|
|
** access the file.
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
*/
|
|
static int flockLock(sqlite3_file *id, int eFileLock) {
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
assert( pFile );
|
|
|
|
/* if we already have a lock, it is exclusive.
|
|
** Just adjust level and punt on outta here. */
|
|
if (pFile->eFileLock > NO_LOCK) {
|
|
pFile->eFileLock = eFileLock;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* grab an exclusive lock */
|
|
|
|
if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) {
|
|
int tErrno = errno;
|
|
/* didn't get, must be busy */
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
|
|
if( IS_LOCK_ERROR(rc) ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
} else {
|
|
/* got it, set the type and return ok */
|
|
pFile->eFileLock = eFileLock;
|
|
}
|
|
OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock),
|
|
rc==SQLITE_OK ? "ok" : "failed"));
|
|
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
|
|
if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
|
|
rc = SQLITE_BUSY;
|
|
}
|
|
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int flockUnlock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
assert( pFile );
|
|
OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock,
|
|
pFile->eFileLock, osGetpid(0)));
|
|
assert( eFileLock<=SHARED_LOCK );
|
|
|
|
/* no-op if possible */
|
|
if( pFile->eFileLock==eFileLock ){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* shared can just be set because we always have an exclusive */
|
|
if (eFileLock==SHARED_LOCK) {
|
|
pFile->eFileLock = eFileLock;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* no, really, unlock. */
|
|
if( robust_flock(pFile->h, LOCK_UN) ){
|
|
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
|
|
return SQLITE_OK;
|
|
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
|
|
return SQLITE_IOERR_UNLOCK;
|
|
}else{
|
|
pFile->eFileLock = NO_LOCK;
|
|
return SQLITE_OK;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Close a file.
|
|
*/
|
|
static int flockClose(sqlite3_file *id) {
|
|
assert( id!=0 );
|
|
flockUnlock(id, NO_LOCK);
|
|
return closeUnixFile(id);
|
|
}
|
|
|
|
#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
|
|
|
|
/******************* End of the flock lock implementation *********************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
************************ Begin Named Semaphore Locking ************************
|
|
**
|
|
** Named semaphore locking is only supported on VxWorks.
|
|
**
|
|
** Semaphore locking is like dot-lock and flock in that it really only
|
|
** supports EXCLUSIVE locking. Only a single process can read or write
|
|
** the database file at a time. This reduces potential concurrency, but
|
|
** makes the lock implementation much easier.
|
|
*/
|
|
#if OS_VXWORKS
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
*/
|
|
static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) {
|
|
int rc = SQLITE_OK;
|
|
int reserved = 0;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
|
|
|
|
assert( pFile );
|
|
|
|
/* Check if a thread in this process holds such a lock */
|
|
if( pFile->eFileLock>SHARED_LOCK ){
|
|
reserved = 1;
|
|
}
|
|
|
|
/* Otherwise see if some other process holds it. */
|
|
if( !reserved ){
|
|
sem_t *pSem = pFile->pInode->pSem;
|
|
|
|
if( sem_trywait(pSem)==-1 ){
|
|
int tErrno = errno;
|
|
if( EAGAIN != tErrno ){
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
|
|
storeLastErrno(pFile, tErrno);
|
|
} else {
|
|
/* someone else has the lock when we are in NO_LOCK */
|
|
reserved = (pFile->eFileLock < SHARED_LOCK);
|
|
}
|
|
}else{
|
|
/* we could have it if we want it */
|
|
sem_post(pSem);
|
|
}
|
|
}
|
|
OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved));
|
|
|
|
*pResOut = reserved;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** Semaphore locks only really support EXCLUSIVE locks. We track intermediate
|
|
** lock states in the sqlite3_file structure, but all locks SHARED or
|
|
** above are really EXCLUSIVE locks and exclude all other processes from
|
|
** access the file.
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
*/
|
|
static int semXLock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
sem_t *pSem = pFile->pInode->pSem;
|
|
int rc = SQLITE_OK;
|
|
|
|
/* if we already have a lock, it is exclusive.
|
|
** Just adjust level and punt on outta here. */
|
|
if (pFile->eFileLock > NO_LOCK) {
|
|
pFile->eFileLock = eFileLock;
|
|
rc = SQLITE_OK;
|
|
goto sem_end_lock;
|
|
}
|
|
|
|
/* lock semaphore now but bail out when already locked. */
|
|
if( sem_trywait(pSem)==-1 ){
|
|
rc = SQLITE_BUSY;
|
|
goto sem_end_lock;
|
|
}
|
|
|
|
/* got it, set the type and return ok */
|
|
pFile->eFileLock = eFileLock;
|
|
|
|
sem_end_lock:
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int semXUnlock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
sem_t *pSem = pFile->pInode->pSem;
|
|
|
|
assert( pFile );
|
|
assert( pSem );
|
|
OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,
|
|
pFile->eFileLock, osGetpid(0)));
|
|
assert( eFileLock<=SHARED_LOCK );
|
|
|
|
/* no-op if possible */
|
|
if( pFile->eFileLock==eFileLock ){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* shared can just be set because we always have an exclusive */
|
|
if (eFileLock==SHARED_LOCK) {
|
|
pFile->eFileLock = eFileLock;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* no, really unlock. */
|
|
if ( sem_post(pSem)==-1 ) {
|
|
int rc, tErrno = errno;
|
|
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
|
|
if( IS_LOCK_ERROR(rc) ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
return rc;
|
|
}
|
|
pFile->eFileLock = NO_LOCK;
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Close a file.
|
|
*/
|
|
static int semXClose(sqlite3_file *id) {
|
|
if( id ){
|
|
unixFile *pFile = (unixFile*)id;
|
|
semXUnlock(id, NO_LOCK);
|
|
assert( pFile );
|
|
unixEnterMutex();
|
|
releaseInodeInfo(pFile);
|
|
unixLeaveMutex();
|
|
closeUnixFile(id);
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
#endif /* OS_VXWORKS */
|
|
/*
|
|
** Named semaphore locking is only available on VxWorks.
|
|
**
|
|
*************** End of the named semaphore lock implementation ****************
|
|
******************************************************************************/
|
|
|
|
|
|
/******************************************************************************
|
|
*************************** Begin AFP Locking *********************************
|
|
**
|
|
** AFP is the Apple Filing Protocol. AFP is a network filesystem found
|
|
** on Apple Macintosh computers - both OS9 and OSX.
|
|
**
|
|
** Third-party implementations of AFP are available. But this code here
|
|
** only works on OSX.
|
|
*/
|
|
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
/*
|
|
** The afpLockingContext structure contains all afp lock specific state
|
|
*/
|
|
typedef struct afpLockingContext afpLockingContext;
|
|
struct afpLockingContext {
|
|
int reserved;
|
|
const char *dbPath; /* Name of the open file */
|
|
};
|
|
|
|
struct ByteRangeLockPB2
|
|
{
|
|
unsigned long long offset; /* offset to first byte to lock */
|
|
unsigned long long length; /* nbr of bytes to lock */
|
|
unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
|
|
unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
|
|
unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
|
|
int fd; /* file desc to assoc this lock with */
|
|
};
|
|
|
|
#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
|
|
|
|
/*
|
|
** This is a utility for setting or clearing a bit-range lock on an
|
|
** AFP filesystem.
|
|
**
|
|
** Return SQLITE_OK on success, SQLITE_BUSY on failure.
|
|
*/
|
|
static int afpSetLock(
|
|
const char *path, /* Name of the file to be locked or unlocked */
|
|
unixFile *pFile, /* Open file descriptor on path */
|
|
unsigned long long offset, /* First byte to be locked */
|
|
unsigned long long length, /* Number of bytes to lock */
|
|
int setLockFlag /* True to set lock. False to clear lock */
|
|
){
|
|
struct ByteRangeLockPB2 pb;
|
|
int err;
|
|
|
|
pb.unLockFlag = setLockFlag ? 0 : 1;
|
|
pb.startEndFlag = 0;
|
|
pb.offset = offset;
|
|
pb.length = length;
|
|
pb.fd = pFile->h;
|
|
|
|
OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
|
|
(setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
|
|
offset, length));
|
|
err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
|
|
if ( err==-1 ) {
|
|
int rc;
|
|
int tErrno = errno;
|
|
OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
|
|
path, tErrno, strerror(tErrno)));
|
|
#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
|
|
rc = SQLITE_BUSY;
|
|
#else
|
|
rc = sqliteErrorFromPosixError(tErrno,
|
|
setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
|
|
#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
|
|
if( IS_LOCK_ERROR(rc) ){
|
|
storeLastErrno(pFile, tErrno);
|
|
}
|
|
return rc;
|
|
} else {
|
|
return SQLITE_OK;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
*/
|
|
static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
|
|
int rc = SQLITE_OK;
|
|
int reserved = 0;
|
|
unixFile *pFile = (unixFile*)id;
|
|
afpLockingContext *context;
|
|
|
|
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
|
|
|
|
assert( pFile );
|
|
context = (afpLockingContext *) pFile->lockingContext;
|
|
if( context->reserved ){
|
|
*pResOut = 1;
|
|
return SQLITE_OK;
|
|
}
|
|
unixEnterMutex(); /* Because pFile->pInode is shared across threads */
|
|
|
|
/* Check if a thread in this process holds such a lock */
|
|
if( pFile->pInode->eFileLock>SHARED_LOCK ){
|
|
reserved = 1;
|
|
}
|
|
|
|
/* Otherwise see if some other process holds it.
|
|
*/
|
|
if( !reserved ){
|
|
/* lock the RESERVED byte */
|
|
int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
|
|
if( SQLITE_OK==lrc ){
|
|
/* if we succeeded in taking the reserved lock, unlock it to restore
|
|
** the original state */
|
|
lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
|
|
} else {
|
|
/* if we failed to get the lock then someone else must have it */
|
|
reserved = 1;
|
|
}
|
|
if( IS_LOCK_ERROR(lrc) ){
|
|
rc=lrc;
|
|
}
|
|
}
|
|
|
|
unixLeaveMutex();
|
|
OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));
|
|
|
|
*pResOut = reserved;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
*/
|
|
static int afpLock(sqlite3_file *id, int eFileLock){
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile*)id;
|
|
unixInodeInfo *pInode = pFile->pInode;
|
|
afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
|
|
|
|
assert( pFile );
|
|
OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h,
|
|
azFileLock(eFileLock), azFileLock(pFile->eFileLock),
|
|
azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0)));
|
|
|
|
/* If there is already a lock of this type or more restrictive on the
|
|
** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
|
|
** unixEnterMutex() hasn't been called yet.
|
|
*/
|
|
if( pFile->eFileLock>=eFileLock ){
|
|
OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h,
|
|
azFileLock(eFileLock)));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* Make sure the locking sequence is correct
|
|
** (1) We never move from unlocked to anything higher than shared lock.
|
|
** (2) SQLite never explicitly requests a pendig lock.
|
|
** (3) A shared lock is always held when a reserve lock is requested.
|
|
*/
|
|
assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
|
|
assert( eFileLock!=PENDING_LOCK );
|
|
assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
|
|
|
|
/* This mutex is needed because pFile->pInode is shared across threads
|
|
*/
|
|
unixEnterMutex();
|
|
pInode = pFile->pInode;
|
|
|
|
/* If some thread using this PID has a lock via a different unixFile*
|
|
** handle that precludes the requested lock, return BUSY.
|
|
*/
|
|
if( (pFile->eFileLock!=pInode->eFileLock &&
|
|
(pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
|
|
){
|
|
rc = SQLITE_BUSY;
|
|
goto afp_end_lock;
|
|
}
|
|
|
|
/* If a SHARED lock is requested, and some thread using this PID already
|
|
** has a SHARED or RESERVED lock, then increment reference counts and
|
|
** return SQLITE_OK.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK &&
|
|
(pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
|
|
assert( eFileLock==SHARED_LOCK );
|
|
assert( pFile->eFileLock==0 );
|
|
assert( pInode->nShared>0 );
|
|
pFile->eFileLock = SHARED_LOCK;
|
|
pInode->nShared++;
|
|
pInode->nLock++;
|
|
goto afp_end_lock;
|
|
}
|
|
|
|
/* A PENDING lock is needed before acquiring a SHARED lock and before
|
|
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
|
|
** be released.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK
|
|
|| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
|
|
){
|
|
int failed;
|
|
failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
|
|
if (failed) {
|
|
rc = failed;
|
|
goto afp_end_lock;
|
|
}
|
|
}
|
|
|
|
/* If control gets to this point, then actually go ahead and make
|
|
** operating system calls for the specified lock.
|
|
*/
|
|
if( eFileLock==SHARED_LOCK ){
|
|
int lrc1, lrc2, lrc1Errno = 0;
|
|
long lk, mask;
|
|
|
|
assert( pInode->nShared==0 );
|
|
assert( pInode->eFileLock==0 );
|
|
|
|
mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff;
|
|
/* Now get the read-lock SHARED_LOCK */
|
|
/* note that the quality of the randomness doesn't matter that much */
|
|
lk = random();
|
|
pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1);
|
|
lrc1 = afpSetLock(context->dbPath, pFile,
|
|
SHARED_FIRST+pInode->sharedByte, 1, 1);
|
|
if( IS_LOCK_ERROR(lrc1) ){
|
|
lrc1Errno = pFile->lastErrno;
|
|
}
|
|
/* Drop the temporary PENDING lock */
|
|
lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
|
|
|
|
if( IS_LOCK_ERROR(lrc1) ) {
|
|
storeLastErrno(pFile, lrc1Errno);
|
|
rc = lrc1;
|
|
goto afp_end_lock;
|
|
} else if( IS_LOCK_ERROR(lrc2) ){
|
|
rc = lrc2;
|
|
goto afp_end_lock;
|
|
} else if( lrc1 != SQLITE_OK ) {
|
|
rc = lrc1;
|
|
} else {
|
|
pFile->eFileLock = SHARED_LOCK;
|
|
pInode->nLock++;
|
|
pInode->nShared = 1;
|
|
}
|
|
}else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
|
|
/* We are trying for an exclusive lock but another thread in this
|
|
** same process is still holding a shared lock. */
|
|
rc = SQLITE_BUSY;
|
|
}else{
|
|
/* The request was for a RESERVED or EXCLUSIVE lock. It is
|
|
** assumed that there is a SHARED or greater lock on the file
|
|
** already.
|
|
*/
|
|
int failed = 0;
|
|
assert( 0!=pFile->eFileLock );
|
|
if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) {
|
|
/* Acquire a RESERVED lock */
|
|
failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
|
|
if( !failed ){
|
|
context->reserved = 1;
|
|
}
|
|
}
|
|
if (!failed && eFileLock == EXCLUSIVE_LOCK) {
|
|
/* Acquire an EXCLUSIVE lock */
|
|
|
|
/* Remove the shared lock before trying the range. we'll need to
|
|
** reestablish the shared lock if we can't get the afpUnlock
|
|
*/
|
|
if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
|
|
pInode->sharedByte, 1, 0)) ){
|
|
int failed2 = SQLITE_OK;
|
|
/* now attemmpt to get the exclusive lock range */
|
|
failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,
|
|
SHARED_SIZE, 1);
|
|
if( failed && (failed2 = afpSetLock(context->dbPath, pFile,
|
|
SHARED_FIRST + pInode->sharedByte, 1, 1)) ){
|
|
/* Can't reestablish the shared lock. Sqlite can't deal, this is
|
|
** a critical I/O error
|
|
*/
|
|
rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
|
|
SQLITE_IOERR_LOCK;
|
|
goto afp_end_lock;
|
|
}
|
|
}else{
|
|
rc = failed;
|
|
}
|
|
}
|
|
if( failed ){
|
|
rc = failed;
|
|
}
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pFile->eFileLock = eFileLock;
|
|
pInode->eFileLock = eFileLock;
|
|
}else if( eFileLock==EXCLUSIVE_LOCK ){
|
|
pFile->eFileLock = PENDING_LOCK;
|
|
pInode->eFileLock = PENDING_LOCK;
|
|
}
|
|
|
|
afp_end_lock:
|
|
unixLeaveMutex();
|
|
OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock),
|
|
rc==SQLITE_OK ? "ok" : "failed"));
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int afpUnlock(sqlite3_file *id, int eFileLock) {
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile*)id;
|
|
unixInodeInfo *pInode;
|
|
afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
|
|
int skipShared = 0;
|
|
#ifdef SQLITE_TEST
|
|
int h = pFile->h;
|
|
#endif
|
|
|
|
assert( pFile );
|
|
OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,
|
|
pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
|
|
osGetpid(0)));
|
|
|
|
assert( eFileLock<=SHARED_LOCK );
|
|
if( pFile->eFileLock<=eFileLock ){
|
|
return SQLITE_OK;
|
|
}
|
|
unixEnterMutex();
|
|
pInode = pFile->pInode;
|
|
assert( pInode->nShared!=0 );
|
|
if( pFile->eFileLock>SHARED_LOCK ){
|
|
assert( pInode->eFileLock==pFile->eFileLock );
|
|
SimulateIOErrorBenign(1);
|
|
SimulateIOError( h=(-1) )
|
|
SimulateIOErrorBenign(0);
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* When reducing a lock such that other processes can start
|
|
** reading the database file again, make sure that the
|
|
** transaction counter was updated if any part of the database
|
|
** file changed. If the transaction counter is not updated,
|
|
** other connections to the same file might not realize that
|
|
** the file has changed and hence might not know to flush their
|
|
** cache. The use of a stale cache can lead to database corruption.
|
|
*/
|
|
assert( pFile->inNormalWrite==0
|
|
|| pFile->dbUpdate==0
|
|
|| pFile->transCntrChng==1 );
|
|
pFile->inNormalWrite = 0;
|
|
#endif
|
|
|
|
if( pFile->eFileLock==EXCLUSIVE_LOCK ){
|
|
rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
|
|
if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){
|
|
/* only re-establish the shared lock if necessary */
|
|
int sharedLockByte = SHARED_FIRST+pInode->sharedByte;
|
|
rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1);
|
|
} else {
|
|
skipShared = 1;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){
|
|
rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
|
|
}
|
|
if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){
|
|
rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
|
|
if( !rc ){
|
|
context->reserved = 0;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){
|
|
pInode->eFileLock = SHARED_LOCK;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK && eFileLock==NO_LOCK ){
|
|
|
|
/* Decrement the shared lock counter. Release the lock using an
|
|
** OS call only when all threads in this same process have released
|
|
** the lock.
|
|
*/
|
|
unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;
|
|
pInode->nShared--;
|
|
if( pInode->nShared==0 ){
|
|
SimulateIOErrorBenign(1);
|
|
SimulateIOError( h=(-1) )
|
|
SimulateIOErrorBenign(0);
|
|
if( !skipShared ){
|
|
rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);
|
|
}
|
|
if( !rc ){
|
|
pInode->eFileLock = NO_LOCK;
|
|
pFile->eFileLock = NO_LOCK;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
pInode->nLock--;
|
|
assert( pInode->nLock>=0 );
|
|
if( pInode->nLock==0 ){
|
|
closePendingFds(pFile);
|
|
}
|
|
}
|
|
}
|
|
|
|
unixLeaveMutex();
|
|
if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Close a file & cleanup AFP specific locking context
|
|
*/
|
|
static int afpClose(sqlite3_file *id) {
|
|
int rc = SQLITE_OK;
|
|
unixFile *pFile = (unixFile*)id;
|
|
assert( id!=0 );
|
|
afpUnlock(id, NO_LOCK);
|
|
unixEnterMutex();
|
|
if( pFile->pInode && pFile->pInode->nLock ){
|
|
/* If there are outstanding locks, do not actually close the file just
|
|
** yet because that would clear those locks. Instead, add the file
|
|
** descriptor to pInode->aPending. It will be automatically closed when
|
|
** the last lock is cleared.
|
|
*/
|
|
setPendingFd(pFile);
|
|
}
|
|
releaseInodeInfo(pFile);
|
|
sqlite3_free(pFile->lockingContext);
|
|
rc = closeUnixFile(id);
|
|
unixLeaveMutex();
|
|
return rc;
|
|
}
|
|
|
|
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
|
|
/*
|
|
** The code above is the AFP lock implementation. The code is specific
|
|
** to MacOSX and does not work on other unix platforms. No alternative
|
|
** is available. If you don't compile for a mac, then the "unix-afp"
|
|
** VFS is not available.
|
|
**
|
|
********************* End of the AFP lock implementation **********************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
*************************** Begin NFS Locking ********************************/
|
|
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int nfsUnlock(sqlite3_file *id, int eFileLock){
|
|
return posixUnlock(id, eFileLock, 1);
|
|
}
|
|
|
|
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
|
|
/*
|
|
** The code above is the NFS lock implementation. The code is specific
|
|
** to MacOSX and does not work on other unix platforms. No alternative
|
|
** is available.
|
|
**
|
|
********************* End of the NFS lock implementation **********************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
**************** Non-locking sqlite3_file methods *****************************
|
|
**
|
|
** The next division contains implementations for all methods of the
|
|
** sqlite3_file object other than the locking methods. The locking
|
|
** methods were defined in divisions above (one locking method per
|
|
** division). Those methods that are common to all locking modes
|
|
** are gather together into this division.
|
|
*/
|
|
|
|
/*
|
|
** Seek to the offset passed as the second argument, then read cnt
|
|
** bytes into pBuf. Return the number of bytes actually read.
|
|
**
|
|
** NB: If you define USE_PREAD or USE_PREAD64, then it might also
|
|
** be necessary to define _XOPEN_SOURCE to be 500. This varies from
|
|
** one system to another. Since SQLite does not define USE_PREAD
|
|
** in any form by default, we will not attempt to define _XOPEN_SOURCE.
|
|
** See tickets #2741 and #2681.
|
|
**
|
|
** To avoid stomping the errno value on a failed read the lastErrno value
|
|
** is set before returning.
|
|
*/
|
|
static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
|
|
int got;
|
|
int prior = 0;
|
|
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
|
|
i64 newOffset;
|
|
#endif
|
|
TIMER_START;
|
|
assert( cnt==(cnt&0x1ffff) );
|
|
assert( id->h>2 );
|
|
do{
|
|
#if defined(USE_PREAD)
|
|
got = osPread(id->h, pBuf, cnt, offset);
|
|
SimulateIOError( got = -1 );
|
|
#elif defined(USE_PREAD64)
|
|
got = osPread64(id->h, pBuf, cnt, offset);
|
|
SimulateIOError( got = -1 );
|
|
#else
|
|
newOffset = lseek(id->h, offset, SEEK_SET);
|
|
SimulateIOError( newOffset = -1 );
|
|
if( newOffset<0 ){
|
|
storeLastErrno((unixFile*)id, errno);
|
|
return -1;
|
|
}
|
|
got = osRead(id->h, pBuf, cnt);
|
|
#endif
|
|
if( got==cnt ) break;
|
|
if( got<0 ){
|
|
if( errno==EINTR ){ got = 1; continue; }
|
|
prior = 0;
|
|
storeLastErrno((unixFile*)id, errno);
|
|
break;
|
|
}else if( got>0 ){
|
|
cnt -= got;
|
|
offset += got;
|
|
prior += got;
|
|
pBuf = (void*)(got + (char*)pBuf);
|
|
}
|
|
}while( got>0 );
|
|
TIMER_END;
|
|
OSTRACE(("READ %-3d %5d %7lld %llu\n",
|
|
id->h, got+prior, offset-prior, TIMER_ELAPSED));
|
|
return got+prior;
|
|
}
|
|
|
|
/*
|
|
** Read data from a file into a buffer. Return SQLITE_OK if all
|
|
** bytes were read successfully and SQLITE_IOERR if anything goes
|
|
** wrong.
|
|
*/
|
|
static int unixRead(
|
|
sqlite3_file *id,
|
|
void *pBuf,
|
|
int amt,
|
|
sqlite3_int64 offset
|
|
){
|
|
unixFile *pFile = (unixFile *)id;
|
|
int got;
|
|
assert( id );
|
|
assert( offset>=0 );
|
|
assert( amt>0 );
|
|
|
|
/* If this is a database file (not a journal, master-journal or temp
|
|
** file), the bytes in the locking range should never be read or written. */
|
|
#if 0
|
|
assert( pFile->pUnused==0
|
|
|| offset>=PENDING_BYTE+512
|
|
|| offset+amt<=PENDING_BYTE
|
|
);
|
|
#endif
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
/* Deal with as much of this read request as possible by transfering
|
|
** data from the memory mapping using memcpy(). */
|
|
if( offset<pFile->mmapSize ){
|
|
if( offset+amt <= pFile->mmapSize ){
|
|
memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
|
|
return SQLITE_OK;
|
|
}else{
|
|
int nCopy = pFile->mmapSize - offset;
|
|
memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
|
|
pBuf = &((u8 *)pBuf)[nCopy];
|
|
amt -= nCopy;
|
|
offset += nCopy;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
got = seekAndRead(pFile, offset, pBuf, amt);
|
|
if( got==amt ){
|
|
return SQLITE_OK;
|
|
}else if( got<0 ){
|
|
/* lastErrno set by seekAndRead */
|
|
return SQLITE_IOERR_READ;
|
|
}else{
|
|
storeLastErrno(pFile, 0); /* not a system error */
|
|
/* Unread parts of the buffer must be zero-filled */
|
|
memset(&((char*)pBuf)[got], 0, amt-got);
|
|
return SQLITE_IOERR_SHORT_READ;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Attempt to seek the file-descriptor passed as the first argument to
|
|
** absolute offset iOff, then attempt to write nBuf bytes of data from
|
|
** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise,
|
|
** return the actual number of bytes written (which may be less than
|
|
** nBuf).
|
|
*/
|
|
static int seekAndWriteFd(
|
|
int fd, /* File descriptor to write to */
|
|
i64 iOff, /* File offset to begin writing at */
|
|
const void *pBuf, /* Copy data from this buffer to the file */
|
|
int nBuf, /* Size of buffer pBuf in bytes */
|
|
int *piErrno /* OUT: Error number if error occurs */
|
|
){
|
|
int rc = 0; /* Value returned by system call */
|
|
|
|
assert( nBuf==(nBuf&0x1ffff) );
|
|
assert( fd>2 );
|
|
assert( piErrno!=0 );
|
|
nBuf &= 0x1ffff;
|
|
TIMER_START;
|
|
|
|
#if defined(USE_PREAD)
|
|
do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
|
|
#elif defined(USE_PREAD64)
|
|
do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
|
|
#else
|
|
do{
|
|
i64 iSeek = lseek(fd, iOff, SEEK_SET);
|
|
SimulateIOError( iSeek = -1 );
|
|
if( iSeek<0 ){
|
|
rc = -1;
|
|
break;
|
|
}
|
|
rc = osWrite(fd, pBuf, nBuf);
|
|
}while( rc<0 && errno==EINTR );
|
|
#endif
|
|
|
|
TIMER_END;
|
|
OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
|
|
|
|
if( rc<0 ) *piErrno = errno;
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** Seek to the offset in id->offset then read cnt bytes into pBuf.
|
|
** Return the number of bytes actually read. Update the offset.
|
|
**
|
|
** To avoid stomping the errno value on a failed write the lastErrno value
|
|
** is set before returning.
|
|
*/
|
|
static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
|
|
return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
|
|
}
|
|
|
|
|
|
/*
|
|
** Write data from a buffer into a file. Return SQLITE_OK on success
|
|
** or some other error code on failure.
|
|
*/
|
|
static int unixWrite(
|
|
sqlite3_file *id,
|
|
const void *pBuf,
|
|
int amt,
|
|
sqlite3_int64 offset
|
|
){
|
|
unixFile *pFile = (unixFile*)id;
|
|
int wrote = 0;
|
|
assert( id );
|
|
assert( amt>0 );
|
|
|
|
/* If this is a database file (not a journal, master-journal or temp
|
|
** file), the bytes in the locking range should never be read or written. */
|
|
#if 0
|
|
assert( pFile->pUnused==0
|
|
|| offset>=PENDING_BYTE+512
|
|
|| offset+amt<=PENDING_BYTE
|
|
);
|
|
#endif
|
|
|
|
#ifdef SQLITE_DEBUG
|
|
/* If we are doing a normal write to a database file (as opposed to
|
|
** doing a hot-journal rollback or a write to some file other than a
|
|
** normal database file) then record the fact that the database
|
|
** has changed. If the transaction counter is modified, record that
|
|
** fact too.
|
|
*/
|
|
if( pFile->inNormalWrite ){
|
|
pFile->dbUpdate = 1; /* The database has been modified */
|
|
if( offset<=24 && offset+amt>=27 ){
|
|
int rc;
|
|
char oldCntr[4];
|
|
SimulateIOErrorBenign(1);
|
|
rc = seekAndRead(pFile, 24, oldCntr, 4);
|
|
SimulateIOErrorBenign(0);
|
|
if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
|
|
pFile->transCntrChng = 1; /* The transaction counter has changed */
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0
|
|
/* Deal with as much of this write request as possible by transfering
|
|
** data from the memory mapping using memcpy(). */
|
|
if( offset<pFile->mmapSize ){
|
|
if( offset+amt <= pFile->mmapSize ){
|
|
memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
|
|
return SQLITE_OK;
|
|
}else{
|
|
int nCopy = pFile->mmapSize - offset;
|
|
memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
|
|
pBuf = &((u8 *)pBuf)[nCopy];
|
|
amt -= nCopy;
|
|
offset += nCopy;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){
|
|
amt -= wrote;
|
|
offset += wrote;
|
|
pBuf = &((char*)pBuf)[wrote];
|
|
}
|
|
SimulateIOError(( wrote=(-1), amt=1 ));
|
|
SimulateDiskfullError(( wrote=0, amt=1 ));
|
|
|
|
if( amt>wrote ){
|
|
if( wrote<0 && pFile->lastErrno!=ENOSPC ){
|
|
/* lastErrno set by seekAndWrite */
|
|
return SQLITE_IOERR_WRITE;
|
|
}else{
|
|
storeLastErrno(pFile, 0); /* not a system error */
|
|
return SQLITE_FULL;
|
|
}
|
|
}
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
#ifdef SQLITE_TEST
|
|
/*
|
|
** Count the number of fullsyncs and normal syncs. This is used to test
|
|
** that syncs and fullsyncs are occurring at the right times.
|
|
*/
|
|
int sqlite3_sync_count = 0;
|
|
int sqlite3_fullsync_count = 0;
|
|
#endif
|
|
|
|
/*
|
|
** We do not trust systems to provide a working fdatasync(). Some do.
|
|
** Others do no. To be safe, we will stick with the (slightly slower)
|
|
** fsync(). If you know that your system does support fdatasync() correctly,
|
|
** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC
|
|
*/
|
|
#if !defined(fdatasync) && !HAVE_FDATASYNC
|
|
# define fdatasync fsync
|
|
#endif
|
|
|
|
/*
|
|
** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
|
|
** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
|
|
** only available on Mac OS X. But that could change.
|
|
*/
|
|
#ifdef F_FULLFSYNC
|
|
# define HAVE_FULLFSYNC 1
|
|
#else
|
|
# define HAVE_FULLFSYNC 0
|
|
#endif
|
|
|
|
|
|
/*
|
|
** The fsync() system call does not work as advertised on many
|
|
** unix systems. The following procedure is an attempt to make
|
|
** it work better.
|
|
**
|
|
** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
|
|
** for testing when we want to run through the test suite quickly.
|
|
** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
|
|
** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
|
|
** or power failure will likely corrupt the database file.
|
|
**
|
|
** SQLite sets the dataOnly flag if the size of the file is unchanged.
|
|
** The idea behind dataOnly is that it should only write the file content
|
|
** to disk, not the inode. We only set dataOnly if the file size is
|
|
** unchanged since the file size is part of the inode. However,
|
|
** Ted Ts'o tells us that fdatasync() will also write the inode if the
|
|
** file size has changed. The only real difference between fdatasync()
|
|
** and fsync(), Ted tells us, is that fdatasync() will not flush the
|
|
** inode if the mtime or owner or other inode attributes have changed.
|
|
** We only care about the file size, not the other file attributes, so
|
|
** as far as SQLite is concerned, an fdatasync() is always adequate.
|
|
** So, we always use fdatasync() if it is available, regardless of
|
|
** the value of the dataOnly flag.
|
|
*/
|
|
static int full_fsync(int fd, int fullSync, int dataOnly){
|
|
int rc;
|
|
|
|
/* The following "ifdef/elif/else/" block has the same structure as
|
|
** the one below. It is replicated here solely to avoid cluttering
|
|
** up the real code with the UNUSED_PARAMETER() macros.
|
|
*/
|
|
#ifdef SQLITE_NO_SYNC
|
|
UNUSED_PARAMETER(fd);
|
|
UNUSED_PARAMETER(fullSync);
|
|
UNUSED_PARAMETER(dataOnly);
|
|
#elif HAVE_FULLFSYNC
|
|
UNUSED_PARAMETER(dataOnly);
|
|
#else
|
|
UNUSED_PARAMETER(fullSync);
|
|
UNUSED_PARAMETER(dataOnly);
|
|
#endif
|
|
|
|
/* Record the number of times that we do a normal fsync() and
|
|
** FULLSYNC. This is used during testing to verify that this procedure
|
|
** gets called with the correct arguments.
|
|
*/
|
|
#ifdef SQLITE_TEST
|
|
if( fullSync ) sqlite3_fullsync_count++;
|
|
sqlite3_sync_count++;
|
|
#endif
|
|
|
|
/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
|
|
** no-op. But go ahead and call fstat() to validate the file
|
|
** descriptor as we need a method to provoke a failure during
|
|
** coverate testing.
|
|
*/
|
|
#ifdef SQLITE_NO_SYNC
|
|
{
|
|
struct stat buf;
|
|
rc = osFstat(fd, &buf);
|
|
}
|
|
#elif HAVE_FULLFSYNC
|
|
if( fullSync ){
|
|
rc = osFcntl(fd, F_FULLFSYNC, 0);
|
|
}else{
|
|
rc = 1;
|
|
}
|
|
/* If the FULLFSYNC failed, fall back to attempting an fsync().
|
|
** It shouldn't be possible for fullfsync to fail on the local
|
|
** file system (on OSX), so failure indicates that FULLFSYNC
|
|
** isn't supported for this file system. So, attempt an fsync
|
|
** and (for now) ignore the overhead of a superfluous fcntl call.
|
|
** It'd be better to detect fullfsync support once and avoid
|
|
** the fcntl call every time sync is called.
|
|
*/
|
|
if( rc ) rc = fsync(fd);
|
|
|
|
#elif defined(__APPLE__)
|
|
/* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly
|
|
** so currently we default to the macro that redefines fdatasync to fsync
|
|
*/
|
|
rc = fsync(fd);
|
|
#else
|
|
rc = fdatasync(fd);
|
|
#if OS_VXWORKS
|
|
if( rc==-1 && errno==ENOTSUP ){
|
|
rc = fsync(fd);
|
|
}
|
|
#endif /* OS_VXWORKS */
|
|
#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
|
|
|
|
if( OS_VXWORKS && rc!= -1 ){
|
|
rc = 0;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Open a file descriptor to the directory containing file zFilename.
|
|
** If successful, *pFd is set to the opened file descriptor and
|
|
** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
|
|
** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
|
|
** value.
|
|
**
|
|
** The directory file descriptor is used for only one thing - to
|
|
** fsync() a directory to make sure file creation and deletion events
|
|
** are flushed to disk. Such fsyncs are not needed on newer
|
|
** journaling filesystems, but are required on older filesystems.
|
|
**
|
|
** This routine can be overridden using the xSetSysCall interface.
|
|
** The ability to override this routine was added in support of the
|
|
** chromium sandbox. Opening a directory is a security risk (we are
|
|
** told) so making it overrideable allows the chromium sandbox to
|
|
** replace this routine with a harmless no-op. To make this routine
|
|
** a no-op, replace it with a stub that returns SQLITE_OK but leaves
|
|
** *pFd set to a negative number.
|
|
**
|
|
** If SQLITE_OK is returned, the caller is responsible for closing
|
|
** the file descriptor *pFd using close().
|
|
*/
|
|
static int openDirectory(const char *zFilename, int *pFd){
|
|
int ii;
|
|
int fd = -1;
|
|
char zDirname[MAX_PATHNAME+1];
|
|
|
|
sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
|
|
for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--);
|
|
if( ii>0 ){
|
|
zDirname[ii] = '\0';
|
|
}else{
|
|
if( zDirname[0]!='/' ) zDirname[0] = '.';
|
|
zDirname[1] = 0;
|
|
}
|
|
fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
|
|
if( fd>=0 ){
|
|
OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
|
|
}
|
|
*pFd = fd;
|
|
if( fd>=0 ) return SQLITE_OK;
|
|
return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname);
|
|
}
|
|
|
|
/*
|
|
** Make sure all writes to a particular file are committed to disk.
|
|
**
|
|
** If dataOnly==0 then both the file itself and its metadata (file
|
|
** size, access time, etc) are synced. If dataOnly!=0 then only the
|
|
** file data is synced.
|
|
**
|
|
** Under Unix, also make sure that the directory entry for the file
|
|
** has been created by fsync-ing the directory that contains the file.
|
|
** If we do not do this and we encounter a power failure, the directory
|
|
** entry for the journal might not exist after we reboot. The next
|
|
** SQLite to access the file will not know that the journal exists (because
|
|
** the directory entry for the journal was never created) and the transaction
|
|
** will not roll back - possibly leading to database corruption.
|
|
*/
|
|
static int unixSync(sqlite3_file *id, int flags){
|
|
int rc;
|
|
unixFile *pFile = (unixFile*)id;
|
|
|
|
int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
|
|
int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
|
|
|
|
/* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
|
|
assert((flags&0x0F)==SQLITE_SYNC_NORMAL
|
|
|| (flags&0x0F)==SQLITE_SYNC_FULL
|
|
);
|
|
|
|
/* Unix cannot, but some systems may return SQLITE_FULL from here. This
|
|
** line is to test that doing so does not cause any problems.
|
|
*/
|
|
SimulateDiskfullError( return SQLITE_FULL );
|
|
|
|
assert( pFile );
|
|
OSTRACE(("SYNC %-3d\n", pFile->h));
|
|
rc = full_fsync(pFile->h, isFullsync, isDataOnly);
|
|
SimulateIOError( rc=1 );
|
|
if( rc ){
|
|
storeLastErrno(pFile, errno);
|
|
return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
|
|
}
|
|
|
|
/* Also fsync the directory containing the file if the DIRSYNC flag
|
|
** is set. This is a one-time occurrence. Many systems (examples: AIX)
|
|
** are unable to fsync a directory, so ignore errors on the fsync.
|
|
*/
|
|
if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
|
|
int dirfd;
|
|
OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
|
|
HAVE_FULLFSYNC, isFullsync));
|
|
rc = osOpenDirectory(pFile->zPath, &dirfd);
|
|
if( rc==SQLITE_OK ){
|
|
full_fsync(dirfd, 0, 0);
|
|
robust_close(pFile, dirfd, __LINE__);
|
|
}else{
|
|
assert( rc==SQLITE_CANTOPEN );
|
|
rc = SQLITE_OK;
|
|
}
|
|
pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Truncate an open file to a specified size
|
|
*/
|
|
static int unixTruncate(sqlite3_file *id, i64 nByte){
|
|
unixFile *pFile = (unixFile *)id;
|
|
int rc;
|
|
assert( pFile );
|
|
SimulateIOError( return SQLITE_IOERR_TRUNCATE );
|
|
|
|
/* If the user has configured a chunk-size for this file, truncate the
|
|
** file so that it consists of an integer number of chunks (i.e. the
|
|
** actual file size after the operation may be larger than the requested
|
|
** size).
|
|
*/
|
|
if( pFile->szChunk>0 ){
|
|
nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
|
|
}
|
|
|
|
rc = robust_ftruncate(pFile->h, nByte);
|
|
if( rc ){
|
|
storeLastErrno(pFile, errno);
|
|
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
|
|
}else{
|
|
#ifdef SQLITE_DEBUG
|
|
/* If we are doing a normal write to a database file (as opposed to
|
|
** doing a hot-journal rollback or a write to some file other than a
|
|
** normal database file) and we truncate the file to zero length,
|
|
** that effectively updates the change counter. This might happen
|
|
** when restoring a database using the backup API from a zero-length
|
|
** source.
|
|
*/
|
|
if( pFile->inNormalWrite && nByte==0 ){
|
|
pFile->transCntrChng = 1;
|
|
}
|
|
#endif
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
/* If the file was just truncated to a size smaller than the currently
|
|
** mapped region, reduce the effective mapping size as well. SQLite will
|
|
** use read() and write() to access data beyond this point from now on.
|
|
*/
|
|
if( nByte<pFile->mmapSize ){
|
|
pFile->mmapSize = nByte;
|
|
}
|
|
#endif
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Determine the current size of a file in bytes
|
|
*/
|
|
static int unixFileSize(sqlite3_file *id, i64 *pSize){
|
|
int rc;
|
|
struct stat buf;
|
|
assert( id );
|
|
rc = osFstat(((unixFile*)id)->h, &buf);
|
|
SimulateIOError( rc=1 );
|
|
if( rc!=0 ){
|
|
storeLastErrno((unixFile*)id, errno);
|
|
return SQLITE_IOERR_FSTAT;
|
|
}
|
|
*pSize = buf.st_size;
|
|
|
|
/* When opening a zero-size database, the findInodeInfo() procedure
|
|
** writes a single byte into that file in order to work around a bug
|
|
** in the OS-X msdos filesystem. In order to avoid problems with upper
|
|
** layers, we need to report this file size as zero even though it is
|
|
** really 1. Ticket #3260.
|
|
*/
|
|
if( *pSize==1 ) *pSize = 0;
|
|
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
/*
|
|
** Handler for proxy-locking file-control verbs. Defined below in the
|
|
** proxying locking division.
|
|
*/
|
|
static int proxyFileControl(sqlite3_file*,int,void*);
|
|
#endif
|
|
|
|
/*
|
|
** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
|
|
** file-control operation. Enlarge the database to nBytes in size
|
|
** (rounded up to the next chunk-size). If the database is already
|
|
** nBytes or larger, this routine is a no-op.
|
|
*/
|
|
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
|
|
if( pFile->szChunk>0 ){
|
|
i64 nSize; /* Required file size */
|
|
struct stat buf; /* Used to hold return values of fstat() */
|
|
|
|
if( osFstat(pFile->h, &buf) ){
|
|
return SQLITE_IOERR_FSTAT;
|
|
}
|
|
|
|
nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
|
|
if( nSize>(i64)buf.st_size ){
|
|
|
|
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
|
|
/* The code below is handling the return value of osFallocate()
|
|
** correctly. posix_fallocate() is defined to "returns zero on success,
|
|
** or an error number on failure". See the manpage for details. */
|
|
int err;
|
|
do{
|
|
err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);
|
|
}while( err==EINTR );
|
|
if( err ) return SQLITE_IOERR_WRITE;
|
|
#else
|
|
/* If the OS does not have posix_fallocate(), fake it. Write a
|
|
** single byte to the last byte in each block that falls entirely
|
|
** within the extended region. Then, if required, a single byte
|
|
** at offset (nSize-1), to set the size of the file correctly.
|
|
** This is a similar technique to that used by glibc on systems
|
|
** that do not have a real fallocate() call.
|
|
*/
|
|
int nBlk = buf.st_blksize; /* File-system block size */
|
|
int nWrite = 0; /* Number of bytes written by seekAndWrite */
|
|
i64 iWrite; /* Next offset to write to */
|
|
|
|
iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1;
|
|
assert( iWrite>=buf.st_size );
|
|
assert( ((iWrite+1)%nBlk)==0 );
|
|
for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){
|
|
if( iWrite>=nSize ) iWrite = nSize - 1;
|
|
nWrite = seekAndWrite(pFile, iWrite, "", 1);
|
|
if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
|
|
int rc;
|
|
if( pFile->szChunk<=0 ){
|
|
if( robust_ftruncate(pFile->h, nByte) ){
|
|
storeLastErrno(pFile, errno);
|
|
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
|
|
}
|
|
}
|
|
|
|
rc = unixMapfile(pFile, nByte);
|
|
return rc;
|
|
}
|
|
#endif
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** If *pArg is initially negative then this is a query. Set *pArg to
|
|
** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
|
|
**
|
|
** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
|
|
*/
|
|
static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
|
|
if( *pArg<0 ){
|
|
*pArg = (pFile->ctrlFlags & mask)!=0;
|
|
}else if( (*pArg)==0 ){
|
|
pFile->ctrlFlags &= ~mask;
|
|
}else{
|
|
pFile->ctrlFlags |= mask;
|
|
}
|
|
}
|
|
|
|
/* Forward declaration */
|
|
static int unixGetTempname(int nBuf, char *zBuf);
|
|
|
|
/*
|
|
** Information and control of an open file handle.
|
|
*/
|
|
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
|
|
unixFile *pFile = (unixFile*)id;
|
|
switch( op ){
|
|
case SQLITE_FCNTL_LOCKSTATE: {
|
|
*(int*)pArg = pFile->eFileLock;
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_LAST_ERRNO: {
|
|
*(int*)pArg = pFile->lastErrno;
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_CHUNK_SIZE: {
|
|
pFile->szChunk = *(int *)pArg;
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_SIZE_HINT: {
|
|
int rc;
|
|
SimulateIOErrorBenign(1);
|
|
rc = fcntlSizeHint(pFile, *(i64 *)pArg);
|
|
SimulateIOErrorBenign(0);
|
|
return rc;
|
|
}
|
|
case SQLITE_FCNTL_PERSIST_WAL: {
|
|
unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
|
|
unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_VFSNAME: {
|
|
*(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_TEMPFILENAME: {
|
|
char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname );
|
|
if( zTFile ){
|
|
unixGetTempname(pFile->pVfs->mxPathname, zTFile);
|
|
*(char**)pArg = zTFile;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_HAS_MOVED: {
|
|
*(int*)pArg = fileHasMoved(pFile);
|
|
return SQLITE_OK;
|
|
}
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
case SQLITE_FCNTL_MMAP_SIZE: {
|
|
i64 newLimit = *(i64*)pArg;
|
|
int rc = SQLITE_OK;
|
|
if( newLimit>sqlite3GlobalConfig.mxMmap ){
|
|
newLimit = sqlite3GlobalConfig.mxMmap;
|
|
}
|
|
*(i64*)pArg = pFile->mmapSizeMax;
|
|
if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
|
|
pFile->mmapSizeMax = newLimit;
|
|
if( pFile->mmapSize>0 ){
|
|
unixUnmapfile(pFile);
|
|
rc = unixMapfile(pFile, -1);
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
#endif
|
|
#ifdef SQLITE_DEBUG
|
|
/* The pager calls this method to signal that it has done
|
|
** a rollback and that the database is therefore unchanged and
|
|
** it hence it is OK for the transaction change counter to be
|
|
** unchanged.
|
|
*/
|
|
case SQLITE_FCNTL_DB_UNCHANGED: {
|
|
((unixFile*)id)->dbUpdate = 0;
|
|
return SQLITE_OK;
|
|
}
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
case SQLITE_FCNTL_SET_LOCKPROXYFILE:
|
|
case SQLITE_FCNTL_GET_LOCKPROXYFILE: {
|
|
return proxyFileControl(id,op,pArg);
|
|
}
|
|
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
|
|
}
|
|
return SQLITE_NOTFOUND;
|
|
}
|
|
|
|
/*
|
|
** Return the sector size in bytes of the underlying block device for
|
|
** the specified file. This is almost always 512 bytes, but may be
|
|
** larger for some devices.
|
|
**
|
|
** SQLite code assumes this function cannot fail. It also assumes that
|
|
** if two files are created in the same file-system directory (i.e.
|
|
** a database and its journal file) that the sector size will be the
|
|
** same for both.
|
|
*/
|
|
#ifndef __QNXNTO__
|
|
static int unixSectorSize(sqlite3_file *NotUsed){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return SQLITE_DEFAULT_SECTOR_SIZE;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
** The following version of unixSectorSize() is optimized for QNX.
|
|
*/
|
|
#ifdef __QNXNTO__
|
|
#include <sys/dcmd_blk.h>
|
|
#include <sys/statvfs.h>
|
|
static int unixSectorSize(sqlite3_file *id){
|
|
unixFile *pFile = (unixFile*)id;
|
|
if( pFile->sectorSize == 0 ){
|
|
struct statvfs fsInfo;
|
|
|
|
/* Set defaults for non-supported filesystems */
|
|
pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
|
|
pFile->deviceCharacteristics = 0;
|
|
if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
|
|
return pFile->sectorSize;
|
|
}
|
|
|
|
if( !strcmp(fsInfo.f_basetype, "tmp") ) {
|
|
pFile->sectorSize = fsInfo.f_bsize;
|
|
pFile->deviceCharacteristics =
|
|
SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */
|
|
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
|
|
** the write succeeds */
|
|
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
|
|
** so it is ordered */
|
|
0;
|
|
}else if( strstr(fsInfo.f_basetype, "etfs") ){
|
|
pFile->sectorSize = fsInfo.f_bsize;
|
|
pFile->deviceCharacteristics =
|
|
/* etfs cluster size writes are atomic */
|
|
(pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) |
|
|
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
|
|
** the write succeeds */
|
|
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
|
|
** so it is ordered */
|
|
0;
|
|
}else if( !strcmp(fsInfo.f_basetype, "qnx6") ){
|
|
pFile->sectorSize = fsInfo.f_bsize;
|
|
pFile->deviceCharacteristics =
|
|
SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */
|
|
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
|
|
** the write succeeds */
|
|
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
|
|
** so it is ordered */
|
|
0;
|
|
}else if( !strcmp(fsInfo.f_basetype, "qnx4") ){
|
|
pFile->sectorSize = fsInfo.f_bsize;
|
|
pFile->deviceCharacteristics =
|
|
/* full bitset of atomics from max sector size and smaller */
|
|
((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
|
|
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
|
|
** so it is ordered */
|
|
0;
|
|
}else if( strstr(fsInfo.f_basetype, "dos") ){
|
|
pFile->sectorSize = fsInfo.f_bsize;
|
|
pFile->deviceCharacteristics =
|
|
/* full bitset of atomics from max sector size and smaller */
|
|
((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
|
|
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
|
|
** so it is ordered */
|
|
0;
|
|
}else{
|
|
pFile->deviceCharacteristics =
|
|
SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */
|
|
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
|
|
** the write succeeds */
|
|
0;
|
|
}
|
|
}
|
|
/* Last chance verification. If the sector size isn't a multiple of 512
|
|
** then it isn't valid.*/
|
|
if( pFile->sectorSize % 512 != 0 ){
|
|
pFile->deviceCharacteristics = 0;
|
|
pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
|
|
}
|
|
return pFile->sectorSize;
|
|
}
|
|
#endif /* __QNXNTO__ */
|
|
|
|
/*
|
|
** Return the device characteristics for the file.
|
|
**
|
|
** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
|
|
** However, that choice is controversial since technically the underlying
|
|
** file system does not always provide powersafe overwrites. (In other
|
|
** words, after a power-loss event, parts of the file that were never
|
|
** written might end up being altered.) However, non-PSOW behavior is very,
|
|
** very rare. And asserting PSOW makes a large reduction in the amount
|
|
** of required I/O for journaling, since a lot of padding is eliminated.
|
|
** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
|
|
** available to turn it off and URI query parameter available to turn it off.
|
|
*/
|
|
static int unixDeviceCharacteristics(sqlite3_file *id){
|
|
unixFile *p = (unixFile*)id;
|
|
int rc = 0;
|
|
#ifdef __QNXNTO__
|
|
if( p->sectorSize==0 ) unixSectorSize(id);
|
|
rc = p->deviceCharacteristics;
|
|
#endif
|
|
if( p->ctrlFlags & UNIXFILE_PSOW ){
|
|
rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
|
|
|
|
/*
|
|
** Return the system page size.
|
|
**
|
|
** This function should not be called directly by other code in this file.
|
|
** Instead, it should be called via macro osGetpagesize().
|
|
*/
|
|
static int unixGetpagesize(void){
|
|
#if OS_VXWORKS
|
|
return 1024;
|
|
#elif defined(_BSD_SOURCE)
|
|
return getpagesize();
|
|
#else
|
|
return (int)sysconf(_SC_PAGESIZE);
|
|
#endif
|
|
}
|
|
|
|
#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */
|
|
|
|
#ifndef SQLITE_OMIT_WAL
|
|
|
|
/*
|
|
** Object used to represent an shared memory buffer.
|
|
**
|
|
** When multiple threads all reference the same wal-index, each thread
|
|
** has its own unixShm object, but they all point to a single instance
|
|
** of this unixShmNode object. In other words, each wal-index is opened
|
|
** only once per process.
|
|
**
|
|
** Each unixShmNode object is connected to a single unixInodeInfo object.
|
|
** We could coalesce this object into unixInodeInfo, but that would mean
|
|
** every open file that does not use shared memory (in other words, most
|
|
** open files) would have to carry around this extra information. So
|
|
** the unixInodeInfo object contains a pointer to this unixShmNode object
|
|
** and the unixShmNode object is created only when needed.
|
|
**
|
|
** unixMutexHeld() must be true when creating or destroying
|
|
** this object or while reading or writing the following fields:
|
|
**
|
|
** nRef
|
|
**
|
|
** The following fields are read-only after the object is created:
|
|
**
|
|
** fid
|
|
** zFilename
|
|
**
|
|
** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and
|
|
** unixMutexHeld() is true when reading or writing any other field
|
|
** in this structure.
|
|
*/
|
|
struct unixShmNode {
|
|
unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */
|
|
sqlite3_mutex *mutex; /* Mutex to access this object */
|
|
char *zFilename; /* Name of the mmapped file */
|
|
int h; /* Open file descriptor */
|
|
int szRegion; /* Size of shared-memory regions */
|
|
u16 nRegion; /* Size of array apRegion */
|
|
u8 isReadonly; /* True if read-only */
|
|
char **apRegion; /* Array of mapped shared-memory regions */
|
|
int nRef; /* Number of unixShm objects pointing to this */
|
|
unixShm *pFirst; /* All unixShm objects pointing to this */
|
|
#ifdef SQLITE_DEBUG
|
|
u8 exclMask; /* Mask of exclusive locks held */
|
|
u8 sharedMask; /* Mask of shared locks held */
|
|
u8 nextShmId; /* Next available unixShm.id value */
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
** Structure used internally by this VFS to record the state of an
|
|
** open shared memory connection.
|
|
**
|
|
** The following fields are initialized when this object is created and
|
|
** are read-only thereafter:
|
|
**
|
|
** unixShm.pFile
|
|
** unixShm.id
|
|
**
|
|
** All other fields are read/write. The unixShm.pFile->mutex must be held
|
|
** while accessing any read/write fields.
|
|
*/
|
|
struct unixShm {
|
|
unixShmNode *pShmNode; /* The underlying unixShmNode object */
|
|
unixShm *pNext; /* Next unixShm with the same unixShmNode */
|
|
u8 hasMutex; /* True if holding the unixShmNode mutex */
|
|
u8 id; /* Id of this connection within its unixShmNode */
|
|
u16 sharedMask; /* Mask of shared locks held */
|
|
u16 exclMask; /* Mask of exclusive locks held */
|
|
};
|
|
|
|
/*
|
|
** Constants used for locking
|
|
*/
|
|
#define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */
|
|
#define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */
|
|
|
|
/*
|
|
** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
|
|
**
|
|
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
|
|
** otherwise.
|
|
*/
|
|
static int unixShmSystemLock(
|
|
unixFile *pFile, /* Open connection to the WAL file */
|
|
int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */
|
|
int ofst, /* First byte of the locking range */
|
|
int n /* Number of bytes to lock */
|
|
){
|
|
unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */
|
|
struct flock f; /* The posix advisory locking structure */
|
|
int rc = SQLITE_OK; /* Result code form fcntl() */
|
|
|
|
/* Access to the unixShmNode object is serialized by the caller */
|
|
pShmNode = pFile->pInode->pShmNode;
|
|
assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
|
|
|
|
/* Shared locks never span more than one byte */
|
|
assert( n==1 || lockType!=F_RDLCK );
|
|
|
|
/* Locks are within range */
|
|
assert( n>=1 && n<=SQLITE_SHM_NLOCK );
|
|
|
|
if( pShmNode->h>=0 ){
|
|
/* Initialize the locking parameters */
|
|
memset(&f, 0, sizeof(f));
|
|
f.l_type = lockType;
|
|
f.l_whence = SEEK_SET;
|
|
f.l_start = ofst;
|
|
f.l_len = n;
|
|
|
|
rc = osFcntl(pShmNode->h, F_SETLK, &f);
|
|
rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
|
|
}
|
|
|
|
/* Update the global lock state and do debug tracing */
|
|
#ifdef SQLITE_DEBUG
|
|
{ u16 mask;
|
|
OSTRACE(("SHM-LOCK "));
|
|
mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);
|
|
if( rc==SQLITE_OK ){
|
|
if( lockType==F_UNLCK ){
|
|
OSTRACE(("unlock %d ok", ofst));
|
|
pShmNode->exclMask &= ~mask;
|
|
pShmNode->sharedMask &= ~mask;
|
|
}else if( lockType==F_RDLCK ){
|
|
OSTRACE(("read-lock %d ok", ofst));
|
|
pShmNode->exclMask &= ~mask;
|
|
pShmNode->sharedMask |= mask;
|
|
}else{
|
|
assert( lockType==F_WRLCK );
|
|
OSTRACE(("write-lock %d ok", ofst));
|
|
pShmNode->exclMask |= mask;
|
|
pShmNode->sharedMask &= ~mask;
|
|
}
|
|
}else{
|
|
if( lockType==F_UNLCK ){
|
|
OSTRACE(("unlock %d failed", ofst));
|
|
}else if( lockType==F_RDLCK ){
|
|
OSTRACE(("read-lock failed"));
|
|
}else{
|
|
assert( lockType==F_WRLCK );
|
|
OSTRACE(("write-lock %d failed", ofst));
|
|
}
|
|
}
|
|
OSTRACE((" - afterwards %03x,%03x\n",
|
|
pShmNode->sharedMask, pShmNode->exclMask));
|
|
}
|
|
#endif
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Return the minimum number of 32KB shm regions that should be mapped at
|
|
** a time, assuming that each mapping must be an integer multiple of the
|
|
** current system page-size.
|
|
**
|
|
** Usually, this is 1. The exception seems to be systems that are configured
|
|
** to use 64KB pages - in this case each mapping must cover at least two
|
|
** shm regions.
|
|
*/
|
|
static int unixShmRegionPerMap(void){
|
|
int shmsz = 32*1024; /* SHM region size */
|
|
int pgsz = osGetpagesize(); /* System page size */
|
|
assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */
|
|
if( pgsz<shmsz ) return 1;
|
|
return pgsz/shmsz;
|
|
}
|
|
|
|
/*
|
|
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
|
|
**
|
|
** This is not a VFS shared-memory method; it is a utility function called
|
|
** by VFS shared-memory methods.
|
|
*/
|
|
static void unixShmPurge(unixFile *pFd){
|
|
unixShmNode *p = pFd->pInode->pShmNode;
|
|
assert( unixMutexHeld() );
|
|
if( p && ALWAYS(p->nRef==0) ){
|
|
int nShmPerMap = unixShmRegionPerMap();
|
|
int i;
|
|
assert( p->pInode==pFd->pInode );
|
|
sqlite3_mutex_free(p->mutex);
|
|
for(i=0; i<p->nRegion; i+=nShmPerMap){
|
|
if( p->h>=0 ){
|
|
osMunmap(p->apRegion[i], p->szRegion);
|
|
}else{
|
|
sqlite3_free(p->apRegion[i]);
|
|
}
|
|
}
|
|
sqlite3_free(p->apRegion);
|
|
if( p->h>=0 ){
|
|
robust_close(pFd, p->h, __LINE__);
|
|
p->h = -1;
|
|
}
|
|
p->pInode->pShmNode = 0;
|
|
sqlite3_free(p);
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Open a shared-memory area associated with open database file pDbFd.
|
|
** This particular implementation uses mmapped files.
|
|
**
|
|
** The file used to implement shared-memory is in the same directory
|
|
** as the open database file and has the same name as the open database
|
|
** file with the "-shm" suffix added. For example, if the database file
|
|
** is "/home/user1/config.db" then the file that is created and mmapped
|
|
** for shared memory will be called "/home/user1/config.db-shm".
|
|
**
|
|
** Another approach to is to use files in /dev/shm or /dev/tmp or an
|
|
** some other tmpfs mount. But if a file in a different directory
|
|
** from the database file is used, then differing access permissions
|
|
** or a chroot() might cause two different processes on the same
|
|
** database to end up using different files for shared memory -
|
|
** meaning that their memory would not really be shared - resulting
|
|
** in database corruption. Nevertheless, this tmpfs file usage
|
|
** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"
|
|
** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time
|
|
** option results in an incompatible build of SQLite; builds of SQLite
|
|
** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the
|
|
** same database file at the same time, database corruption will likely
|
|
** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
|
|
** "unsupported" and may go away in a future SQLite release.
|
|
**
|
|
** When opening a new shared-memory file, if no other instances of that
|
|
** file are currently open, in this process or in other processes, then
|
|
** the file must be truncated to zero length or have its header cleared.
|
|
**
|
|
** If the original database file (pDbFd) is using the "unix-excl" VFS
|
|
** that means that an exclusive lock is held on the database file and
|
|
** that no other processes are able to read or write the database. In
|
|
** that case, we do not really need shared memory. No shared memory
|
|
** file is created. The shared memory will be simulated with heap memory.
|
|
*/
|
|
static int unixOpenSharedMemory(unixFile *pDbFd){
|
|
struct unixShm *p = 0; /* The connection to be opened */
|
|
struct unixShmNode *pShmNode; /* The underlying mmapped file */
|
|
int rc; /* Result code */
|
|
unixInodeInfo *pInode; /* The inode of fd */
|
|
char *zShmFilename; /* Name of the file used for SHM */
|
|
int nShmFilename; /* Size of the SHM filename in bytes */
|
|
|
|
/* Allocate space for the new unixShm object. */
|
|
p = sqlite3_malloc64( sizeof(*p) );
|
|
if( p==0 ) return SQLITE_NOMEM;
|
|
memset(p, 0, sizeof(*p));
|
|
assert( pDbFd->pShm==0 );
|
|
|
|
/* Check to see if a unixShmNode object already exists. Reuse an existing
|
|
** one if present. Create a new one if necessary.
|
|
*/
|
|
unixEnterMutex();
|
|
pInode = pDbFd->pInode;
|
|
pShmNode = pInode->pShmNode;
|
|
if( pShmNode==0 ){
|
|
struct stat sStat; /* fstat() info for database file */
|
|
#ifndef SQLITE_SHM_DIRECTORY
|
|
const char *zBasePath = pDbFd->zPath;
|
|
#endif
|
|
|
|
/* Call fstat() to figure out the permissions on the database file. If
|
|
** a new *-shm file is created, an attempt will be made to create it
|
|
** with the same permissions.
|
|
*/
|
|
if( osFstat(pDbFd->h, &sStat) ){
|
|
rc = SQLITE_IOERR_FSTAT;
|
|
goto shm_open_err;
|
|
}
|
|
|
|
#ifdef SQLITE_SHM_DIRECTORY
|
|
nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
|
|
#else
|
|
nShmFilename = 6 + (int)strlen(zBasePath);
|
|
#endif
|
|
pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename );
|
|
if( pShmNode==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
goto shm_open_err;
|
|
}
|
|
memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
|
|
zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
|
|
#ifdef SQLITE_SHM_DIRECTORY
|
|
sqlite3_snprintf(nShmFilename, zShmFilename,
|
|
SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
|
|
(u32)sStat.st_ino, (u32)sStat.st_dev);
|
|
#else
|
|
sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath);
|
|
sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
|
|
#endif
|
|
pShmNode->h = -1;
|
|
pDbFd->pInode->pShmNode = pShmNode;
|
|
pShmNode->pInode = pDbFd->pInode;
|
|
pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
|
|
if( pShmNode->mutex==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
goto shm_open_err;
|
|
}
|
|
|
|
if( pInode->bProcessLock==0 ){
|
|
int openFlags = O_RDWR | O_CREAT;
|
|
if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
|
|
openFlags = O_RDONLY;
|
|
pShmNode->isReadonly = 1;
|
|
}
|
|
pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
|
|
if( pShmNode->h<0 ){
|
|
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
|
|
goto shm_open_err;
|
|
}
|
|
|
|
/* If this process is running as root, make sure that the SHM file
|
|
** is owned by the same user that owns the original database. Otherwise,
|
|
** the original owner will not be able to connect.
|
|
*/
|
|
robustFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
|
|
|
|
/* Check to see if another process is holding the dead-man switch.
|
|
** If not, truncate the file to zero length.
|
|
*/
|
|
rc = SQLITE_OK;
|
|
if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
|
|
if( robust_ftruncate(pShmNode->h, 0) ){
|
|
rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1);
|
|
}
|
|
if( rc ) goto shm_open_err;
|
|
}
|
|
}
|
|
|
|
/* Make the new connection a child of the unixShmNode */
|
|
p->pShmNode = pShmNode;
|
|
#ifdef SQLITE_DEBUG
|
|
p->id = pShmNode->nextShmId++;
|
|
#endif
|
|
pShmNode->nRef++;
|
|
pDbFd->pShm = p;
|
|
unixLeaveMutex();
|
|
|
|
/* The reference count on pShmNode has already been incremented under
|
|
** the cover of the unixEnterMutex() mutex and the pointer from the
|
|
** new (struct unixShm) object to the pShmNode has been set. All that is
|
|
** left to do is to link the new object into the linked list starting
|
|
** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex
|
|
** mutex.
|
|
*/
|
|
sqlite3_mutex_enter(pShmNode->mutex);
|
|
p->pNext = pShmNode->pFirst;
|
|
pShmNode->pFirst = p;
|
|
sqlite3_mutex_leave(pShmNode->mutex);
|
|
return SQLITE_OK;
|
|
|
|
/* Jump here on any error */
|
|
shm_open_err:
|
|
unixShmPurge(pDbFd); /* This call frees pShmNode if required */
|
|
sqlite3_free(p);
|
|
unixLeaveMutex();
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is called to obtain a pointer to region iRegion of the
|
|
** shared-memory associated with the database file fd. Shared-memory regions
|
|
** are numbered starting from zero. Each shared-memory region is szRegion
|
|
** bytes in size.
|
|
**
|
|
** If an error occurs, an error code is returned and *pp is set to NULL.
|
|
**
|
|
** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
|
|
** region has not been allocated (by any client, including one running in a
|
|
** separate process), then *pp is set to NULL and SQLITE_OK returned. If
|
|
** bExtend is non-zero and the requested shared-memory region has not yet
|
|
** been allocated, it is allocated by this function.
|
|
**
|
|
** If the shared-memory region has already been allocated or is allocated by
|
|
** this call as described above, then it is mapped into this processes
|
|
** address space (if it is not already), *pp is set to point to the mapped
|
|
** memory and SQLITE_OK returned.
|
|
*/
|
|
static int unixShmMap(
|
|
sqlite3_file *fd, /* Handle open on database file */
|
|
int iRegion, /* Region to retrieve */
|
|
int szRegion, /* Size of regions */
|
|
int bExtend, /* True to extend file if necessary */
|
|
void volatile **pp /* OUT: Mapped memory */
|
|
){
|
|
unixFile *pDbFd = (unixFile*)fd;
|
|
unixShm *p;
|
|
unixShmNode *pShmNode;
|
|
int rc = SQLITE_OK;
|
|
int nShmPerMap = unixShmRegionPerMap();
|
|
int nReqRegion;
|
|
|
|
/* If the shared-memory file has not yet been opened, open it now. */
|
|
if( pDbFd->pShm==0 ){
|
|
rc = unixOpenSharedMemory(pDbFd);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
}
|
|
|
|
p = pDbFd->pShm;
|
|
pShmNode = p->pShmNode;
|
|
sqlite3_mutex_enter(pShmNode->mutex);
|
|
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
|
|
assert( pShmNode->pInode==pDbFd->pInode );
|
|
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
|
|
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
|
|
|
|
/* Minimum number of regions required to be mapped. */
|
|
nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
|
|
|
|
if( pShmNode->nRegion<nReqRegion ){
|
|
char **apNew; /* New apRegion[] array */
|
|
int nByte = nReqRegion*szRegion; /* Minimum required file size */
|
|
struct stat sStat; /* Used by fstat() */
|
|
|
|
pShmNode->szRegion = szRegion;
|
|
|
|
if( pShmNode->h>=0 ){
|
|
/* The requested region is not mapped into this processes address space.
|
|
** Check to see if it has been allocated (i.e. if the wal-index file is
|
|
** large enough to contain the requested region).
|
|
*/
|
|
if( osFstat(pShmNode->h, &sStat) ){
|
|
rc = SQLITE_IOERR_SHMSIZE;
|
|
goto shmpage_out;
|
|
}
|
|
|
|
if( sStat.st_size<nByte ){
|
|
/* The requested memory region does not exist. If bExtend is set to
|
|
** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
|
|
*/
|
|
if( !bExtend ){
|
|
goto shmpage_out;
|
|
}
|
|
|
|
/* Alternatively, if bExtend is true, extend the file. Do this by
|
|
** writing a single byte to the end of each (OS) page being
|
|
** allocated or extended. Technically, we need only write to the
|
|
** last page in order to extend the file. But writing to all new
|
|
** pages forces the OS to allocate them immediately, which reduces
|
|
** the chances of SIGBUS while accessing the mapped region later on.
|
|
*/
|
|
else{
|
|
static const int pgsz = 4096;
|
|
int iPg;
|
|
|
|
/* Write to the last byte of each newly allocated or extended page */
|
|
assert( (nByte % pgsz)==0 );
|
|
for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
|
|
int x = 0;
|
|
if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, &x)!=1 ){
|
|
const char *zFile = pShmNode->zFilename;
|
|
rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
|
|
goto shmpage_out;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Map the requested memory region into this processes address space. */
|
|
apNew = (char **)sqlite3_realloc(
|
|
pShmNode->apRegion, nReqRegion*sizeof(char *)
|
|
);
|
|
if( !apNew ){
|
|
rc = SQLITE_IOERR_NOMEM;
|
|
goto shmpage_out;
|
|
}
|
|
pShmNode->apRegion = apNew;
|
|
while( pShmNode->nRegion<nReqRegion ){
|
|
int nMap = szRegion*nShmPerMap;
|
|
int i;
|
|
void *pMem;
|
|
if( pShmNode->h>=0 ){
|
|
pMem = osMmap(0, nMap,
|
|
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
|
|
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
|
|
);
|
|
if( pMem==MAP_FAILED ){
|
|
rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
|
|
goto shmpage_out;
|
|
}
|
|
}else{
|
|
pMem = sqlite3_malloc64(szRegion);
|
|
if( pMem==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
goto shmpage_out;
|
|
}
|
|
memset(pMem, 0, szRegion);
|
|
}
|
|
|
|
for(i=0; i<nShmPerMap; i++){
|
|
pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
|
|
}
|
|
pShmNode->nRegion += nShmPerMap;
|
|
}
|
|
}
|
|
|
|
shmpage_out:
|
|
if( pShmNode->nRegion>iRegion ){
|
|
*pp = pShmNode->apRegion[iRegion];
|
|
}else{
|
|
*pp = 0;
|
|
}
|
|
if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
|
|
sqlite3_mutex_leave(pShmNode->mutex);
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Change the lock state for a shared-memory segment.
|
|
**
|
|
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
|
|
** different here than in posix. In xShmLock(), one can go from unlocked
|
|
** to shared and back or from unlocked to exclusive and back. But one may
|
|
** not go from shared to exclusive or from exclusive to shared.
|
|
*/
|
|
static int unixShmLock(
|
|
sqlite3_file *fd, /* Database file holding the shared memory */
|
|
int ofst, /* First lock to acquire or release */
|
|
int n, /* Number of locks to acquire or release */
|
|
int flags /* What to do with the lock */
|
|
){
|
|
unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */
|
|
unixShm *p = pDbFd->pShm; /* The shared memory being locked */
|
|
unixShm *pX; /* For looping over all siblings */
|
|
unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */
|
|
int rc = SQLITE_OK; /* Result code */
|
|
u16 mask; /* Mask of locks to take or release */
|
|
|
|
assert( pShmNode==pDbFd->pInode->pShmNode );
|
|
assert( pShmNode->pInode==pDbFd->pInode );
|
|
assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
|
|
assert( n>=1 );
|
|
assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
|
|
|| flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
|
|
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
|
|
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
|
|
assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
|
|
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
|
|
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
|
|
|
|
mask = (1<<(ofst+n)) - (1<<ofst);
|
|
assert( n>1 || mask==(1<<ofst) );
|
|
sqlite3_mutex_enter(pShmNode->mutex);
|
|
if( flags & SQLITE_SHM_UNLOCK ){
|
|
u16 allMask = 0; /* Mask of locks held by siblings */
|
|
|
|
/* See if any siblings hold this same lock */
|
|
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
|
|
if( pX==p ) continue;
|
|
assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
|
|
allMask |= pX->sharedMask;
|
|
}
|
|
|
|
/* Unlock the system-level locks */
|
|
if( (mask & allMask)==0 ){
|
|
rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n);
|
|
}else{
|
|
rc = SQLITE_OK;
|
|
}
|
|
|
|
/* Undo the local locks */
|
|
if( rc==SQLITE_OK ){
|
|
p->exclMask &= ~mask;
|
|
p->sharedMask &= ~mask;
|
|
}
|
|
}else if( flags & SQLITE_SHM_SHARED ){
|
|
u16 allShared = 0; /* Union of locks held by connections other than "p" */
|
|
|
|
/* Find out which shared locks are already held by sibling connections.
|
|
** If any sibling already holds an exclusive lock, go ahead and return
|
|
** SQLITE_BUSY.
|
|
*/
|
|
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
|
|
if( (pX->exclMask & mask)!=0 ){
|
|
rc = SQLITE_BUSY;
|
|
break;
|
|
}
|
|
allShared |= pX->sharedMask;
|
|
}
|
|
|
|
/* Get shared locks at the system level, if necessary */
|
|
if( rc==SQLITE_OK ){
|
|
if( (allShared & mask)==0 ){
|
|
rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n);
|
|
}else{
|
|
rc = SQLITE_OK;
|
|
}
|
|
}
|
|
|
|
/* Get the local shared locks */
|
|
if( rc==SQLITE_OK ){
|
|
p->sharedMask |= mask;
|
|
}
|
|
}else{
|
|
/* Make sure no sibling connections hold locks that will block this
|
|
** lock. If any do, return SQLITE_BUSY right away.
|
|
*/
|
|
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
|
|
if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
|
|
rc = SQLITE_BUSY;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Get the exclusive locks at the system level. Then if successful
|
|
** also mark the local connection as being locked.
|
|
*/
|
|
if( rc==SQLITE_OK ){
|
|
rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n);
|
|
if( rc==SQLITE_OK ){
|
|
assert( (p->sharedMask & mask)==0 );
|
|
p->exclMask |= mask;
|
|
}
|
|
}
|
|
}
|
|
sqlite3_mutex_leave(pShmNode->mutex);
|
|
OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",
|
|
p->id, osGetpid(0), p->sharedMask, p->exclMask));
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Implement a memory barrier or memory fence on shared memory.
|
|
**
|
|
** All loads and stores begun before the barrier must complete before
|
|
** any load or store begun after the barrier.
|
|
*/
|
|
static void unixShmBarrier(
|
|
sqlite3_file *fd /* Database file holding the shared memory */
|
|
){
|
|
UNUSED_PARAMETER(fd);
|
|
sqlite3MemoryBarrier(); /* compiler-defined memory barrier */
|
|
unixEnterMutex(); /* Also mutex, for redundancy */
|
|
unixLeaveMutex();
|
|
}
|
|
|
|
/*
|
|
** Close a connection to shared-memory. Delete the underlying
|
|
** storage if deleteFlag is true.
|
|
**
|
|
** If there is no shared memory associated with the connection then this
|
|
** routine is a harmless no-op.
|
|
*/
|
|
static int unixShmUnmap(
|
|
sqlite3_file *fd, /* The underlying database file */
|
|
int deleteFlag /* Delete shared-memory if true */
|
|
){
|
|
unixShm *p; /* The connection to be closed */
|
|
unixShmNode *pShmNode; /* The underlying shared-memory file */
|
|
unixShm **pp; /* For looping over sibling connections */
|
|
unixFile *pDbFd; /* The underlying database file */
|
|
|
|
pDbFd = (unixFile*)fd;
|
|
p = pDbFd->pShm;
|
|
if( p==0 ) return SQLITE_OK;
|
|
pShmNode = p->pShmNode;
|
|
|
|
assert( pShmNode==pDbFd->pInode->pShmNode );
|
|
assert( pShmNode->pInode==pDbFd->pInode );
|
|
|
|
/* Remove connection p from the set of connections associated
|
|
** with pShmNode */
|
|
sqlite3_mutex_enter(pShmNode->mutex);
|
|
for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
|
|
*pp = p->pNext;
|
|
|
|
/* Free the connection p */
|
|
sqlite3_free(p);
|
|
pDbFd->pShm = 0;
|
|
sqlite3_mutex_leave(pShmNode->mutex);
|
|
|
|
/* If pShmNode->nRef has reached 0, then close the underlying
|
|
** shared-memory file, too */
|
|
unixEnterMutex();
|
|
assert( pShmNode->nRef>0 );
|
|
pShmNode->nRef--;
|
|
if( pShmNode->nRef==0 ){
|
|
if( deleteFlag && pShmNode->h>=0 ){
|
|
osUnlink(pShmNode->zFilename);
|
|
}
|
|
unixShmPurge(pDbFd);
|
|
}
|
|
unixLeaveMutex();
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
|
|
#else
|
|
# define unixShmMap 0
|
|
# define unixShmLock 0
|
|
# define unixShmBarrier 0
|
|
# define unixShmUnmap 0
|
|
#endif /* #ifndef SQLITE_OMIT_WAL */
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
/*
|
|
** If it is currently memory mapped, unmap file pFd.
|
|
*/
|
|
static void unixUnmapfile(unixFile *pFd){
|
|
assert( pFd->nFetchOut==0 );
|
|
if( pFd->pMapRegion ){
|
|
osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
|
|
pFd->pMapRegion = 0;
|
|
pFd->mmapSize = 0;
|
|
pFd->mmapSizeActual = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Attempt to set the size of the memory mapping maintained by file
|
|
** descriptor pFd to nNew bytes. Any existing mapping is discarded.
|
|
**
|
|
** If successful, this function sets the following variables:
|
|
**
|
|
** unixFile.pMapRegion
|
|
** unixFile.mmapSize
|
|
** unixFile.mmapSizeActual
|
|
**
|
|
** If unsuccessful, an error message is logged via sqlite3_log() and
|
|
** the three variables above are zeroed. In this case SQLite should
|
|
** continue accessing the database using the xRead() and xWrite()
|
|
** methods.
|
|
*/
|
|
static void unixRemapfile(
|
|
unixFile *pFd, /* File descriptor object */
|
|
i64 nNew /* Required mapping size */
|
|
){
|
|
const char *zErr = "mmap";
|
|
int h = pFd->h; /* File descriptor open on db file */
|
|
u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
|
|
i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */
|
|
u8 *pNew = 0; /* Location of new mapping */
|
|
int flags = PROT_READ; /* Flags to pass to mmap() */
|
|
|
|
assert( pFd->nFetchOut==0 );
|
|
assert( nNew>pFd->mmapSize );
|
|
assert( nNew<=pFd->mmapSizeMax );
|
|
assert( nNew>0 );
|
|
assert( pFd->mmapSizeActual>=pFd->mmapSize );
|
|
assert( MAP_FAILED!=0 );
|
|
|
|
#ifdef SQLITE_MMAP_READWRITE
|
|
if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
|
|
#endif
|
|
|
|
if( pOrig ){
|
|
#if HAVE_MREMAP
|
|
i64 nReuse = pFd->mmapSize;
|
|
#else
|
|
const int szSyspage = osGetpagesize();
|
|
i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
|
|
#endif
|
|
u8 *pReq = &pOrig[nReuse];
|
|
|
|
/* Unmap any pages of the existing mapping that cannot be reused. */
|
|
if( nReuse!=nOrig ){
|
|
osMunmap(pReq, nOrig-nReuse);
|
|
}
|
|
|
|
#if HAVE_MREMAP
|
|
pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
|
|
zErr = "mremap";
|
|
#else
|
|
pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
|
|
if( pNew!=MAP_FAILED ){
|
|
if( pNew!=pReq ){
|
|
osMunmap(pNew, nNew - nReuse);
|
|
pNew = 0;
|
|
}else{
|
|
pNew = pOrig;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* The attempt to extend the existing mapping failed. Free it. */
|
|
if( pNew==MAP_FAILED || pNew==0 ){
|
|
osMunmap(pOrig, nReuse);
|
|
}
|
|
}
|
|
|
|
/* If pNew is still NULL, try to create an entirely new mapping. */
|
|
if( pNew==0 ){
|
|
pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
|
|
}
|
|
|
|
if( pNew==MAP_FAILED ){
|
|
pNew = 0;
|
|
nNew = 0;
|
|
unixLogError(SQLITE_OK, zErr, pFd->zPath);
|
|
|
|
/* If the mmap() above failed, assume that all subsequent mmap() calls
|
|
** will probably fail too. Fall back to using xRead/xWrite exclusively
|
|
** in this case. */
|
|
pFd->mmapSizeMax = 0;
|
|
}
|
|
pFd->pMapRegion = (void *)pNew;
|
|
pFd->mmapSize = pFd->mmapSizeActual = nNew;
|
|
}
|
|
|
|
/*
|
|
** Memory map or remap the file opened by file-descriptor pFd (if the file
|
|
** is already mapped, the existing mapping is replaced by the new). Or, if
|
|
** there already exists a mapping for this file, and there are still
|
|
** outstanding xFetch() references to it, this function is a no-op.
|
|
**
|
|
** If parameter nByte is non-negative, then it is the requested size of
|
|
** the mapping to create. Otherwise, if nByte is less than zero, then the
|
|
** requested size is the size of the file on disk. The actual size of the
|
|
** created mapping is either the requested size or the value configured
|
|
** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
|
|
**
|
|
** SQLITE_OK is returned if no error occurs (even if the mapping is not
|
|
** recreated as a result of outstanding references) or an SQLite error
|
|
** code otherwise.
|
|
*/
|
|
static int unixMapfile(unixFile *pFd, i64 nMap){
|
|
assert( nMap>=0 || pFd->nFetchOut==0 );
|
|
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
|
|
if( pFd->nFetchOut>0 ) return SQLITE_OK;
|
|
|
|
if( nMap<0 ){
|
|
struct stat statbuf; /* Low-level file information */
|
|
if( osFstat(pFd->h, &statbuf) ){
|
|
return SQLITE_IOERR_FSTAT;
|
|
}
|
|
nMap = statbuf.st_size;
|
|
}
|
|
if( nMap>pFd->mmapSizeMax ){
|
|
nMap = pFd->mmapSizeMax;
|
|
}
|
|
|
|
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
|
|
if( nMap!=pFd->mmapSize ){
|
|
unixRemapfile(pFd, nMap);
|
|
}
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
#endif /* SQLITE_MAX_MMAP_SIZE>0 */
|
|
|
|
/*
|
|
** If possible, return a pointer to a mapping of file fd starting at offset
|
|
** iOff. The mapping must be valid for at least nAmt bytes.
|
|
**
|
|
** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
|
|
** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
|
|
** Finally, if an error does occur, return an SQLite error code. The final
|
|
** value of *pp is undefined in this case.
|
|
**
|
|
** If this function does return a pointer, the caller must eventually
|
|
** release the reference by calling unixUnfetch().
|
|
*/
|
|
static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
unixFile *pFd = (unixFile *)fd; /* The underlying database file */
|
|
#endif
|
|
*pp = 0;
|
|
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
if( pFd->mmapSizeMax>0 ){
|
|
if( pFd->pMapRegion==0 ){
|
|
int rc = unixMapfile(pFd, -1);
|
|
if( rc!=SQLITE_OK ) return rc;
|
|
}
|
|
if( pFd->mmapSize >= iOff+nAmt ){
|
|
*pp = &((u8 *)pFd->pMapRegion)[iOff];
|
|
pFd->nFetchOut++;
|
|
}
|
|
}
|
|
#endif
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** If the third argument is non-NULL, then this function releases a
|
|
** reference obtained by an earlier call to unixFetch(). The second
|
|
** argument passed to this function must be the same as the corresponding
|
|
** argument that was passed to the unixFetch() invocation.
|
|
**
|
|
** Or, if the third argument is NULL, then this function is being called
|
|
** to inform the VFS layer that, according to POSIX, any existing mapping
|
|
** may now be invalid and should be unmapped.
|
|
*/
|
|
static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
unixFile *pFd = (unixFile *)fd; /* The underlying database file */
|
|
UNUSED_PARAMETER(iOff);
|
|
|
|
/* If p==0 (unmap the entire file) then there must be no outstanding
|
|
** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
|
|
** then there must be at least one outstanding. */
|
|
assert( (p==0)==(pFd->nFetchOut==0) );
|
|
|
|
/* If p!=0, it must match the iOff value. */
|
|
assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
|
|
|
|
if( p ){
|
|
pFd->nFetchOut--;
|
|
}else{
|
|
unixUnmapfile(pFd);
|
|
}
|
|
|
|
assert( pFd->nFetchOut>=0 );
|
|
#else
|
|
UNUSED_PARAMETER(fd);
|
|
UNUSED_PARAMETER(p);
|
|
UNUSED_PARAMETER(iOff);
|
|
#endif
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Here ends the implementation of all sqlite3_file methods.
|
|
**
|
|
********************** End sqlite3_file Methods *******************************
|
|
******************************************************************************/
|
|
|
|
/*
|
|
** This division contains definitions of sqlite3_io_methods objects that
|
|
** implement various file locking strategies. It also contains definitions
|
|
** of "finder" functions. A finder-function is used to locate the appropriate
|
|
** sqlite3_io_methods object for a particular database file. The pAppData
|
|
** field of the sqlite3_vfs VFS objects are initialized to be pointers to
|
|
** the correct finder-function for that VFS.
|
|
**
|
|
** Most finder functions return a pointer to a fixed sqlite3_io_methods
|
|
** object. The only interesting finder-function is autolockIoFinder, which
|
|
** looks at the filesystem type and tries to guess the best locking
|
|
** strategy from that.
|
|
**
|
|
** For finder-function F, two objects are created:
|
|
**
|
|
** (1) The real finder-function named "FImpt()".
|
|
**
|
|
** (2) A constant pointer to this function named just "F".
|
|
**
|
|
**
|
|
** A pointer to the F pointer is used as the pAppData value for VFS
|
|
** objects. We have to do this instead of letting pAppData point
|
|
** directly at the finder-function since C90 rules prevent a void*
|
|
** from be cast into a function pointer.
|
|
**
|
|
**
|
|
** Each instance of this macro generates two objects:
|
|
**
|
|
** * A constant sqlite3_io_methods object call METHOD that has locking
|
|
** methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
|
|
**
|
|
** * An I/O method finder function called FINDER that returns a pointer
|
|
** to the METHOD object in the previous bullet.
|
|
*/
|
|
#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \
|
|
static const sqlite3_io_methods METHOD = { \
|
|
VERSION, /* iVersion */ \
|
|
CLOSE, /* xClose */ \
|
|
unixRead, /* xRead */ \
|
|
unixWrite, /* xWrite */ \
|
|
unixTruncate, /* xTruncate */ \
|
|
unixSync, /* xSync */ \
|
|
unixFileSize, /* xFileSize */ \
|
|
LOCK, /* xLock */ \
|
|
UNLOCK, /* xUnlock */ \
|
|
CKLOCK, /* xCheckReservedLock */ \
|
|
unixFileControl, /* xFileControl */ \
|
|
unixSectorSize, /* xSectorSize */ \
|
|
unixDeviceCharacteristics, /* xDeviceCapabilities */ \
|
|
SHMMAP, /* xShmMap */ \
|
|
unixShmLock, /* xShmLock */ \
|
|
unixShmBarrier, /* xShmBarrier */ \
|
|
unixShmUnmap, /* xShmUnmap */ \
|
|
unixFetch, /* xFetch */ \
|
|
unixUnfetch, /* xUnfetch */ \
|
|
}; \
|
|
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
|
|
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
|
|
return &METHOD; \
|
|
} \
|
|
static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
|
|
= FINDER##Impl;
|
|
|
|
/*
|
|
** Here are all of the sqlite3_io_methods objects for each of the
|
|
** locking strategies. Functions that return pointers to these methods
|
|
** are also created.
|
|
*/
|
|
IOMETHODS(
|
|
posixIoFinder, /* Finder function name */
|
|
posixIoMethods, /* sqlite3_io_methods object name */
|
|
3, /* shared memory and mmap are enabled */
|
|
unixClose, /* xClose method */
|
|
unixLock, /* xLock method */
|
|
unixUnlock, /* xUnlock method */
|
|
unixCheckReservedLock, /* xCheckReservedLock method */
|
|
unixShmMap /* xShmMap method */
|
|
)
|
|
IOMETHODS(
|
|
nolockIoFinder, /* Finder function name */
|
|
nolockIoMethods, /* sqlite3_io_methods object name */
|
|
3, /* shared memory is disabled */
|
|
nolockClose, /* xClose method */
|
|
nolockLock, /* xLock method */
|
|
nolockUnlock, /* xUnlock method */
|
|
nolockCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
IOMETHODS(
|
|
dotlockIoFinder, /* Finder function name */
|
|
dotlockIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
dotlockClose, /* xClose method */
|
|
dotlockLock, /* xLock method */
|
|
dotlockUnlock, /* xUnlock method */
|
|
dotlockCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
IOMETHODS(
|
|
flockIoFinder, /* Finder function name */
|
|
flockIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
flockClose, /* xClose method */
|
|
flockLock, /* xLock method */
|
|
flockUnlock, /* xUnlock method */
|
|
flockCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
#endif
|
|
|
|
#if OS_VXWORKS
|
|
IOMETHODS(
|
|
semIoFinder, /* Finder function name */
|
|
semIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
semXClose, /* xClose method */
|
|
semXLock, /* xLock method */
|
|
semXUnlock, /* xUnlock method */
|
|
semXCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
#endif
|
|
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
IOMETHODS(
|
|
afpIoFinder, /* Finder function name */
|
|
afpIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
afpClose, /* xClose method */
|
|
afpLock, /* xLock method */
|
|
afpUnlock, /* xUnlock method */
|
|
afpCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
#endif
|
|
|
|
/*
|
|
** The proxy locking method is a "super-method" in the sense that it
|
|
** opens secondary file descriptors for the conch and lock files and
|
|
** it uses proxy, dot-file, AFP, and flock() locking methods on those
|
|
** secondary files. For this reason, the division that implements
|
|
** proxy locking is located much further down in the file. But we need
|
|
** to go ahead and define the sqlite3_io_methods and finder function
|
|
** for proxy locking here. So we forward declare the I/O methods.
|
|
*/
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
static int proxyClose(sqlite3_file*);
|
|
static int proxyLock(sqlite3_file*, int);
|
|
static int proxyUnlock(sqlite3_file*, int);
|
|
static int proxyCheckReservedLock(sqlite3_file*, int*);
|
|
IOMETHODS(
|
|
proxyIoFinder, /* Finder function name */
|
|
proxyIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
proxyClose, /* xClose method */
|
|
proxyLock, /* xLock method */
|
|
proxyUnlock, /* xUnlock method */
|
|
proxyCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
#endif
|
|
|
|
/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
IOMETHODS(
|
|
nfsIoFinder, /* Finder function name */
|
|
nfsIoMethods, /* sqlite3_io_methods object name */
|
|
1, /* shared memory is disabled */
|
|
unixClose, /* xClose method */
|
|
unixLock, /* xLock method */
|
|
nfsUnlock, /* xUnlock method */
|
|
unixCheckReservedLock, /* xCheckReservedLock method */
|
|
0 /* xShmMap method */
|
|
)
|
|
#endif
|
|
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
/*
|
|
** This "finder" function attempts to determine the best locking strategy
|
|
** for the database file "filePath". It then returns the sqlite3_io_methods
|
|
** object that implements that strategy.
|
|
**
|
|
** This is for MacOSX only.
|
|
*/
|
|
static const sqlite3_io_methods *autolockIoFinderImpl(
|
|
const char *filePath, /* name of the database file */
|
|
unixFile *pNew /* open file object for the database file */
|
|
){
|
|
static const struct Mapping {
|
|
const char *zFilesystem; /* Filesystem type name */
|
|
const sqlite3_io_methods *pMethods; /* Appropriate locking method */
|
|
} aMap[] = {
|
|
{ "hfs", &posixIoMethods },
|
|
{ "ufs", &posixIoMethods },
|
|
{ "afpfs", &afpIoMethods },
|
|
{ "smbfs", &afpIoMethods },
|
|
{ "webdav", &nolockIoMethods },
|
|
{ 0, 0 }
|
|
};
|
|
int i;
|
|
struct statfs fsInfo;
|
|
struct flock lockInfo;
|
|
|
|
if( !filePath ){
|
|
/* If filePath==NULL that means we are dealing with a transient file
|
|
** that does not need to be locked. */
|
|
return &nolockIoMethods;
|
|
}
|
|
if( statfs(filePath, &fsInfo) != -1 ){
|
|
if( fsInfo.f_flags & MNT_RDONLY ){
|
|
return &nolockIoMethods;
|
|
}
|
|
for(i=0; aMap[i].zFilesystem; i++){
|
|
if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
|
|
return aMap[i].pMethods;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Default case. Handles, amongst others, "nfs".
|
|
** Test byte-range lock using fcntl(). If the call succeeds,
|
|
** assume that the file-system supports POSIX style locks.
|
|
*/
|
|
lockInfo.l_len = 1;
|
|
lockInfo.l_start = 0;
|
|
lockInfo.l_whence = SEEK_SET;
|
|
lockInfo.l_type = F_RDLCK;
|
|
if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
|
|
if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){
|
|
return &nfsIoMethods;
|
|
} else {
|
|
return &posixIoMethods;
|
|
}
|
|
}else{
|
|
return &dotlockIoMethods;
|
|
}
|
|
}
|
|
static const sqlite3_io_methods
|
|
*(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
|
|
|
|
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
|
|
|
|
#if OS_VXWORKS
|
|
/*
|
|
** This "finder" function for VxWorks checks to see if posix advisory
|
|
** locking works. If it does, then that is what is used. If it does not
|
|
** work, then fallback to named semaphore locking.
|
|
*/
|
|
static const sqlite3_io_methods *vxworksIoFinderImpl(
|
|
const char *filePath, /* name of the database file */
|
|
unixFile *pNew /* the open file object */
|
|
){
|
|
struct flock lockInfo;
|
|
|
|
if( !filePath ){
|
|
/* If filePath==NULL that means we are dealing with a transient file
|
|
** that does not need to be locked. */
|
|
return &nolockIoMethods;
|
|
}
|
|
|
|
/* Test if fcntl() is supported and use POSIX style locks.
|
|
** Otherwise fall back to the named semaphore method.
|
|
*/
|
|
lockInfo.l_len = 1;
|
|
lockInfo.l_start = 0;
|
|
lockInfo.l_whence = SEEK_SET;
|
|
lockInfo.l_type = F_RDLCK;
|
|
if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
|
|
return &posixIoMethods;
|
|
}else{
|
|
return &semIoMethods;
|
|
}
|
|
}
|
|
static const sqlite3_io_methods
|
|
*(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl;
|
|
|
|
#endif /* OS_VXWORKS */
|
|
|
|
/*
|
|
** An abstract type for a pointer to an IO method finder function:
|
|
*/
|
|
typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
|
|
|
|
|
|
/****************************************************************************
|
|
**************************** sqlite3_vfs methods ****************************
|
|
**
|
|
** This division contains the implementation of methods on the
|
|
** sqlite3_vfs object.
|
|
*/
|
|
|
|
/*
|
|
** Initialize the contents of the unixFile structure pointed to by pId.
|
|
*/
|
|
static int fillInUnixFile(
|
|
sqlite3_vfs *pVfs, /* Pointer to vfs object */
|
|
int h, /* Open file descriptor of file being opened */
|
|
sqlite3_file *pId, /* Write to the unixFile structure here */
|
|
const char *zFilename, /* Name of the file being opened */
|
|
int ctrlFlags /* Zero or more UNIXFILE_* values */
|
|
){
|
|
const sqlite3_io_methods *pLockingStyle;
|
|
unixFile *pNew = (unixFile *)pId;
|
|
int rc = SQLITE_OK;
|
|
|
|
assert( pNew->pInode==NULL );
|
|
|
|
/* Usually the path zFilename should not be a relative pathname. The
|
|
** exception is when opening the proxy "conch" file in builds that
|
|
** include the special Apple locking styles.
|
|
*/
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
assert( zFilename==0 || zFilename[0]=='/'
|
|
|| pVfs->pAppData==(void*)&autolockIoFinder );
|
|
#else
|
|
assert( zFilename==0 || zFilename[0]=='/' );
|
|
#endif
|
|
|
|
/* No locking occurs in temporary files */
|
|
assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
|
|
|
|
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
|
|
pNew->h = h;
|
|
pNew->pVfs = pVfs;
|
|
pNew->zPath = zFilename;
|
|
pNew->ctrlFlags = (u8)ctrlFlags;
|
|
#if SQLITE_MAX_MMAP_SIZE>0
|
|
pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
|
|
#endif
|
|
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
|
|
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
|
|
pNew->ctrlFlags |= UNIXFILE_PSOW;
|
|
}
|
|
if( strcmp(pVfs->zName,"unix-excl")==0 ){
|
|
pNew->ctrlFlags |= UNIXFILE_EXCL;
|
|
}
|
|
|
|
#if OS_VXWORKS
|
|
pNew->pId = vxworksFindFileId(zFilename);
|
|
if( pNew->pId==0 ){
|
|
ctrlFlags |= UNIXFILE_NOLOCK;
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
#endif
|
|
|
|
if( ctrlFlags & UNIXFILE_NOLOCK ){
|
|
pLockingStyle = &nolockIoMethods;
|
|
}else{
|
|
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
/* Cache zFilename in the locking context (AFP and dotlock override) for
|
|
** proxyLock activation is possible (remote proxy is based on db name)
|
|
** zFilename remains valid until file is closed, to support */
|
|
pNew->lockingContext = (void*)zFilename;
|
|
#endif
|
|
}
|
|
|
|
if( pLockingStyle == &posixIoMethods
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
|| pLockingStyle == &nfsIoMethods
|
|
#endif
|
|
){
|
|
unixEnterMutex();
|
|
rc = findInodeInfo(pNew, &pNew->pInode);
|
|
if( rc!=SQLITE_OK ){
|
|
/* If an error occurred in findInodeInfo(), close the file descriptor
|
|
** immediately, before releasing the mutex. findInodeInfo() may fail
|
|
** in two scenarios:
|
|
**
|
|
** (a) A call to fstat() failed.
|
|
** (b) A malloc failed.
|
|
**
|
|
** Scenario (b) may only occur if the process is holding no other
|
|
** file descriptors open on the same file. If there were other file
|
|
** descriptors on this file, then no malloc would be required by
|
|
** findInodeInfo(). If this is the case, it is quite safe to close
|
|
** handle h - as it is guaranteed that no posix locks will be released
|
|
** by doing so.
|
|
**
|
|
** If scenario (a) caused the error then things are not so safe. The
|
|
** implicit assumption here is that if fstat() fails, things are in
|
|
** such bad shape that dropping a lock or two doesn't matter much.
|
|
*/
|
|
robust_close(pNew, h, __LINE__);
|
|
h = -1;
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
else if( pLockingStyle == &afpIoMethods ){
|
|
/* AFP locking uses the file path so it needs to be included in
|
|
** the afpLockingContext.
|
|
*/
|
|
afpLockingContext *pCtx;
|
|
pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) );
|
|
if( pCtx==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
/* NB: zFilename exists and remains valid until the file is closed
|
|
** according to requirement F11141. So we do not need to make a
|
|
** copy of the filename. */
|
|
pCtx->dbPath = zFilename;
|
|
pCtx->reserved = 0;
|
|
srandomdev();
|
|
unixEnterMutex();
|
|
rc = findInodeInfo(pNew, &pNew->pInode);
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(pNew->lockingContext);
|
|
robust_close(pNew, h, __LINE__);
|
|
h = -1;
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
else if( pLockingStyle == &dotlockIoMethods ){
|
|
/* Dotfile locking uses the file path so it needs to be included in
|
|
** the dotlockLockingContext
|
|
*/
|
|
char *zLockFile;
|
|
int nFilename;
|
|
assert( zFilename!=0 );
|
|
nFilename = (int)strlen(zFilename) + 6;
|
|
zLockFile = (char *)sqlite3_malloc64(nFilename);
|
|
if( zLockFile==0 ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
|
|
}
|
|
pNew->lockingContext = zLockFile;
|
|
}
|
|
|
|
#if OS_VXWORKS
|
|
else if( pLockingStyle == &semIoMethods ){
|
|
/* Named semaphore locking uses the file path so it needs to be
|
|
** included in the semLockingContext
|
|
*/
|
|
unixEnterMutex();
|
|
rc = findInodeInfo(pNew, &pNew->pInode);
|
|
if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){
|
|
char *zSemName = pNew->pInode->aSemName;
|
|
int n;
|
|
sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem",
|
|
pNew->pId->zCanonicalName);
|
|
for( n=1; zSemName[n]; n++ )
|
|
if( zSemName[n]=='/' ) zSemName[n] = '_';
|
|
pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1);
|
|
if( pNew->pInode->pSem == SEM_FAILED ){
|
|
rc = SQLITE_NOMEM;
|
|
pNew->pInode->aSemName[0] = '\0';
|
|
}
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
#endif
|
|
|
|
storeLastErrno(pNew, 0);
|
|
#if OS_VXWORKS
|
|
if( rc!=SQLITE_OK ){
|
|
if( h>=0 ) robust_close(pNew, h, __LINE__);
|
|
h = -1;
|
|
osUnlink(zFilename);
|
|
pNew->ctrlFlags |= UNIXFILE_DELETE;
|
|
}
|
|
#endif
|
|
if( rc!=SQLITE_OK ){
|
|
if( h>=0 ) robust_close(pNew, h, __LINE__);
|
|
}else{
|
|
pNew->pMethod = pLockingStyle;
|
|
OpenCounter(+1);
|
|
verifyDbFile(pNew);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Return the name of a directory in which to put temporary files.
|
|
** If no suitable temporary file directory can be found, return NULL.
|
|
*/
|
|
static const char *unixTempFileDir(void){
|
|
static const char *azDirs[] = {
|
|
0,
|
|
0,
|
|
"/var/tmp",
|
|
"/usr/tmp",
|
|
"/tmp",
|
|
"."
|
|
};
|
|
unsigned int i;
|
|
struct stat buf;
|
|
const char *zDir = sqlite3_temp_directory;
|
|
|
|
if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR");
|
|
if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR");
|
|
for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
|
|
if( zDir==0 ) continue;
|
|
if( osStat(zDir, &buf) ) continue;
|
|
if( !S_ISDIR(buf.st_mode) ) continue;
|
|
if( osAccess(zDir, 07) ) continue;
|
|
break;
|
|
}
|
|
return zDir;
|
|
}
|
|
|
|
/*
|
|
** Create a temporary file name in zBuf. zBuf must be allocated
|
|
** by the calling process and must be big enough to hold at least
|
|
** pVfs->mxPathname bytes.
|
|
*/
|
|
static int unixGetTempname(int nBuf, char *zBuf){
|
|
const char *zDir;
|
|
int iLimit = 0;
|
|
|
|
/* It's odd to simulate an io-error here, but really this is just
|
|
** using the io-error infrastructure to test that SQLite handles this
|
|
** function failing.
|
|
*/
|
|
SimulateIOError( return SQLITE_IOERR );
|
|
|
|
zDir = unixTempFileDir();
|
|
do{
|
|
u64 r;
|
|
sqlite3_randomness(sizeof(r), &r);
|
|
assert( nBuf>2 );
|
|
zBuf[nBuf-2] = 0;
|
|
sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c",
|
|
zDir, r, 0);
|
|
if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR;
|
|
}while( osAccess(zBuf,0)==0 );
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
/*
|
|
** Routine to transform a unixFile into a proxy-locking unixFile.
|
|
** Implementation in the proxy-lock division, but used by unixOpen()
|
|
** if SQLITE_PREFER_PROXY_LOCKING is defined.
|
|
*/
|
|
static int proxyTransformUnixFile(unixFile*, const char*);
|
|
#endif
|
|
|
|
/*
|
|
** Search for an unused file descriptor that was opened on the database
|
|
** file (not a journal or master-journal file) identified by pathname
|
|
** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
|
|
** argument to this function.
|
|
**
|
|
** Such a file descriptor may exist if a database connection was closed
|
|
** but the associated file descriptor could not be closed because some
|
|
** other file descriptor open on the same file is holding a file-lock.
|
|
** Refer to comments in the unixClose() function and the lengthy comment
|
|
** describing "Posix Advisory Locking" at the start of this file for
|
|
** further details. Also, ticket #4018.
|
|
**
|
|
** If a suitable file descriptor is found, then it is returned. If no
|
|
** such file descriptor is located, -1 is returned.
|
|
*/
|
|
static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
|
|
UnixUnusedFd *pUnused = 0;
|
|
|
|
/* Do not search for an unused file descriptor on vxworks. Not because
|
|
** vxworks would not benefit from the change (it might, we're not sure),
|
|
** but because no way to test it is currently available. It is better
|
|
** not to risk breaking vxworks support for the sake of such an obscure
|
|
** feature. */
|
|
#if !OS_VXWORKS
|
|
struct stat sStat; /* Results of stat() call */
|
|
|
|
/* A stat() call may fail for various reasons. If this happens, it is
|
|
** almost certain that an open() call on the same path will also fail.
|
|
** For this reason, if an error occurs in the stat() call here, it is
|
|
** ignored and -1 is returned. The caller will try to open a new file
|
|
** descriptor on the same path, fail, and return an error to SQLite.
|
|
**
|
|
** Even if a subsequent open() call does succeed, the consequences of
|
|
** not searching for a reusable file descriptor are not dire. */
|
|
if( 0==osStat(zPath, &sStat) ){
|
|
unixInodeInfo *pInode;
|
|
|
|
unixEnterMutex();
|
|
pInode = inodeList;
|
|
while( pInode && (pInode->fileId.dev!=sStat.st_dev
|
|
|| pInode->fileId.ino!=sStat.st_ino) ){
|
|
pInode = pInode->pNext;
|
|
}
|
|
if( pInode ){
|
|
UnixUnusedFd **pp;
|
|
for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
|
|
pUnused = *pp;
|
|
if( pUnused ){
|
|
*pp = pUnused->pNext;
|
|
}
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
#endif /* if !OS_VXWORKS */
|
|
return pUnused;
|
|
}
|
|
|
|
/*
|
|
** This function is called by unixOpen() to determine the unix permissions
|
|
** to create new files with. If no error occurs, then SQLITE_OK is returned
|
|
** and a value suitable for passing as the third argument to open(2) is
|
|
** written to *pMode. If an IO error occurs, an SQLite error code is
|
|
** returned and the value of *pMode is not modified.
|
|
**
|
|
** In most cases, this routine sets *pMode to 0, which will become
|
|
** an indication to robust_open() to create the file using
|
|
** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
|
|
** But if the file being opened is a WAL or regular journal file, then
|
|
** this function queries the file-system for the permissions on the
|
|
** corresponding database file and sets *pMode to this value. Whenever
|
|
** possible, WAL and journal files are created using the same permissions
|
|
** as the associated database file.
|
|
**
|
|
** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
|
|
** original filename is unavailable. But 8_3_NAMES is only used for
|
|
** FAT filesystems and permissions do not matter there, so just use
|
|
** the default permissions.
|
|
*/
|
|
static int findCreateFileMode(
|
|
const char *zPath, /* Path of file (possibly) being created */
|
|
int flags, /* Flags passed as 4th argument to xOpen() */
|
|
mode_t *pMode, /* OUT: Permissions to open file with */
|
|
uid_t *pUid, /* OUT: uid to set on the file */
|
|
gid_t *pGid /* OUT: gid to set on the file */
|
|
){
|
|
int rc = SQLITE_OK; /* Return Code */
|
|
*pMode = 0;
|
|
*pUid = 0;
|
|
*pGid = 0;
|
|
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
|
|
char zDb[MAX_PATHNAME+1]; /* Database file path */
|
|
int nDb; /* Number of valid bytes in zDb */
|
|
struct stat sStat; /* Output of stat() on database file */
|
|
|
|
/* zPath is a path to a WAL or journal file. The following block derives
|
|
** the path to the associated database file from zPath. This block handles
|
|
** the following naming conventions:
|
|
**
|
|
** "<path to db>-journal"
|
|
** "<path to db>-wal"
|
|
** "<path to db>-journalNN"
|
|
** "<path to db>-walNN"
|
|
**
|
|
** where NN is a decimal number. The NN naming schemes are
|
|
** used by the test_multiplex.c module.
|
|
*/
|
|
nDb = sqlite3Strlen30(zPath) - 1;
|
|
while( zPath[nDb]!='-' ){
|
|
#ifndef SQLITE_ENABLE_8_3_NAMES
|
|
/* In the normal case (8+3 filenames disabled) the journal filename
|
|
** is guaranteed to contain a '-' character. */
|
|
assert( nDb>0 );
|
|
assert( sqlite3Isalnum(zPath[nDb]) );
|
|
#else
|
|
/* If 8+3 names are possible, then the journal file might not contain
|
|
** a '-' character. So check for that case and return early. */
|
|
if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK;
|
|
#endif
|
|
nDb--;
|
|
}
|
|
memcpy(zDb, zPath, nDb);
|
|
zDb[nDb] = '\0';
|
|
|
|
if( 0==osStat(zDb, &sStat) ){
|
|
*pMode = sStat.st_mode & 0777;
|
|
*pUid = sStat.st_uid;
|
|
*pGid = sStat.st_gid;
|
|
}else{
|
|
rc = SQLITE_IOERR_FSTAT;
|
|
}
|
|
}else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
|
|
*pMode = 0600;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Open the file zPath.
|
|
**
|
|
** Previously, the SQLite OS layer used three functions in place of this
|
|
** one:
|
|
**
|
|
** sqlite3OsOpenReadWrite();
|
|
** sqlite3OsOpenReadOnly();
|
|
** sqlite3OsOpenExclusive();
|
|
**
|
|
** These calls correspond to the following combinations of flags:
|
|
**
|
|
** ReadWrite() -> (READWRITE | CREATE)
|
|
** ReadOnly() -> (READONLY)
|
|
** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
|
|
**
|
|
** The old OpenExclusive() accepted a boolean argument - "delFlag". If
|
|
** true, the file was configured to be automatically deleted when the
|
|
** file handle closed. To achieve the same effect using this new
|
|
** interface, add the DELETEONCLOSE flag to those specified above for
|
|
** OpenExclusive().
|
|
*/
|
|
static int unixOpen(
|
|
sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */
|
|
const char *zPath, /* Pathname of file to be opened */
|
|
sqlite3_file *pFile, /* The file descriptor to be filled in */
|
|
int flags, /* Input flags to control the opening */
|
|
int *pOutFlags /* Output flags returned to SQLite core */
|
|
){
|
|
unixFile *p = (unixFile *)pFile;
|
|
int fd = -1; /* File descriptor returned by open() */
|
|
int openFlags = 0; /* Flags to pass to open() */
|
|
int eType = flags&0xFFFFFF00; /* Type of file to open */
|
|
int noLock; /* True to omit locking primitives */
|
|
int rc = SQLITE_OK; /* Function Return Code */
|
|
int ctrlFlags = 0; /* UNIXFILE_* flags */
|
|
|
|
int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
|
|
int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
|
|
int isCreate = (flags & SQLITE_OPEN_CREATE);
|
|
int isReadonly = (flags & SQLITE_OPEN_READONLY);
|
|
int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
|
|
#endif
|
|
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
|
|
struct statfs fsInfo;
|
|
#endif
|
|
|
|
/* If creating a master or main-file journal, this function will open
|
|
** a file-descriptor on the directory too. The first time unixSync()
|
|
** is called the directory file descriptor will be fsync()ed and close()d.
|
|
*/
|
|
int syncDir = (isCreate && (
|
|
eType==SQLITE_OPEN_MASTER_JOURNAL
|
|
|| eType==SQLITE_OPEN_MAIN_JOURNAL
|
|
|| eType==SQLITE_OPEN_WAL
|
|
));
|
|
|
|
/* If argument zPath is a NULL pointer, this function is required to open
|
|
** a temporary file. Use this buffer to store the file name in.
|
|
*/
|
|
char zTmpname[MAX_PATHNAME+2];
|
|
const char *zName = zPath;
|
|
|
|
/* Check the following statements are true:
|
|
**
|
|
** (a) Exactly one of the READWRITE and READONLY flags must be set, and
|
|
** (b) if CREATE is set, then READWRITE must also be set, and
|
|
** (c) if EXCLUSIVE is set, then CREATE must also be set.
|
|
** (d) if DELETEONCLOSE is set, then CREATE must also be set.
|
|
*/
|
|
assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
|
|
assert(isCreate==0 || isReadWrite);
|
|
assert(isExclusive==0 || isCreate);
|
|
assert(isDelete==0 || isCreate);
|
|
|
|
/* The main DB, main journal, WAL file and master journal are never
|
|
** automatically deleted. Nor are they ever temporary files. */
|
|
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
|
|
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
|
|
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
|
|
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL );
|
|
|
|
/* Assert that the upper layer has set one of the "file-type" flags. */
|
|
assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
|
|
|| eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
|
|
|| eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
|
|
|| eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
|
|
);
|
|
|
|
/* Detect a pid change and reset the PRNG. There is a race condition
|
|
** here such that two or more threads all trying to open databases at
|
|
** the same instant might all reset the PRNG. But multiple resets
|
|
** are harmless.
|
|
*/
|
|
if( randomnessPid!=osGetpid(0) ){
|
|
randomnessPid = osGetpid(0);
|
|
sqlite3_randomness(0,0);
|
|
}
|
|
|
|
memset(p, 0, sizeof(unixFile));
|
|
|
|
if( eType==SQLITE_OPEN_MAIN_DB ){
|
|
UnixUnusedFd *pUnused;
|
|
pUnused = findReusableFd(zName, flags);
|
|
if( pUnused ){
|
|
fd = pUnused->fd;
|
|
}else{
|
|
pUnused = sqlite3_malloc64(sizeof(*pUnused));
|
|
if( !pUnused ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
}
|
|
p->pUnused = pUnused;
|
|
|
|
/* Database filenames are double-zero terminated if they are not
|
|
** URIs with parameters. Hence, they can always be passed into
|
|
** sqlite3_uri_parameter(). */
|
|
assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
|
|
|
|
}else if( !zName ){
|
|
/* If zName is NULL, the upper layer is requesting a temp file. */
|
|
assert(isDelete && !syncDir);
|
|
rc = unixGetTempname(pVfs->mxPathname, zTmpname);
|
|
if( rc!=SQLITE_OK ){
|
|
return rc;
|
|
}
|
|
zName = zTmpname;
|
|
|
|
/* Generated temporary filenames are always double-zero terminated
|
|
** for use by sqlite3_uri_parameter(). */
|
|
assert( zName[strlen(zName)+1]==0 );
|
|
}
|
|
|
|
/* Determine the value of the flags parameter passed to POSIX function
|
|
** open(). These must be calculated even if open() is not called, as
|
|
** they may be stored as part of the file handle and used by the
|
|
** 'conch file' locking functions later on. */
|
|
if( isReadonly ) openFlags |= O_RDONLY;
|
|
if( isReadWrite ) openFlags |= O_RDWR;
|
|
if( isCreate ) openFlags |= O_CREAT;
|
|
if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
|
|
openFlags |= (O_LARGEFILE|O_BINARY);
|
|
|
|
if( fd<0 ){
|
|
mode_t openMode; /* Permissions to create file with */
|
|
uid_t uid; /* Userid for the file */
|
|
gid_t gid; /* Groupid for the file */
|
|
rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
|
|
if( rc!=SQLITE_OK ){
|
|
assert( !p->pUnused );
|
|
assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
|
|
return rc;
|
|
}
|
|
fd = robust_open(zName, openFlags, openMode);
|
|
OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));
|
|
assert( !isExclusive || (openFlags & O_CREAT)!=0 );
|
|
if( fd<0 && errno!=EISDIR && isReadWrite ){
|
|
/* Failed to open the file for read/write access. Try read-only. */
|
|
flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
|
|
openFlags &= ~(O_RDWR|O_CREAT);
|
|
flags |= SQLITE_OPEN_READONLY;
|
|
openFlags |= O_RDONLY;
|
|
isReadonly = 1;
|
|
fd = robust_open(zName, openFlags, openMode);
|
|
}
|
|
if( fd<0 ){
|
|
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
|
|
goto open_finished;
|
|
}
|
|
|
|
/* If this process is running as root and if creating a new rollback
|
|
** journal or WAL file, set the ownership of the journal or WAL to be
|
|
** the same as the original database.
|
|
*/
|
|
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
|
|
robustFchown(fd, uid, gid);
|
|
}
|
|
}
|
|
assert( fd>=0 );
|
|
if( pOutFlags ){
|
|
*pOutFlags = flags;
|
|
}
|
|
|
|
if( p->pUnused ){
|
|
p->pUnused->fd = fd;
|
|
p->pUnused->flags = flags;
|
|
}
|
|
|
|
if( isDelete ){
|
|
#if OS_VXWORKS
|
|
zPath = zName;
|
|
#elif defined(SQLITE_UNLINK_AFTER_CLOSE)
|
|
zPath = sqlite3_mprintf("%s", zName);
|
|
if( zPath==0 ){
|
|
robust_close(p, fd, __LINE__);
|
|
return SQLITE_NOMEM;
|
|
}
|
|
#else
|
|
osUnlink(zName);
|
|
#endif
|
|
}
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
else{
|
|
p->openFlags = openFlags;
|
|
}
|
|
#endif
|
|
|
|
noLock = eType!=SQLITE_OPEN_MAIN_DB;
|
|
|
|
|
|
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
|
|
if( fstatfs(fd, &fsInfo) == -1 ){
|
|
storeLastErrno(p, errno);
|
|
robust_close(p, fd, __LINE__);
|
|
return SQLITE_IOERR_ACCESS;
|
|
}
|
|
if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
|
|
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
|
|
}
|
|
if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) {
|
|
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
|
|
}
|
|
#endif
|
|
|
|
/* Set up appropriate ctrlFlags */
|
|
if( isDelete ) ctrlFlags |= UNIXFILE_DELETE;
|
|
if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY;
|
|
if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK;
|
|
if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC;
|
|
if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
|
|
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
#if SQLITE_PREFER_PROXY_LOCKING
|
|
isAutoProxy = 1;
|
|
#endif
|
|
if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){
|
|
char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
|
|
int useProxy = 0;
|
|
|
|
/* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means
|
|
** never use proxy, NULL means use proxy for non-local files only. */
|
|
if( envforce!=NULL ){
|
|
useProxy = atoi(envforce)>0;
|
|
}else{
|
|
useProxy = !(fsInfo.f_flags&MNT_LOCAL);
|
|
}
|
|
if( useProxy ){
|
|
rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
|
|
if( rc==SQLITE_OK ){
|
|
rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
|
|
if( rc!=SQLITE_OK ){
|
|
/* Use unixClose to clean up the resources added in fillInUnixFile
|
|
** and clear all the structure's references. Specifically,
|
|
** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op
|
|
*/
|
|
unixClose(pFile);
|
|
return rc;
|
|
}
|
|
}
|
|
goto open_finished;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
|
|
|
|
open_finished:
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3_free(p->pUnused);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** Delete the file at zPath. If the dirSync argument is true, fsync()
|
|
** the directory after deleting the file.
|
|
*/
|
|
static int unixDelete(
|
|
sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */
|
|
const char *zPath, /* Name of file to be deleted */
|
|
int dirSync /* If true, fsync() directory after deleting file */
|
|
){
|
|
int rc = SQLITE_OK;
|
|
UNUSED_PARAMETER(NotUsed);
|
|
SimulateIOError(return SQLITE_IOERR_DELETE);
|
|
if( osUnlink(zPath)==(-1) ){
|
|
if( errno==ENOENT
|
|
#if OS_VXWORKS
|
|
|| osAccess(zPath,0)!=0
|
|
#endif
|
|
){
|
|
rc = SQLITE_IOERR_DELETE_NOENT;
|
|
}else{
|
|
rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
|
|
}
|
|
return rc;
|
|
}
|
|
#ifndef SQLITE_DISABLE_DIRSYNC
|
|
if( (dirSync & 1)!=0 ){
|
|
int fd;
|
|
rc = osOpenDirectory(zPath, &fd);
|
|
if( rc==SQLITE_OK ){
|
|
if( full_fsync(fd,0,0) ){
|
|
rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
|
|
}
|
|
robust_close(0, fd, __LINE__);
|
|
}else{
|
|
assert( rc==SQLITE_CANTOPEN );
|
|
rc = SQLITE_OK;
|
|
}
|
|
}
|
|
#endif
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Test the existence of or access permissions of file zPath. The
|
|
** test performed depends on the value of flags:
|
|
**
|
|
** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
|
|
** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
|
|
** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
|
|
**
|
|
** Otherwise return 0.
|
|
*/
|
|
static int unixAccess(
|
|
sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */
|
|
const char *zPath, /* Path of the file to examine */
|
|
int flags, /* What do we want to learn about the zPath file? */
|
|
int *pResOut /* Write result boolean here */
|
|
){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
SimulateIOError( return SQLITE_IOERR_ACCESS; );
|
|
assert( pResOut!=0 );
|
|
|
|
/* The spec says there are three possible values for flags. But only
|
|
** two of them are actually used */
|
|
assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE );
|
|
|
|
if( flags==SQLITE_ACCESS_EXISTS ){
|
|
struct stat buf;
|
|
*pResOut = (0==osStat(zPath, &buf) && buf.st_size>0);
|
|
}else{
|
|
*pResOut = osAccess(zPath, W_OK|R_OK)==0;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
**
|
|
*/
|
|
static int mkFullPathname(
|
|
const char *zPath, /* Input path */
|
|
char *zOut, /* Output buffer */
|
|
int nOut /* Allocated size of buffer zOut */
|
|
){
|
|
int nPath = sqlite3Strlen30(zPath);
|
|
int iOff = 0;
|
|
if( zPath[0]!='/' ){
|
|
if( osGetcwd(zOut, nOut-2)==0 ){
|
|
return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath);
|
|
}
|
|
iOff = sqlite3Strlen30(zOut);
|
|
zOut[iOff++] = '/';
|
|
}
|
|
if( (iOff+nPath+1)>nOut ){
|
|
/* SQLite assumes that xFullPathname() nul-terminates the output buffer
|
|
** even if it returns an error. */
|
|
zOut[iOff] = '\0';
|
|
return SQLITE_CANTOPEN_BKPT;
|
|
}
|
|
sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Turn a relative pathname into a full pathname. The relative path
|
|
** is stored as a nul-terminated string in the buffer pointed to by
|
|
** zPath.
|
|
**
|
|
** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
|
|
** (in this case, MAX_PATHNAME bytes). The full-path is written to
|
|
** this buffer before returning.
|
|
*/
|
|
static int unixFullPathname(
|
|
sqlite3_vfs *pVfs, /* Pointer to vfs object */
|
|
const char *zPath, /* Possibly relative input path */
|
|
int nOut, /* Size of output buffer in bytes */
|
|
char *zOut /* Output buffer */
|
|
){
|
|
#if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT)
|
|
return mkFullPathname(zPath, zOut, nOut);
|
|
#else
|
|
int rc = SQLITE_OK;
|
|
int nByte;
|
|
int nLink = 1; /* Number of symbolic links followed so far */
|
|
const char *zIn = zPath; /* Input path for each iteration of loop */
|
|
char *zDel = 0;
|
|
|
|
assert( pVfs->mxPathname==MAX_PATHNAME );
|
|
UNUSED_PARAMETER(pVfs);
|
|
|
|
/* It's odd to simulate an io-error here, but really this is just
|
|
** using the io-error infrastructure to test that SQLite handles this
|
|
** function failing. This function could fail if, for example, the
|
|
** current working directory has been unlinked.
|
|
*/
|
|
SimulateIOError( return SQLITE_ERROR );
|
|
|
|
do {
|
|
|
|
/* Call stat() on path zIn. Set bLink to true if the path is a symbolic
|
|
** link, or false otherwise. */
|
|
int bLink = 0;
|
|
struct stat buf;
|
|
if( osLstat(zIn, &buf)!=0 ){
|
|
if( errno!=ENOENT ){
|
|
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn);
|
|
}
|
|
}else{
|
|
bLink = S_ISLNK(buf.st_mode);
|
|
}
|
|
|
|
if( bLink ){
|
|
if( zDel==0 ){
|
|
zDel = sqlite3_malloc(nOut);
|
|
if( zDel==0 ) rc = SQLITE_NOMEM;
|
|
}else if( ++nLink>SQLITE_MAX_SYMLINKS ){
|
|
rc = SQLITE_CANTOPEN_BKPT;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
nByte = osReadlink(zIn, zDel, nOut-1);
|
|
if( nByte<0 ){
|
|
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn);
|
|
}else{
|
|
if( zDel[0]!='/' ){
|
|
int n;
|
|
for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--);
|
|
if( nByte+n+1>nOut ){
|
|
rc = SQLITE_CANTOPEN_BKPT;
|
|
}else{
|
|
memmove(&zDel[n], zDel, nByte+1);
|
|
memcpy(zDel, zIn, n);
|
|
nByte += n;
|
|
}
|
|
}
|
|
zDel[nByte] = '\0';
|
|
}
|
|
}
|
|
|
|
zIn = zDel;
|
|
}
|
|
|
|
assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' );
|
|
if( rc==SQLITE_OK && zIn!=zOut ){
|
|
rc = mkFullPathname(zIn, zOut, nOut);
|
|
}
|
|
if( bLink==0 ) break;
|
|
zIn = zOut;
|
|
}while( rc==SQLITE_OK );
|
|
|
|
sqlite3_free(zDel);
|
|
return rc;
|
|
#endif /* HAVE_READLINK && HAVE_LSTAT */
|
|
}
|
|
|
|
|
|
#ifndef SQLITE_OMIT_LOAD_EXTENSION
|
|
/*
|
|
** Interfaces for opening a shared library, finding entry points
|
|
** within the shared library, and closing the shared library.
|
|
*/
|
|
#include <dlfcn.h>
|
|
static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
|
|
}
|
|
|
|
/*
|
|
** SQLite calls this function immediately after a call to unixDlSym() or
|
|
** unixDlOpen() fails (returns a null pointer). If a more detailed error
|
|
** message is available, it is written to zBufOut. If no error message
|
|
** is available, zBufOut is left unmodified and SQLite uses a default
|
|
** error message.
|
|
*/
|
|
static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){
|
|
const char *zErr;
|
|
UNUSED_PARAMETER(NotUsed);
|
|
unixEnterMutex();
|
|
zErr = dlerror();
|
|
if( zErr ){
|
|
sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
|
|
}
|
|
unixLeaveMutex();
|
|
}
|
|
static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){
|
|
/*
|
|
** GCC with -pedantic-errors says that C90 does not allow a void* to be
|
|
** cast into a pointer to a function. And yet the library dlsym() routine
|
|
** returns a void* which is really a pointer to a function. So how do we
|
|
** use dlsym() with -pedantic-errors?
|
|
**
|
|
** Variable x below is defined to be a pointer to a function taking
|
|
** parameters void* and const char* and returning a pointer to a function.
|
|
** We initialize x by assigning it a pointer to the dlsym() function.
|
|
** (That assignment requires a cast.) Then we call the function that
|
|
** x points to.
|
|
**
|
|
** This work-around is unlikely to work correctly on any system where
|
|
** you really cannot cast a function pointer into void*. But then, on the
|
|
** other hand, dlsym() will not work on such a system either, so we have
|
|
** not really lost anything.
|
|
*/
|
|
void (*(*x)(void*,const char*))(void);
|
|
UNUSED_PARAMETER(NotUsed);
|
|
x = (void(*(*)(void*,const char*))(void))dlsym;
|
|
return (*x)(p, zSym);
|
|
}
|
|
static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
dlclose(pHandle);
|
|
}
|
|
#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
|
|
#define unixDlOpen 0
|
|
#define unixDlError 0
|
|
#define unixDlSym 0
|
|
#define unixDlClose 0
|
|
#endif
|
|
|
|
/*
|
|
** Write nBuf bytes of random data to the supplied buffer zBuf.
|
|
*/
|
|
static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));
|
|
|
|
/* We have to initialize zBuf to prevent valgrind from reporting
|
|
** errors. The reports issued by valgrind are incorrect - we would
|
|
** prefer that the randomness be increased by making use of the
|
|
** uninitialized space in zBuf - but valgrind errors tend to worry
|
|
** some users. Rather than argue, it seems easier just to initialize
|
|
** the whole array and silence valgrind, even if that means less randomness
|
|
** in the random seed.
|
|
**
|
|
** When testing, initializing zBuf[] to zero is all we do. That means
|
|
** that we always use the same random number sequence. This makes the
|
|
** tests repeatable.
|
|
*/
|
|
memset(zBuf, 0, nBuf);
|
|
randomnessPid = osGetpid(0);
|
|
#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS)
|
|
{
|
|
int fd, got;
|
|
fd = robust_open("/dev/urandom", O_RDONLY, 0);
|
|
if( fd<0 ){
|
|
time_t t;
|
|
time(&t);
|
|
memcpy(zBuf, &t, sizeof(t));
|
|
memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid));
|
|
assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf );
|
|
nBuf = sizeof(t) + sizeof(randomnessPid);
|
|
}else{
|
|
do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );
|
|
robust_close(0, fd, __LINE__);
|
|
}
|
|
}
|
|
#endif
|
|
return nBuf;
|
|
}
|
|
|
|
|
|
/*
|
|
** Sleep for a little while. Return the amount of time slept.
|
|
** The argument is the number of microseconds we want to sleep.
|
|
** The return value is the number of microseconds of sleep actually
|
|
** requested from the underlying operating system, a number which
|
|
** might be greater than or equal to the argument, but not less
|
|
** than the argument.
|
|
*/
|
|
static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
|
|
#if OS_VXWORKS
|
|
struct timespec sp;
|
|
|
|
sp.tv_sec = microseconds / 1000000;
|
|
sp.tv_nsec = (microseconds % 1000000) * 1000;
|
|
nanosleep(&sp, NULL);
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return microseconds;
|
|
#elif defined(HAVE_USLEEP) && HAVE_USLEEP
|
|
usleep(microseconds);
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return microseconds;
|
|
#else
|
|
int seconds = (microseconds+999999)/1000000;
|
|
sleep(seconds);
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return seconds*1000000;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
** The following variable, if set to a non-zero value, is interpreted as
|
|
** the number of seconds since 1970 and is used to set the result of
|
|
** sqlite3OsCurrentTime() during testing.
|
|
*/
|
|
#ifdef SQLITE_TEST
|
|
int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */
|
|
#endif
|
|
|
|
/*
|
|
** Find the current time (in Universal Coordinated Time). Write into *piNow
|
|
** the current time and date as a Julian Day number times 86_400_000. In
|
|
** other words, write into *piNow the number of milliseconds since the Julian
|
|
** epoch of noon in Greenwich on November 24, 4714 B.C according to the
|
|
** proleptic Gregorian calendar.
|
|
**
|
|
** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date
|
|
** cannot be found.
|
|
*/
|
|
static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
|
|
static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
|
|
int rc = SQLITE_OK;
|
|
#if defined(NO_GETTOD)
|
|
time_t t;
|
|
time(&t);
|
|
*piNow = ((sqlite3_int64)t)*1000 + unixEpoch;
|
|
#elif OS_VXWORKS
|
|
struct timespec sNow;
|
|
clock_gettime(CLOCK_REALTIME, &sNow);
|
|
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
|
|
#else
|
|
struct timeval sNow;
|
|
(void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */
|
|
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
|
|
#endif
|
|
|
|
#ifdef SQLITE_TEST
|
|
if( sqlite3_current_time ){
|
|
*piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch;
|
|
}
|
|
#endif
|
|
UNUSED_PARAMETER(NotUsed);
|
|
return rc;
|
|
}
|
|
|
|
#ifndef SQLITE_OMIT_DEPRECATED
|
|
/*
|
|
** Find the current time (in Universal Coordinated Time). Write the
|
|
** current time and date as a Julian Day number into *prNow and
|
|
** return 0. Return 1 if the time and date cannot be found.
|
|
*/
|
|
static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
|
|
sqlite3_int64 i = 0;
|
|
int rc;
|
|
UNUSED_PARAMETER(NotUsed);
|
|
rc = unixCurrentTimeInt64(0, &i);
|
|
*prNow = i/86400000.0;
|
|
return rc;
|
|
}
|
|
#else
|
|
# define unixCurrentTime 0
|
|
#endif
|
|
|
|
#ifndef SQLITE_OMIT_DEPRECATED
|
|
/*
|
|
** We added the xGetLastError() method with the intention of providing
|
|
** better low-level error messages when operating-system problems come up
|
|
** during SQLite operation. But so far, none of that has been implemented
|
|
** in the core. So this routine is never called. For now, it is merely
|
|
** a place-holder.
|
|
*/
|
|
static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
|
|
UNUSED_PARAMETER(NotUsed);
|
|
UNUSED_PARAMETER(NotUsed2);
|
|
UNUSED_PARAMETER(NotUsed3);
|
|
return 0;
|
|
}
|
|
#else
|
|
# define unixGetLastError 0
|
|
#endif
|
|
|
|
|
|
/*
|
|
************************ End of sqlite3_vfs methods ***************************
|
|
******************************************************************************/
|
|
|
|
/******************************************************************************
|
|
************************** Begin Proxy Locking ********************************
|
|
**
|
|
** Proxy locking is a "uber-locking-method" in this sense: It uses the
|
|
** other locking methods on secondary lock files. Proxy locking is a
|
|
** meta-layer over top of the primitive locking implemented above. For
|
|
** this reason, the division that implements of proxy locking is deferred
|
|
** until late in the file (here) after all of the other I/O methods have
|
|
** been defined - so that the primitive locking methods are available
|
|
** as services to help with the implementation of proxy locking.
|
|
**
|
|
****
|
|
**
|
|
** The default locking schemes in SQLite use byte-range locks on the
|
|
** database file to coordinate safe, concurrent access by multiple readers
|
|
** and writers [http://sqlite.org/lockingv3.html]. The five file locking
|
|
** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
|
|
** as POSIX read & write locks over fixed set of locations (via fsctl),
|
|
** on AFP and SMB only exclusive byte-range locks are available via fsctl
|
|
** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
|
|
** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
|
|
** address in the shared range is taken for a SHARED lock, the entire
|
|
** shared range is taken for an EXCLUSIVE lock):
|
|
**
|
|
** PENDING_BYTE 0x40000000
|
|
** RESERVED_BYTE 0x40000001
|
|
** SHARED_RANGE 0x40000002 -> 0x40000200
|
|
**
|
|
** This works well on the local file system, but shows a nearly 100x
|
|
** slowdown in read performance on AFP because the AFP client disables
|
|
** the read cache when byte-range locks are present. Enabling the read
|
|
** cache exposes a cache coherency problem that is present on all OS X
|
|
** supported network file systems. NFS and AFP both observe the
|
|
** close-to-open semantics for ensuring cache coherency
|
|
** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
|
|
** address the requirements for concurrent database access by multiple
|
|
** readers and writers
|
|
** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
|
|
**
|
|
** To address the performance and cache coherency issues, proxy file locking
|
|
** changes the way database access is controlled by limiting access to a
|
|
** single host at a time and moving file locks off of the database file
|
|
** and onto a proxy file on the local file system.
|
|
**
|
|
**
|
|
** Using proxy locks
|
|
** -----------------
|
|
**
|
|
** C APIs
|
|
**
|
|
** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE,
|
|
** <proxy_path> | ":auto:");
|
|
** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE,
|
|
** &<proxy_path>);
|
|
**
|
|
**
|
|
** SQL pragmas
|
|
**
|
|
** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
|
|
** PRAGMA [database.]lock_proxy_file
|
|
**
|
|
** Specifying ":auto:" means that if there is a conch file with a matching
|
|
** host ID in it, the proxy path in the conch file will be used, otherwise
|
|
** a proxy path based on the user's temp dir
|
|
** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
|
|
** actual proxy file name is generated from the name and path of the
|
|
** database file. For example:
|
|
**
|
|
** For database path "/Users/me/foo.db"
|
|
** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
|
|
**
|
|
** Once a lock proxy is configured for a database connection, it can not
|
|
** be removed, however it may be switched to a different proxy path via
|
|
** the above APIs (assuming the conch file is not being held by another
|
|
** connection or process).
|
|
**
|
|
**
|
|
** How proxy locking works
|
|
** -----------------------
|
|
**
|
|
** Proxy file locking relies primarily on two new supporting files:
|
|
**
|
|
** * conch file to limit access to the database file to a single host
|
|
** at a time
|
|
**
|
|
** * proxy file to act as a proxy for the advisory locks normally
|
|
** taken on the database
|
|
**
|
|
** The conch file - to use a proxy file, sqlite must first "hold the conch"
|
|
** by taking an sqlite-style shared lock on the conch file, reading the
|
|
** contents and comparing the host's unique host ID (see below) and lock
|
|
** proxy path against the values stored in the conch. The conch file is
|
|
** stored in the same directory as the database file and the file name
|
|
** is patterned after the database file name as ".<databasename>-conch".
|
|
** If the conch file does not exist, or its contents do not match the
|
|
** host ID and/or proxy path, then the lock is escalated to an exclusive
|
|
** lock and the conch file contents is updated with the host ID and proxy
|
|
** path and the lock is downgraded to a shared lock again. If the conch
|
|
** is held by another process (with a shared lock), the exclusive lock
|
|
** will fail and SQLITE_BUSY is returned.
|
|
**
|
|
** The proxy file - a single-byte file used for all advisory file locks
|
|
** normally taken on the database file. This allows for safe sharing
|
|
** of the database file for multiple readers and writers on the same
|
|
** host (the conch ensures that they all use the same local lock file).
|
|
**
|
|
** Requesting the lock proxy does not immediately take the conch, it is
|
|
** only taken when the first request to lock database file is made.
|
|
** This matches the semantics of the traditional locking behavior, where
|
|
** opening a connection to a database file does not take a lock on it.
|
|
** The shared lock and an open file descriptor are maintained until
|
|
** the connection to the database is closed.
|
|
**
|
|
** The proxy file and the lock file are never deleted so they only need
|
|
** to be created the first time they are used.
|
|
**
|
|
** Configuration options
|
|
** ---------------------
|
|
**
|
|
** SQLITE_PREFER_PROXY_LOCKING
|
|
**
|
|
** Database files accessed on non-local file systems are
|
|
** automatically configured for proxy locking, lock files are
|
|
** named automatically using the same logic as
|
|
** PRAGMA lock_proxy_file=":auto:"
|
|
**
|
|
** SQLITE_PROXY_DEBUG
|
|
**
|
|
** Enables the logging of error messages during host id file
|
|
** retrieval and creation
|
|
**
|
|
** LOCKPROXYDIR
|
|
**
|
|
** Overrides the default directory used for lock proxy files that
|
|
** are named automatically via the ":auto:" setting
|
|
**
|
|
** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
|
|
**
|
|
** Permissions to use when creating a directory for storing the
|
|
** lock proxy files, only used when LOCKPROXYDIR is not set.
|
|
**
|
|
**
|
|
** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
|
|
** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
|
|
** force proxy locking to be used for every database file opened, and 0
|
|
** will force automatic proxy locking to be disabled for all database
|
|
** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or
|
|
** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
|
|
*/
|
|
|
|
/*
|
|
** Proxy locking is only available on MacOSX
|
|
*/
|
|
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|
|
|
|
/*
|
|
** The proxyLockingContext has the path and file structures for the remote
|
|
** and local proxy files in it
|
|
*/
|
|
typedef struct proxyLockingContext proxyLockingContext;
|
|
struct proxyLockingContext {
|
|
unixFile *conchFile; /* Open conch file */
|
|
char *conchFilePath; /* Name of the conch file */
|
|
unixFile *lockProxy; /* Open proxy lock file */
|
|
char *lockProxyPath; /* Name of the proxy lock file */
|
|
char *dbPath; /* Name of the open file */
|
|
int conchHeld; /* 1 if the conch is held, -1 if lockless */
|
|
int nFails; /* Number of conch taking failures */
|
|
void *oldLockingContext; /* Original lockingcontext to restore on close */
|
|
sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */
|
|
};
|
|
|
|
/*
|
|
** The proxy lock file path for the database at dbPath is written into lPath,
|
|
** which must point to valid, writable memory large enough for a maxLen length
|
|
** file path.
|
|
*/
|
|
static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){
|
|
int len;
|
|
int dbLen;
|
|
int i;
|
|
|
|
#ifdef LOCKPROXYDIR
|
|
len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
|
|
#else
|
|
# ifdef _CS_DARWIN_USER_TEMP_DIR
|
|
{
|
|
if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){
|
|
OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n",
|
|
lPath, errno, osGetpid(0)));
|
|
return SQLITE_IOERR_LOCK;
|
|
}
|
|
len = strlcat(lPath, "sqliteplocks", maxLen);
|
|
}
|
|
# else
|
|
len = strlcpy(lPath, "/tmp/", maxLen);
|
|
# endif
|
|
#endif
|
|
|
|
if( lPath[len-1]!='/' ){
|
|
len = strlcat(lPath, "/", maxLen);
|
|
}
|
|
|
|
/* transform the db path to a unique cache name */
|
|
dbLen = (int)strlen(dbPath);
|
|
for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){
|
|
char c = dbPath[i];
|
|
lPath[i+len] = (c=='/')?'_':c;
|
|
}
|
|
lPath[i+len]='\0';
|
|
strlcat(lPath, ":auto:", maxLen);
|
|
OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0)));
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Creates the lock file and any missing directories in lockPath
|
|
*/
|
|
static int proxyCreateLockPath(const char *lockPath){
|
|
int i, len;
|
|
char buf[MAXPATHLEN];
|
|
int start = 0;
|
|
|
|
assert(lockPath!=NULL);
|
|
/* try to create all the intermediate directories */
|
|
len = (int)strlen(lockPath);
|
|
buf[0] = lockPath[0];
|
|
for( i=1; i<len; i++ ){
|
|
if( lockPath[i] == '/' && (i - start > 0) ){
|
|
/* only mkdir if leaf dir != "." or "/" or ".." */
|
|
if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/')
|
|
|| (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
|
|
buf[i]='\0';
|
|
if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
|
|
int err=errno;
|
|
if( err!=EEXIST ) {
|
|
OSTRACE(("CREATELOCKPATH FAILED creating %s, "
|
|
"'%s' proxy lock path=%s pid=%d\n",
|
|
buf, strerror(err), lockPath, osGetpid(0)));
|
|
return err;
|
|
}
|
|
}
|
|
}
|
|
start=i+1;
|
|
}
|
|
buf[i] = lockPath[i];
|
|
}
|
|
OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0)));
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
** Create a new VFS file descriptor (stored in memory obtained from
|
|
** sqlite3_malloc) and open the file named "path" in the file descriptor.
|
|
**
|
|
** The caller is responsible not only for closing the file descriptor
|
|
** but also for freeing the memory associated with the file descriptor.
|
|
*/
|
|
static int proxyCreateUnixFile(
|
|
const char *path, /* path for the new unixFile */
|
|
unixFile **ppFile, /* unixFile created and returned by ref */
|
|
int islockfile /* if non zero missing dirs will be created */
|
|
) {
|
|
int fd = -1;
|
|
unixFile *pNew;
|
|
int rc = SQLITE_OK;
|
|
int openFlags = O_RDWR | O_CREAT;
|
|
sqlite3_vfs dummyVfs;
|
|
int terrno = 0;
|
|
UnixUnusedFd *pUnused = NULL;
|
|
|
|
/* 1. first try to open/create the file
|
|
** 2. if that fails, and this is a lock file (not-conch), try creating
|
|
** the parent directories and then try again.
|
|
** 3. if that fails, try to open the file read-only
|
|
** otherwise return BUSY (if lock file) or CANTOPEN for the conch file
|
|
*/
|
|
pUnused = findReusableFd(path, openFlags);
|
|
if( pUnused ){
|
|
fd = pUnused->fd;
|
|
}else{
|
|
pUnused = sqlite3_malloc64(sizeof(*pUnused));
|
|
if( !pUnused ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
}
|
|
if( fd<0 ){
|
|
fd = robust_open(path, openFlags, 0);
|
|
terrno = errno;
|
|
if( fd<0 && errno==ENOENT && islockfile ){
|
|
if( proxyCreateLockPath(path) == SQLITE_OK ){
|
|
fd = robust_open(path, openFlags, 0);
|
|
}
|
|
}
|
|
}
|
|
if( fd<0 ){
|
|
openFlags = O_RDONLY;
|
|
fd = robust_open(path, openFlags, 0);
|
|
terrno = errno;
|
|
}
|
|
if( fd<0 ){
|
|
if( islockfile ){
|
|
return SQLITE_BUSY;
|
|
}
|
|
switch (terrno) {
|
|
case EACCES:
|
|
return SQLITE_PERM;
|
|
case EIO:
|
|
return SQLITE_IOERR_LOCK; /* even though it is the conch */
|
|
default:
|
|
return SQLITE_CANTOPEN_BKPT;
|
|
}
|
|
}
|
|
|
|
pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew));
|
|
if( pNew==NULL ){
|
|
rc = SQLITE_NOMEM;
|
|
goto end_create_proxy;
|
|
}
|
|
memset(pNew, 0, sizeof(unixFile));
|
|
pNew->openFlags = openFlags;
|
|
memset(&dummyVfs, 0, sizeof(dummyVfs));
|
|
dummyVfs.pAppData = (void*)&autolockIoFinder;
|
|
dummyVfs.zName = "dummy";
|
|
pUnused->fd = fd;
|
|
pUnused->flags = openFlags;
|
|
pNew->pUnused = pUnused;
|
|
|
|
rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);
|
|
if( rc==SQLITE_OK ){
|
|
*ppFile = pNew;
|
|
return SQLITE_OK;
|
|
}
|
|
end_create_proxy:
|
|
robust_close(pNew, fd, __LINE__);
|
|
sqlite3_free(pNew);
|
|
sqlite3_free(pUnused);
|
|
return rc;
|
|
}
|
|
|
|
#ifdef SQLITE_TEST
|
|
/* simulate multiple hosts by creating unique hostid file paths */
|
|
int sqlite3_hostid_num = 0;
|
|
#endif
|
|
|
|
#define PROXY_HOSTIDLEN 16 /* conch file host id length */
|
|
|
|
#ifdef HAVE_GETHOSTUUID
|
|
/* Not always defined in the headers as it ought to be */
|
|
extern int gethostuuid(uuid_t id, const struct timespec *wait);
|
|
#endif
|
|
|
|
/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN
|
|
** bytes of writable memory.
|
|
*/
|
|
static int proxyGetHostID(unsigned char *pHostID, int *pError){
|
|
assert(PROXY_HOSTIDLEN == sizeof(uuid_t));
|
|
memset(pHostID, 0, PROXY_HOSTIDLEN);
|
|
#ifdef HAVE_GETHOSTUUID
|
|
{
|
|
struct timespec timeout = {1, 0}; /* 1 sec timeout */
|
|
if( gethostuuid(pHostID, &timeout) ){
|
|
int err = errno;
|
|
if( pError ){
|
|
*pError = err;
|
|
}
|
|
return SQLITE_IOERR;
|
|
}
|
|
}
|
|
#else
|
|
UNUSED_PARAMETER(pError);
|
|
#endif
|
|
#ifdef SQLITE_TEST
|
|
/* simulate multiple hosts by creating unique hostid file paths */
|
|
if( sqlite3_hostid_num != 0){
|
|
pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF));
|
|
}
|
|
#endif
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* The conch file contains the header, host id and lock file path
|
|
*/
|
|
#define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */
|
|
#define PROXY_HEADERLEN 1 /* conch file header length */
|
|
#define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN)
|
|
#define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN)
|
|
|
|
/*
|
|
** Takes an open conch file, copies the contents to a new path and then moves
|
|
** it back. The newly created file's file descriptor is assigned to the
|
|
** conch file structure and finally the original conch file descriptor is
|
|
** closed. Returns zero if successful.
|
|
*/
|
|
static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
unixFile *conchFile = pCtx->conchFile;
|
|
char tPath[MAXPATHLEN];
|
|
char buf[PROXY_MAXCONCHLEN];
|
|
char *cPath = pCtx->conchFilePath;
|
|
size_t readLen = 0;
|
|
size_t pathLen = 0;
|
|
char errmsg[64] = "";
|
|
int fd = -1;
|
|
int rc = -1;
|
|
UNUSED_PARAMETER(myHostID);
|
|
|
|
/* create a new path by replace the trailing '-conch' with '-break' */
|
|
pathLen = strlcpy(tPath, cPath, MAXPATHLEN);
|
|
if( pathLen>MAXPATHLEN || pathLen<6 ||
|
|
(strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){
|
|
sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen);
|
|
goto end_breaklock;
|
|
}
|
|
/* read the conch content */
|
|
readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);
|
|
if( readLen<PROXY_PATHINDEX ){
|
|
sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);
|
|
goto end_breaklock;
|
|
}
|
|
/* write it out to the temporary break file */
|
|
fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0);
|
|
if( fd<0 ){
|
|
sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
|
|
goto end_breaklock;
|
|
}
|
|
if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){
|
|
sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);
|
|
goto end_breaklock;
|
|
}
|
|
if( rename(tPath, cPath) ){
|
|
sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);
|
|
goto end_breaklock;
|
|
}
|
|
rc = 0;
|
|
fprintf(stderr, "broke stale lock on %s\n", cPath);
|
|
robust_close(pFile, conchFile->h, __LINE__);
|
|
conchFile->h = fd;
|
|
conchFile->openFlags = O_RDWR | O_CREAT;
|
|
|
|
end_breaklock:
|
|
if( rc ){
|
|
if( fd>=0 ){
|
|
osUnlink(tPath);
|
|
robust_close(pFile, fd, __LINE__);
|
|
}
|
|
fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/* Take the requested lock on the conch file and break a stale lock if the
|
|
** host id matches.
|
|
*/
|
|
static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
unixFile *conchFile = pCtx->conchFile;
|
|
int rc = SQLITE_OK;
|
|
int nTries = 0;
|
|
struct timespec conchModTime;
|
|
|
|
memset(&conchModTime, 0, sizeof(conchModTime));
|
|
do {
|
|
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
|
|
nTries ++;
|
|
if( rc==SQLITE_BUSY ){
|
|
/* If the lock failed (busy):
|
|
* 1st try: get the mod time of the conch, wait 0.5s and try again.
|
|
* 2nd try: fail if the mod time changed or host id is different, wait
|
|
* 10 sec and try again
|
|
* 3rd try: break the lock unless the mod time has changed.
|
|
*/
|
|
struct stat buf;
|
|
if( osFstat(conchFile->h, &buf) ){
|
|
storeLastErrno(pFile, errno);
|
|
return SQLITE_IOERR_LOCK;
|
|
}
|
|
|
|
if( nTries==1 ){
|
|
conchModTime = buf.st_mtimespec;
|
|
usleep(500000); /* wait 0.5 sec and try the lock again*/
|
|
continue;
|
|
}
|
|
|
|
assert( nTries>1 );
|
|
if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec ||
|
|
conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){
|
|
return SQLITE_BUSY;
|
|
}
|
|
|
|
if( nTries==2 ){
|
|
char tBuf[PROXY_MAXCONCHLEN];
|
|
int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);
|
|
if( len<0 ){
|
|
storeLastErrno(pFile, errno);
|
|
return SQLITE_IOERR_LOCK;
|
|
}
|
|
if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){
|
|
/* don't break the lock if the host id doesn't match */
|
|
if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){
|
|
return SQLITE_BUSY;
|
|
}
|
|
}else{
|
|
/* don't break the lock on short read or a version mismatch */
|
|
return SQLITE_BUSY;
|
|
}
|
|
usleep(10000000); /* wait 10 sec and try the lock again */
|
|
continue;
|
|
}
|
|
|
|
assert( nTries==3 );
|
|
if( 0==proxyBreakConchLock(pFile, myHostID) ){
|
|
rc = SQLITE_OK;
|
|
if( lockType==EXCLUSIVE_LOCK ){
|
|
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);
|
|
}
|
|
if( !rc ){
|
|
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
|
|
}
|
|
}
|
|
}
|
|
} while( rc==SQLITE_BUSY && nTries<3 );
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* Takes the conch by taking a shared lock and read the contents conch, if
|
|
** lockPath is non-NULL, the host ID and lock file path must match. A NULL
|
|
** lockPath means that the lockPath in the conch file will be used if the
|
|
** host IDs match, or a new lock path will be generated automatically
|
|
** and written to the conch file.
|
|
*/
|
|
static int proxyTakeConch(unixFile *pFile){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
|
|
if( pCtx->conchHeld!=0 ){
|
|
return SQLITE_OK;
|
|
}else{
|
|
unixFile *conchFile = pCtx->conchFile;
|
|
uuid_t myHostID;
|
|
int pError = 0;
|
|
char readBuf[PROXY_MAXCONCHLEN];
|
|
char lockPath[MAXPATHLEN];
|
|
char *tempLockPath = NULL;
|
|
int rc = SQLITE_OK;
|
|
int createConch = 0;
|
|
int hostIdMatch = 0;
|
|
int readLen = 0;
|
|
int tryOldLockPath = 0;
|
|
int forceNewLockPath = 0;
|
|
|
|
OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h,
|
|
(pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
|
|
osGetpid(0)));
|
|
|
|
rc = proxyGetHostID(myHostID, &pError);
|
|
if( (rc&0xff)==SQLITE_IOERR ){
|
|
storeLastErrno(pFile, pError);
|
|
goto end_takeconch;
|
|
}
|
|
rc = proxyConchLock(pFile, myHostID, SHARED_LOCK);
|
|
if( rc!=SQLITE_OK ){
|
|
goto end_takeconch;
|
|
}
|
|
/* read the existing conch file */
|
|
readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN);
|
|
if( readLen<0 ){
|
|
/* I/O error: lastErrno set by seekAndRead */
|
|
storeLastErrno(pFile, conchFile->lastErrno);
|
|
rc = SQLITE_IOERR_READ;
|
|
goto end_takeconch;
|
|
}else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) ||
|
|
readBuf[0]!=(char)PROXY_CONCHVERSION ){
|
|
/* a short read or version format mismatch means we need to create a new
|
|
** conch file.
|
|
*/
|
|
createConch = 1;
|
|
}
|
|
/* if the host id matches and the lock path already exists in the conch
|
|
** we'll try to use the path there, if we can't open that path, we'll
|
|
** retry with a new auto-generated path
|
|
*/
|
|
do { /* in case we need to try again for an :auto: named lock file */
|
|
|
|
if( !createConch && !forceNewLockPath ){
|
|
hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID,
|
|
PROXY_HOSTIDLEN);
|
|
/* if the conch has data compare the contents */
|
|
if( !pCtx->lockProxyPath ){
|
|
/* for auto-named local lock file, just check the host ID and we'll
|
|
** use the local lock file path that's already in there
|
|
*/
|
|
if( hostIdMatch ){
|
|
size_t pathLen = (readLen - PROXY_PATHINDEX);
|
|
|
|
if( pathLen>=MAXPATHLEN ){
|
|
pathLen=MAXPATHLEN-1;
|
|
}
|
|
memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen);
|
|
lockPath[pathLen] = 0;
|
|
tempLockPath = lockPath;
|
|
tryOldLockPath = 1;
|
|
/* create a copy of the lock path if the conch is taken */
|
|
goto end_takeconch;
|
|
}
|
|
}else if( hostIdMatch
|
|
&& !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],
|
|
readLen-PROXY_PATHINDEX)
|
|
){
|
|
/* conch host and lock path match */
|
|
goto end_takeconch;
|
|
}
|
|
}
|
|
|
|
/* if the conch isn't writable and doesn't match, we can't take it */
|
|
if( (conchFile->openFlags&O_RDWR) == 0 ){
|
|
rc = SQLITE_BUSY;
|
|
goto end_takeconch;
|
|
}
|
|
|
|
/* either the conch didn't match or we need to create a new one */
|
|
if( !pCtx->lockProxyPath ){
|
|
proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
|
|
tempLockPath = lockPath;
|
|
/* create a copy of the lock path _only_ if the conch is taken */
|
|
}
|
|
|
|
/* update conch with host and path (this will fail if other process
|
|
** has a shared lock already), if the host id matches, use the big
|
|
** stick.
|
|
*/
|
|
futimes(conchFile->h, NULL);
|
|
if( hostIdMatch && !createConch ){
|
|
if( conchFile->pInode && conchFile->pInode->nShared>1 ){
|
|
/* We are trying for an exclusive lock but another thread in this
|
|
** same process is still holding a shared lock. */
|
|
rc = SQLITE_BUSY;
|
|
} else {
|
|
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
|
|
}
|
|
}else{
|
|
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
char writeBuffer[PROXY_MAXCONCHLEN];
|
|
int writeSize = 0;
|
|
|
|
writeBuffer[0] = (char)PROXY_CONCHVERSION;
|
|
memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
|
|
if( pCtx->lockProxyPath!=NULL ){
|
|
strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath,
|
|
MAXPATHLEN);
|
|
}else{
|
|
strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
|
|
}
|
|
writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
|
|
robust_ftruncate(conchFile->h, writeSize);
|
|
rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
|
|
full_fsync(conchFile->h,0,0);
|
|
/* If we created a new conch file (not just updated the contents of a
|
|
** valid conch file), try to match the permissions of the database
|
|
*/
|
|
if( rc==SQLITE_OK && createConch ){
|
|
struct stat buf;
|
|
int err = osFstat(pFile->h, &buf);
|
|
if( err==0 ){
|
|
mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
|
|
S_IROTH|S_IWOTH);
|
|
/* try to match the database file R/W permissions, ignore failure */
|
|
#ifndef SQLITE_PROXY_DEBUG
|
|
osFchmod(conchFile->h, cmode);
|
|
#else
|
|
do{
|
|
rc = osFchmod(conchFile->h, cmode);
|
|
}while( rc==(-1) && errno==EINTR );
|
|
if( rc!=0 ){
|
|
int code = errno;
|
|
fprintf(stderr, "fchmod %o FAILED with %d %s\n",
|
|
cmode, code, strerror(code));
|
|
} else {
|
|
fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
|
|
}
|
|
}else{
|
|
int code = errno;
|
|
fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
|
|
err, code, strerror(code));
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
|
|
|
|
end_takeconch:
|
|
OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
|
|
if( rc==SQLITE_OK && pFile->openFlags ){
|
|
int fd;
|
|
if( pFile->h>=0 ){
|
|
robust_close(pFile, pFile->h, __LINE__);
|
|
}
|
|
pFile->h = -1;
|
|
fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
|
|
OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
|
|
if( fd>=0 ){
|
|
pFile->h = fd;
|
|
}else{
|
|
rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called
|
|
during locking */
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK && !pCtx->lockProxy ){
|
|
char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;
|
|
rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);
|
|
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){
|
|
/* we couldn't create the proxy lock file with the old lock file path
|
|
** so try again via auto-naming
|
|
*/
|
|
forceNewLockPath = 1;
|
|
tryOldLockPath = 0;
|
|
continue; /* go back to the do {} while start point, try again */
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
/* Need to make a copy of path if we extracted the value
|
|
** from the conch file or the path was allocated on the stack
|
|
*/
|
|
if( tempLockPath ){
|
|
pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath);
|
|
if( !pCtx->lockProxyPath ){
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
pCtx->conchHeld = 1;
|
|
|
|
if( pCtx->lockProxy->pMethod == &afpIoMethods ){
|
|
afpLockingContext *afpCtx;
|
|
afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext;
|
|
afpCtx->dbPath = pCtx->lockProxyPath;
|
|
}
|
|
} else {
|
|
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
|
|
}
|
|
OSTRACE(("TAKECONCH %d %s\n", conchFile->h,
|
|
rc==SQLITE_OK?"ok":"failed"));
|
|
return rc;
|
|
} while (1); /* in case we need to retry the :auto: lock file -
|
|
** we should never get here except via the 'continue' call. */
|
|
}
|
|
}
|
|
|
|
/*
|
|
** If pFile holds a lock on a conch file, then release that lock.
|
|
*/
|
|
static int proxyReleaseConch(unixFile *pFile){
|
|
int rc = SQLITE_OK; /* Subroutine return code */
|
|
proxyLockingContext *pCtx; /* The locking context for the proxy lock */
|
|
unixFile *conchFile; /* Name of the conch file */
|
|
|
|
pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
conchFile = pCtx->conchFile;
|
|
OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h,
|
|
(pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
|
|
osGetpid(0)));
|
|
if( pCtx->conchHeld>0 ){
|
|
rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
|
|
}
|
|
pCtx->conchHeld = 0;
|
|
OSTRACE(("RELEASECONCH %d %s\n", conchFile->h,
|
|
(rc==SQLITE_OK ? "ok" : "failed")));
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Given the name of a database file, compute the name of its conch file.
|
|
** Store the conch filename in memory obtained from sqlite3_malloc64().
|
|
** Make *pConchPath point to the new name. Return SQLITE_OK on success
|
|
** or SQLITE_NOMEM if unable to obtain memory.
|
|
**
|
|
** The caller is responsible for ensuring that the allocated memory
|
|
** space is eventually freed.
|
|
**
|
|
** *pConchPath is set to NULL if a memory allocation error occurs.
|
|
*/
|
|
static int proxyCreateConchPathname(char *dbPath, char **pConchPath){
|
|
int i; /* Loop counter */
|
|
int len = (int)strlen(dbPath); /* Length of database filename - dbPath */
|
|
char *conchPath; /* buffer in which to construct conch name */
|
|
|
|
/* Allocate space for the conch filename and initialize the name to
|
|
** the name of the original database file. */
|
|
*pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8);
|
|
if( conchPath==0 ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
memcpy(conchPath, dbPath, len+1);
|
|
|
|
/* now insert a "." before the last / character */
|
|
for( i=(len-1); i>=0; i-- ){
|
|
if( conchPath[i]=='/' ){
|
|
i++;
|
|
break;
|
|
}
|
|
}
|
|
conchPath[i]='.';
|
|
while ( i<len ){
|
|
conchPath[i+1]=dbPath[i];
|
|
i++;
|
|
}
|
|
|
|
/* append the "-conch" suffix to the file */
|
|
memcpy(&conchPath[i+1], "-conch", 7);
|
|
assert( (int)strlen(conchPath) == len+7 );
|
|
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
|
|
/* Takes a fully configured proxy locking-style unix file and switches
|
|
** the local lock file path
|
|
*/
|
|
static int switchLockProxyPath(unixFile *pFile, const char *path) {
|
|
proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
|
|
char *oldPath = pCtx->lockProxyPath;
|
|
int rc = SQLITE_OK;
|
|
|
|
if( pFile->eFileLock!=NO_LOCK ){
|
|
return SQLITE_BUSY;
|
|
}
|
|
|
|
/* nothing to do if the path is NULL, :auto: or matches the existing path */
|
|
if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
|
|
(oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
|
|
return SQLITE_OK;
|
|
}else{
|
|
unixFile *lockProxy = pCtx->lockProxy;
|
|
pCtx->lockProxy=NULL;
|
|
pCtx->conchHeld = 0;
|
|
if( lockProxy!=NULL ){
|
|
rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
|
|
if( rc ) return rc;
|
|
sqlite3_free(lockProxy);
|
|
}
|
|
sqlite3_free(oldPath);
|
|
pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** pFile is a file that has been opened by a prior xOpen call. dbPath
|
|
** is a string buffer at least MAXPATHLEN+1 characters in size.
|
|
**
|
|
** This routine find the filename associated with pFile and writes it
|
|
** int dbPath.
|
|
*/
|
|
static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){
|
|
#if defined(__APPLE__)
|
|
if( pFile->pMethod == &afpIoMethods ){
|
|
/* afp style keeps a reference to the db path in the filePath field
|
|
** of the struct */
|
|
assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
|
|
strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath,
|
|
MAXPATHLEN);
|
|
} else
|
|
#endif
|
|
if( pFile->pMethod == &dotlockIoMethods ){
|
|
/* dot lock style uses the locking context to store the dot lock
|
|
** file path */
|
|
int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
|
|
memcpy(dbPath, (char *)pFile->lockingContext, len + 1);
|
|
}else{
|
|
/* all other styles use the locking context to store the db file path */
|
|
assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
|
|
strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN);
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Takes an already filled in unix file and alters it so all file locking
|
|
** will be performed on the local proxy lock file. The following fields
|
|
** are preserved in the locking context so that they can be restored and
|
|
** the unix structure properly cleaned up at close time:
|
|
** ->lockingContext
|
|
** ->pMethod
|
|
*/
|
|
static int proxyTransformUnixFile(unixFile *pFile, const char *path) {
|
|
proxyLockingContext *pCtx;
|
|
char dbPath[MAXPATHLEN+1]; /* Name of the database file */
|
|
char *lockPath=NULL;
|
|
int rc = SQLITE_OK;
|
|
|
|
if( pFile->eFileLock!=NO_LOCK ){
|
|
return SQLITE_BUSY;
|
|
}
|
|
proxyGetDbPathForUnixFile(pFile, dbPath);
|
|
if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
|
|
lockPath=NULL;
|
|
}else{
|
|
lockPath=(char *)path;
|
|
}
|
|
|
|
OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h,
|
|
(lockPath ? lockPath : ":auto:"), osGetpid(0)));
|
|
|
|
pCtx = sqlite3_malloc64( sizeof(*pCtx) );
|
|
if( pCtx==0 ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
memset(pCtx, 0, sizeof(*pCtx));
|
|
|
|
rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);
|
|
if( rc==SQLITE_OK ){
|
|
rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0);
|
|
if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){
|
|
/* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and
|
|
** (c) the file system is read-only, then enable no-locking access.
|
|
** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts
|
|
** that openFlags will have only one of O_RDONLY or O_RDWR.
|
|
*/
|
|
struct statfs fsInfo;
|
|
struct stat conchInfo;
|
|
int goLockless = 0;
|
|
|
|
if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) {
|
|
int err = errno;
|
|
if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){
|
|
goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY;
|
|
}
|
|
}
|
|
if( goLockless ){
|
|
pCtx->conchHeld = -1; /* read only FS/ lockless */
|
|
rc = SQLITE_OK;
|
|
}
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK && lockPath ){
|
|
pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
|
|
if( pCtx->dbPath==NULL ){
|
|
rc = SQLITE_NOMEM;
|
|
}
|
|
}
|
|
if( rc==SQLITE_OK ){
|
|
/* all memory is allocated, proxys are created and assigned,
|
|
** switch the locking context and pMethod then return.
|
|
*/
|
|
pCtx->oldLockingContext = pFile->lockingContext;
|
|
pFile->lockingContext = pCtx;
|
|
pCtx->pOldMethod = pFile->pMethod;
|
|
pFile->pMethod = &proxyIoMethods;
|
|
}else{
|
|
if( pCtx->conchFile ){
|
|
pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
|
|
sqlite3_free(pCtx->conchFile);
|
|
}
|
|
sqlite3DbFree(0, pCtx->lockProxyPath);
|
|
sqlite3_free(pCtx->conchFilePath);
|
|
sqlite3_free(pCtx);
|
|
}
|
|
OSTRACE(("TRANSPROXY %d %s\n", pFile->h,
|
|
(rc==SQLITE_OK ? "ok" : "failed")));
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** This routine handles sqlite3_file_control() calls that are specific
|
|
** to proxy locking.
|
|
*/
|
|
static int proxyFileControl(sqlite3_file *id, int op, void *pArg){
|
|
switch( op ){
|
|
case SQLITE_FCNTL_GET_LOCKPROXYFILE: {
|
|
unixFile *pFile = (unixFile*)id;
|
|
if( pFile->pMethod == &proxyIoMethods ){
|
|
proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
|
|
proxyTakeConch(pFile);
|
|
if( pCtx->lockProxyPath ){
|
|
*(const char **)pArg = pCtx->lockProxyPath;
|
|
}else{
|
|
*(const char **)pArg = ":auto: (not held)";
|
|
}
|
|
} else {
|
|
*(const char **)pArg = NULL;
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
case SQLITE_FCNTL_SET_LOCKPROXYFILE: {
|
|
unixFile *pFile = (unixFile*)id;
|
|
int rc = SQLITE_OK;
|
|
int isProxyStyle = (pFile->pMethod == &proxyIoMethods);
|
|
if( pArg==NULL || (const char *)pArg==0 ){
|
|
if( isProxyStyle ){
|
|
/* turn off proxy locking - not supported. If support is added for
|
|
** switching proxy locking mode off then it will need to fail if
|
|
** the journal mode is WAL mode.
|
|
*/
|
|
rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/;
|
|
}else{
|
|
/* turn off proxy locking - already off - NOOP */
|
|
rc = SQLITE_OK;
|
|
}
|
|
}else{
|
|
const char *proxyPath = (const char *)pArg;
|
|
if( isProxyStyle ){
|
|
proxyLockingContext *pCtx =
|
|
(proxyLockingContext*)pFile->lockingContext;
|
|
if( !strcmp(pArg, ":auto:")
|
|
|| (pCtx->lockProxyPath &&
|
|
!strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))
|
|
){
|
|
rc = SQLITE_OK;
|
|
}else{
|
|
rc = switchLockProxyPath(pFile, proxyPath);
|
|
}
|
|
}else{
|
|
/* turn on proxy file locking */
|
|
rc = proxyTransformUnixFile(pFile, proxyPath);
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
default: {
|
|
assert( 0 ); /* The call assures that only valid opcodes are sent */
|
|
}
|
|
}
|
|
/*NOTREACHED*/
|
|
return SQLITE_ERROR;
|
|
}
|
|
|
|
/*
|
|
** Within this division (the proxying locking implementation) the procedures
|
|
** above this point are all utilities. The lock-related methods of the
|
|
** proxy-locking sqlite3_io_method object follow.
|
|
*/
|
|
|
|
|
|
/*
|
|
** This routine checks if there is a RESERVED lock held on the specified
|
|
** file by this or any other process. If such a lock is held, set *pResOut
|
|
** to a non-zero value otherwise *pResOut is set to zero. The return value
|
|
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
|
|
*/
|
|
static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
int rc = proxyTakeConch(pFile);
|
|
if( rc==SQLITE_OK ){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
if( pCtx->conchHeld>0 ){
|
|
unixFile *proxy = pCtx->lockProxy;
|
|
return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
|
|
}else{ /* conchHeld < 0 is lockless */
|
|
pResOut=0;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Lock the file with the lock specified by parameter eFileLock - one
|
|
** of the following:
|
|
**
|
|
** (1) SHARED_LOCK
|
|
** (2) RESERVED_LOCK
|
|
** (3) PENDING_LOCK
|
|
** (4) EXCLUSIVE_LOCK
|
|
**
|
|
** Sometimes when requesting one lock state, additional lock states
|
|
** are inserted in between. The locking might fail on one of the later
|
|
** transitions leaving the lock state different from what it started but
|
|
** still short of its goal. The following chart shows the allowed
|
|
** transitions and the inserted intermediate states:
|
|
**
|
|
** UNLOCKED -> SHARED
|
|
** SHARED -> RESERVED
|
|
** SHARED -> (PENDING) -> EXCLUSIVE
|
|
** RESERVED -> (PENDING) -> EXCLUSIVE
|
|
** PENDING -> EXCLUSIVE
|
|
**
|
|
** This routine will only increase a lock. Use the sqlite3OsUnlock()
|
|
** routine to lower a locking level.
|
|
*/
|
|
static int proxyLock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
int rc = proxyTakeConch(pFile);
|
|
if( rc==SQLITE_OK ){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
if( pCtx->conchHeld>0 ){
|
|
unixFile *proxy = pCtx->lockProxy;
|
|
rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock);
|
|
pFile->eFileLock = proxy->eFileLock;
|
|
}else{
|
|
/* conchHeld < 0 is lockless */
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
|
|
** must be either NO_LOCK or SHARED_LOCK.
|
|
**
|
|
** If the locking level of the file descriptor is already at or below
|
|
** the requested locking level, this routine is a no-op.
|
|
*/
|
|
static int proxyUnlock(sqlite3_file *id, int eFileLock) {
|
|
unixFile *pFile = (unixFile*)id;
|
|
int rc = proxyTakeConch(pFile);
|
|
if( rc==SQLITE_OK ){
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
if( pCtx->conchHeld>0 ){
|
|
unixFile *proxy = pCtx->lockProxy;
|
|
rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock);
|
|
pFile->eFileLock = proxy->eFileLock;
|
|
}else{
|
|
/* conchHeld < 0 is lockless */
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Close a file that uses proxy locks.
|
|
*/
|
|
static int proxyClose(sqlite3_file *id) {
|
|
if( ALWAYS(id) ){
|
|
unixFile *pFile = (unixFile*)id;
|
|
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
|
|
unixFile *lockProxy = pCtx->lockProxy;
|
|
unixFile *conchFile = pCtx->conchFile;
|
|
int rc = SQLITE_OK;
|
|
|
|
if( lockProxy ){
|
|
rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
|
|
if( rc ) return rc;
|
|
rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
|
|
if( rc ) return rc;
|
|
sqlite3_free(lockProxy);
|
|
pCtx->lockProxy = 0;
|
|
}
|
|
if( conchFile ){
|
|
if( pCtx->conchHeld ){
|
|
rc = proxyReleaseConch(pFile);
|
|
if( rc ) return rc;
|
|
}
|
|
rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
|
|
if( rc ) return rc;
|
|
sqlite3_free(conchFile);
|
|
}
|
|
sqlite3DbFree(0, pCtx->lockProxyPath);
|
|
sqlite3_free(pCtx->conchFilePath);
|
|
sqlite3DbFree(0, pCtx->dbPath);
|
|
/* restore the original locking context and pMethod then close it */
|
|
pFile->lockingContext = pCtx->oldLockingContext;
|
|
pFile->pMethod = pCtx->pOldMethod;
|
|
sqlite3_free(pCtx);
|
|
return pFile->pMethod->xClose(id);
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
|
|
|
|
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
|
|
/*
|
|
** The proxy locking style is intended for use with AFP filesystems.
|
|
** And since AFP is only supported on MacOSX, the proxy locking is also
|
|
** restricted to MacOSX.
|
|
**
|
|
**
|
|
******************* End of the proxy lock implementation **********************
|
|
******************************************************************************/
|
|
|
|
/*
|
|
** Initialize the operating system interface.
|
|
**
|
|
** This routine registers all VFS implementations for unix-like operating
|
|
** systems. This routine, and the sqlite3_os_end() routine that follows,
|
|
** should be the only routines in this file that are visible from other
|
|
** files.
|
|
**
|
|
** This routine is called once during SQLite initialization and by a
|
|
** single thread. The memory allocation and mutex subsystems have not
|
|
** necessarily been initialized when this routine is called, and so they
|
|
** should not be used.
|
|
*/
|
|
int sqlite3_os_init(void){
|
|
/*
|
|
** The following macro defines an initializer for an sqlite3_vfs object.
|
|
** The name of the VFS is NAME. The pAppData is a pointer to a pointer
|
|
** to the "finder" function. (pAppData is a pointer to a pointer because
|
|
** silly C90 rules prohibit a void* from being cast to a function pointer
|
|
** and so we have to go through the intermediate pointer to avoid problems
|
|
** when compiling with -pedantic-errors on GCC.)
|
|
**
|
|
** The FINDER parameter to this macro is the name of the pointer to the
|
|
** finder-function. The finder-function returns a pointer to the
|
|
** sqlite_io_methods object that implements the desired locking
|
|
** behaviors. See the division above that contains the IOMETHODS
|
|
** macro for addition information on finder-functions.
|
|
**
|
|
** Most finders simply return a pointer to a fixed sqlite3_io_methods
|
|
** object. But the "autolockIoFinder" available on MacOSX does a little
|
|
** more than that; it looks at the filesystem type that hosts the
|
|
** database file and tries to choose an locking method appropriate for
|
|
** that filesystem time.
|
|
*/
|
|
#define UNIXVFS(VFSNAME, FINDER) { \
|
|
3, /* iVersion */ \
|
|
sizeof(unixFile), /* szOsFile */ \
|
|
MAX_PATHNAME, /* mxPathname */ \
|
|
0, /* pNext */ \
|
|
VFSNAME, /* zName */ \
|
|
(void*)&FINDER, /* pAppData */ \
|
|
unixOpen, /* xOpen */ \
|
|
unixDelete, /* xDelete */ \
|
|
unixAccess, /* xAccess */ \
|
|
unixFullPathname, /* xFullPathname */ \
|
|
unixDlOpen, /* xDlOpen */ \
|
|
unixDlError, /* xDlError */ \
|
|
unixDlSym, /* xDlSym */ \
|
|
unixDlClose, /* xDlClose */ \
|
|
unixRandomness, /* xRandomness */ \
|
|
unixSleep, /* xSleep */ \
|
|
unixCurrentTime, /* xCurrentTime */ \
|
|
unixGetLastError, /* xGetLastError */ \
|
|
unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \
|
|
unixSetSystemCall, /* xSetSystemCall */ \
|
|
unixGetSystemCall, /* xGetSystemCall */ \
|
|
unixNextSystemCall, /* xNextSystemCall */ \
|
|
}
|
|
|
|
/*
|
|
** All default VFSes for unix are contained in the following array.
|
|
**
|
|
** Note that the sqlite3_vfs.pNext field of the VFS object is modified
|
|
** by the SQLite core when the VFS is registered. So the following
|
|
** array cannot be const.
|
|
*/
|
|
static sqlite3_vfs aVfs[] = {
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
UNIXVFS("unix", autolockIoFinder ),
|
|
#elif OS_VXWORKS
|
|
UNIXVFS("unix", vxworksIoFinder ),
|
|
#else
|
|
UNIXVFS("unix", posixIoFinder ),
|
|
#endif
|
|
UNIXVFS("unix-none", nolockIoFinder ),
|
|
UNIXVFS("unix-dotfile", dotlockIoFinder ),
|
|
UNIXVFS("unix-excl", posixIoFinder ),
|
|
#if OS_VXWORKS
|
|
UNIXVFS("unix-namedsem", semIoFinder ),
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS
|
|
UNIXVFS("unix-posix", posixIoFinder ),
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE
|
|
UNIXVFS("unix-flock", flockIoFinder ),
|
|
#endif
|
|
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
|
|
UNIXVFS("unix-afp", afpIoFinder ),
|
|
UNIXVFS("unix-nfs", nfsIoFinder ),
|
|
UNIXVFS("unix-proxy", proxyIoFinder ),
|
|
#endif
|
|
};
|
|
unsigned int i; /* Loop counter */
|
|
|
|
/* Double-check that the aSyscall[] array has been constructed
|
|
** correctly. See ticket [bb3a86e890c8e96ab] */
|
|
assert( ArraySize(aSyscall)==28 );
|
|
|
|
/* Register all VFSes defined in the aVfs[] array */
|
|
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
|
|
sqlite3_vfs_register(&aVfs[i], i==0);
|
|
}
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/*
|
|
** Shutdown the operating system interface.
|
|
**
|
|
** Some operating systems might need to do some cleanup in this routine,
|
|
** to release dynamically allocated objects. But not on unix.
|
|
** This routine is a no-op for unix.
|
|
*/
|
|
int sqlite3_os_end(void){
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
#endif /* SQLITE_OS_UNIX */
|