Files
MaxScale/query_classifier/qc_sqlite/sqlite-src-3110100/src/os_unix.c
Johan Wikman d115a54038 All changes 1.4.3...2.0.0
b709e29 Fix URL typo in release notes
01f203c Update release notes
c49810a Update COPYRIGHT
e327526 Add BSL version number to LICENSE.TXT
07e3a4e Remove superfluous COPURIGHT.md and LICENSE.md
54c3310 Replace Dynamic Data Routing Platform with Database Proxy
305d02f Remove *.cmake wildcard from .gitignore
b0b5208 Cleanup of spaces
aeca6d0 Extend maxscaled error messages
817d74c Document where the CDC users are stored
9a569db Update license
ff8697a MXS-716: Fix table level privilege detection
2071a8c Only check replies of slaves that are in use
f8dfa42 Fix possible hangs in CDC python scripts
fa1d99e Removed "filestem option" from example
009b549 Removed "filestem option" from example
8d515c2 Add example Kafka producer script for Python
64e976b Fix sporadic SSL authentication failures
5a655dc MXS-814: Check service/monitor permissions on all servers
2a7f596 Add note about galeramon priority to Galera tutorials
b90b5a6 Fixed SHOW SLAVE STATUS in binlog router
e22fe39 Changed couln size for SHOW SLAVE STATUS
ae97b18 Fix avrorouter build failure with older sqlite libraries
56ef8b7 Replace GPL license with BSL license in scripts and tests
552836f Initialize all fields when MySQL users are loaded from cache
bf42947 Update all licensing related files
b29db9d Remove optimize_wildcard parameter from use
5170844 Make readwritesplit diagnosting output more clear
262ffb1 Fix crash when a config parameter has no section
33ac9e6 Add note about LEAST_BEHIND_MASTER and server weights
e13e860 Fix a memory leak when backend authentication fails
75d00c2 MXS-801: Set the default value of strip_db_esc to true
bd5f2db MXS-800: Add a log message about the working directory
4b1dd8c Update MySQL Monitor documentation on detect_replication_lag
559bc99 Fix installation of license file
b057587 Change LICENSE to LICENSE.TXT
223fa43 Remove null characters from log messages
36fd05b Fix fatal signal handler log message
053dc8a Fix typos in documentation
371dc87 Fix Galera text in Master-Slave tutorial
30b6265 Disable adding of new objects at runtime
db92311 Update the documentation on configuration reloading
0923d40 Update Connector-C version
c4738b5 Add define for avro-conversion.ini
196e6ac Update license from GPL to BSL.
e612366 Correctly calculate the number of bytes read in dcb_read
93a2a03 Update rotate documentation in admin tutorial
c5eb854 MXS-585: Fix authentication handling regression
6330070 Fix schemarouter memory leak
aa5827e Fix CDC authentication memory leak
a5af4ad Fix avro memory leaks
627d73f Fix Avro scripts
0ff7556 Add build instructions to avrorouter documentation
734a1c8 Fix doxygen mainpage
e51ce09 Add licence text to avro scripts
4d27c14 Update Avro documentation and fix installation directories
a58a330 Fix readconnroute error message about router_options
22b138c MXS-772: Fix postinstall script
a9960b7 Fix function declaration in mysql_backend.c
cbe1704 Add missing newline
09d76ee Fix avro documentation default values
1d3f8f3 Added refresh users on START SLAVE
880db34 Make router configuration errors fatal
3bad5ca Update documentation and add logging to avrorouter
10f3384 Disable SSLv3
ca8d902 Fix rwsplit error reporting when master is lost
e816d65 Fix MaxScale Tutorial
deca3e1 Update MaxScale man page
f1735b7 Update release notes
9238714 qc: Change type of DEALLOCATE PREPARE
0b77c3b dbfwfilter: Do not require a trailing new line
1152ca9 Remove copyright message
a038a85 Remove debug assertion on ERANGE error in galeramon
12ab235 Fix comparison error for connections limit.
5de1a8f qc_sqlite: Correct string recognition
b63d754 Fix links in documentation contents
05d457e CDC protocol link fix
50676ef Fix monitor code formatting
218ba09 Remove MaxScale-and-SSL.md
0d6845b Add images to Avro documentation and tutorial
8dd2c9b Update MaxScale-2.0.0-Release-Notes.md
6d4b593 Change avrorouter default transaction grouping
4c629de Add notes about monitor changes to upgrading and release notes
267d0dc Update Binlogrouter.md
c624781 Update Replication-Proxy-Binlog-Router-Tutorial.md
f3261bc CDC users
1368797 Format authenticator and module headers
ab01749 Format filters
8b05d32 Format core and include files
f3974e5 Add GPL LICENSE to qc_mysqlembedded
bfec36a astyle rabbitmq_consumer/consumer.c
54b960a Check that the Avro directory is writable
3d4cd2e Fix cdc_users using the wrong path for service users cache
1e738dd Add CDC client program documentation
f6809fd Remove superfluous rabbitmw_consumer/LICENSE
6b5e667 Update license text in files
9bfcb46 Change CDC protocol documentation formatting
607f25c  REQUEST-DATA formatting
8175ab4 CDC protocol update
d5ca272 CDC protocol update
6c91764 Only check wsrep_local_index if node is joined
f12e2c2 Do not use SSL for monitors and services
6d2cd99 Fix TestAdminUsers
f4ae50d Apply astyle to server/core/test/*.c
7cc2824 Update build instructions
cf8e2b5 Update release notes
03c7a6c Remove wrong function prototypes
5a11eed Revert "Remove duplicate functions"
80ed488 Remove duplicate functions
bb0de8d Add info on SSL and throttling to release notes for 2.0.
0934aee Update MaxAdmin reference guide
2a3fe9b Update source URL in release notes
e575cf0 Merge branch 'MXS-177-develop' into develop
cc8c88d Change header for BSL
ecde266 Change header for BSL
890b208 Log a message when a script is executed
9c365df Added information on server side SSL to config guide.
aa3e002 Remove obsolete heading
79dd73a Make dprintAllSessions use dprintSession
1fc0db9 Align output of "show services"
1b9d301 Make monitorShowAll use monitorShow
983615e Adjust output of 'show modules'
436badd qc_sqlite: The module is now beta
a7cbbe5 Update Upgrade document
71ac13f Remove obsolete user/password from example
eb20ff8 Fix and clean up Avrorouter diagnostics code
31d4052 Change MaxScale to MariaDB MaxScale
e6e4858 Fix `source` parameter not working with `router_options`
d8de99a Update module version numbers
eb81add Merge remote-tracking branch 'origin/develop' into MXS-177-develop
daba563 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop
678f417 Changes in response to reviews.
410fb81 Changes in response to reviews.
60135e5 Add initial release notes about Avrorouter
7400ecc qc_sqlite: Remove uninitialized read
536962c Update version number
018f044 Fix debug assertion in client command processing
51f0804 Prevent 'show monitor' from crashing with failed monitor
559347e Fix "Too many connections" message; add comments.
01d3929 Add printf format checking to dcb_printf
fbd49a6 dbfwfilter: Require complete parsing only when needed
1885863 Add information to release notes about readwritesplit changes
73b56a2 Update MaxScale section in release notes.
0a2f56f MaxAdmin: Remove debug information from 'show users'
3cf3279 MaxAdmin: Report SSL information as well
29c2b66 Always use SSL if server configured with SSL
7d6b335 dprintAllServers should use dprintServer
02a5246 qc_sqlite: Correctly detect parsing context
469419b compare: Add strict mode
8c5b3d3 compare: Allow the comparison of a single statement
4691514 Add Updgrade to 2.0 document
38b3ecb Expand the checks done before a monitor is stopped
8e2cfb9 Add backend name to authentication error message
9600a07 Fix MaxInfo crash
91c58b0 Removed log message for duplicate entry while adding an user
40392fe Fixed log message priority
0ec35b8 maxadmin: Allow the last user to be removed
5a0ebed maxadmin: Change name of user file
87aa8f1 maxadmin: Always allow root to connect
bf37751 Fix COM_QUIT packet detection
7c93ee4 Update avrorouter documentation and tutorial
95ce463 Fix wrong directory in avrorouter log message
cfe54c7 Update ChangeLog
d69562c Fix LOAD DATA LOCAL INFILE data size tracking
24e7cd6 MXS-584: added support for SET @@session.autocommit
d6f6f76 Fixes, correct too many connections message
efeb924 Update release notes for 2.0.0
8f71a87 qc_sqlite: Adjust error messages
b967d60 Remove copy of enum enum_server_command
822b7e3 Update package license
b58301a Update MaxScale License for overlooked files
c09ee47 Update MaxScale License
49f46fa Tidy up. Comment out config items not yet supported.
f5c3470 Updated and simplified the Building from Source document
98b98e2 Add note about master failure modes to documentation
e036f2c Update Limitations document
62219a5 Merge remote-tracking branch 'origin/drain-writeq' into develop
5caf667 Invoke DCB_REASON_DRAINED more frequently.
77b107b qc_sqlite: Add support for LOAD DATA INFILE
8e70f26 compare: Optionally print out the parse result
ad750e6 Merge remote-tracking branch 'origin/MXS-651-develop-merge' into develop
ef85779 Merge remote-tracking branch 'origin/develop' into MXS-651-develop-merge
ea9fdda MXS-477: Add LONGBLOB support for readconnroute
eae6d42 qc_sqlite: Remove superfluous columnname definition
8fe2b21 Add binlog source to avrorouter
b25cc37 qc_sqlite: Add missing destructors
8a749e7 qc_sqlite: Reduce number of keywords
5f4bb8b compare: Output query immediately
2456e52 dbfwfilter: Reject queries that cannot be parsed
5f1fbbd qc_sqlite: Extend SET grammar
b8d8418 dbfwfilter: Remove 'allow' from firewall filter rule
0bd2a44 MXS-741 When no events are read from binlog file, ...
a07c491 Remove duplicated function (merge error, probably)
b237008 Save conflict resolution, missed last time.
a0c0b40 Merge remote-tracking branch 'origin/develop' into MXS-651-develop
385d47d Change SSL logic, fix large read problem.
b93b5e0 Remove false debug assertion
b953b1f Turn off SSL read ahead.
e0d46a5 Fix error messages and remove commented code
49b4655 MXS-739: Fix invalid JSON in Maxinfo
0c30692 qc_sqlite: Handle GROUP_CONCAT arguments
54e48a1 qc_sqlite: Consider \ as an escape character in strings
713a5d6 qc_sqlite: Add test cases
20d1b51 qc_sqlite: Handle qualified names in CREATE VIEW
1019313 qc_sqlite: Make QUERY_TYPE_WRITE default for SHOW
059c14e qc_sqlite: Accept qualified function names in SELECT
db34989 qc_sqlite: Accept qualified function names
b93e2f1 qc_sqlite: Add limited support for GRAND and REVOKE
678672d qc_sqlite: Cleanup copying of database and table names
9b744b9 qc_sqlite: Update table and database names at the same time
db75e61 qc: Support getting the qualified table names
1f867f4 qc: Add join.test
9c7e02a qc_sqlite: Accept "...from NATURAL straight_join..."
93d1c31 qc_sqlite: Both " and ' can enclose a string literal
8055b21 qc_sqlite: Set more information based upon tokens
37e3663 qc_sqlite: Do not blindly add affected fields
50f1360 qc: Correctly collect affected fields
71c234e qc_sqlite: Recognize CREATE TABLE ... UNION
01803f1 qc_sqlite: Recognize {DEALLOCATE|DROP} PREPARE ...
6ecd4b3 qc_sqlite: Parse index hints
0bdab01 qc: Compare sets of tables
b908c8f Fix double freeing of internal DCBs
8903556 qc_sqlite: Recognize LEFT, INSERT and REPLACE
266e6c0 qc: Log all problems by default (compare program)
7b54cac qc_sqlite: Fix logging bug
9566e9f qc_sqlite: Plug a leak
b0a860d qc: Run compare a specified number of times
050d698 qc_sqlite: Simplified argument handling
97c56b8 qc: Allow arguments to be passed to the query classifier
09a46e0 qc_sqlite: Add argument log_unrecognized_statements
fd98153 qc: Allow arguments to be provided to the query classifier
313aa7e Fix Problems SSL assertion; non SSL connect to SSL
1d721e6 Fix DEB packaging errors
96bdc39 Fix RPM packaging failures on CentOS 7
6ba900d qc_sqlite: Recognize more SHOW commands
2869d0b qc_sqlite: Exclude support for sqlite's PRAGMA
0be68a3 qc_sqlite: Enhance SELECT syntax
28f3e1a Merge branch 'develop' into MXS-729
e18bd41 qc: Expose the result of the parsing
5896085 Add BUILD_AVRO to the CMake cache
daeb896 Remove changes to blr_master.c memory handling
d523821 Add comments
4eb9a66 Empty admin users file is now handled
52b46c6 qc: Update create.test
db09711 qc_sqlite: Ignore case when looking for test language keywords
f042a1d qc_sqlite: Extend CREATE TABLE syntax
177d2de qc_sqlite: Extend CREATE TABLE syntax
d3ca8e6 qc_sqlite: Add some support for HANDLER
86c6a96 qc_sqlite: Recognize RENAME TABLE
471594f qc_sqlite: Accept more table options at CREATE TABLE
3da6cde qc_sqlite: Remove unused keywords
bd89662 Fix crash on corrupted passwd file
b5d1764 MXS-733: Always print session states
043e2db Remove unused CMake variables
5604fe2 Restore missing line, fixes logic error.
66d15a5 Added log message warning for old users found
5be9fca Changes in response to review by Johan
899e0e2 Removed password parameter from admin_user_add and admin_remove_user
a2d9302 Merge branch 'develop' into MXS-729
bcaf82f Code review update
e61c716 Nagios plugin update with Maxadmin using UNIX socket only
d7150a2 qc_sqlite: Extend column syntax
3b78df0 qc_sqlite: Accept VALUE in addition to VALUES
85a705b qc_sqlite: Accept CHARSET in addition to CHARACTER SET
db9cec8 qc_sqlite: Accept qualified column  names in CREATE TABLE
a9cabb0 qc_sqlite: Extend SELECT syntax
f5e9878 qc_sqlite: Add set type
675cb93 qc_sqlite: Allow BINARY to turn into an identifier
b04a760 qc_sqlite: Accept DROP TABLES
1075d9c qc_sqlite: Allow qualified name with LIKE in CREATE
420ac56 qc_sqlite: Extend EXPLAIN grammar
727d626 Add missing error message to readwritesplit
f4fd09e Change templates and testing configurations to use sockets
1ef2e06 Add configurable default admin user
a723731 Remove wrong file
7c3b02b Maxadmin/maxscaled UNIX socket update
eed78d4 qc_sqlite: Pick out more information from select when CREATEing
267f091 qc_sqlite: Recognise DROP TEMPORARY TABLE
54fc29f qc_sqlite: Accept $ as a valid character in identifiers
afa2ec9 qc_sqlite: Allow keywords to be used in qualified name
db0427d MXS-729 code review update
a3b3000 Merge branch 'develop' into MXS-729
e73d66c qc_sqlite: Identify qualified identifiers
5bacade Trailing space fix
3bc9be3 MXS-729 socket=default support in maxscale.cnf
1a5c23c Code review update for MXS-729
d6665c7 qc_sqlite: Extend CREATE TABLE grammar
91725ce qc_sqlite: Dequote table and database names
cd7a022 qc: Add create test
1aa4e6b qc: Update test files
762b0c2 qc_mysqlembedded: Do not return "*" as table name
cd9968f qc_sqlite: Update delete.test
f16703d qc_sqlite: Add support for CALL
e3ca9b4 qc_mysqlembedded: Do not return an array of empty strings
5878a22 qc_sqlite: Support selects in DELETE
1cf0444 qc_sqlite: Fix bug in DELETE grammar
0bf39a1 qc_sqlite: Add support for CHECK TABLE
4a8feca qc_sqlite: Add helper for catenating SrcLists
ab299b3 qc_sqlite: Extend DELETE syntax
5778856 qc_sqlite: Extract database name as well
99901f1 qc_sqlite: Extend DELETE syntax
63396f8 qc_sqlite: Match "autocommit" caseinsensitively
e804dd3 qc_sqlite: Add support for LOCK/UNLOCK
c23e442 qc_sqlite: Extend DELETE syntax
5460e31 qc: Add delete test
ab392ad qc_sqlite: Free unused data
598c6f0 qc: Measure time of parsing
2fa3844 qc_sqlite: Put all changes being {%|#}ifdefs
1b43992 qc_sqlite: Modified update.test
1676ea4 qc_sqlite: LEFT|RIGHT are not required with JOIN
224ebd3 qc_sqlite: Extend UPDATE grammar
dbecca9 qc_sqlite: Extend UPDATE grammar
b6ca3b3 MaxAdmin security modification MXS-729
8fb47dd Remove copying of MariaDB embedded library files
22e1257 Normalize whitespace when canonicalizing queries
269cff2 MXS-697: Fix dbfwfilter logging for matched queries
6344f6f Ignore Apple .DS_Store files.
d606977 Improve comments in response to code review.
619aa13 Add configuration check flag to MaxScale
27c860b Drain write queue improvements.
33d4154 Read only one configuration file
d104c19 Format more core files
83fdead Format part of core source code
311d5de Format gateway.c and config.c with Astyle
8cbb48e Don't build maxavro library if BUILD_AVRO is not defined
32bb77a Merge branch 'MXS-483' into develop
db72c5d Format CDC/Avro related files
3c26e27 qc_sqlite: Use SrcList instead of Expr
f96ad6a Merge branch 'develop' into MXS-729
0728734 Fix query canonical form tests
e68262d Merge remote-tracking branch 'gpl-maxscale/master' into develop
65460dc Fix missing symbols from MySQLAuth
791c821 MaxAdmin listens on UNIX socket only and maxadmin can connect
89afed6 MXS-66: All configuration errors are fatal errors
d613053 Add more details to galeramon documentation
22f4f57 qc: Add support for multi UPDATE
0dba25a Added default port to blr_make_registration
9d8248c qc_sqlit: Plug leaks and access errors
057551a qc_sqlite: Fix to small an allocation
1f73820 qc_sqlite: Free memory allocated during parsing
93fefb9 qc: Enable compare to run the same test repeatedly
e52c578 qc_sqlite: Handle last_insert_id()
929e02a qc_sqlite: Extend UPDATE grammar
de3b9f7 qc_sqlite: Defines provided when running cmake and make
4d5c3b2 qc_sqlite: Add support for multiple-table DELETE FROM
36a4296 qc_mysqlembedded: Handle SQLCOM_DELETE_MULTI
41f613a Fix DCB and SESSION removal from free object pools
00f2ddd Move some common code used in only one protocol into protocol.
6fbd0b0 Format Go source with gofmt
abfbbcb Fix build failures and internal test suite
31de74a Merge branch 'develop' into MXS-483
20d461d Remove uniqueness constrain on oneshot tasks
6c09288 Add missing error message to converter task
0c2c389 Merge branch 'develop' into MXS-483
fa0accc Set freed memory to NULL after authentication failure
63f24e4 Install cdc_schema.go
5123c21 Fix ALTER TABLE parsing
004acc2 Merge branch 'develop' into MXS-483
f69a671 Remove array foreach macro use
a159cd9 qc_sqlite: Add support for SHOW DATABASES
31a2118 Make qc_mysqlembedded optional
27ef30e Changed the default query classifier
359010d Add -h flag as the alias for --host
bebc086 Fix minor bugs
c7ca253 qc_sqlite: Recognize START [TRANSACTION]
240f8bf qc_sqlite: Collect info from nested SELECTs
93ea393 qc_sqlite: Pass along the relative position of a token
cc960af qc_sqlite: Fix incorrect assigment
22a6fef Fix `gtid` avro index table
4c38bef qc_sqlite: STATUS is not a registered word
cace998 qc_sqlite: Include all fields of UPDATE
997b19c qc: Add update tests
7601b3f qc_sqlite: Parse "INSERT INTO t VALUES (), ();" correctly
ca426f1 qc_sqlite: Handle CREATE TRIGGER
f54f963 qc_sqlite: Allow INSERT without INTO
e4a1b6d Remove foreign keys from Avro index
e4501a2 Merge branch 'develop' into MXS-483
82b9585 Fix MMMon never assigning master status
a45a709 qc_mysqlembedded: Find the leaf name
2f3ca8f qc_mysqlembedded: Recognize SQLCOM_REPLACE
cc7ad83 qc_mysqlembedded: Pick up fields for INSERT SELECT as well
0e6b39e qc: Cleanup of select.test
9113f4f qc_sqlite: Pickup more fields from INSERT
4d58f98 Dummy query classifier
dfe824f Document `query_classifier` option
4aa329b MXS-718: Collect fields of INSERT
53818f2 Modify packet number for SSL backend connection
346f973 qc_sqlite: Accept qualified column names
8a83616 Fix in-memory SQLite table structure
6f2c884 Further backend SSL development
4444e92 qc_sqlite: Extend INSERT grammar
2aebcab qc_sqlite: Add support for TRUNCATE
1a6742e qc_sqlite: Accept DEFAULT as value in INSERT
07dec05 qc_sqlite: Crude classification made based on seen keywords
a90a579 Add missing function documentation
72bd0cf qc_sqlite: Change CREATE TABLE grammar
6e04bc8 qc: Add INSERT tests
3666bda qc_sqlite: Add SELECT test
d27e173 Add server/mysql-test/t/select.test to query_classifier
562d004 qc_sqlite: Cleanup error logging.
819cacb Merge branch 'develop' into MXS-483
0d3a789 Add warnings and comments to Avro row event processing
2fab570 Added support for SET autocommit=1
1aa83cf Code review fix
c999f0a Addition of SELECT USER()
8c723da Clean up avro_client.c and avro_file.c
eb21ee8 Clean up avro.c
946a284 Added Avro schema to table metadata processing
72f90be qc_sqlite: Add support for CREATE {FUNCTION|PROCEDURE} ...
4a4ab49 qc: Update line number also when skipping a block
ffddb2a qc_sqlite: Allow queries using INTERVAL
b8b03bd qc_sqlite: Add support for SELECT * FROM tbl2 = tbl1;
77a261a qc_sqlite: Add support for GROUP BY ... WITH ROLLUP
0ead41e cdc_schema now generates lowercase JSON
66e327a Classifier has to be specified explicitly
9074b91 Updated Avrorouter documentation
cf06c7a qc_sqlite: Some comments added.
f579eff Added simple Go based Avro schema generator
f448e90 MXS-419: Added ulimit calls to init scripts
b4ad257 Added FindAvro.cmake
56cc9b9 Added the last transaction script to installations
2d52da2 Added temporary avro-alpha package name
6ada071 Fixed cdc_users script
61f0206 Renaming and minor fixes to CDC Python scripts
9d77c32 Moved GTID table tracking to an in-memory database
8ae7cb0 MXS-704: Fixed `which` usage in post-install scripts
195e118 Readwritesplit sessions aren't created if master is down
2be91da Added affected tables to avro diagnostics
b185320 QUERY-LAST-TRANSACTION now returns proper table names
90860b5 Log stale master message only once
4859c60 Table name to GTID mapping
f77bd23 First steps to backend SSL, not yet working.
68b5bf0 qc_sqlite: Don't treat TRUE and FALSE as identifiers
fca8e59 qc_sqlite: Collect database names as well
6b0e04d qc_sqlite: Add support for SHOW CREATE VIEW
77f4b34 qc_mysqlembedded: Report more, rather than less
a73e033 qc_sqlite: Extend builtin functions
9d9650e qc_sqlite: SQL_BUFFER_RESULT must decay to an id
83fe99d qc_sqlite: Support INSERT IGNORE
9d1c9ca Added avrorouter limitations and tutorial
8dd094d qc_sqlite: Recognize builtin functions
2edc3d6 Moved write operations of the maxavro library to a different file
1364e54 Added more comments to the Avro RBR handling code
f711588 Added warnign about unsupported field types
df0d250 Added SQLite3 based indexing to avrorouter
0c55706 Added GTID event flag check in AVRO processing
bfe28dd qc_sqlite: Accept SET GLOBAL|SESSION ...
a8d2068 qc_mysqlembedded: Exlude code that won't compile on 5.5.42
16ea0b3 qc_sqlite: Add support for DROP FUNCTION
1c0f1fc qc: Report stats after comparison
02345b2 qc_sqlite: Recognize builtin readonly functions
c7a5e75 qc_sqlite: Recognize :=
0aa849d qc: Make compare undestand the delimiter command
fb0a877 qc_mysqlembedded: Examine Item::SUBSELECT_ITEMs
045cf8d qc: Add missing mtl commands
e5c6f45 qc_sqlite: Relax qc_get_type comparison
ac3b2df qc_sqlite: Add support for SHOW STATUS
73a34fb qc_sqlite: Add initial support for FLUSH
4ffbe79 qc_sqlite: Extend CREATE TABLE syntax
009af81 qc_sqlite: Add support for SHOW WARNINGS
001de97 qc: Ignore mysqltest constructs
128307d Merge branch 'release-1.4.3' into gpl-master
5e8a06a SET NAMES XXX added
3ca12ba MXS-685: MMMon clears server state before setting it
dc4d2b0 Further steps to connection limit, non-working.
ef70257 MXS-636: Master failures are no longer fatal errors
99f4c64 Updated QUERY-LAST-TRANSACTION format
d1ff157 Changed QUERY-LAST-TRANSACTION format to JSON
8b2f1ac Fixed formatting of the avrorouter
61543df Added QUERY-LAST-TRANSACTION command
c10d10b qc_sqlite: Add support for SHOW CREATE TABLE
106a38f qc_sqlite: Add support for DROP INDEX
2a85421 qc_sqlite: Extend what can be stated about a table
794cd1c qc_sqlite: Add support for MATCH ... AGAINST
dd7b747 qc_sqlite: Accept FULLTEXT and SPATIAL in CREATE TABLE
a13d6ce qc_sqlite: Add support for PREPARE and EXECUTE
0c5d29f qc_sqlite: Add support for ANALYZE
a6cd32b qc_sqlite: Extend SET syntax
5d47704 qc_sqlite: Pick out fields from UPDATE t SET i = ...
0e05735 qc: Understand --error in server test files
8535975 qc_sqlite: Extend CREATE VIEW syntax
b35e638 qc: Igore read type bit if write bit is on
818a814 qc_sqlite: Add support for SHOW VARIABLES
1aa877b qc_sqlite: Add initial support for DO
e92913a qc_sqlite: Add support for CREATE VIEW
d53a46d qc_sqlite: Recognize bit field literals b'1010'
1fb7977 Added GTID event timestmap into struct gtid_pos
8f95b10 Added new fields in AVRO diagnostics
cb4db54 Added tests with large SQL packets to modutil tests
e4dbd6b MXS-621: More fixes to log messages at startup
4f1e9ee qc: compare tester can now read server MySQL tests
cd8154b qc_sqlite: Allow CHARACTER SET to be specified for column
6f8d053 Added MariaDB 10.1 check for new flags in GTID event
71c471b qc_mysqlembedded: Fix type bits setting
26b00a7 qc_sqlite: Extend ALTER grammar
ea6057c qc_sqlite: Handle also pInto when dupping a struct select
2271559 qc_sqlite: Add support for SHOW TABLE STATUS
9caaf27 qc_sqlite: Add support for CREATE ... LIKE oldtable
cd19932 Merge tag '1.4.2' into master
9e9e4d8 Merge branch 'develop' of https://github.com/mariadb-corporation/maxscale-bsl into develop
267cb60 qc_mysqlembedded: Look into parenthesized comma expressions
77c6ca9 qc_sqlite: Recognize token "<=>"
5ca9a9f qc_sqlite: Allow comma expressions in where clause
b08e910 qc_sqlite: Add SELECT options
d11e581 qc_sqlite: Some recursion cleanup
d53d063 Add but don't invoke connection queue functionality.
6818104 Fix logic error in connections limiter
3c61605 qc_sqlite: Find more affected fields
9af8dfd Allow the classifiers to be specified on the command line
5d4a134 Activate call to protocol for max connections error message.
16638e7 Fix another mistake
234f9e6 Fix mistake
843a6fc Fix mistake.
2c6e9ad Fix errors in config.c; enable call to protocol on connection limit.
fd27849 Introduce configuration items for Maximum and Queued Service connections
60d198d Implement very simple connection limit.
84d8f0f Merge remote-tracking branch 'origin/develop' into MXS-177
8a58e63 Merge remote-tracking branch 'origin/develop' into develop
08487cd Add assertion to enforce number of free DCBs not being negative.
f73af2f Added MariaDB 10.1 check for new flags in GTID event
23898ec Fix wrong sprintf specifier, trailing white space.
ea6cfa3 readwritesplit: Cleaned up routeQuery
3858df0 Cleaned up select_connect_backend_servers
c38ee13 Added more buffer tests
48816df Added more modutils tests
537eac2 Added tests for modutil_get_complete_packets
22a6095 MXS-669: modutil_get_complete_packets no longer makes the buffer contiguous
51af97e qc_sqlite: Add support for CREATE INDEX
e50c573 qc_sqlite: Dig out fields for IN
f58c6df qc_sqlite: Dequote table name
319422b qc_sqlite: Accept ENUM as type for a column
5d6a45c qc_sqlite: Allow UNSIGNED to fallback to an id
16a5f20 qc_sqlite: Extend CREATE TABLE syntax
d6268da qc_sqlite: Accept RIGHT and FULL OUTER joins
2207415 qc_sqlite: Allow STRAIGHT_JOIN in SELECT
6fee546 qc_sqlite: Pick upp more table names
9de5f84 Remove trailing white space.
758f84d Improve comments and messages in dcb.c and session.c re recycle memory.
1c2de21 Merge remote-tracking branch 'origin/develop' into dcb-optimise
6614944 DCB code tidying. Fix missing spinlock release; remove redundant variables
ecd5e5c Remove extra code introduced by merge.
877127a Merge commit '3c0d3e5ab6ddde59da764ec904b517759074a31e' into develop
4275bbe Updated the Connector-C version to 2.2.3
c71042b Some tentative list management code; provide for counting service clients.
ad0c8a6 qc_sqlite: Allow empty insert statement
72e75e5 qc_sqlite: Add support for SELECT ... INTO
cc553fa qc_sqlite: MAXSCALE define can now be used everywhere
3305c6e qc_sqlite: Handle CASE in SELECT
702f62e qc_sqlite: Extend CREATE TABLE grammar
941c212 qc_sqlite: Add support for SHOW [INDEX|INDEXES|KEYS]
6a79136 qc_sqlite: Extend grammar for SHOW TABLES and SHOW COLUMNS
f175d2d qc_sqlite: Add SHOW COLUMNS support
6e47951 qc_sqlite: Add support for SHOW TABLES
bcfa0e7 qc_mysqlembedded: Return the actual name and not as-name
3e19f2e Fixed qlafilter build failure
810b24e MXS-675: Standardized qlafilter output
be92173 qc_sqlite: Exclude alias names from affected fields
9479280 qc_sqlite: Add support for explain EXTENDED
13b0e10 qc_sqlite: Add support for DELETE
a6ccfea qc_mysqlembedded: Look at all conditional items
b428041 qc_sqlite: Extend SELECT options
83f829f query_classifier: Correctly calculate the length of a GWBUF
2ddb24c query_classifier: Ensure that -- comments are handled
fa7a746 qc_sqlite: Allow STRAIGHT_JOIN in SELECTS
6f47819 FindLibUUID update
5ed897b Added FindLibUUID cmake file
16e02bb Added FindLibUUID cmake file
aff63e0 MXS-680: qc_mysqlembedded does not look into functions
8a0eeb4 query_classifier: Improve output of compare
6f08185 Query classifier can now convert enums to strings
124e2b9 MXS-679: Correctly collect fields of WHERE
353c97c transaction_safety default is off
896e37b qc_sqlite: Invert stop logic and be more verbose
7a44d4d qc_sqlite: Extend what is accepted in CREATE TABLE
4dbf499 qc_sqlite: Accept FIRST in ALTER TABLE
3f655c0 qc_sqlite: Update table and affected fields for INSERT
8e1e275 qc_sqlite: Make AS optional in CREATE statement
5f2084b qc_sqlite: Add support for ENGINE when creating a table
242f183 qc_sqlite: CREATE paramters handled in the correct place
8ed2e25 qc_sqlite: Trace only when needed
63d4531 qc_sqlite: Update affected fields also from functions
118cdc3 qc_sqlite: Allow multiple index names in USE|IGNORE INDEX
912da76 qc_sqlite: Add initial support for ...IGNORE INDEX...
0aa7de6 qc_sqlite: Log detailed message on error
3e3bf1a qc_sqlite: Extend create syntax.
c4a4572 qc_sqlite: Exclude quoted values
1621f49 Removed MYSQL_EMBEDDED_LIBRARIES
d3e324c UUID generation now comes from libuuid
e8fe678 qc_sqlite: Enable confitional compilation
a9522ba qc_sqlite: Handle X.Y selects
9bc9770 qc_sqlite: Use same stream when outputting padding
366257a qc_sqlite: Add support for UNSIGNED and ZEROFILL
d4d90ff qc_sqlite: Add support for DROP VIEW
d0519bd qc_sqlite: Extend DROP TABLE syntax
c1e4894 qc_sqlite: Add flag to compare for stopping at first error
9fd6344 MXS-674: Maxinfo generates invalid JSON
3c0d3e5 Fix stupid errors.
9d32b2d Include read queue in buffer provided by dcb_read; changes to match.
b690797 Fix double spinlock release in random_jkiss.
6a4328f Fix problems of memory not being freed in some error cases.
2112e56 Change DCB and Session handling to recycle memory; fix bug in random_jkiss.
3912f72 MXS-631, MXS-632: Cleaned up default value CMake files
383ccb8 Fixed build failure on MariaDB 5.5
a60bca5 Merge branch '1.2.1-binlog_router_trx' into develop
3c2a062 Fix to crashes in embedded library with MariaDB 10.0
d3fe938 MXS-662: Service protocol check no longer ignores bind address
c3da49b qc_sqlite: Update affected fields from everywhere
7a0fab8 qc_sqlite: Allow verbosity of compare test to be controlled
81d6822 qc_sqlite: Cleanup handling of select columns
13e5c59 qc_sqlite: Introduce custom allocation functions
026f27d qc_sqlite: Add support for "USE database"
99079df qc_sqlite: Ignore duplicates when comparing affected fields
ca45cd6 qc_sqlite: Add initial support for qc_get_database_names
75970b6 qc_sqlite: Add support for DROP TABLE.
b97e45d qc_sqlite: Move get_affected_fields() to other helpers
cb0fa96 qc_sqlite: Collect table names of INSERT
3a7c513 qc_mysqlembedded: Only look for created name if CREATE
308b0a4 qc_sqlite: Add support for gc_get_created_table_name.
0dc4af2 qc_sqlite: Add qc_has_clause() handling to update
e9f2d1d qc_sqlite: Update now also provides table names
c3192e4 qc_sqlite: Add initial support for get_table_names
c51eafd qc_sqlite: Add support for qc_has_clause
f318fb2 qc_mysqlembedded: Work around embedded lib bug
4ba2e11 qc_sqlite: Add initial support for qc_get_affected_fields
080dea5 qc_sqlite: Support is_read_query
3f94df1 Fixed compare.cc build failure
868a712 Updated freeing of buffer chains in readwritesplit
9bf7fca Formatted readwritesplit source code
de4da2b Add assertion to spinlock release to detect release of already released spinlock.
d30955a qc_sqlite: Handle the default case of affected fields.
5d02b3f qc_sqlite: Set operation when creating table
94a334d Add test for comparing qc-output
aa6f5d6 Allow a specific query classifier to be loaded explicitly
c799d37 Test both qc_mysqlembedded and qc_sqlite
f8d9aa1 qc_sqlite: Enable "set @user_var=@@system_var"
f190bdc qc_sqlite: Recognize /*!-comments
b694b55 Fixed binary Avro format streaming
c95fa86 qc_sqlite: Report correctly the type of set autocommit
9cb236c qc_sqlite: Add test case
77b4e62 Ensure classify test checks all types
962039e Change return type of qc_get_type
ae00df8 qc_sqlite: Add initial support for the SET statement.
88253c5 qc_sqlite: Rename functions
fa48043 Rework of MySQL backend protocol clean up to fix fault.
3851064 qc_sqlite: Correct recognition of system variables (@@xyz).
9d86f7f qc_sqlite: Detect user and system variables.
a683297 qc_sqlite: Recognize and accept system variables (@@xyz).
a4f64dd qc_sqlite: Add initial support for CREATE [TEMPORARY] TABLE
f834b4f MXS-661: Only COM_QUERY packets are parsed
30077c1 CMake policies set only for correct versions
a166f34 Suppress warning about unknown CMake target
1412730 Added more variables to launchable monitor scripts
358c194 MXS-656: Galera nodes with index 0 can be master again
842aec5 qc_sqlite: Add support for BEGIN, COMMIT, ROLLBACK
b9cad6d Add initial support for UPDATE.
95741cb Add initial support for insert.
3796158 Re-install sqlite whenever parse.y has changed
5bcd8cf Ensure that the query is the one passed
cf05533 Add support for obtaining the type of a query
400d8b4 Always log the outcome
45cf632 Fixed resource leaks and minor bugs
fa9e970 Printout the query when there is a mismatch.
263cb07 All classify to be used with any query classifier
ea381b9 Further cleanup of classify.c
23f6f30 Merge pull request #107 from godmodelabs/typo-dpkg
8c2a64e Fixed classify build failure
0c3070b Fixed binlog to Avro conversion bugs
b827ba9 MXS-653: Silence maxpasswd
30d981c MXS-654: Add test for checking maxpasswd
984039b Rearrange classify.c
837e46d Add log initialization
1cc7a6e Reformat query_classifier/test/classify.c
065a4e5 Merge branch 'develop' into develop-MXS-544-b-merge
ca27f13 Fixed binlog build failure
fb81be2 fixed typo dpgk <-> dpkg
1e88d5d Added python based CDC user creation script
040bbdd MXS-633: Monitor permission checks moved to modules
cde7595 Master-Slave clusters are now robust by default
158c776 Cleaned up core test suite
94c6e66 Fixed bugs resulting from merge
a491e11 Merge remote-tracking branch 'origin/MXS-544-b' into develop-MXS-544-b-merge
30f9f25 Cleaned up avro.c
6286f64 Merge branch 'release-1.4.1' into develop
00206ac MXS-194: Added support for more query types to dbfwfilter
267832b Fixed diagnostic output
a64b694 Fixed bugs in avrorouter
8faaba1 Fixed a bug in GTID seeking
a5fafb7 Fixed typos in avrorouter documentation
8080379 Added avrorouter documentation
fa07d8a Fixed dbfwfilter rule parser build failure
744ce0d Constraints are ignored in DDL statement processing
50808c6 Cleaned up avrorouter
47f6032 Merge branch '1.2.1-binlog_router_trx_lint' into develop
caa0956 Added missing dependencies to maxscale-core
92df61a Remove parallel make from travis coverity builds
fa2b2b4 Added more error logging to Avro record reading
9a98e8b Support for GTID requests and data bursts
c2a787b Small diagnostic fix
c4cee7e Added format output requested by client
50483c7 Cleaning up of Avro code
d485379 Added support for binary Avro protocol
c22cdbb Converted Avro GTID from string to integer representation
5795ca9 Added coverity notification email to .travis.yml
a06e44d Added coverity_scan to Travis
6b94384 Fixed memory leak in avro_schema.c
a11096c Support for db.table request for Avrorouter
4e5cbbf Fixed bugs in Avro record reading
a99e427 Fixed minor bugs in avrorouter
01db8ae Fixed errors with CREATE TABLE statements
f5f3d7a Diagnostic routine update
209324f Added missing include for log_manager.h
e62f764 Added sending of schemas and file rotation
8c8fcbb Added missing log_manager.h include
b13942d Changed printf calls in maxavro library to MXS_ERROR
1168962 More lint inspired changes, mainly in blr_master.c and blr_slave.c
ced8f2f Fixed directory checks in avrorouter
a8ae6be Minor fix to string processing
fbd2d95 Fixed typo in dbfwfilter's CMakeLists.txt
29c3cf4 Merge pull request #106 from mariadb-corporation/willfong-patch-1
854d4e9 Add password column name to example
2f956df Moved server state change logging to a common function
007121f Fixed truncated string values
782892b Fix lint errors and warnings for blr_file.c
4f99fc5 Added Avro testing script
2820980 Small fix to help clear lint problems in blr.c
3afeda4 Fixed errors and warnings located by lint
ecfff82 Fix most lint problems in blr.c
223689c Added ALTER TABLE support
80bc935 Fix final lint problems with mysql_common protocol functions.
e068310 Added preliminary alter table parsing
8c723f1 Lint monitor modules
fdb5620 Fix lint issues in authenticators.
84f0e04 Added function documentation and renamed files
365d9f5 Tidy up, mainly for lint
2ff3005 Added update rows event processing and event types to avro records
2ae0371 Fixed failing regex and improved data streaming
f19206a Renamed avrorouter header
aa7174b Moved relpacement and storage of ddl statements to a separate function
0c10be8 Improved client notification and added Avro block size managemet
91405a7 Cleaned up instance creation
dd97485 Removed useless vars
af64e9e Added CDC authentication with a db file
b73a118 Streamline and lint MySQL backend protocol.
65034ce Merge branch 'release-1.4.0' into develop
28f7e4e Added callback for AVRO client async data transmission
628c27a Added MAXAVRO_FILE struct to AVRO_CLIENT
32b3645 Fixed slavelag build failure
7b15542 Added default authentication method for CDC protocol
5f8e20f Renamed maxavro types and formatted files that use them
882cf84 Added more function documentation to maxavro library
9532f0b Fixed CDC protocol build failure
35a1d3a Added support for offsets in client requests
94577ac Fixed, formatted and refactored CDC protocol
da9bcad Use the maxavro library to read data from Avro files
3ececee Added low level error checking to maxavro library
01b0b8b Tidy and lint mysql_client.c
943f0a7 Added handling of Avro boolean data types to maxavro library
4c781f7 Cleaned up maxavro library and avrorouter
6b2e85d Renamed functions more consistently and cleaned up code
e07158a Moved query event handling to its own function
df7d4c0 Added avro_ prefix to rbr.c
fcbfceb Added seeking to a position in an Avro file
068243a CDC auth decoding
3584d54 Add checks to simplify downstream logic.
9b2c323 Removed useless fprintf
bd5cd52 Added missing authfunc setup
e4aff59 Added record value processing
5cc8615 Added value length functions
7921ecc Merge branch 'MXS-615' into MXS-483
4b09cca Added Travis status to readme.md
cca3a48 Simplify interface to max admin authentication.
4739838 Authenticator API update
233505f Maxavrocheck now accepts multiple files
3fdd137 Improved the Avro file handling
a6ba913 Merge from MXS-615
417d742 Added maxavrocheck
014f9cf Remove obsolete second parameter from authenticate function in authenticators.
ece7ece MaxAdmin authentication converted to a module. Fix quirk in SSL setup.
7c8b37e Moved contents of avro_schema.h into mxs_avro.h
d6660cf Improvements to type handling
71ed0cf Protocol API to have entry point for obtaining default authenticator name.
9d35de2 Fixed transaction tracking
5be02a2 Avrorouter internal state is now stored in the Avro file directory
9293464 Added new info to avro diagnostics
06e0e93 Protocol modules can still handle the authentication outside authenticator modules
6d7108b Added JSON output when Requesting an avro file
6188211 Added new CDC protocol state
c8af27f CDC authentication uses its own authenticator
6590f94 Factor out qc_get_qtype_str
b7880f1 Fix qc_sqlite CMakeLists.txt
bd4ff43 Fixed connector-c being updated and built after every make invokation
0d9e57b Fixed non-MariaDB connectors being used in builds
3d3b779 FIX BUG IN CLIENT DCB SHUTDOWN THAT CAN CAUSE CRASHES
e45ba33 Fixed Connector-C .cmake files
c130189 Fixed connector-c being updated and built after every make invokation
7f3cdf3 Fixed errors on binlog rotation
9d3c83a Remove qc_sqlite
15e8ba5 CDC protocol is now compliant with new protocol structure
4460869 Merge branch 'release-1.4.0' into MXS-483
ea40812 Cleaned up the binlog processing loop
cb646ca Add minimal select recognition to qc_sqlite
ac1a9c5 Fixed binlogrouter test
85dd227 Re-route sqlite's sqlite3Select.
7a2e6f3 Update CMakeLists.txt for qc_sqlite
7a751c3 Added timestamps to records and fixed minor bugs
f73bdde Avrorouter state storage to disk
fcf0488 Fixed Connector-C .cmake files
48b8e4e Merge branch 'MXS-615' into MXS-615-binlog-merge
7c8e19f Add missing dependencies for qc_sqlite
bb9b667 Improvements to type handling and binlog position tracking
dc66b74 Client UUID added
f12fce4 AVRO registration is now handled by avro router
575b809 Add skeleton sqlite-based query classifier.
d09d5fc Build sqlite
146d1f9 Fixed BLOB type handling and refined error messages
6e9e521 Added client user to diagnostics
4538bb8 Merge pull request #104 from rasmushoj/develop
7e18d95 Avro router diagnostics routine update
01e3f75 reverted changes in CMakeLists.txt
52f7c78 reverted changes in postinst.in
eaed577 Added sqlite 3110100
a58cdda Travis configuration for MaxScale. ...
38b452d MIGRATE FREE CLIENT DATA TO AUTH MODULE; BUG FIXES; TIDY UP
6e64506 Fixed minor bugs
aff2411 Enabled CDC protocol
f669100 Fixed NULL Avro value being assigned to a field which cannot be NULL
8f6b16a Added row event processing to avrorouter
2939fe0 Updated Avro schema management to use actual column names
9e3b0cb Removed use of RBR related functions in binlogrouter
d674903 Formatted avro files
fe028d1 DEVELOPMENT OF AUTHENTICATION AS MODULE - WILL NOT WORK YET
977aded Added authenticator modules to the build
a2b384f MOVE MYSQL AUTH CODE INTO AUTHENTICATOR MODULES DIRECTORY
a5d7484 PRELIMINARY CHANGES TO CREATE AUTHENTICATORS AS MODULES
66cf802 Merge remote-tracking branch 'origin/develop' into MXS-615
bca0a7d MINOR CHANGES TO SATISFY LINT
5a9e397 Added Avrorouter binlog file walking
fbc737f Fixed binlogrouter test
3c7c9d7 Added avrorouter main event handling loop
07ad81b Moved common binlogrouter code to a separate file
8c605ed Fixed avrorouter build failures
aa1ba05 Moved binlog definitions to a separate header and fixed build failures
eee7c55 Added create table statement detection
e52b27e Added AVRO_INSTANCE and AVRO_CLIENT
0830caa Change test for client DCB to use role being DCB_ROLE_CLIENT_HANDLER. ...
997bbca Change protocols to continue looping if an accept fails; ...
522e42d Make use of dcb_accept and dcb_listen in httpd and telnetd protocols.
4e692b0 Generalise dcb_listen to tailor log messages to different protocols. ...
52c431d Remove support for passing default port number when handling ...
afe5abc Fix bug in creation of SSL listener structure; fix bugs in ...
0bd6c77 Merge remote-tracking branch 'origin/MXS-544' into MXS-544-a ...
7598597 Add dcb_listen function to make a given DCB into a listener, ...
a275d89 Maxbinlogcheck avro version can detect proper end of file
9bb55a5 Moved row event and table map event handling to a separate file
b7d9e09 Add/improve comments, fix mistake with premature return.
c598770 First attempt at extracting general code into dcb_accept, ...
f20f28f Testing with maxbinlogcheck
b3c60b7 Added mysql_binlog files
0ff9971 Added MariaDB/MySQL binary data processing functions
124560c Merge branch '1.2.1-binlog_router_trx' into MXS-483
4deccff New router fro cdc client
2c11434 Fixed test compiler errors
c1f7d24 Obliged to merge remote-tracking branch 'origin/develop' ...
1775599 Merge remote-tracking branch 'origin/MXS-544' into Test-dev-544-merge
c5317da Small modifications in comments
11c0666 Code cleanup
64a5e9a Merge branch 'release-1.3.0' into MXS-483
2c11e89 First Implementation of CDC
2016-08-11 12:44:07 +03:00

7572 lines
252 KiB
C

/*
** 2004 May 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains the VFS implementation for unix-like operating systems
** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
**
** There are actually several different VFS implementations in this file.
** The differences are in the way that file locking is done. The default
** implementation uses Posix Advisory Locks. Alternative implementations
** use flock(), dot-files, various proprietary locking schemas, or simply
** skip locking all together.
**
** This source file is organized into divisions where the logic for various
** subfunctions is contained within the appropriate division. PLEASE
** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed
** in the correct division and should be clearly labeled.
**
** The layout of divisions is as follows:
**
** * General-purpose declarations and utility functions.
** * Unique file ID logic used by VxWorks.
** * Various locking primitive implementations (all except proxy locking):
** + for Posix Advisory Locks
** + for no-op locks
** + for dot-file locks
** + for flock() locking
** + for named semaphore locks (VxWorks only)
** + for AFP filesystem locks (MacOSX only)
** * sqlite3_file methods not associated with locking.
** * Definitions of sqlite3_io_methods objects for all locking
** methods plus "finder" functions for each locking method.
** * sqlite3_vfs method implementations.
** * Locking primitives for the proxy uber-locking-method. (MacOSX only)
** * Definitions of sqlite3_vfs objects for all locking methods
** plus implementations of sqlite3_os_init() and sqlite3_os_end().
*/
#include "sqliteInt.h"
#if SQLITE_OS_UNIX /* This file is used on unix only */
/*
** There are various methods for file locking used for concurrency
** control:
**
** 1. POSIX locking (the default),
** 2. No locking,
** 3. Dot-file locking,
** 4. flock() locking,
** 5. AFP locking (OSX only),
** 6. Named POSIX semaphores (VXWorks only),
** 7. proxy locking. (OSX only)
**
** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
** selection of the appropriate locking style based on the filesystem
** where the database is located.
*/
#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
# if defined(__APPLE__)
# define SQLITE_ENABLE_LOCKING_STYLE 1
# else
# define SQLITE_ENABLE_LOCKING_STYLE 0
# endif
#endif
/*
** standard include files.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
# include <sys/mman.h>
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
# include <sys/ioctl.h>
# include <sys/file.h>
# include <sys/param.h>
#endif /* SQLITE_ENABLE_LOCKING_STYLE */
#if defined(__APPLE__) && ((__MAC_OS_X_VERSION_MIN_REQUIRED > 1050) || \
(__IPHONE_OS_VERSION_MIN_REQUIRED > 2000))
# if (!defined(TARGET_OS_EMBEDDED) || (TARGET_OS_EMBEDDED==0)) \
&& (!defined(TARGET_IPHONE_SIMULATOR) || (TARGET_IPHONE_SIMULATOR==0))
# define HAVE_GETHOSTUUID 1
# else
# warning "gethostuuid() is disabled."
# endif
#endif
#if OS_VXWORKS
# include <sys/ioctl.h>
# include <semaphore.h>
# include <limits.h>
#endif /* OS_VXWORKS */
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
# include <sys/mount.h>
#endif
#ifdef HAVE_UTIME
# include <utime.h>
#endif
/*
** Allowed values of unixFile.fsFlags
*/
#define SQLITE_FSFLAGS_IS_MSDOS 0x1
/*
** If we are to be thread-safe, include the pthreads header and define
** the SQLITE_UNIX_THREADS macro.
*/
#if SQLITE_THREADSAFE
# include <pthread.h>
# define SQLITE_UNIX_THREADS 1
#endif
/*
** Default permissions when creating a new file
*/
#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
#endif
/*
** Default permissions when creating auto proxy dir
*/
#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
#endif
/*
** Maximum supported path-length.
*/
#define MAX_PATHNAME 512
/*
** Maximum supported symbolic links
*/
#define SQLITE_MAX_SYMLINKS 100
/* Always cast the getpid() return type for compatibility with
** kernel modules in VxWorks. */
#define osGetpid(X) (pid_t)getpid()
/*
** Only set the lastErrno if the error code is a real error and not
** a normal expected return code of SQLITE_BUSY or SQLITE_OK
*/
#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
/* Forward references */
typedef struct unixShm unixShm; /* Connection shared memory */
typedef struct unixShmNode unixShmNode; /* Shared memory instance */
typedef struct unixInodeInfo unixInodeInfo; /* An i-node */
typedef struct UnixUnusedFd UnixUnusedFd; /* An unused file descriptor */
/*
** Sometimes, after a file handle is closed by SQLite, the file descriptor
** cannot be closed immediately. In these cases, instances of the following
** structure are used to store the file descriptor while waiting for an
** opportunity to either close or reuse it.
*/
struct UnixUnusedFd {
int fd; /* File descriptor to close */
int flags; /* Flags this file descriptor was opened with */
UnixUnusedFd *pNext; /* Next unused file descriptor on same file */
};
/*
** The unixFile structure is subclass of sqlite3_file specific to the unix
** VFS implementations.
*/
typedef struct unixFile unixFile;
struct unixFile {
sqlite3_io_methods const *pMethod; /* Always the first entry */
sqlite3_vfs *pVfs; /* The VFS that created this unixFile */
unixInodeInfo *pInode; /* Info about locks on this inode */
int h; /* The file descriptor */
unsigned char eFileLock; /* The type of lock held on this fd */
unsigned short int ctrlFlags; /* Behavioral bits. UNIXFILE_* flags */
int lastErrno; /* The unix errno from last I/O error */
void *lockingContext; /* Locking style specific state */
UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */
const char *zPath; /* Name of the file */
unixShm *pShm; /* Shared memory segment information */
int szChunk; /* Configured by FCNTL_CHUNK_SIZE */
#if SQLITE_MAX_MMAP_SIZE>0
int nFetchOut; /* Number of outstanding xFetch refs */
sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */
sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */
sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */
void *pMapRegion; /* Memory mapped region */
#endif
#ifdef __QNXNTO__
int sectorSize; /* Device sector size */
int deviceCharacteristics; /* Precomputed device characteristics */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
int openFlags; /* The flags specified at open() */
#endif
#if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__)
unsigned fsFlags; /* cached details from statfs() */
#endif
#if OS_VXWORKS
struct vxworksFileId *pId; /* Unique file ID */
#endif
#ifdef SQLITE_DEBUG
/* The next group of variables are used to track whether or not the
** transaction counter in bytes 24-27 of database files are updated
** whenever any part of the database changes. An assertion fault will
** occur if a file is updated without also updating the transaction
** counter. This test is made to avoid new problems similar to the
** one described by ticket #3584.
*/
unsigned char transCntrChng; /* True if the transaction counter changed */
unsigned char dbUpdate; /* True if any part of database file changed */
unsigned char inNormalWrite; /* True if in a normal write operation */
#endif
#ifdef SQLITE_TEST
/* In test mode, increase the size of this structure a bit so that
** it is larger than the struct CrashFile defined in test6.c.
*/
char aPadding[32];
#endif
};
/* This variable holds the process id (pid) from when the xRandomness()
** method was called. If xOpen() is called from a different process id,
** indicating that a fork() has occurred, the PRNG will be reset.
*/
static pid_t randomnessPid = 0;
/*
** Allowed values for the unixFile.ctrlFlags bitmask:
*/
#define UNIXFILE_EXCL 0x01 /* Connections from one process only */
#define UNIXFILE_RDONLY 0x02 /* Connection is read only */
#define UNIXFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */
#ifndef SQLITE_DISABLE_DIRSYNC
# define UNIXFILE_DIRSYNC 0x08 /* Directory sync needed */
#else
# define UNIXFILE_DIRSYNC 0x00
#endif
#define UNIXFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */
#define UNIXFILE_DELETE 0x20 /* Delete on close */
#define UNIXFILE_URI 0x40 /* Filename might have query parameters */
#define UNIXFILE_NOLOCK 0x80 /* Do no file locking */
/*
** Include code that is common to all os_*.c files
*/
#include "os_common.h"
/*
** Define various macros that are missing from some systems.
*/
#ifndef O_LARGEFILE
# define O_LARGEFILE 0
#endif
#ifdef SQLITE_DISABLE_LFS
# undef O_LARGEFILE
# define O_LARGEFILE 0
#endif
#ifndef O_NOFOLLOW
# define O_NOFOLLOW 0
#endif
#ifndef O_BINARY
# define O_BINARY 0
#endif
/*
** The threadid macro resolves to the thread-id or to 0. Used for
** testing and debugging only.
*/
#if SQLITE_THREADSAFE
#define threadid pthread_self()
#else
#define threadid 0
#endif
/*
** HAVE_MREMAP defaults to true on Linux and false everywhere else.
*/
#if !defined(HAVE_MREMAP)
# if defined(__linux__) && defined(_GNU_SOURCE)
# define HAVE_MREMAP 1
# else
# define HAVE_MREMAP 0
# endif
#endif
/*
** Explicitly call the 64-bit version of lseek() on Android. Otherwise, lseek()
** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined.
*/
#ifdef __ANDROID__
# define lseek lseek64
#endif
/*
** Different Unix systems declare open() in different ways. Same use
** open(const char*,int,mode_t). Others use open(const char*,int,...).
** The difference is important when using a pointer to the function.
**
** The safest way to deal with the problem is to always use this wrapper
** which always has the same well-defined interface.
*/
static int posixOpen(const char *zFile, int flags, int mode){
return open(zFile, flags, mode);
}
/* Forward reference */
static int openDirectory(const char*, int*);
static int unixGetpagesize(void);
/*
** Many system calls are accessed through pointer-to-functions so that
** they may be overridden at runtime to facilitate fault injection during
** testing and sandboxing. The following array holds the names and pointers
** to all overrideable system calls.
*/
static struct unix_syscall {
const char *zName; /* Name of the system call */
sqlite3_syscall_ptr pCurrent; /* Current value of the system call */
sqlite3_syscall_ptr pDefault; /* Default value */
} aSyscall[] = {
{ "open", (sqlite3_syscall_ptr)posixOpen, 0 },
#define osOpen ((int(*)(const char*,int,int))aSyscall[0].pCurrent)
{ "close", (sqlite3_syscall_ptr)close, 0 },
#define osClose ((int(*)(int))aSyscall[1].pCurrent)
{ "access", (sqlite3_syscall_ptr)access, 0 },
#define osAccess ((int(*)(const char*,int))aSyscall[2].pCurrent)
{ "getcwd", (sqlite3_syscall_ptr)getcwd, 0 },
#define osGetcwd ((char*(*)(char*,size_t))aSyscall[3].pCurrent)
{ "stat", (sqlite3_syscall_ptr)stat, 0 },
#define osStat ((int(*)(const char*,struct stat*))aSyscall[4].pCurrent)
/*
** The DJGPP compiler environment looks mostly like Unix, but it
** lacks the fcntl() system call. So redefine fcntl() to be something
** that always succeeds. This means that locking does not occur under
** DJGPP. But it is DOS - what did you expect?
*/
#ifdef __DJGPP__
{ "fstat", 0, 0 },
#define osFstat(a,b,c) 0
#else
{ "fstat", (sqlite3_syscall_ptr)fstat, 0 },
#define osFstat ((int(*)(int,struct stat*))aSyscall[5].pCurrent)
#endif
{ "ftruncate", (sqlite3_syscall_ptr)ftruncate, 0 },
#define osFtruncate ((int(*)(int,off_t))aSyscall[6].pCurrent)
{ "fcntl", (sqlite3_syscall_ptr)fcntl, 0 },
#define osFcntl ((int(*)(int,int,...))aSyscall[7].pCurrent)
{ "read", (sqlite3_syscall_ptr)read, 0 },
#define osRead ((ssize_t(*)(int,void*,size_t))aSyscall[8].pCurrent)
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pread", (sqlite3_syscall_ptr)pread, 0 },
#else
{ "pread", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[9].pCurrent)
#if defined(USE_PREAD64)
{ "pread64", (sqlite3_syscall_ptr)pread64, 0 },
#else
{ "pread64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPread64 ((ssize_t(*)(int,void*,size_t,off_t))aSyscall[10].pCurrent)
{ "write", (sqlite3_syscall_ptr)write, 0 },
#define osWrite ((ssize_t(*)(int,const void*,size_t))aSyscall[11].pCurrent)
#if defined(USE_PREAD) || SQLITE_ENABLE_LOCKING_STYLE
{ "pwrite", (sqlite3_syscall_ptr)pwrite, 0 },
#else
{ "pwrite", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite ((ssize_t(*)(int,const void*,size_t,off_t))\
aSyscall[12].pCurrent)
#if defined(USE_PREAD64)
{ "pwrite64", (sqlite3_syscall_ptr)pwrite64, 0 },
#else
{ "pwrite64", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osPwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\
aSyscall[13].pCurrent)
{ "fchmod", (sqlite3_syscall_ptr)fchmod, 0 },
#define osFchmod ((int(*)(int,mode_t))aSyscall[14].pCurrent)
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
{ "fallocate", (sqlite3_syscall_ptr)posix_fallocate, 0 },
#else
{ "fallocate", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
{ "unlink", (sqlite3_syscall_ptr)unlink, 0 },
#define osUnlink ((int(*)(const char*))aSyscall[16].pCurrent)
{ "openDirectory", (sqlite3_syscall_ptr)openDirectory, 0 },
#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
{ "mkdir", (sqlite3_syscall_ptr)mkdir, 0 },
#define osMkdir ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)
{ "rmdir", (sqlite3_syscall_ptr)rmdir, 0 },
#define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent)
#if defined(HAVE_FCHOWN)
{ "fchown", (sqlite3_syscall_ptr)fchown, 0 },
#else
{ "fchown", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent)
{ "geteuid", (sqlite3_syscall_ptr)geteuid, 0 },
#define osGeteuid ((uid_t(*)(void))aSyscall[21].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "mmap", (sqlite3_syscall_ptr)mmap, 0 },
#else
{ "mmap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[22].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "munmap", (sqlite3_syscall_ptr)munmap, 0 },
#else
{ "munmap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMunmap ((void*(*)(void*,size_t))aSyscall[23].pCurrent)
#if HAVE_MREMAP && (!defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0)
{ "mremap", (sqlite3_syscall_ptr)mremap, 0 },
#else
{ "mremap", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[24].pCurrent)
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
{ "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 },
#else
{ "getpagesize", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osGetpagesize ((int(*)(void))aSyscall[25].pCurrent)
#if defined(HAVE_READLINK)
{ "readlink", (sqlite3_syscall_ptr)readlink, 0 },
#else
{ "readlink", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osReadlink ((ssize_t(*)(const char*,char*,size_t))aSyscall[26].pCurrent)
#if defined(HAVE_LSTAT)
{ "lstat", (sqlite3_syscall_ptr)lstat, 0 },
#else
{ "lstat", (sqlite3_syscall_ptr)0, 0 },
#endif
#define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent)
}; /* End of the overrideable system calls */
/*
** On some systems, calls to fchown() will trigger a message in a security
** log if they come from non-root processes. So avoid calling fchown() if
** we are not running as root.
*/
static int robustFchown(int fd, uid_t uid, gid_t gid){
#if defined(HAVE_FCHOWN)
return osGeteuid() ? 0 : osFchown(fd,uid,gid);
#else
return 0;
#endif
}
/*
** This is the xSetSystemCall() method of sqlite3_vfs for all of the
** "unix" VFSes. Return SQLITE_OK opon successfully updating the
** system call pointer, or SQLITE_NOTFOUND if there is no configurable
** system call named zName.
*/
static int unixSetSystemCall(
sqlite3_vfs *pNotUsed, /* The VFS pointer. Not used */
const char *zName, /* Name of system call to override */
sqlite3_syscall_ptr pNewFunc /* Pointer to new system call value */
){
unsigned int i;
int rc = SQLITE_NOTFOUND;
UNUSED_PARAMETER(pNotUsed);
if( zName==0 ){
/* If no zName is given, restore all system calls to their default
** settings and return NULL
*/
rc = SQLITE_OK;
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
if( aSyscall[i].pDefault ){
aSyscall[i].pCurrent = aSyscall[i].pDefault;
}
}
}else{
/* If zName is specified, operate on only the one system call
** specified.
*/
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
if( strcmp(zName, aSyscall[i].zName)==0 ){
if( aSyscall[i].pDefault==0 ){
aSyscall[i].pDefault = aSyscall[i].pCurrent;
}
rc = SQLITE_OK;
if( pNewFunc==0 ) pNewFunc = aSyscall[i].pDefault;
aSyscall[i].pCurrent = pNewFunc;
break;
}
}
}
return rc;
}
/*
** Return the value of a system call. Return NULL if zName is not a
** recognized system call name. NULL is also returned if the system call
** is currently undefined.
*/
static sqlite3_syscall_ptr unixGetSystemCall(
sqlite3_vfs *pNotUsed,
const char *zName
){
unsigned int i;
UNUSED_PARAMETER(pNotUsed);
for(i=0; i<sizeof(aSyscall)/sizeof(aSyscall[0]); i++){
if( strcmp(zName, aSyscall[i].zName)==0 ) return aSyscall[i].pCurrent;
}
return 0;
}
/*
** Return the name of the first system call after zName. If zName==NULL
** then return the name of the first system call. Return NULL if zName
** is the last system call or if zName is not the name of a valid
** system call.
*/
static const char *unixNextSystemCall(sqlite3_vfs *p, const char *zName){
int i = -1;
UNUSED_PARAMETER(p);
if( zName ){
for(i=0; i<ArraySize(aSyscall)-1; i++){
if( strcmp(zName, aSyscall[i].zName)==0 ) break;
}
}
for(i++; i<ArraySize(aSyscall); i++){
if( aSyscall[i].pCurrent!=0 ) return aSyscall[i].zName;
}
return 0;
}
/*
** Do not accept any file descriptor less than this value, in order to avoid
** opening database file using file descriptors that are commonly used for
** standard input, output, and error.
*/
#ifndef SQLITE_MINIMUM_FILE_DESCRIPTOR
# define SQLITE_MINIMUM_FILE_DESCRIPTOR 3
#endif
/*
** Invoke open(). Do so multiple times, until it either succeeds or
** fails for some reason other than EINTR.
**
** If the file creation mode "m" is 0 then set it to the default for
** SQLite. The default is SQLITE_DEFAULT_FILE_PERMISSIONS (normally
** 0644) as modified by the system umask. If m is not 0, then
** make the file creation mode be exactly m ignoring the umask.
**
** The m parameter will be non-zero only when creating -wal, -journal,
** and -shm files. We want those files to have *exactly* the same
** permissions as their original database, unadulterated by the umask.
** In that way, if a database file is -rw-rw-rw or -rw-rw-r-, and a
** transaction crashes and leaves behind hot journals, then any
** process that is able to write to the database will also be able to
** recover the hot journals.
*/
static int robust_open(const char *z, int f, mode_t m){
int fd;
mode_t m2 = m ? m : SQLITE_DEFAULT_FILE_PERMISSIONS;
while(1){
#if defined(O_CLOEXEC)
fd = osOpen(z,f|O_CLOEXEC,m2);
#else
fd = osOpen(z,f,m2);
#endif
if( fd<0 ){
if( errno==EINTR ) continue;
break;
}
if( fd>=SQLITE_MINIMUM_FILE_DESCRIPTOR ) break;
osClose(fd);
sqlite3_log(SQLITE_WARNING,
"attempt to open \"%s\" as file descriptor %d", z, fd);
fd = -1;
if( osOpen("/dev/null", f, m)<0 ) break;
}
if( fd>=0 ){
if( m!=0 ){
struct stat statbuf;
if( osFstat(fd, &statbuf)==0
&& statbuf.st_size==0
&& (statbuf.st_mode&0777)!=m
){
osFchmod(fd, m);
}
}
#if defined(FD_CLOEXEC) && (!defined(O_CLOEXEC) || O_CLOEXEC==0)
osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
#endif
}
return fd;
}
/*
** Helper functions to obtain and relinquish the global mutex. The
** global mutex is used to protect the unixInodeInfo and
** vxworksFileId objects used by this file, all of which may be
** shared by multiple threads.
**
** Function unixMutexHeld() is used to assert() that the global mutex
** is held when required. This function is only used as part of assert()
** statements. e.g.
**
** unixEnterMutex()
** assert( unixMutexHeld() );
** unixEnterLeave()
*/
static void unixEnterMutex(void){
sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
}
static void unixLeaveMutex(void){
sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
}
#ifdef SQLITE_DEBUG
static int unixMutexHeld(void) {
return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1));
}
#endif
#ifdef SQLITE_HAVE_OS_TRACE
/*
** Helper function for printing out trace information from debugging
** binaries. This returns the string representation of the supplied
** integer lock-type.
*/
static const char *azFileLock(int eFileLock){
switch( eFileLock ){
case NO_LOCK: return "NONE";
case SHARED_LOCK: return "SHARED";
case RESERVED_LOCK: return "RESERVED";
case PENDING_LOCK: return "PENDING";
case EXCLUSIVE_LOCK: return "EXCLUSIVE";
}
return "ERROR";
}
#endif
#ifdef SQLITE_LOCK_TRACE
/*
** Print out information about all locking operations.
**
** This routine is used for troubleshooting locks on multithreaded
** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
** command-line option on the compiler. This code is normally
** turned off.
*/
static int lockTrace(int fd, int op, struct flock *p){
char *zOpName, *zType;
int s;
int savedErrno;
if( op==F_GETLK ){
zOpName = "GETLK";
}else if( op==F_SETLK ){
zOpName = "SETLK";
}else{
s = osFcntl(fd, op, p);
sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
return s;
}
if( p->l_type==F_RDLCK ){
zType = "RDLCK";
}else if( p->l_type==F_WRLCK ){
zType = "WRLCK";
}else if( p->l_type==F_UNLCK ){
zType = "UNLCK";
}else{
assert( 0 );
}
assert( p->l_whence==SEEK_SET );
s = osFcntl(fd, op, p);
savedErrno = errno;
sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
(int)p->l_pid, s);
if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
struct flock l2;
l2 = *p;
osFcntl(fd, F_GETLK, &l2);
if( l2.l_type==F_RDLCK ){
zType = "RDLCK";
}else if( l2.l_type==F_WRLCK ){
zType = "WRLCK";
}else if( l2.l_type==F_UNLCK ){
zType = "UNLCK";
}else{
assert( 0 );
}
sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
}
errno = savedErrno;
return s;
}
#undef osFcntl
#define osFcntl lockTrace
#endif /* SQLITE_LOCK_TRACE */
/*
** Retry ftruncate() calls that fail due to EINTR
**
** All calls to ftruncate() within this file should be made through
** this wrapper. On the Android platform, bypassing the logic below
** could lead to a corrupt database.
*/
static int robust_ftruncate(int h, sqlite3_int64 sz){
int rc;
#ifdef __ANDROID__
/* On Android, ftruncate() always uses 32-bit offsets, even if
** _FILE_OFFSET_BITS=64 is defined. This means it is unsafe to attempt to
** truncate a file to any size larger than 2GiB. Silently ignore any
** such attempts. */
if( sz>(sqlite3_int64)0x7FFFFFFF ){
rc = SQLITE_OK;
}else
#endif
do{ rc = osFtruncate(h,sz); }while( rc<0 && errno==EINTR );
return rc;
}
/*
** This routine translates a standard POSIX errno code into something
** useful to the clients of the sqlite3 functions. Specifically, it is
** intended to translate a variety of "try again" errors into SQLITE_BUSY
** and a variety of "please close the file descriptor NOW" errors into
** SQLITE_IOERR
**
** Errors during initialization of locks, or file system support for locks,
** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
*/
static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
assert( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
(sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
(sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
(sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) );
switch (posixError) {
case EACCES:
case EAGAIN:
case ETIMEDOUT:
case EBUSY:
case EINTR:
case ENOLCK:
/* random NFS retry error, unless during file system support
* introspection, in which it actually means what it says */
return SQLITE_BUSY;
case EPERM:
return SQLITE_PERM;
default:
return sqliteIOErr;
}
}
/******************************************************************************
****************** Begin Unique File ID Utility Used By VxWorks ***************
**
** On most versions of unix, we can get a unique ID for a file by concatenating
** the device number and the inode number. But this does not work on VxWorks.
** On VxWorks, a unique file id must be based on the canonical filename.
**
** A pointer to an instance of the following structure can be used as a
** unique file ID in VxWorks. Each instance of this structure contains
** a copy of the canonical filename. There is also a reference count.
** The structure is reclaimed when the number of pointers to it drops to
** zero.
**
** There are never very many files open at one time and lookups are not
** a performance-critical path, so it is sufficient to put these
** structures on a linked list.
*/
struct vxworksFileId {
struct vxworksFileId *pNext; /* Next in a list of them all */
int nRef; /* Number of references to this one */
int nName; /* Length of the zCanonicalName[] string */
char *zCanonicalName; /* Canonical filename */
};
#if OS_VXWORKS
/*
** All unique filenames are held on a linked list headed by this
** variable:
*/
static struct vxworksFileId *vxworksFileList = 0;
/*
** Simplify a filename into its canonical form
** by making the following changes:
**
** * removing any trailing and duplicate /
** * convert /./ into just /
** * convert /A/../ where A is any simple name into just /
**
** Changes are made in-place. Return the new name length.
**
** The original filename is in z[0..n-1]. Return the number of
** characters in the simplified name.
*/
static int vxworksSimplifyName(char *z, int n){
int i, j;
while( n>1 && z[n-1]=='/' ){ n--; }
for(i=j=0; i<n; i++){
if( z[i]=='/' ){
if( z[i+1]=='/' ) continue;
if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
i += 1;
continue;
}
if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
while( j>0 && z[j-1]!='/' ){ j--; }
if( j>0 ){ j--; }
i += 2;
continue;
}
}
z[j++] = z[i];
}
z[j] = 0;
return j;
}
/*
** Find a unique file ID for the given absolute pathname. Return
** a pointer to the vxworksFileId object. This pointer is the unique
** file ID.
**
** The nRef field of the vxworksFileId object is incremented before
** the object is returned. A new vxworksFileId object is created
** and added to the global list if necessary.
**
** If a memory allocation error occurs, return NULL.
*/
static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
struct vxworksFileId *pNew; /* search key and new file ID */
struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
int n; /* Length of zAbsoluteName string */
assert( zAbsoluteName[0]=='/' );
n = (int)strlen(zAbsoluteName);
pNew = sqlite3_malloc64( sizeof(*pNew) + (n+1) );
if( pNew==0 ) return 0;
pNew->zCanonicalName = (char*)&pNew[1];
memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
n = vxworksSimplifyName(pNew->zCanonicalName, n);
/* Search for an existing entry that matching the canonical name.
** If found, increment the reference count and return a pointer to
** the existing file ID.
*/
unixEnterMutex();
for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
if( pCandidate->nName==n
&& memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
){
sqlite3_free(pNew);
pCandidate->nRef++;
unixLeaveMutex();
return pCandidate;
}
}
/* No match was found. We will make a new file ID */
pNew->nRef = 1;
pNew->nName = n;
pNew->pNext = vxworksFileList;
vxworksFileList = pNew;
unixLeaveMutex();
return pNew;
}
/*
** Decrement the reference count on a vxworksFileId object. Free
** the object when the reference count reaches zero.
*/
static void vxworksReleaseFileId(struct vxworksFileId *pId){
unixEnterMutex();
assert( pId->nRef>0 );
pId->nRef--;
if( pId->nRef==0 ){
struct vxworksFileId **pp;
for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
assert( *pp==pId );
*pp = pId->pNext;
sqlite3_free(pId);
}
unixLeaveMutex();
}
#endif /* OS_VXWORKS */
/*************** End of Unique File ID Utility Used By VxWorks ****************
******************************************************************************/
/******************************************************************************
*************************** Posix Advisory Locking ****************************
**
** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)
** section 6.5.2.2 lines 483 through 490 specify that when a process
** sets or clears a lock, that operation overrides any prior locks set
** by the same process. It does not explicitly say so, but this implies
** that it overrides locks set by the same process using a different
** file descriptor. Consider this test case:
**
** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
**
** Suppose ./file1 and ./file2 are really the same file (because
** one is a hard or symbolic link to the other) then if you set
** an exclusive lock on fd1, then try to get an exclusive lock
** on fd2, it works. I would have expected the second lock to
** fail since there was already a lock on the file due to fd1.
** But not so. Since both locks came from the same process, the
** second overrides the first, even though they were on different
** file descriptors opened on different file names.
**
** This means that we cannot use POSIX locks to synchronize file access
** among competing threads of the same process. POSIX locks will work fine
** to synchronize access for threads in separate processes, but not
** threads within the same process.
**
** To work around the problem, SQLite has to manage file locks internally
** on its own. Whenever a new database is opened, we have to find the
** specific inode of the database file (the inode is determined by the
** st_dev and st_ino fields of the stat structure that fstat() fills in)
** and check for locks already existing on that inode. When locks are
** created or removed, we have to look at our own internal record of the
** locks to see if another thread has previously set a lock on that same
** inode.
**
** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
** For VxWorks, we have to use the alternative unique ID system based on
** canonical filename and implemented in the previous division.)
**
** The sqlite3_file structure for POSIX is no longer just an integer file
** descriptor. It is now a structure that holds the integer file
** descriptor and a pointer to a structure that describes the internal
** locks on the corresponding inode. There is one locking structure
** per inode, so if the same inode is opened twice, both unixFile structures
** point to the same locking structure. The locking structure keeps
** a reference count (so we will know when to delete it) and a "cnt"
** field that tells us its internal lock status. cnt==0 means the
** file is unlocked. cnt==-1 means the file has an exclusive lock.
** cnt>0 means there are cnt shared locks on the file.
**
** Any attempt to lock or unlock a file first checks the locking
** structure. The fcntl() system call is only invoked to set a
** POSIX lock if the internal lock structure transitions between
** a locked and an unlocked state.
**
** But wait: there are yet more problems with POSIX advisory locks.
**
** If you close a file descriptor that points to a file that has locks,
** all locks on that file that are owned by the current process are
** released. To work around this problem, each unixInodeInfo object
** maintains a count of the number of pending locks on tha inode.
** When an attempt is made to close an unixFile, if there are
** other unixFile open on the same inode that are holding locks, the call
** to close() the file descriptor is deferred until all of the locks clear.
** The unixInodeInfo structure keeps a list of file descriptors that need to
** be closed and that list is walked (and cleared) when the last lock
** clears.
**
** Yet another problem: LinuxThreads do not play well with posix locks.
**
** Many older versions of linux use the LinuxThreads library which is
** not posix compliant. Under LinuxThreads, a lock created by thread
** A cannot be modified or overridden by a different thread B.
** Only thread A can modify the lock. Locking behavior is correct
** if the appliation uses the newer Native Posix Thread Library (NPTL)
** on linux - with NPTL a lock created by thread A can override locks
** in thread B. But there is no way to know at compile-time which
** threading library is being used. So there is no way to know at
** compile-time whether or not thread A can override locks on thread B.
** One has to do a run-time check to discover the behavior of the
** current process.
**
** SQLite used to support LinuxThreads. But support for LinuxThreads
** was dropped beginning with version 3.7.0. SQLite will still work with
** LinuxThreads provided that (1) there is no more than one connection
** per database file in the same process and (2) database connections
** do not move across threads.
*/
/*
** An instance of the following structure serves as the key used
** to locate a particular unixInodeInfo object.
*/
struct unixFileId {
dev_t dev; /* Device number */
#if OS_VXWORKS
struct vxworksFileId *pId; /* Unique file ID for vxworks. */
#else
ino_t ino; /* Inode number */
#endif
};
/*
** An instance of the following structure is allocated for each open
** inode. Or, on LinuxThreads, there is one of these structures for
** each inode opened by each thread.
**
** A single inode can have multiple file descriptors, so each unixFile
** structure contains a pointer to an instance of this object and this
** object keeps a count of the number of unixFile pointing to it.
*/
struct unixInodeInfo {
struct unixFileId fileId; /* The lookup key */
int nShared; /* Number of SHARED locks held */
unsigned char eFileLock; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
unsigned char bProcessLock; /* An exclusive process lock is held */
int nRef; /* Number of pointers to this structure */
unixShmNode *pShmNode; /* Shared memory associated with this inode */
int nLock; /* Number of outstanding file locks */
UnixUnusedFd *pUnused; /* Unused file descriptors to close */
unixInodeInfo *pNext; /* List of all unixInodeInfo objects */
unixInodeInfo *pPrev; /* .... doubly linked */
#if SQLITE_ENABLE_LOCKING_STYLE
unsigned long long sharedByte; /* for AFP simulated shared lock */
#endif
#if OS_VXWORKS
sem_t *pSem; /* Named POSIX semaphore */
char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */
#endif
};
/*
** A lists of all unixInodeInfo objects.
*/
static unixInodeInfo *inodeList = 0;
/*
**
** This function - unixLogErrorAtLine(), is only ever called via the macro
** unixLogError().
**
** It is invoked after an error occurs in an OS function and errno has been
** set. It logs a message using sqlite3_log() containing the current value of
** errno and, if possible, the human-readable equivalent from strerror() or
** strerror_r().
**
** The first argument passed to the macro should be the error code that
** will be returned to SQLite (e.g. SQLITE_IOERR_DELETE, SQLITE_CANTOPEN).
** The two subsequent arguments should be the name of the OS function that
** failed (e.g. "unlink", "open") and the associated file-system path,
** if any.
*/
#define unixLogError(a,b,c) unixLogErrorAtLine(a,b,c,__LINE__)
static int unixLogErrorAtLine(
int errcode, /* SQLite error code */
const char *zFunc, /* Name of OS function that failed */
const char *zPath, /* File path associated with error */
int iLine /* Source line number where error occurred */
){
char *zErr; /* Message from strerror() or equivalent */
int iErrno = errno; /* Saved syscall error number */
/* If this is not a threadsafe build (SQLITE_THREADSAFE==0), then use
** the strerror() function to obtain the human-readable error message
** equivalent to errno. Otherwise, use strerror_r().
*/
#if SQLITE_THREADSAFE && defined(HAVE_STRERROR_R)
char aErr[80];
memset(aErr, 0, sizeof(aErr));
zErr = aErr;
/* If STRERROR_R_CHAR_P (set by autoconf scripts) or __USE_GNU is defined,
** assume that the system provides the GNU version of strerror_r() that
** returns a pointer to a buffer containing the error message. That pointer
** may point to aErr[], or it may point to some static storage somewhere.
** Otherwise, assume that the system provides the POSIX version of
** strerror_r(), which always writes an error message into aErr[].
**
** If the code incorrectly assumes that it is the POSIX version that is
** available, the error message will often be an empty string. Not a
** huge problem. Incorrectly concluding that the GNU version is available
** could lead to a segfault though.
*/
#if defined(STRERROR_R_CHAR_P) || defined(__USE_GNU)
zErr =
# endif
strerror_r(iErrno, aErr, sizeof(aErr)-1);
#elif SQLITE_THREADSAFE
/* This is a threadsafe build, but strerror_r() is not available. */
zErr = "";
#else
/* Non-threadsafe build, use strerror(). */
zErr = strerror(iErrno);
#endif
if( zPath==0 ) zPath = "";
sqlite3_log(errcode,
"os_unix.c:%d: (%d) %s(%s) - %s",
iLine, iErrno, zFunc, zPath, zErr
);
return errcode;
}
/*
** Close a file descriptor.
**
** We assume that close() almost always works, since it is only in a
** very sick application or on a very sick platform that it might fail.
** If it does fail, simply leak the file descriptor, but do log the
** error.
**
** Note that it is not safe to retry close() after EINTR since the
** file descriptor might have already been reused by another thread.
** So we don't even try to recover from an EINTR. Just log the error
** and move on.
*/
static void robust_close(unixFile *pFile, int h, int lineno){
if( osClose(h) ){
unixLogErrorAtLine(SQLITE_IOERR_CLOSE, "close",
pFile ? pFile->zPath : 0, lineno);
}
}
/*
** Set the pFile->lastErrno. Do this in a subroutine as that provides
** a convenient place to set a breakpoint.
*/
static void storeLastErrno(unixFile *pFile, int error){
pFile->lastErrno = error;
}
/*
** Close all file descriptors accumuated in the unixInodeInfo->pUnused list.
*/
static void closePendingFds(unixFile *pFile){
unixInodeInfo *pInode = pFile->pInode;
UnixUnusedFd *p;
UnixUnusedFd *pNext;
for(p=pInode->pUnused; p; p=pNext){
pNext = p->pNext;
robust_close(pFile, p->fd, __LINE__);
sqlite3_free(p);
}
pInode->pUnused = 0;
}
/*
** Release a unixInodeInfo structure previously allocated by findInodeInfo().
**
** The mutex entered using the unixEnterMutex() function must be held
** when this function is called.
*/
static void releaseInodeInfo(unixFile *pFile){
unixInodeInfo *pInode = pFile->pInode;
assert( unixMutexHeld() );
if( ALWAYS(pInode) ){
pInode->nRef--;
if( pInode->nRef==0 ){
assert( pInode->pShmNode==0 );
closePendingFds(pFile);
if( pInode->pPrev ){
assert( pInode->pPrev->pNext==pInode );
pInode->pPrev->pNext = pInode->pNext;
}else{
assert( inodeList==pInode );
inodeList = pInode->pNext;
}
if( pInode->pNext ){
assert( pInode->pNext->pPrev==pInode );
pInode->pNext->pPrev = pInode->pPrev;
}
sqlite3_free(pInode);
}
}
}
/*
** Given a file descriptor, locate the unixInodeInfo object that
** describes that file descriptor. Create a new one if necessary. The
** return value might be uninitialized if an error occurs.
**
** The mutex entered using the unixEnterMutex() function must be held
** when this function is called.
**
** Return an appropriate error code.
*/
static int findInodeInfo(
unixFile *pFile, /* Unix file with file desc used in the key */
unixInodeInfo **ppInode /* Return the unixInodeInfo object here */
){
int rc; /* System call return code */
int fd; /* The file descriptor for pFile */
struct unixFileId fileId; /* Lookup key for the unixInodeInfo */
struct stat statbuf; /* Low-level file information */
unixInodeInfo *pInode = 0; /* Candidate unixInodeInfo object */
assert( unixMutexHeld() );
/* Get low-level information about the file that we can used to
** create a unique name for the file.
*/
fd = pFile->h;
rc = osFstat(fd, &statbuf);
if( rc!=0 ){
storeLastErrno(pFile, errno);
#if defined(EOVERFLOW) && defined(SQLITE_DISABLE_LFS)
if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
#endif
return SQLITE_IOERR;
}
#ifdef __APPLE__
/* On OS X on an msdos filesystem, the inode number is reported
** incorrectly for zero-size files. See ticket #3260. To work
** around this problem (we consider it a bug in OS X, not SQLite)
** we always increase the file size to 1 by writing a single byte
** prior to accessing the inode number. The one byte written is
** an ASCII 'S' character which also happens to be the first byte
** in the header of every SQLite database. In this way, if there
** is a race condition such that another thread has already populated
** the first page of the database, no damage is done.
*/
if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
do{ rc = osWrite(fd, "S", 1); }while( rc<0 && errno==EINTR );
if( rc!=1 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR;
}
rc = osFstat(fd, &statbuf);
if( rc!=0 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR;
}
}
#endif
memset(&fileId, 0, sizeof(fileId));
fileId.dev = statbuf.st_dev;
#if OS_VXWORKS
fileId.pId = pFile->pId;
#else
fileId.ino = statbuf.st_ino;
#endif
pInode = inodeList;
while( pInode && memcmp(&fileId, &pInode->fileId, sizeof(fileId)) ){
pInode = pInode->pNext;
}
if( pInode==0 ){
pInode = sqlite3_malloc64( sizeof(*pInode) );
if( pInode==0 ){
return SQLITE_NOMEM;
}
memset(pInode, 0, sizeof(*pInode));
memcpy(&pInode->fileId, &fileId, sizeof(fileId));
pInode->nRef = 1;
pInode->pNext = inodeList;
pInode->pPrev = 0;
if( inodeList ) inodeList->pPrev = pInode;
inodeList = pInode;
}else{
pInode->nRef++;
}
*ppInode = pInode;
return SQLITE_OK;
}
/*
** Return TRUE if pFile has been renamed or unlinked since it was first opened.
*/
static int fileHasMoved(unixFile *pFile){
#if OS_VXWORKS
return pFile->pInode!=0 && pFile->pId!=pFile->pInode->fileId.pId;
#else
struct stat buf;
return pFile->pInode!=0 &&
(osStat(pFile->zPath, &buf)!=0 || buf.st_ino!=pFile->pInode->fileId.ino);
#endif
}
/*
** Check a unixFile that is a database. Verify the following:
**
** (1) There is exactly one hard link on the file
** (2) The file is not a symbolic link
** (3) The file has not been renamed or unlinked
**
** Issue sqlite3_log(SQLITE_WARNING,...) messages if anything is not right.
*/
static void verifyDbFile(unixFile *pFile){
struct stat buf;
int rc;
rc = osFstat(pFile->h, &buf);
if( rc!=0 ){
sqlite3_log(SQLITE_WARNING, "cannot fstat db file %s", pFile->zPath);
return;
}
if( buf.st_nlink==0 && (pFile->ctrlFlags & UNIXFILE_DELETE)==0 ){
sqlite3_log(SQLITE_WARNING, "file unlinked while open: %s", pFile->zPath);
return;
}
if( buf.st_nlink>1 ){
sqlite3_log(SQLITE_WARNING, "multiple links to file: %s", pFile->zPath);
return;
}
if( fileHasMoved(pFile) ){
sqlite3_log(SQLITE_WARNING, "file renamed while open: %s", pFile->zPath);
return;
}
}
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
assert( pFile->eFileLock<=SHARED_LOCK );
unixEnterMutex(); /* Because pFile->pInode is shared across threads */
/* Check if a thread in this process holds such a lock */
if( pFile->pInode->eFileLock>SHARED_LOCK ){
reserved = 1;
}
/* Otherwise see if some other process holds it.
*/
#ifndef __DJGPP__
if( !reserved && !pFile->pInode->bProcessLock ){
struct flock lock;
lock.l_whence = SEEK_SET;
lock.l_start = RESERVED_BYTE;
lock.l_len = 1;
lock.l_type = F_WRLCK;
if( osFcntl(pFile->h, F_GETLK, &lock) ){
rc = SQLITE_IOERR_CHECKRESERVEDLOCK;
storeLastErrno(pFile, errno);
} else if( lock.l_type!=F_UNLCK ){
reserved = 1;
}
}
#endif
unixLeaveMutex();
OSTRACE(("TEST WR-LOCK %d %d %d (unix)\n", pFile->h, rc, reserved));
*pResOut = reserved;
return rc;
}
/*
** Attempt to set a system-lock on the file pFile. The lock is
** described by pLock.
**
** If the pFile was opened read/write from unix-excl, then the only lock
** ever obtained is an exclusive lock, and it is obtained exactly once
** the first time any lock is attempted. All subsequent system locking
** operations become no-ops. Locking operations still happen internally,
** in order to coordinate access between separate database connections
** within this process, but all of that is handled in memory and the
** operating system does not participate.
**
** This function is a pass-through to fcntl(F_SETLK) if pFile is using
** any VFS other than "unix-excl" or if pFile is opened on "unix-excl"
** and is read-only.
**
** Zero is returned if the call completes successfully, or -1 if a call
** to fcntl() fails. In this case, errno is set appropriately (by fcntl()).
*/
static int unixFileLock(unixFile *pFile, struct flock *pLock){
int rc;
unixInodeInfo *pInode = pFile->pInode;
assert( unixMutexHeld() );
assert( pInode!=0 );
if( (pFile->ctrlFlags & (UNIXFILE_EXCL|UNIXFILE_RDONLY))==UNIXFILE_EXCL ){
if( pInode->bProcessLock==0 ){
struct flock lock;
assert( pInode->nLock==0 );
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
lock.l_type = F_WRLCK;
rc = osFcntl(pFile->h, F_SETLK, &lock);
if( rc<0 ) return rc;
pInode->bProcessLock = 1;
pInode->nLock++;
}else{
rc = 0;
}
}else{
rc = osFcntl(pFile->h, F_SETLK, pLock);
}
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
*/
static int unixLock(sqlite3_file *id, int eFileLock){
/* The following describes the implementation of the various locks and
** lock transitions in terms of the POSIX advisory shared and exclusive
** lock primitives (called read-locks and write-locks below, to avoid
** confusion with SQLite lock names). The algorithms are complicated
** slightly in order to be compatible with windows systems simultaneously
** accessing the same database file, in case that is ever required.
**
** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
** byte', each single bytes at well known offsets, and the 'shared byte
** range', a range of 510 bytes at a well known offset.
**
** To obtain a SHARED lock, a read-lock is obtained on the 'pending
** byte'. If this is successful, a random byte from the 'shared byte
** range' is read-locked and the lock on the 'pending byte' released.
**
** A process may only obtain a RESERVED lock after it has a SHARED lock.
** A RESERVED lock is implemented by grabbing a write-lock on the
** 'reserved byte'.
**
** A process may only obtain a PENDING lock after it has obtained a
** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
** on the 'pending byte'. This ensures that no new SHARED locks can be
** obtained, but existing SHARED locks are allowed to persist. A process
** does not have to obtain a RESERVED lock on the way to a PENDING lock.
** This property is used by the algorithm for rolling back a journal file
** after a crash.
**
** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
** implemented by obtaining a write-lock on the entire 'shared byte
** range'. Since all other locks require a read-lock on one of the bytes
** within this range, this ensures that no other locks are held on the
** database.
**
** The reason a single byte cannot be used instead of the 'shared byte
** range' is that some versions of windows do not support read-locks. By
** locking a random byte from a range, concurrent SHARED locks may exist
** even if the locking primitive used is always a write-lock.
*/
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
unixInodeInfo *pInode;
struct flock lock;
int tErrno = 0;
assert( pFile );
OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
azFileLock(eFileLock), azFileLock(pFile->eFileLock),
azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared,
osGetpid(0)));
/* If there is already a lock of this type or more restrictive on the
** unixFile, do nothing. Don't use the end_lock: exit path, as
** unixEnterMutex() hasn't been called yet.
*/
if( pFile->eFileLock>=eFileLock ){
OSTRACE(("LOCK %d %s ok (already held) (unix)\n", pFile->h,
azFileLock(eFileLock)));
return SQLITE_OK;
}
/* Make sure the locking sequence is correct.
** (1) We never move from unlocked to anything higher than shared lock.
** (2) SQLite never explicitly requests a pendig lock.
** (3) A shared lock is always held when a reserve lock is requested.
*/
assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
assert( eFileLock!=PENDING_LOCK );
assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
/* This mutex is needed because pFile->pInode is shared across threads
*/
unixEnterMutex();
pInode = pFile->pInode;
/* If some thread using this PID has a lock via a different unixFile*
** handle that precludes the requested lock, return BUSY.
*/
if( (pFile->eFileLock!=pInode->eFileLock &&
(pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
){
rc = SQLITE_BUSY;
goto end_lock;
}
/* If a SHARED lock is requested, and some thread using this PID already
** has a SHARED or RESERVED lock, then increment reference counts and
** return SQLITE_OK.
*/
if( eFileLock==SHARED_LOCK &&
(pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
assert( eFileLock==SHARED_LOCK );
assert( pFile->eFileLock==0 );
assert( pInode->nShared>0 );
pFile->eFileLock = SHARED_LOCK;
pInode->nShared++;
pInode->nLock++;
goto end_lock;
}
/* A PENDING lock is needed before acquiring a SHARED lock and before
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
** be released.
*/
lock.l_len = 1L;
lock.l_whence = SEEK_SET;
if( eFileLock==SHARED_LOCK
|| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
){
lock.l_type = (eFileLock==SHARED_LOCK?F_RDLCK:F_WRLCK);
lock.l_start = PENDING_BYTE;
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
goto end_lock;
}
}
/* If control gets to this point, then actually go ahead and make
** operating system calls for the specified lock.
*/
if( eFileLock==SHARED_LOCK ){
assert( pInode->nShared==0 );
assert( pInode->eFileLock==0 );
assert( rc==SQLITE_OK );
/* Now get the read-lock */
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
}
/* Drop the temporary PENDING lock */
lock.l_start = PENDING_BYTE;
lock.l_len = 1L;
lock.l_type = F_UNLCK;
if( unixFileLock(pFile, &lock) && rc==SQLITE_OK ){
/* This could happen with a network mount */
tErrno = errno;
rc = SQLITE_IOERR_UNLOCK;
}
if( rc ){
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
goto end_lock;
}else{
pFile->eFileLock = SHARED_LOCK;
pInode->nLock++;
pInode->nShared = 1;
}
}else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
/* We are trying for an exclusive lock but another thread in this
** same process is still holding a shared lock. */
rc = SQLITE_BUSY;
}else{
/* The request was for a RESERVED or EXCLUSIVE lock. It is
** assumed that there is a SHARED or greater lock on the file
** already.
*/
assert( 0!=pFile->eFileLock );
lock.l_type = F_WRLCK;
assert( eFileLock==RESERVED_LOCK || eFileLock==EXCLUSIVE_LOCK );
if( eFileLock==RESERVED_LOCK ){
lock.l_start = RESERVED_BYTE;
lock.l_len = 1L;
}else{
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
}
if( unixFileLock(pFile, &lock) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
}
}
#ifdef SQLITE_DEBUG
/* Set up the transaction-counter change checking flags when
** transitioning from a SHARED to a RESERVED lock. The change
** from SHARED to RESERVED marks the beginning of a normal
** write operation (not a hot journal rollback).
*/
if( rc==SQLITE_OK
&& pFile->eFileLock<=SHARED_LOCK
&& eFileLock==RESERVED_LOCK
){
pFile->transCntrChng = 0;
pFile->dbUpdate = 0;
pFile->inNormalWrite = 1;
}
#endif
if( rc==SQLITE_OK ){
pFile->eFileLock = eFileLock;
pInode->eFileLock = eFileLock;
}else if( eFileLock==EXCLUSIVE_LOCK ){
pFile->eFileLock = PENDING_LOCK;
pInode->eFileLock = PENDING_LOCK;
}
end_lock:
unixLeaveMutex();
OSTRACE(("LOCK %d %s %s (unix)\n", pFile->h, azFileLock(eFileLock),
rc==SQLITE_OK ? "ok" : "failed"));
return rc;
}
/*
** Add the file descriptor used by file handle pFile to the corresponding
** pUnused list.
*/
static void setPendingFd(unixFile *pFile){
unixInodeInfo *pInode = pFile->pInode;
UnixUnusedFd *p = pFile->pUnused;
p->pNext = pInode->pUnused;
pInode->pUnused = p;
pFile->h = -1;
pFile->pUnused = 0;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
**
** If handleNFSUnlock is true, then on downgrading an EXCLUSIVE_LOCK to SHARED
** the byte range is divided into 2 parts and the first part is unlocked then
** set to a read lock, then the other part is simply unlocked. This works
** around a bug in BSD NFS lockd (also seen on MacOSX 10.3+) that fails to
** remove the write lock on a region when a read lock is set.
*/
static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
unixFile *pFile = (unixFile*)id;
unixInodeInfo *pInode;
struct flock lock;
int rc = SQLITE_OK;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
osGetpid(0)));
assert( eFileLock<=SHARED_LOCK );
if( pFile->eFileLock<=eFileLock ){
return SQLITE_OK;
}
unixEnterMutex();
pInode = pFile->pInode;
assert( pInode->nShared!=0 );
if( pFile->eFileLock>SHARED_LOCK ){
assert( pInode->eFileLock==pFile->eFileLock );
#ifdef SQLITE_DEBUG
/* When reducing a lock such that other processes can start
** reading the database file again, make sure that the
** transaction counter was updated if any part of the database
** file changed. If the transaction counter is not updated,
** other connections to the same file might not realize that
** the file has changed and hence might not know to flush their
** cache. The use of a stale cache can lead to database corruption.
*/
pFile->inNormalWrite = 0;
#endif
/* downgrading to a shared lock on NFS involves clearing the write lock
** before establishing the readlock - to avoid a race condition we downgrade
** the lock in 2 blocks, so that part of the range will be covered by a
** write lock until the rest is covered by a read lock:
** 1: [WWWWW]
** 2: [....W]
** 3: [RRRRW]
** 4: [RRRR.]
*/
if( eFileLock==SHARED_LOCK ){
#if !defined(__APPLE__) || !SQLITE_ENABLE_LOCKING_STYLE
(void)handleNFSUnlock;
assert( handleNFSUnlock==0 );
#endif
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
if( handleNFSUnlock ){
int tErrno; /* Error code from system call errors */
off_t divSize = SHARED_SIZE - 1;
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST;
lock.l_len = divSize;
if( unixFileLock(pFile, &lock)==(-1) ){
tErrno = errno;
rc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, tErrno);
goto end_unlock;
}
lock.l_type = F_RDLCK;
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST;
lock.l_len = divSize;
if( unixFileLock(pFile, &lock)==(-1) ){
tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
if( IS_LOCK_ERROR(rc) ){
storeLastErrno(pFile, tErrno);
}
goto end_unlock;
}
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST+divSize;
lock.l_len = SHARED_SIZE-divSize;
if( unixFileLock(pFile, &lock)==(-1) ){
tErrno = errno;
rc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, tErrno);
goto end_unlock;
}
}else
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
{
lock.l_type = F_RDLCK;
lock.l_whence = SEEK_SET;
lock.l_start = SHARED_FIRST;
lock.l_len = SHARED_SIZE;
if( unixFileLock(pFile, &lock) ){
/* In theory, the call to unixFileLock() cannot fail because another
** process is holding an incompatible lock. If it does, this
** indicates that the other process is not following the locking
** protocol. If this happens, return SQLITE_IOERR_RDLOCK. Returning
** SQLITE_BUSY would confuse the upper layer (in practice it causes
** an assert to fail). */
rc = SQLITE_IOERR_RDLOCK;
storeLastErrno(pFile, errno);
goto end_unlock;
}
}
}
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
lock.l_start = PENDING_BYTE;
lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
if( unixFileLock(pFile, &lock)==0 ){
pInode->eFileLock = SHARED_LOCK;
}else{
rc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, errno);
goto end_unlock;
}
}
if( eFileLock==NO_LOCK ){
/* Decrement the shared lock counter. Release the lock using an
** OS call only when all threads in this same process have released
** the lock.
*/
pInode->nShared--;
if( pInode->nShared==0 ){
lock.l_type = F_UNLCK;
lock.l_whence = SEEK_SET;
lock.l_start = lock.l_len = 0L;
if( unixFileLock(pFile, &lock)==0 ){
pInode->eFileLock = NO_LOCK;
}else{
rc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, errno);
pInode->eFileLock = NO_LOCK;
pFile->eFileLock = NO_LOCK;
}
}
/* Decrement the count of locks against this same file. When the
** count reaches zero, close any other file descriptors whose close
** was deferred because of outstanding locks.
*/
pInode->nLock--;
assert( pInode->nLock>=0 );
if( pInode->nLock==0 ){
closePendingFds(pFile);
}
}
end_unlock:
unixLeaveMutex();
if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int unixUnlock(sqlite3_file *id, int eFileLock){
#if SQLITE_MAX_MMAP_SIZE>0
assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 );
#endif
return posixUnlock(id, eFileLock, 0);
}
#if SQLITE_MAX_MMAP_SIZE>0
static int unixMapfile(unixFile *pFd, i64 nByte);
static void unixUnmapfile(unixFile *pFd);
#endif
/*
** This function performs the parts of the "close file" operation
** common to all locking schemes. It closes the directory and file
** handles, if they are valid, and sets all fields of the unixFile
** structure to 0.
**
** It is *not* necessary to hold the mutex when this routine is called,
** even on VxWorks. A mutex will be acquired on VxWorks by the
** vxworksReleaseFileId() routine.
*/
static int closeUnixFile(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
#if SQLITE_MAX_MMAP_SIZE>0
unixUnmapfile(pFile);
#endif
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
pFile->h = -1;
}
#if OS_VXWORKS
if( pFile->pId ){
if( pFile->ctrlFlags & UNIXFILE_DELETE ){
osUnlink(pFile->pId->zCanonicalName);
}
vxworksReleaseFileId(pFile->pId);
pFile->pId = 0;
}
#endif
#ifdef SQLITE_UNLINK_AFTER_CLOSE
if( pFile->ctrlFlags & UNIXFILE_DELETE ){
osUnlink(pFile->zPath);
sqlite3_free(*(char**)&pFile->zPath);
pFile->zPath = 0;
}
#endif
OSTRACE(("CLOSE %-3d\n", pFile->h));
OpenCounter(-1);
sqlite3_free(pFile->pUnused);
memset(pFile, 0, sizeof(unixFile));
return SQLITE_OK;
}
/*
** Close a file.
*/
static int unixClose(sqlite3_file *id){
int rc = SQLITE_OK;
unixFile *pFile = (unixFile *)id;
verifyDbFile(pFile);
unixUnlock(id, NO_LOCK);
unixEnterMutex();
/* unixFile.pInode is always valid here. Otherwise, a different close
** routine (e.g. nolockClose()) would be called instead.
*/
assert( pFile->pInode->nLock>0 || pFile->pInode->bProcessLock==0 );
if( ALWAYS(pFile->pInode) && pFile->pInode->nLock ){
/* If there are outstanding locks, do not actually close the file just
** yet because that would clear those locks. Instead, add the file
** descriptor to pInode->pUnused list. It will be automatically closed
** when the last lock is cleared.
*/
setPendingFd(pFile);
}
releaseInodeInfo(pFile);
rc = closeUnixFile(id);
unixLeaveMutex();
return rc;
}
/************** End of the posix advisory lock implementation *****************
******************************************************************************/
/******************************************************************************
****************************** No-op Locking **********************************
**
** Of the various locking implementations available, this is by far the
** simplest: locking is ignored. No attempt is made to lock the database
** file for reading or writing.
**
** This locking mode is appropriate for use on read-only databases
** (ex: databases that are burned into CD-ROM, for example.) It can
** also be used if the application employs some external mechanism to
** prevent simultaneous access of the same database by two or more
** database connections. But there is a serious risk of database
** corruption if this locking mode is used in situations where multiple
** database connections are accessing the same database file at the same
** time and one or more of those connections are writing.
*/
static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
UNUSED_PARAMETER(NotUsed);
*pResOut = 0;
return SQLITE_OK;
}
static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
UNUSED_PARAMETER2(NotUsed, NotUsed2);
return SQLITE_OK;
}
static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
UNUSED_PARAMETER2(NotUsed, NotUsed2);
return SQLITE_OK;
}
/*
** Close the file.
*/
static int nolockClose(sqlite3_file *id) {
return closeUnixFile(id);
}
/******************* End of the no-op lock implementation *********************
******************************************************************************/
/******************************************************************************
************************* Begin dot-file Locking ******************************
**
** The dotfile locking implementation uses the existence of separate lock
** files (really a directory) to control access to the database. This works
** on just about every filesystem imaginable. But there are serious downsides:
**
** (1) There is zero concurrency. A single reader blocks all other
** connections from reading or writing the database.
**
** (2) An application crash or power loss can leave stale lock files
** sitting around that need to be cleared manually.
**
** Nevertheless, a dotlock is an appropriate locking mode for use if no
** other locking strategy is available.
**
** Dotfile locking works by creating a subdirectory in the same directory as
** the database and with the same name but with a ".lock" extension added.
** The existence of a lock directory implies an EXCLUSIVE lock. All other
** lock types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
*/
/*
** The file suffix added to the data base filename in order to create the
** lock directory.
*/
#define DOTLOCK_SUFFIX ".lock"
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
**
** In dotfile locking, either a lock exists or it does not. So in this
** variation of CheckReservedLock(), *pResOut is set to true if any lock
** is held on the file and false if the file is unlocked.
*/
static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
reserved = osAccess((const char*)pFile->lockingContext, 0)==0;
OSTRACE(("TEST WR-LOCK %d %d %d (dotlock)\n", pFile->h, rc, reserved));
*pResOut = reserved;
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
**
** With dotfile locking, we really only support state (4): EXCLUSIVE.
** But we track the other locking levels internally.
*/
static int dotlockLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
char *zLockFile = (char *)pFile->lockingContext;
int rc = SQLITE_OK;
/* If we have any lock, then the lock file already exists. All we have
** to do is adjust our internal record of the lock level.
*/
if( pFile->eFileLock > NO_LOCK ){
pFile->eFileLock = eFileLock;
/* Always update the timestamp on the old file */
#ifdef HAVE_UTIME
utime(zLockFile, NULL);
#else
utimes(zLockFile, NULL);
#endif
return SQLITE_OK;
}
/* grab an exclusive lock */
rc = osMkdir(zLockFile, 0777);
if( rc<0 ){
/* failed to open/create the lock directory */
int tErrno = errno;
if( EEXIST == tErrno ){
rc = SQLITE_BUSY;
} else {
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( rc!=SQLITE_BUSY ){
storeLastErrno(pFile, tErrno);
}
}
return rc;
}
/* got it, set the type and return ok */
pFile->eFileLock = eFileLock;
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
**
** When the locking level reaches NO_LOCK, delete the lock file.
*/
static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
char *zLockFile = (char *)pFile->lockingContext;
int rc;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d pid=%d (dotlock)\n", pFile->h, eFileLock,
pFile->eFileLock, osGetpid(0)));
assert( eFileLock<=SHARED_LOCK );
/* no-op if possible */
if( pFile->eFileLock==eFileLock ){
return SQLITE_OK;
}
/* To downgrade to shared, simply update our internal notion of the
** lock state. No need to mess with the file on disk.
*/
if( eFileLock==SHARED_LOCK ){
pFile->eFileLock = SHARED_LOCK;
return SQLITE_OK;
}
/* To fully unlock the database, delete the lock file */
assert( eFileLock==NO_LOCK );
rc = osRmdir(zLockFile);
if( rc<0 ){
int tErrno = errno;
if( tErrno==ENOENT ){
rc = SQLITE_OK;
}else{
rc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, tErrno);
}
return rc;
}
pFile->eFileLock = NO_LOCK;
return SQLITE_OK;
}
/*
** Close a file. Make sure the lock has been released before closing.
*/
static int dotlockClose(sqlite3_file *id) {
unixFile *pFile = (unixFile*)id;
assert( id!=0 );
dotlockUnlock(id, NO_LOCK);
sqlite3_free(pFile->lockingContext);
return closeUnixFile(id);
}
/****************** End of the dot-file lock implementation *******************
******************************************************************************/
/******************************************************************************
************************** Begin flock Locking ********************************
**
** Use the flock() system call to do file locking.
**
** flock() locking is like dot-file locking in that the various
** fine-grain locking levels supported by SQLite are collapsed into
** a single exclusive lock. In other words, SHARED, RESERVED, and
** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite
** still works when you do this, but concurrency is reduced since
** only a single process can be reading the database at a time.
**
** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off
*/
#if SQLITE_ENABLE_LOCKING_STYLE
/*
** Retry flock() calls that fail with EINTR
*/
#ifdef EINTR
static int robust_flock(int fd, int op){
int rc;
do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );
return rc;
}
#else
# define robust_flock(a,b) flock(a,b)
#endif
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
/* Check if a thread in this process holds such a lock */
if( pFile->eFileLock>SHARED_LOCK ){
reserved = 1;
}
/* Otherwise see if some other process holds it. */
if( !reserved ){
/* attempt to get the lock */
int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB);
if( !lrc ){
/* got the lock, unlock it */
lrc = robust_flock(pFile->h, LOCK_UN);
if ( lrc ) {
int tErrno = errno;
/* unlock failed with an error */
lrc = SQLITE_IOERR_UNLOCK;
storeLastErrno(pFile, tErrno);
rc = lrc;
}
} else {
int tErrno = errno;
reserved = 1;
/* someone else might have it reserved */
lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( IS_LOCK_ERROR(lrc) ){
storeLastErrno(pFile, tErrno);
rc = lrc;
}
}
}
OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved));
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
rc = SQLITE_OK;
reserved=1;
}
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
*pResOut = reserved;
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** flock() only really support EXCLUSIVE locks. We track intermediate
** lock states in the sqlite3_file structure, but all locks SHARED or
** above are really EXCLUSIVE locks and exclude all other processes from
** access the file.
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
*/
static int flockLock(sqlite3_file *id, int eFileLock) {
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
assert( pFile );
/* if we already have a lock, it is exclusive.
** Just adjust level and punt on outta here. */
if (pFile->eFileLock > NO_LOCK) {
pFile->eFileLock = eFileLock;
return SQLITE_OK;
}
/* grab an exclusive lock */
if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) {
int tErrno = errno;
/* didn't get, must be busy */
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
if( IS_LOCK_ERROR(rc) ){
storeLastErrno(pFile, tErrno);
}
} else {
/* got it, set the type and return ok */
pFile->eFileLock = eFileLock;
}
OSTRACE(("LOCK %d %s %s (flock)\n", pFile->h, azFileLock(eFileLock),
rc==SQLITE_OK ? "ok" : "failed"));
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
rc = SQLITE_BUSY;
}
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int flockUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d pid=%d (flock)\n", pFile->h, eFileLock,
pFile->eFileLock, osGetpid(0)));
assert( eFileLock<=SHARED_LOCK );
/* no-op if possible */
if( pFile->eFileLock==eFileLock ){
return SQLITE_OK;
}
/* shared can just be set because we always have an exclusive */
if (eFileLock==SHARED_LOCK) {
pFile->eFileLock = eFileLock;
return SQLITE_OK;
}
/* no, really, unlock. */
if( robust_flock(pFile->h, LOCK_UN) ){
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
return SQLITE_OK;
#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
return SQLITE_IOERR_UNLOCK;
}else{
pFile->eFileLock = NO_LOCK;
return SQLITE_OK;
}
}
/*
** Close a file.
*/
static int flockClose(sqlite3_file *id) {
assert( id!=0 );
flockUnlock(id, NO_LOCK);
return closeUnixFile(id);
}
#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
/******************* End of the flock lock implementation *********************
******************************************************************************/
/******************************************************************************
************************ Begin Named Semaphore Locking ************************
**
** Named semaphore locking is only supported on VxWorks.
**
** Semaphore locking is like dot-lock and flock in that it really only
** supports EXCLUSIVE locking. Only a single process can read or write
** the database file at a time. This reduces potential concurrency, but
** makes the lock implementation much easier.
*/
#if OS_VXWORKS
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int semXCheckReservedLock(sqlite3_file *id, int *pResOut) {
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
/* Check if a thread in this process holds such a lock */
if( pFile->eFileLock>SHARED_LOCK ){
reserved = 1;
}
/* Otherwise see if some other process holds it. */
if( !reserved ){
sem_t *pSem = pFile->pInode->pSem;
if( sem_trywait(pSem)==-1 ){
int tErrno = errno;
if( EAGAIN != tErrno ){
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
storeLastErrno(pFile, tErrno);
} else {
/* someone else has the lock when we are in NO_LOCK */
reserved = (pFile->eFileLock < SHARED_LOCK);
}
}else{
/* we could have it if we want it */
sem_post(pSem);
}
}
OSTRACE(("TEST WR-LOCK %d %d %d (sem)\n", pFile->h, rc, reserved));
*pResOut = reserved;
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** Semaphore locks only really support EXCLUSIVE locks. We track intermediate
** lock states in the sqlite3_file structure, but all locks SHARED or
** above are really EXCLUSIVE locks and exclude all other processes from
** access the file.
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
*/
static int semXLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
sem_t *pSem = pFile->pInode->pSem;
int rc = SQLITE_OK;
/* if we already have a lock, it is exclusive.
** Just adjust level and punt on outta here. */
if (pFile->eFileLock > NO_LOCK) {
pFile->eFileLock = eFileLock;
rc = SQLITE_OK;
goto sem_end_lock;
}
/* lock semaphore now but bail out when already locked. */
if( sem_trywait(pSem)==-1 ){
rc = SQLITE_BUSY;
goto sem_end_lock;
}
/* got it, set the type and return ok */
pFile->eFileLock = eFileLock;
sem_end_lock:
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int semXUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
sem_t *pSem = pFile->pInode->pSem;
assert( pFile );
assert( pSem );
OSTRACE(("UNLOCK %d %d was %d pid=%d (sem)\n", pFile->h, eFileLock,
pFile->eFileLock, osGetpid(0)));
assert( eFileLock<=SHARED_LOCK );
/* no-op if possible */
if( pFile->eFileLock==eFileLock ){
return SQLITE_OK;
}
/* shared can just be set because we always have an exclusive */
if (eFileLock==SHARED_LOCK) {
pFile->eFileLock = eFileLock;
return SQLITE_OK;
}
/* no, really unlock. */
if ( sem_post(pSem)==-1 ) {
int rc, tErrno = errno;
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
if( IS_LOCK_ERROR(rc) ){
storeLastErrno(pFile, tErrno);
}
return rc;
}
pFile->eFileLock = NO_LOCK;
return SQLITE_OK;
}
/*
** Close a file.
*/
static int semXClose(sqlite3_file *id) {
if( id ){
unixFile *pFile = (unixFile*)id;
semXUnlock(id, NO_LOCK);
assert( pFile );
unixEnterMutex();
releaseInodeInfo(pFile);
unixLeaveMutex();
closeUnixFile(id);
}
return SQLITE_OK;
}
#endif /* OS_VXWORKS */
/*
** Named semaphore locking is only available on VxWorks.
**
*************** End of the named semaphore lock implementation ****************
******************************************************************************/
/******************************************************************************
*************************** Begin AFP Locking *********************************
**
** AFP is the Apple Filing Protocol. AFP is a network filesystem found
** on Apple Macintosh computers - both OS9 and OSX.
**
** Third-party implementations of AFP are available. But this code here
** only works on OSX.
*/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
/*
** The afpLockingContext structure contains all afp lock specific state
*/
typedef struct afpLockingContext afpLockingContext;
struct afpLockingContext {
int reserved;
const char *dbPath; /* Name of the open file */
};
struct ByteRangeLockPB2
{
unsigned long long offset; /* offset to first byte to lock */
unsigned long long length; /* nbr of bytes to lock */
unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
int fd; /* file desc to assoc this lock with */
};
#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
/*
** This is a utility for setting or clearing a bit-range lock on an
** AFP filesystem.
**
** Return SQLITE_OK on success, SQLITE_BUSY on failure.
*/
static int afpSetLock(
const char *path, /* Name of the file to be locked or unlocked */
unixFile *pFile, /* Open file descriptor on path */
unsigned long long offset, /* First byte to be locked */
unsigned long long length, /* Number of bytes to lock */
int setLockFlag /* True to set lock. False to clear lock */
){
struct ByteRangeLockPB2 pb;
int err;
pb.unLockFlag = setLockFlag ? 0 : 1;
pb.startEndFlag = 0;
pb.offset = offset;
pb.length = length;
pb.fd = pFile->h;
OSTRACE(("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
(setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
offset, length));
err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
if ( err==-1 ) {
int rc;
int tErrno = errno;
OSTRACE(("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
path, tErrno, strerror(tErrno)));
#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
rc = SQLITE_BUSY;
#else
rc = sqliteErrorFromPosixError(tErrno,
setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
if( IS_LOCK_ERROR(rc) ){
storeLastErrno(pFile, tErrno);
}
return rc;
} else {
return SQLITE_OK;
}
}
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
afpLockingContext *context;
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
context = (afpLockingContext *) pFile->lockingContext;
if( context->reserved ){
*pResOut = 1;
return SQLITE_OK;
}
unixEnterMutex(); /* Because pFile->pInode is shared across threads */
/* Check if a thread in this process holds such a lock */
if( pFile->pInode->eFileLock>SHARED_LOCK ){
reserved = 1;
}
/* Otherwise see if some other process holds it.
*/
if( !reserved ){
/* lock the RESERVED byte */
int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
if( SQLITE_OK==lrc ){
/* if we succeeded in taking the reserved lock, unlock it to restore
** the original state */
lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
} else {
/* if we failed to get the lock then someone else must have it */
reserved = 1;
}
if( IS_LOCK_ERROR(lrc) ){
rc=lrc;
}
}
unixLeaveMutex();
OSTRACE(("TEST WR-LOCK %d %d %d (afp)\n", pFile->h, rc, reserved));
*pResOut = reserved;
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
*/
static int afpLock(sqlite3_file *id, int eFileLock){
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
unixInodeInfo *pInode = pFile->pInode;
afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
assert( pFile );
OSTRACE(("LOCK %d %s was %s(%s,%d) pid=%d (afp)\n", pFile->h,
azFileLock(eFileLock), azFileLock(pFile->eFileLock),
azFileLock(pInode->eFileLock), pInode->nShared , osGetpid(0)));
/* If there is already a lock of this type or more restrictive on the
** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
** unixEnterMutex() hasn't been called yet.
*/
if( pFile->eFileLock>=eFileLock ){
OSTRACE(("LOCK %d %s ok (already held) (afp)\n", pFile->h,
azFileLock(eFileLock)));
return SQLITE_OK;
}
/* Make sure the locking sequence is correct
** (1) We never move from unlocked to anything higher than shared lock.
** (2) SQLite never explicitly requests a pendig lock.
** (3) A shared lock is always held when a reserve lock is requested.
*/
assert( pFile->eFileLock!=NO_LOCK || eFileLock==SHARED_LOCK );
assert( eFileLock!=PENDING_LOCK );
assert( eFileLock!=RESERVED_LOCK || pFile->eFileLock==SHARED_LOCK );
/* This mutex is needed because pFile->pInode is shared across threads
*/
unixEnterMutex();
pInode = pFile->pInode;
/* If some thread using this PID has a lock via a different unixFile*
** handle that precludes the requested lock, return BUSY.
*/
if( (pFile->eFileLock!=pInode->eFileLock &&
(pInode->eFileLock>=PENDING_LOCK || eFileLock>SHARED_LOCK))
){
rc = SQLITE_BUSY;
goto afp_end_lock;
}
/* If a SHARED lock is requested, and some thread using this PID already
** has a SHARED or RESERVED lock, then increment reference counts and
** return SQLITE_OK.
*/
if( eFileLock==SHARED_LOCK &&
(pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){
assert( eFileLock==SHARED_LOCK );
assert( pFile->eFileLock==0 );
assert( pInode->nShared>0 );
pFile->eFileLock = SHARED_LOCK;
pInode->nShared++;
pInode->nLock++;
goto afp_end_lock;
}
/* A PENDING lock is needed before acquiring a SHARED lock and before
** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
** be released.
*/
if( eFileLock==SHARED_LOCK
|| (eFileLock==EXCLUSIVE_LOCK && pFile->eFileLock<PENDING_LOCK)
){
int failed;
failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
if (failed) {
rc = failed;
goto afp_end_lock;
}
}
/* If control gets to this point, then actually go ahead and make
** operating system calls for the specified lock.
*/
if( eFileLock==SHARED_LOCK ){
int lrc1, lrc2, lrc1Errno = 0;
long lk, mask;
assert( pInode->nShared==0 );
assert( pInode->eFileLock==0 );
mask = (sizeof(long)==8) ? LARGEST_INT64 : 0x7fffffff;
/* Now get the read-lock SHARED_LOCK */
/* note that the quality of the randomness doesn't matter that much */
lk = random();
pInode->sharedByte = (lk & mask)%(SHARED_SIZE - 1);
lrc1 = afpSetLock(context->dbPath, pFile,
SHARED_FIRST+pInode->sharedByte, 1, 1);
if( IS_LOCK_ERROR(lrc1) ){
lrc1Errno = pFile->lastErrno;
}
/* Drop the temporary PENDING lock */
lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
if( IS_LOCK_ERROR(lrc1) ) {
storeLastErrno(pFile, lrc1Errno);
rc = lrc1;
goto afp_end_lock;
} else if( IS_LOCK_ERROR(lrc2) ){
rc = lrc2;
goto afp_end_lock;
} else if( lrc1 != SQLITE_OK ) {
rc = lrc1;
} else {
pFile->eFileLock = SHARED_LOCK;
pInode->nLock++;
pInode->nShared = 1;
}
}else if( eFileLock==EXCLUSIVE_LOCK && pInode->nShared>1 ){
/* We are trying for an exclusive lock but another thread in this
** same process is still holding a shared lock. */
rc = SQLITE_BUSY;
}else{
/* The request was for a RESERVED or EXCLUSIVE lock. It is
** assumed that there is a SHARED or greater lock on the file
** already.
*/
int failed = 0;
assert( 0!=pFile->eFileLock );
if (eFileLock >= RESERVED_LOCK && pFile->eFileLock < RESERVED_LOCK) {
/* Acquire a RESERVED lock */
failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
if( !failed ){
context->reserved = 1;
}
}
if (!failed && eFileLock == EXCLUSIVE_LOCK) {
/* Acquire an EXCLUSIVE lock */
/* Remove the shared lock before trying the range. we'll need to
** reestablish the shared lock if we can't get the afpUnlock
*/
if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
pInode->sharedByte, 1, 0)) ){
int failed2 = SQLITE_OK;
/* now attemmpt to get the exclusive lock range */
failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,
SHARED_SIZE, 1);
if( failed && (failed2 = afpSetLock(context->dbPath, pFile,
SHARED_FIRST + pInode->sharedByte, 1, 1)) ){
/* Can't reestablish the shared lock. Sqlite can't deal, this is
** a critical I/O error
*/
rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
SQLITE_IOERR_LOCK;
goto afp_end_lock;
}
}else{
rc = failed;
}
}
if( failed ){
rc = failed;
}
}
if( rc==SQLITE_OK ){
pFile->eFileLock = eFileLock;
pInode->eFileLock = eFileLock;
}else if( eFileLock==EXCLUSIVE_LOCK ){
pFile->eFileLock = PENDING_LOCK;
pInode->eFileLock = PENDING_LOCK;
}
afp_end_lock:
unixLeaveMutex();
OSTRACE(("LOCK %d %s %s (afp)\n", pFile->h, azFileLock(eFileLock),
rc==SQLITE_OK ? "ok" : "failed"));
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int afpUnlock(sqlite3_file *id, int eFileLock) {
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
unixInodeInfo *pInode;
afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
int skipShared = 0;
#ifdef SQLITE_TEST
int h = pFile->h;
#endif
assert( pFile );
OSTRACE(("UNLOCK %d %d was %d(%d,%d) pid=%d (afp)\n", pFile->h, eFileLock,
pFile->eFileLock, pFile->pInode->eFileLock, pFile->pInode->nShared,
osGetpid(0)));
assert( eFileLock<=SHARED_LOCK );
if( pFile->eFileLock<=eFileLock ){
return SQLITE_OK;
}
unixEnterMutex();
pInode = pFile->pInode;
assert( pInode->nShared!=0 );
if( pFile->eFileLock>SHARED_LOCK ){
assert( pInode->eFileLock==pFile->eFileLock );
SimulateIOErrorBenign(1);
SimulateIOError( h=(-1) )
SimulateIOErrorBenign(0);
#ifdef SQLITE_DEBUG
/* When reducing a lock such that other processes can start
** reading the database file again, make sure that the
** transaction counter was updated if any part of the database
** file changed. If the transaction counter is not updated,
** other connections to the same file might not realize that
** the file has changed and hence might not know to flush their
** cache. The use of a stale cache can lead to database corruption.
*/
assert( pFile->inNormalWrite==0
|| pFile->dbUpdate==0
|| pFile->transCntrChng==1 );
pFile->inNormalWrite = 0;
#endif
if( pFile->eFileLock==EXCLUSIVE_LOCK ){
rc = afpSetLock(context->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1) ){
/* only re-establish the shared lock if necessary */
int sharedLockByte = SHARED_FIRST+pInode->sharedByte;
rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 1);
} else {
skipShared = 1;
}
}
if( rc==SQLITE_OK && pFile->eFileLock>=PENDING_LOCK ){
rc = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
}
if( rc==SQLITE_OK && pFile->eFileLock>=RESERVED_LOCK && context->reserved ){
rc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
if( !rc ){
context->reserved = 0;
}
}
if( rc==SQLITE_OK && (eFileLock==SHARED_LOCK || pInode->nShared>1)){
pInode->eFileLock = SHARED_LOCK;
}
}
if( rc==SQLITE_OK && eFileLock==NO_LOCK ){
/* Decrement the shared lock counter. Release the lock using an
** OS call only when all threads in this same process have released
** the lock.
*/
unsigned long long sharedLockByte = SHARED_FIRST+pInode->sharedByte;
pInode->nShared--;
if( pInode->nShared==0 ){
SimulateIOErrorBenign(1);
SimulateIOError( h=(-1) )
SimulateIOErrorBenign(0);
if( !skipShared ){
rc = afpSetLock(context->dbPath, pFile, sharedLockByte, 1, 0);
}
if( !rc ){
pInode->eFileLock = NO_LOCK;
pFile->eFileLock = NO_LOCK;
}
}
if( rc==SQLITE_OK ){
pInode->nLock--;
assert( pInode->nLock>=0 );
if( pInode->nLock==0 ){
closePendingFds(pFile);
}
}
}
unixLeaveMutex();
if( rc==SQLITE_OK ) pFile->eFileLock = eFileLock;
return rc;
}
/*
** Close a file & cleanup AFP specific locking context
*/
static int afpClose(sqlite3_file *id) {
int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
assert( id!=0 );
afpUnlock(id, NO_LOCK);
unixEnterMutex();
if( pFile->pInode && pFile->pInode->nLock ){
/* If there are outstanding locks, do not actually close the file just
** yet because that would clear those locks. Instead, add the file
** descriptor to pInode->aPending. It will be automatically closed when
** the last lock is cleared.
*/
setPendingFd(pFile);
}
releaseInodeInfo(pFile);
sqlite3_free(pFile->lockingContext);
rc = closeUnixFile(id);
unixLeaveMutex();
return rc;
}
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
/*
** The code above is the AFP lock implementation. The code is specific
** to MacOSX and does not work on other unix platforms. No alternative
** is available. If you don't compile for a mac, then the "unix-afp"
** VFS is not available.
**
********************* End of the AFP lock implementation **********************
******************************************************************************/
/******************************************************************************
*************************** Begin NFS Locking ********************************/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int nfsUnlock(sqlite3_file *id, int eFileLock){
return posixUnlock(id, eFileLock, 1);
}
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
/*
** The code above is the NFS lock implementation. The code is specific
** to MacOSX and does not work on other unix platforms. No alternative
** is available.
**
********************* End of the NFS lock implementation **********************
******************************************************************************/
/******************************************************************************
**************** Non-locking sqlite3_file methods *****************************
**
** The next division contains implementations for all methods of the
** sqlite3_file object other than the locking methods. The locking
** methods were defined in divisions above (one locking method per
** division). Those methods that are common to all locking modes
** are gather together into this division.
*/
/*
** Seek to the offset passed as the second argument, then read cnt
** bytes into pBuf. Return the number of bytes actually read.
**
** NB: If you define USE_PREAD or USE_PREAD64, then it might also
** be necessary to define _XOPEN_SOURCE to be 500. This varies from
** one system to another. Since SQLite does not define USE_PREAD
** in any form by default, we will not attempt to define _XOPEN_SOURCE.
** See tickets #2741 and #2681.
**
** To avoid stomping the errno value on a failed read the lastErrno value
** is set before returning.
*/
static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
int got;
int prior = 0;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
i64 newOffset;
#endif
TIMER_START;
assert( cnt==(cnt&0x1ffff) );
assert( id->h>2 );
do{
#if defined(USE_PREAD)
got = osPread(id->h, pBuf, cnt, offset);
SimulateIOError( got = -1 );
#elif defined(USE_PREAD64)
got = osPread64(id->h, pBuf, cnt, offset);
SimulateIOError( got = -1 );
#else
newOffset = lseek(id->h, offset, SEEK_SET);
SimulateIOError( newOffset = -1 );
if( newOffset<0 ){
storeLastErrno((unixFile*)id, errno);
return -1;
}
got = osRead(id->h, pBuf, cnt);
#endif
if( got==cnt ) break;
if( got<0 ){
if( errno==EINTR ){ got = 1; continue; }
prior = 0;
storeLastErrno((unixFile*)id, errno);
break;
}else if( got>0 ){
cnt -= got;
offset += got;
prior += got;
pBuf = (void*)(got + (char*)pBuf);
}
}while( got>0 );
TIMER_END;
OSTRACE(("READ %-3d %5d %7lld %llu\n",
id->h, got+prior, offset-prior, TIMER_ELAPSED));
return got+prior;
}
/*
** Read data from a file into a buffer. Return SQLITE_OK if all
** bytes were read successfully and SQLITE_IOERR if anything goes
** wrong.
*/
static int unixRead(
sqlite3_file *id,
void *pBuf,
int amt,
sqlite3_int64 offset
){
unixFile *pFile = (unixFile *)id;
int got;
assert( id );
assert( offset>=0 );
assert( amt>0 );
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
#if 0
assert( pFile->pUnused==0
|| offset>=PENDING_BYTE+512
|| offset+amt<=PENDING_BYTE
);
#endif
#if SQLITE_MAX_MMAP_SIZE>0
/* Deal with as much of this read request as possible by transfering
** data from the memory mapping using memcpy(). */
if( offset<pFile->mmapSize ){
if( offset+amt <= pFile->mmapSize ){
memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt);
return SQLITE_OK;
}else{
int nCopy = pFile->mmapSize - offset;
memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy);
pBuf = &((u8 *)pBuf)[nCopy];
amt -= nCopy;
offset += nCopy;
}
}
#endif
got = seekAndRead(pFile, offset, pBuf, amt);
if( got==amt ){
return SQLITE_OK;
}else if( got<0 ){
/* lastErrno set by seekAndRead */
return SQLITE_IOERR_READ;
}else{
storeLastErrno(pFile, 0); /* not a system error */
/* Unread parts of the buffer must be zero-filled */
memset(&((char*)pBuf)[got], 0, amt-got);
return SQLITE_IOERR_SHORT_READ;
}
}
/*
** Attempt to seek the file-descriptor passed as the first argument to
** absolute offset iOff, then attempt to write nBuf bytes of data from
** pBuf to it. If an error occurs, return -1 and set *piErrno. Otherwise,
** return the actual number of bytes written (which may be less than
** nBuf).
*/
static int seekAndWriteFd(
int fd, /* File descriptor to write to */
i64 iOff, /* File offset to begin writing at */
const void *pBuf, /* Copy data from this buffer to the file */
int nBuf, /* Size of buffer pBuf in bytes */
int *piErrno /* OUT: Error number if error occurs */
){
int rc = 0; /* Value returned by system call */
assert( nBuf==(nBuf&0x1ffff) );
assert( fd>2 );
assert( piErrno!=0 );
nBuf &= 0x1ffff;
TIMER_START;
#if defined(USE_PREAD)
do{ rc = (int)osPwrite(fd, pBuf, nBuf, iOff); }while( rc<0 && errno==EINTR );
#elif defined(USE_PREAD64)
do{ rc = (int)osPwrite64(fd, pBuf, nBuf, iOff);}while( rc<0 && errno==EINTR);
#else
do{
i64 iSeek = lseek(fd, iOff, SEEK_SET);
SimulateIOError( iSeek = -1 );
if( iSeek<0 ){
rc = -1;
break;
}
rc = osWrite(fd, pBuf, nBuf);
}while( rc<0 && errno==EINTR );
#endif
TIMER_END;
OSTRACE(("WRITE %-3d %5d %7lld %llu\n", fd, rc, iOff, TIMER_ELAPSED));
if( rc<0 ) *piErrno = errno;
return rc;
}
/*
** Seek to the offset in id->offset then read cnt bytes into pBuf.
** Return the number of bytes actually read. Update the offset.
**
** To avoid stomping the errno value on a failed write the lastErrno value
** is set before returning.
*/
static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
return seekAndWriteFd(id->h, offset, pBuf, cnt, &id->lastErrno);
}
/*
** Write data from a buffer into a file. Return SQLITE_OK on success
** or some other error code on failure.
*/
static int unixWrite(
sqlite3_file *id,
const void *pBuf,
int amt,
sqlite3_int64 offset
){
unixFile *pFile = (unixFile*)id;
int wrote = 0;
assert( id );
assert( amt>0 );
/* If this is a database file (not a journal, master-journal or temp
** file), the bytes in the locking range should never be read or written. */
#if 0
assert( pFile->pUnused==0
|| offset>=PENDING_BYTE+512
|| offset+amt<=PENDING_BYTE
);
#endif
#ifdef SQLITE_DEBUG
/* If we are doing a normal write to a database file (as opposed to
** doing a hot-journal rollback or a write to some file other than a
** normal database file) then record the fact that the database
** has changed. If the transaction counter is modified, record that
** fact too.
*/
if( pFile->inNormalWrite ){
pFile->dbUpdate = 1; /* The database has been modified */
if( offset<=24 && offset+amt>=27 ){
int rc;
char oldCntr[4];
SimulateIOErrorBenign(1);
rc = seekAndRead(pFile, 24, oldCntr, 4);
SimulateIOErrorBenign(0);
if( rc!=4 || memcmp(oldCntr, &((char*)pBuf)[24-offset], 4)!=0 ){
pFile->transCntrChng = 1; /* The transaction counter has changed */
}
}
}
#endif
#if defined(SQLITE_MMAP_READWRITE) && SQLITE_MAX_MMAP_SIZE>0
/* Deal with as much of this write request as possible by transfering
** data from the memory mapping using memcpy(). */
if( offset<pFile->mmapSize ){
if( offset+amt <= pFile->mmapSize ){
memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt);
return SQLITE_OK;
}else{
int nCopy = pFile->mmapSize - offset;
memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy);
pBuf = &((u8 *)pBuf)[nCopy];
amt -= nCopy;
offset += nCopy;
}
}
#endif
while( (wrote = seekAndWrite(pFile, offset, pBuf, amt))<amt && wrote>0 ){
amt -= wrote;
offset += wrote;
pBuf = &((char*)pBuf)[wrote];
}
SimulateIOError(( wrote=(-1), amt=1 ));
SimulateDiskfullError(( wrote=0, amt=1 ));
if( amt>wrote ){
if( wrote<0 && pFile->lastErrno!=ENOSPC ){
/* lastErrno set by seekAndWrite */
return SQLITE_IOERR_WRITE;
}else{
storeLastErrno(pFile, 0); /* not a system error */
return SQLITE_FULL;
}
}
return SQLITE_OK;
}
#ifdef SQLITE_TEST
/*
** Count the number of fullsyncs and normal syncs. This is used to test
** that syncs and fullsyncs are occurring at the right times.
*/
int sqlite3_sync_count = 0;
int sqlite3_fullsync_count = 0;
#endif
/*
** We do not trust systems to provide a working fdatasync(). Some do.
** Others do no. To be safe, we will stick with the (slightly slower)
** fsync(). If you know that your system does support fdatasync() correctly,
** then simply compile with -Dfdatasync=fdatasync or -DHAVE_FDATASYNC
*/
#if !defined(fdatasync) && !HAVE_FDATASYNC
# define fdatasync fsync
#endif
/*
** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
** only available on Mac OS X. But that could change.
*/
#ifdef F_FULLFSYNC
# define HAVE_FULLFSYNC 1
#else
# define HAVE_FULLFSYNC 0
#endif
/*
** The fsync() system call does not work as advertised on many
** unix systems. The following procedure is an attempt to make
** it work better.
**
** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
** for testing when we want to run through the test suite quickly.
** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
** or power failure will likely corrupt the database file.
**
** SQLite sets the dataOnly flag if the size of the file is unchanged.
** The idea behind dataOnly is that it should only write the file content
** to disk, not the inode. We only set dataOnly if the file size is
** unchanged since the file size is part of the inode. However,
** Ted Ts'o tells us that fdatasync() will also write the inode if the
** file size has changed. The only real difference between fdatasync()
** and fsync(), Ted tells us, is that fdatasync() will not flush the
** inode if the mtime or owner or other inode attributes have changed.
** We only care about the file size, not the other file attributes, so
** as far as SQLite is concerned, an fdatasync() is always adequate.
** So, we always use fdatasync() if it is available, regardless of
** the value of the dataOnly flag.
*/
static int full_fsync(int fd, int fullSync, int dataOnly){
int rc;
/* The following "ifdef/elif/else/" block has the same structure as
** the one below. It is replicated here solely to avoid cluttering
** up the real code with the UNUSED_PARAMETER() macros.
*/
#ifdef SQLITE_NO_SYNC
UNUSED_PARAMETER(fd);
UNUSED_PARAMETER(fullSync);
UNUSED_PARAMETER(dataOnly);
#elif HAVE_FULLFSYNC
UNUSED_PARAMETER(dataOnly);
#else
UNUSED_PARAMETER(fullSync);
UNUSED_PARAMETER(dataOnly);
#endif
/* Record the number of times that we do a normal fsync() and
** FULLSYNC. This is used during testing to verify that this procedure
** gets called with the correct arguments.
*/
#ifdef SQLITE_TEST
if( fullSync ) sqlite3_fullsync_count++;
sqlite3_sync_count++;
#endif
/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
** no-op. But go ahead and call fstat() to validate the file
** descriptor as we need a method to provoke a failure during
** coverate testing.
*/
#ifdef SQLITE_NO_SYNC
{
struct stat buf;
rc = osFstat(fd, &buf);
}
#elif HAVE_FULLFSYNC
if( fullSync ){
rc = osFcntl(fd, F_FULLFSYNC, 0);
}else{
rc = 1;
}
/* If the FULLFSYNC failed, fall back to attempting an fsync().
** It shouldn't be possible for fullfsync to fail on the local
** file system (on OSX), so failure indicates that FULLFSYNC
** isn't supported for this file system. So, attempt an fsync
** and (for now) ignore the overhead of a superfluous fcntl call.
** It'd be better to detect fullfsync support once and avoid
** the fcntl call every time sync is called.
*/
if( rc ) rc = fsync(fd);
#elif defined(__APPLE__)
/* fdatasync() on HFS+ doesn't yet flush the file size if it changed correctly
** so currently we default to the macro that redefines fdatasync to fsync
*/
rc = fsync(fd);
#else
rc = fdatasync(fd);
#if OS_VXWORKS
if( rc==-1 && errno==ENOTSUP ){
rc = fsync(fd);
}
#endif /* OS_VXWORKS */
#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
if( OS_VXWORKS && rc!= -1 ){
rc = 0;
}
return rc;
}
/*
** Open a file descriptor to the directory containing file zFilename.
** If successful, *pFd is set to the opened file descriptor and
** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
** value.
**
** The directory file descriptor is used for only one thing - to
** fsync() a directory to make sure file creation and deletion events
** are flushed to disk. Such fsyncs are not needed on newer
** journaling filesystems, but are required on older filesystems.
**
** This routine can be overridden using the xSetSysCall interface.
** The ability to override this routine was added in support of the
** chromium sandbox. Opening a directory is a security risk (we are
** told) so making it overrideable allows the chromium sandbox to
** replace this routine with a harmless no-op. To make this routine
** a no-op, replace it with a stub that returns SQLITE_OK but leaves
** *pFd set to a negative number.
**
** If SQLITE_OK is returned, the caller is responsible for closing
** the file descriptor *pFd using close().
*/
static int openDirectory(const char *zFilename, int *pFd){
int ii;
int fd = -1;
char zDirname[MAX_PATHNAME+1];
sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
for(ii=(int)strlen(zDirname); ii>0 && zDirname[ii]!='/'; ii--);
if( ii>0 ){
zDirname[ii] = '\0';
}else{
if( zDirname[0]!='/' ) zDirname[0] = '.';
zDirname[1] = 0;
}
fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
if( fd>=0 ){
OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
}
*pFd = fd;
if( fd>=0 ) return SQLITE_OK;
return unixLogError(SQLITE_CANTOPEN_BKPT, "openDirectory", zDirname);
}
/*
** Make sure all writes to a particular file are committed to disk.
**
** If dataOnly==0 then both the file itself and its metadata (file
** size, access time, etc) are synced. If dataOnly!=0 then only the
** file data is synced.
**
** Under Unix, also make sure that the directory entry for the file
** has been created by fsync-ing the directory that contains the file.
** If we do not do this and we encounter a power failure, the directory
** entry for the journal might not exist after we reboot. The next
** SQLite to access the file will not know that the journal exists (because
** the directory entry for the journal was never created) and the transaction
** will not roll back - possibly leading to database corruption.
*/
static int unixSync(sqlite3_file *id, int flags){
int rc;
unixFile *pFile = (unixFile*)id;
int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
/* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
assert((flags&0x0F)==SQLITE_SYNC_NORMAL
|| (flags&0x0F)==SQLITE_SYNC_FULL
);
/* Unix cannot, but some systems may return SQLITE_FULL from here. This
** line is to test that doing so does not cause any problems.
*/
SimulateDiskfullError( return SQLITE_FULL );
assert( pFile );
OSTRACE(("SYNC %-3d\n", pFile->h));
rc = full_fsync(pFile->h, isFullsync, isDataOnly);
SimulateIOError( rc=1 );
if( rc ){
storeLastErrno(pFile, errno);
return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
}
/* Also fsync the directory containing the file if the DIRSYNC flag
** is set. This is a one-time occurrence. Many systems (examples: AIX)
** are unable to fsync a directory, so ignore errors on the fsync.
*/
if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
int dirfd;
OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
HAVE_FULLFSYNC, isFullsync));
rc = osOpenDirectory(pFile->zPath, &dirfd);
if( rc==SQLITE_OK ){
full_fsync(dirfd, 0, 0);
robust_close(pFile, dirfd, __LINE__);
}else{
assert( rc==SQLITE_CANTOPEN );
rc = SQLITE_OK;
}
pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
}
return rc;
}
/*
** Truncate an open file to a specified size
*/
static int unixTruncate(sqlite3_file *id, i64 nByte){
unixFile *pFile = (unixFile *)id;
int rc;
assert( pFile );
SimulateIOError( return SQLITE_IOERR_TRUNCATE );
/* If the user has configured a chunk-size for this file, truncate the
** file so that it consists of an integer number of chunks (i.e. the
** actual file size after the operation may be larger than the requested
** size).
*/
if( pFile->szChunk>0 ){
nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
}
rc = robust_ftruncate(pFile->h, nByte);
if( rc ){
storeLastErrno(pFile, errno);
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
}else{
#ifdef SQLITE_DEBUG
/* If we are doing a normal write to a database file (as opposed to
** doing a hot-journal rollback or a write to some file other than a
** normal database file) and we truncate the file to zero length,
** that effectively updates the change counter. This might happen
** when restoring a database using the backup API from a zero-length
** source.
*/
if( pFile->inNormalWrite && nByte==0 ){
pFile->transCntrChng = 1;
}
#endif
#if SQLITE_MAX_MMAP_SIZE>0
/* If the file was just truncated to a size smaller than the currently
** mapped region, reduce the effective mapping size as well. SQLite will
** use read() and write() to access data beyond this point from now on.
*/
if( nByte<pFile->mmapSize ){
pFile->mmapSize = nByte;
}
#endif
return SQLITE_OK;
}
}
/*
** Determine the current size of a file in bytes
*/
static int unixFileSize(sqlite3_file *id, i64 *pSize){
int rc;
struct stat buf;
assert( id );
rc = osFstat(((unixFile*)id)->h, &buf);
SimulateIOError( rc=1 );
if( rc!=0 ){
storeLastErrno((unixFile*)id, errno);
return SQLITE_IOERR_FSTAT;
}
*pSize = buf.st_size;
/* When opening a zero-size database, the findInodeInfo() procedure
** writes a single byte into that file in order to work around a bug
** in the OS-X msdos filesystem. In order to avoid problems with upper
** layers, we need to report this file size as zero even though it is
** really 1. Ticket #3260.
*/
if( *pSize==1 ) *pSize = 0;
return SQLITE_OK;
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
/*
** Handler for proxy-locking file-control verbs. Defined below in the
** proxying locking division.
*/
static int proxyFileControl(sqlite3_file*,int,void*);
#endif
/*
** This function is called to handle the SQLITE_FCNTL_SIZE_HINT
** file-control operation. Enlarge the database to nBytes in size
** (rounded up to the next chunk-size). If the database is already
** nBytes or larger, this routine is a no-op.
*/
static int fcntlSizeHint(unixFile *pFile, i64 nByte){
if( pFile->szChunk>0 ){
i64 nSize; /* Required file size */
struct stat buf; /* Used to hold return values of fstat() */
if( osFstat(pFile->h, &buf) ){
return SQLITE_IOERR_FSTAT;
}
nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
if( nSize>(i64)buf.st_size ){
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
/* The code below is handling the return value of osFallocate()
** correctly. posix_fallocate() is defined to "returns zero on success,
** or an error number on failure". See the manpage for details. */
int err;
do{
err = osFallocate(pFile->h, buf.st_size, nSize-buf.st_size);
}while( err==EINTR );
if( err ) return SQLITE_IOERR_WRITE;
#else
/* If the OS does not have posix_fallocate(), fake it. Write a
** single byte to the last byte in each block that falls entirely
** within the extended region. Then, if required, a single byte
** at offset (nSize-1), to set the size of the file correctly.
** This is a similar technique to that used by glibc on systems
** that do not have a real fallocate() call.
*/
int nBlk = buf.st_blksize; /* File-system block size */
int nWrite = 0; /* Number of bytes written by seekAndWrite */
i64 iWrite; /* Next offset to write to */
iWrite = (buf.st_size/nBlk)*nBlk + nBlk - 1;
assert( iWrite>=buf.st_size );
assert( ((iWrite+1)%nBlk)==0 );
for(/*no-op*/; iWrite<nSize+nBlk-1; iWrite+=nBlk ){
if( iWrite>=nSize ) iWrite = nSize - 1;
nWrite = seekAndWrite(pFile, iWrite, "", 1);
if( nWrite!=1 ) return SQLITE_IOERR_WRITE;
}
#endif
}
}
#if SQLITE_MAX_MMAP_SIZE>0
if( pFile->mmapSizeMax>0 && nByte>pFile->mmapSize ){
int rc;
if( pFile->szChunk<=0 ){
if( robust_ftruncate(pFile->h, nByte) ){
storeLastErrno(pFile, errno);
return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
}
}
rc = unixMapfile(pFile, nByte);
return rc;
}
#endif
return SQLITE_OK;
}
/*
** If *pArg is initially negative then this is a query. Set *pArg to
** 1 or 0 depending on whether or not bit mask of pFile->ctrlFlags is set.
**
** If *pArg is 0 or 1, then clear or set the mask bit of pFile->ctrlFlags.
*/
static void unixModeBit(unixFile *pFile, unsigned char mask, int *pArg){
if( *pArg<0 ){
*pArg = (pFile->ctrlFlags & mask)!=0;
}else if( (*pArg)==0 ){
pFile->ctrlFlags &= ~mask;
}else{
pFile->ctrlFlags |= mask;
}
}
/* Forward declaration */
static int unixGetTempname(int nBuf, char *zBuf);
/*
** Information and control of an open file handle.
*/
static int unixFileControl(sqlite3_file *id, int op, void *pArg){
unixFile *pFile = (unixFile*)id;
switch( op ){
case SQLITE_FCNTL_LOCKSTATE: {
*(int*)pArg = pFile->eFileLock;
return SQLITE_OK;
}
case SQLITE_FCNTL_LAST_ERRNO: {
*(int*)pArg = pFile->lastErrno;
return SQLITE_OK;
}
case SQLITE_FCNTL_CHUNK_SIZE: {
pFile->szChunk = *(int *)pArg;
return SQLITE_OK;
}
case SQLITE_FCNTL_SIZE_HINT: {
int rc;
SimulateIOErrorBenign(1);
rc = fcntlSizeHint(pFile, *(i64 *)pArg);
SimulateIOErrorBenign(0);
return rc;
}
case SQLITE_FCNTL_PERSIST_WAL: {
unixModeBit(pFile, UNIXFILE_PERSIST_WAL, (int*)pArg);
return SQLITE_OK;
}
case SQLITE_FCNTL_POWERSAFE_OVERWRITE: {
unixModeBit(pFile, UNIXFILE_PSOW, (int*)pArg);
return SQLITE_OK;
}
case SQLITE_FCNTL_VFSNAME: {
*(char**)pArg = sqlite3_mprintf("%s", pFile->pVfs->zName);
return SQLITE_OK;
}
case SQLITE_FCNTL_TEMPFILENAME: {
char *zTFile = sqlite3_malloc64( pFile->pVfs->mxPathname );
if( zTFile ){
unixGetTempname(pFile->pVfs->mxPathname, zTFile);
*(char**)pArg = zTFile;
}
return SQLITE_OK;
}
case SQLITE_FCNTL_HAS_MOVED: {
*(int*)pArg = fileHasMoved(pFile);
return SQLITE_OK;
}
#if SQLITE_MAX_MMAP_SIZE>0
case SQLITE_FCNTL_MMAP_SIZE: {
i64 newLimit = *(i64*)pArg;
int rc = SQLITE_OK;
if( newLimit>sqlite3GlobalConfig.mxMmap ){
newLimit = sqlite3GlobalConfig.mxMmap;
}
*(i64*)pArg = pFile->mmapSizeMax;
if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){
pFile->mmapSizeMax = newLimit;
if( pFile->mmapSize>0 ){
unixUnmapfile(pFile);
rc = unixMapfile(pFile, -1);
}
}
return rc;
}
#endif
#ifdef SQLITE_DEBUG
/* The pager calls this method to signal that it has done
** a rollback and that the database is therefore unchanged and
** it hence it is OK for the transaction change counter to be
** unchanged.
*/
case SQLITE_FCNTL_DB_UNCHANGED: {
((unixFile*)id)->dbUpdate = 0;
return SQLITE_OK;
}
#endif
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
case SQLITE_FCNTL_SET_LOCKPROXYFILE:
case SQLITE_FCNTL_GET_LOCKPROXYFILE: {
return proxyFileControl(id,op,pArg);
}
#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */
}
return SQLITE_NOTFOUND;
}
/*
** Return the sector size in bytes of the underlying block device for
** the specified file. This is almost always 512 bytes, but may be
** larger for some devices.
**
** SQLite code assumes this function cannot fail. It also assumes that
** if two files are created in the same file-system directory (i.e.
** a database and its journal file) that the sector size will be the
** same for both.
*/
#ifndef __QNXNTO__
static int unixSectorSize(sqlite3_file *NotUsed){
UNUSED_PARAMETER(NotUsed);
return SQLITE_DEFAULT_SECTOR_SIZE;
}
#endif
/*
** The following version of unixSectorSize() is optimized for QNX.
*/
#ifdef __QNXNTO__
#include <sys/dcmd_blk.h>
#include <sys/statvfs.h>
static int unixSectorSize(sqlite3_file *id){
unixFile *pFile = (unixFile*)id;
if( pFile->sectorSize == 0 ){
struct statvfs fsInfo;
/* Set defaults for non-supported filesystems */
pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
pFile->deviceCharacteristics = 0;
if( fstatvfs(pFile->h, &fsInfo) == -1 ) {
return pFile->sectorSize;
}
if( !strcmp(fsInfo.f_basetype, "tmp") ) {
pFile->sectorSize = fsInfo.f_bsize;
pFile->deviceCharacteristics =
SQLITE_IOCAP_ATOMIC4K | /* All ram filesystem writes are atomic */
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
** the write succeeds */
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
** so it is ordered */
0;
}else if( strstr(fsInfo.f_basetype, "etfs") ){
pFile->sectorSize = fsInfo.f_bsize;
pFile->deviceCharacteristics =
/* etfs cluster size writes are atomic */
(pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) |
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
** the write succeeds */
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
** so it is ordered */
0;
}else if( !strcmp(fsInfo.f_basetype, "qnx6") ){
pFile->sectorSize = fsInfo.f_bsize;
pFile->deviceCharacteristics =
SQLITE_IOCAP_ATOMIC | /* All filesystem writes are atomic */
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
** the write succeeds */
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
** so it is ordered */
0;
}else if( !strcmp(fsInfo.f_basetype, "qnx4") ){
pFile->sectorSize = fsInfo.f_bsize;
pFile->deviceCharacteristics =
/* full bitset of atomics from max sector size and smaller */
((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
** so it is ordered */
0;
}else if( strstr(fsInfo.f_basetype, "dos") ){
pFile->sectorSize = fsInfo.f_bsize;
pFile->deviceCharacteristics =
/* full bitset of atomics from max sector size and smaller */
((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 |
SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind
** so it is ordered */
0;
}else{
pFile->deviceCharacteristics =
SQLITE_IOCAP_ATOMIC512 | /* blocks are atomic */
SQLITE_IOCAP_SAFE_APPEND | /* growing the file does not occur until
** the write succeeds */
0;
}
}
/* Last chance verification. If the sector size isn't a multiple of 512
** then it isn't valid.*/
if( pFile->sectorSize % 512 != 0 ){
pFile->deviceCharacteristics = 0;
pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE;
}
return pFile->sectorSize;
}
#endif /* __QNXNTO__ */
/*
** Return the device characteristics for the file.
**
** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default.
** However, that choice is controversial since technically the underlying
** file system does not always provide powersafe overwrites. (In other
** words, after a power-loss event, parts of the file that were never
** written might end up being altered.) However, non-PSOW behavior is very,
** very rare. And asserting PSOW makes a large reduction in the amount
** of required I/O for journaling, since a lot of padding is eliminated.
** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control
** available to turn it off and URI query parameter available to turn it off.
*/
static int unixDeviceCharacteristics(sqlite3_file *id){
unixFile *p = (unixFile*)id;
int rc = 0;
#ifdef __QNXNTO__
if( p->sectorSize==0 ) unixSectorSize(id);
rc = p->deviceCharacteristics;
#endif
if( p->ctrlFlags & UNIXFILE_PSOW ){
rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE;
}
return rc;
}
#if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0
/*
** Return the system page size.
**
** This function should not be called directly by other code in this file.
** Instead, it should be called via macro osGetpagesize().
*/
static int unixGetpagesize(void){
#if OS_VXWORKS
return 1024;
#elif defined(_BSD_SOURCE)
return getpagesize();
#else
return (int)sysconf(_SC_PAGESIZE);
#endif
}
#endif /* !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 */
#ifndef SQLITE_OMIT_WAL
/*
** Object used to represent an shared memory buffer.
**
** When multiple threads all reference the same wal-index, each thread
** has its own unixShm object, but they all point to a single instance
** of this unixShmNode object. In other words, each wal-index is opened
** only once per process.
**
** Each unixShmNode object is connected to a single unixInodeInfo object.
** We could coalesce this object into unixInodeInfo, but that would mean
** every open file that does not use shared memory (in other words, most
** open files) would have to carry around this extra information. So
** the unixInodeInfo object contains a pointer to this unixShmNode object
** and the unixShmNode object is created only when needed.
**
** unixMutexHeld() must be true when creating or destroying
** this object or while reading or writing the following fields:
**
** nRef
**
** The following fields are read-only after the object is created:
**
** fid
** zFilename
**
** Either unixShmNode.mutex must be held or unixShmNode.nRef==0 and
** unixMutexHeld() is true when reading or writing any other field
** in this structure.
*/
struct unixShmNode {
unixInodeInfo *pInode; /* unixInodeInfo that owns this SHM node */
sqlite3_mutex *mutex; /* Mutex to access this object */
char *zFilename; /* Name of the mmapped file */
int h; /* Open file descriptor */
int szRegion; /* Size of shared-memory regions */
u16 nRegion; /* Size of array apRegion */
u8 isReadonly; /* True if read-only */
char **apRegion; /* Array of mapped shared-memory regions */
int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */
#ifdef SQLITE_DEBUG
u8 exclMask; /* Mask of exclusive locks held */
u8 sharedMask; /* Mask of shared locks held */
u8 nextShmId; /* Next available unixShm.id value */
#endif
};
/*
** Structure used internally by this VFS to record the state of an
** open shared memory connection.
**
** The following fields are initialized when this object is created and
** are read-only thereafter:
**
** unixShm.pFile
** unixShm.id
**
** All other fields are read/write. The unixShm.pFile->mutex must be held
** while accessing any read/write fields.
*/
struct unixShm {
unixShmNode *pShmNode; /* The underlying unixShmNode object */
unixShm *pNext; /* Next unixShm with the same unixShmNode */
u8 hasMutex; /* True if holding the unixShmNode mutex */
u8 id; /* Id of this connection within its unixShmNode */
u16 sharedMask; /* Mask of shared locks held */
u16 exclMask; /* Mask of exclusive locks held */
};
/*
** Constants used for locking
*/
#define UNIX_SHM_BASE ((22+SQLITE_SHM_NLOCK)*4) /* first lock byte */
#define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */
/*
** Apply posix advisory locks for all bytes from ofst through ofst+n-1.
**
** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking
** otherwise.
*/
static int unixShmSystemLock(
unixFile *pFile, /* Open connection to the WAL file */
int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */
int ofst, /* First byte of the locking range */
int n /* Number of bytes to lock */
){
unixShmNode *pShmNode; /* Apply locks to this open shared-memory segment */
struct flock f; /* The posix advisory locking structure */
int rc = SQLITE_OK; /* Result code form fcntl() */
/* Access to the unixShmNode object is serialized by the caller */
pShmNode = pFile->pInode->pShmNode;
assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
/* Shared locks never span more than one byte */
assert( n==1 || lockType!=F_RDLCK );
/* Locks are within range */
assert( n>=1 && n<=SQLITE_SHM_NLOCK );
if( pShmNode->h>=0 ){
/* Initialize the locking parameters */
memset(&f, 0, sizeof(f));
f.l_type = lockType;
f.l_whence = SEEK_SET;
f.l_start = ofst;
f.l_len = n;
rc = osFcntl(pShmNode->h, F_SETLK, &f);
rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY;
}
/* Update the global lock state and do debug tracing */
#ifdef SQLITE_DEBUG
{ u16 mask;
OSTRACE(("SHM-LOCK "));
mask = ofst>31 ? 0xffff : (1<<(ofst+n)) - (1<<ofst);
if( rc==SQLITE_OK ){
if( lockType==F_UNLCK ){
OSTRACE(("unlock %d ok", ofst));
pShmNode->exclMask &= ~mask;
pShmNode->sharedMask &= ~mask;
}else if( lockType==F_RDLCK ){
OSTRACE(("read-lock %d ok", ofst));
pShmNode->exclMask &= ~mask;
pShmNode->sharedMask |= mask;
}else{
assert( lockType==F_WRLCK );
OSTRACE(("write-lock %d ok", ofst));
pShmNode->exclMask |= mask;
pShmNode->sharedMask &= ~mask;
}
}else{
if( lockType==F_UNLCK ){
OSTRACE(("unlock %d failed", ofst));
}else if( lockType==F_RDLCK ){
OSTRACE(("read-lock failed"));
}else{
assert( lockType==F_WRLCK );
OSTRACE(("write-lock %d failed", ofst));
}
}
OSTRACE((" - afterwards %03x,%03x\n",
pShmNode->sharedMask, pShmNode->exclMask));
}
#endif
return rc;
}
/*
** Return the minimum number of 32KB shm regions that should be mapped at
** a time, assuming that each mapping must be an integer multiple of the
** current system page-size.
**
** Usually, this is 1. The exception seems to be systems that are configured
** to use 64KB pages - in this case each mapping must cover at least two
** shm regions.
*/
static int unixShmRegionPerMap(void){
int shmsz = 32*1024; /* SHM region size */
int pgsz = osGetpagesize(); /* System page size */
assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */
if( pgsz<shmsz ) return 1;
return pgsz/shmsz;
}
/*
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
**
** This is not a VFS shared-memory method; it is a utility function called
** by VFS shared-memory methods.
*/
static void unixShmPurge(unixFile *pFd){
unixShmNode *p = pFd->pInode->pShmNode;
assert( unixMutexHeld() );
if( p && ALWAYS(p->nRef==0) ){
int nShmPerMap = unixShmRegionPerMap();
int i;
assert( p->pInode==pFd->pInode );
sqlite3_mutex_free(p->mutex);
for(i=0; i<p->nRegion; i+=nShmPerMap){
if( p->h>=0 ){
osMunmap(p->apRegion[i], p->szRegion);
}else{
sqlite3_free(p->apRegion[i]);
}
}
sqlite3_free(p->apRegion);
if( p->h>=0 ){
robust_close(pFd, p->h, __LINE__);
p->h = -1;
}
p->pInode->pShmNode = 0;
sqlite3_free(p);
}
}
/*
** Open a shared-memory area associated with open database file pDbFd.
** This particular implementation uses mmapped files.
**
** The file used to implement shared-memory is in the same directory
** as the open database file and has the same name as the open database
** file with the "-shm" suffix added. For example, if the database file
** is "/home/user1/config.db" then the file that is created and mmapped
** for shared memory will be called "/home/user1/config.db-shm".
**
** Another approach to is to use files in /dev/shm or /dev/tmp or an
** some other tmpfs mount. But if a file in a different directory
** from the database file is used, then differing access permissions
** or a chroot() might cause two different processes on the same
** database to end up using different files for shared memory -
** meaning that their memory would not really be shared - resulting
** in database corruption. Nevertheless, this tmpfs file usage
** can be enabled at compile-time using -DSQLITE_SHM_DIRECTORY="/dev/shm"
** or the equivalent. The use of the SQLITE_SHM_DIRECTORY compile-time
** option results in an incompatible build of SQLite; builds of SQLite
** that with differing SQLITE_SHM_DIRECTORY settings attempt to use the
** same database file at the same time, database corruption will likely
** result. The SQLITE_SHM_DIRECTORY compile-time option is considered
** "unsupported" and may go away in a future SQLite release.
**
** When opening a new shared-memory file, if no other instances of that
** file are currently open, in this process or in other processes, then
** the file must be truncated to zero length or have its header cleared.
**
** If the original database file (pDbFd) is using the "unix-excl" VFS
** that means that an exclusive lock is held on the database file and
** that no other processes are able to read or write the database. In
** that case, we do not really need shared memory. No shared memory
** file is created. The shared memory will be simulated with heap memory.
*/
static int unixOpenSharedMemory(unixFile *pDbFd){
struct unixShm *p = 0; /* The connection to be opened */
struct unixShmNode *pShmNode; /* The underlying mmapped file */
int rc; /* Result code */
unixInodeInfo *pInode; /* The inode of fd */
char *zShmFilename; /* Name of the file used for SHM */
int nShmFilename; /* Size of the SHM filename in bytes */
/* Allocate space for the new unixShm object. */
p = sqlite3_malloc64( sizeof(*p) );
if( p==0 ) return SQLITE_NOMEM;
memset(p, 0, sizeof(*p));
assert( pDbFd->pShm==0 );
/* Check to see if a unixShmNode object already exists. Reuse an existing
** one if present. Create a new one if necessary.
*/
unixEnterMutex();
pInode = pDbFd->pInode;
pShmNode = pInode->pShmNode;
if( pShmNode==0 ){
struct stat sStat; /* fstat() info for database file */
#ifndef SQLITE_SHM_DIRECTORY
const char *zBasePath = pDbFd->zPath;
#endif
/* Call fstat() to figure out the permissions on the database file. If
** a new *-shm file is created, an attempt will be made to create it
** with the same permissions.
*/
if( osFstat(pDbFd->h, &sStat) ){
rc = SQLITE_IOERR_FSTAT;
goto shm_open_err;
}
#ifdef SQLITE_SHM_DIRECTORY
nShmFilename = sizeof(SQLITE_SHM_DIRECTORY) + 31;
#else
nShmFilename = 6 + (int)strlen(zBasePath);
#endif
pShmNode = sqlite3_malloc64( sizeof(*pShmNode) + nShmFilename );
if( pShmNode==0 ){
rc = SQLITE_NOMEM;
goto shm_open_err;
}
memset(pShmNode, 0, sizeof(*pShmNode)+nShmFilename);
zShmFilename = pShmNode->zFilename = (char*)&pShmNode[1];
#ifdef SQLITE_SHM_DIRECTORY
sqlite3_snprintf(nShmFilename, zShmFilename,
SQLITE_SHM_DIRECTORY "/sqlite-shm-%x-%x",
(u32)sStat.st_ino, (u32)sStat.st_dev);
#else
sqlite3_snprintf(nShmFilename, zShmFilename, "%s-shm", zBasePath);
sqlite3FileSuffix3(pDbFd->zPath, zShmFilename);
#endif
pShmNode->h = -1;
pDbFd->pInode->pShmNode = pShmNode;
pShmNode->pInode = pDbFd->pInode;
pShmNode->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
if( pShmNode->mutex==0 ){
rc = SQLITE_NOMEM;
goto shm_open_err;
}
if( pInode->bProcessLock==0 ){
int openFlags = O_RDWR | O_CREAT;
if( sqlite3_uri_boolean(pDbFd->zPath, "readonly_shm", 0) ){
openFlags = O_RDONLY;
pShmNode->isReadonly = 1;
}
pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
if( pShmNode->h<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
goto shm_open_err;
}
/* If this process is running as root, make sure that the SHM file
** is owned by the same user that owns the original database. Otherwise,
** the original owner will not be able to connect.
*/
robustFchown(pShmNode->h, sStat.st_uid, sStat.st_gid);
/* Check to see if another process is holding the dead-man switch.
** If not, truncate the file to zero length.
*/
rc = SQLITE_OK;
if( unixShmSystemLock(pDbFd, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
if( robust_ftruncate(pShmNode->h, 0) ){
rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
}
}
if( rc==SQLITE_OK ){
rc = unixShmSystemLock(pDbFd, F_RDLCK, UNIX_SHM_DMS, 1);
}
if( rc ) goto shm_open_err;
}
}
/* Make the new connection a child of the unixShmNode */
p->pShmNode = pShmNode;
#ifdef SQLITE_DEBUG
p->id = pShmNode->nextShmId++;
#endif
pShmNode->nRef++;
pDbFd->pShm = p;
unixLeaveMutex();
/* The reference count on pShmNode has already been incremented under
** the cover of the unixEnterMutex() mutex and the pointer from the
** new (struct unixShm) object to the pShmNode has been set. All that is
** left to do is to link the new object into the linked list starting
** at pShmNode->pFirst. This must be done while holding the pShmNode->mutex
** mutex.
*/
sqlite3_mutex_enter(pShmNode->mutex);
p->pNext = pShmNode->pFirst;
pShmNode->pFirst = p;
sqlite3_mutex_leave(pShmNode->mutex);
return SQLITE_OK;
/* Jump here on any error */
shm_open_err:
unixShmPurge(pDbFd); /* This call frees pShmNode if required */
sqlite3_free(p);
unixLeaveMutex();
return rc;
}
/*
** This function is called to obtain a pointer to region iRegion of the
** shared-memory associated with the database file fd. Shared-memory regions
** are numbered starting from zero. Each shared-memory region is szRegion
** bytes in size.
**
** If an error occurs, an error code is returned and *pp is set to NULL.
**
** Otherwise, if the bExtend parameter is 0 and the requested shared-memory
** region has not been allocated (by any client, including one running in a
** separate process), then *pp is set to NULL and SQLITE_OK returned. If
** bExtend is non-zero and the requested shared-memory region has not yet
** been allocated, it is allocated by this function.
**
** If the shared-memory region has already been allocated or is allocated by
** this call as described above, then it is mapped into this processes
** address space (if it is not already), *pp is set to point to the mapped
** memory and SQLITE_OK returned.
*/
static int unixShmMap(
sqlite3_file *fd, /* Handle open on database file */
int iRegion, /* Region to retrieve */
int szRegion, /* Size of regions */
int bExtend, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){
unixFile *pDbFd = (unixFile*)fd;
unixShm *p;
unixShmNode *pShmNode;
int rc = SQLITE_OK;
int nShmPerMap = unixShmRegionPerMap();
int nReqRegion;
/* If the shared-memory file has not yet been opened, open it now. */
if( pDbFd->pShm==0 ){
rc = unixOpenSharedMemory(pDbFd);
if( rc!=SQLITE_OK ) return rc;
}
p = pDbFd->pShm;
pShmNode = p->pShmNode;
sqlite3_mutex_enter(pShmNode->mutex);
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
assert( pShmNode->pInode==pDbFd->pInode );
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
/* Minimum number of regions required to be mapped. */
nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
if( pShmNode->nRegion<nReqRegion ){
char **apNew; /* New apRegion[] array */
int nByte = nReqRegion*szRegion; /* Minimum required file size */
struct stat sStat; /* Used by fstat() */
pShmNode->szRegion = szRegion;
if( pShmNode->h>=0 ){
/* The requested region is not mapped into this processes address space.
** Check to see if it has been allocated (i.e. if the wal-index file is
** large enough to contain the requested region).
*/
if( osFstat(pShmNode->h, &sStat) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}
if( sStat.st_size<nByte ){
/* The requested memory region does not exist. If bExtend is set to
** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
*/
if( !bExtend ){
goto shmpage_out;
}
/* Alternatively, if bExtend is true, extend the file. Do this by
** writing a single byte to the end of each (OS) page being
** allocated or extended. Technically, we need only write to the
** last page in order to extend the file. But writing to all new
** pages forces the OS to allocate them immediately, which reduces
** the chances of SIGBUS while accessing the mapped region later on.
*/
else{
static const int pgsz = 4096;
int iPg;
/* Write to the last byte of each newly allocated or extended page */
assert( (nByte % pgsz)==0 );
for(iPg=(sStat.st_size/pgsz); iPg<(nByte/pgsz); iPg++){
int x = 0;
if( seekAndWriteFd(pShmNode->h, iPg*pgsz + pgsz-1, "", 1, &x)!=1 ){
const char *zFile = pShmNode->zFilename;
rc = unixLogError(SQLITE_IOERR_SHMSIZE, "write", zFile);
goto shmpage_out;
}
}
}
}
}
/* Map the requested memory region into this processes address space. */
apNew = (char **)sqlite3_realloc(
pShmNode->apRegion, nReqRegion*sizeof(char *)
);
if( !apNew ){
rc = SQLITE_IOERR_NOMEM;
goto shmpage_out;
}
pShmNode->apRegion = apNew;
while( pShmNode->nRegion<nReqRegion ){
int nMap = szRegion*nShmPerMap;
int i;
void *pMem;
if( pShmNode->h>=0 ){
pMem = osMmap(0, nMap,
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE,
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
);
if( pMem==MAP_FAILED ){
rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
goto shmpage_out;
}
}else{
pMem = sqlite3_malloc64(szRegion);
if( pMem==0 ){
rc = SQLITE_NOMEM;
goto shmpage_out;
}
memset(pMem, 0, szRegion);
}
for(i=0; i<nShmPerMap; i++){
pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
}
pShmNode->nRegion += nShmPerMap;
}
}
shmpage_out:
if( pShmNode->nRegion>iRegion ){
*pp = pShmNode->apRegion[iRegion];
}else{
*pp = 0;
}
if( pShmNode->isReadonly && rc==SQLITE_OK ) rc = SQLITE_READONLY;
sqlite3_mutex_leave(pShmNode->mutex);
return rc;
}
/*
** Change the lock state for a shared-memory segment.
**
** Note that the relationship between SHAREd and EXCLUSIVE locks is a little
** different here than in posix. In xShmLock(), one can go from unlocked
** to shared and back or from unlocked to exclusive and back. But one may
** not go from shared to exclusive or from exclusive to shared.
*/
static int unixShmLock(
sqlite3_file *fd, /* Database file holding the shared memory */
int ofst, /* First lock to acquire or release */
int n, /* Number of locks to acquire or release */
int flags /* What to do with the lock */
){
unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */
unixShm *p = pDbFd->pShm; /* The shared memory being locked */
unixShm *pX; /* For looping over all siblings */
unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */
int rc = SQLITE_OK; /* Result code */
u16 mask; /* Mask of locks to take or release */
assert( pShmNode==pDbFd->pInode->pShmNode );
assert( pShmNode->pInode==pDbFd->pInode );
assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
assert( n>=1 );
assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
|| flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
|| flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
mask = (1<<(ofst+n)) - (1<<ofst);
assert( n>1 || mask==(1<<ofst) );
sqlite3_mutex_enter(pShmNode->mutex);
if( flags & SQLITE_SHM_UNLOCK ){
u16 allMask = 0; /* Mask of locks held by siblings */
/* See if any siblings hold this same lock */
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
if( pX==p ) continue;
assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 );
allMask |= pX->sharedMask;
}
/* Unlock the system-level locks */
if( (mask & allMask)==0 ){
rc = unixShmSystemLock(pDbFd, F_UNLCK, ofst+UNIX_SHM_BASE, n);
}else{
rc = SQLITE_OK;
}
/* Undo the local locks */
if( rc==SQLITE_OK ){
p->exclMask &= ~mask;
p->sharedMask &= ~mask;
}
}else if( flags & SQLITE_SHM_SHARED ){
u16 allShared = 0; /* Union of locks held by connections other than "p" */
/* Find out which shared locks are already held by sibling connections.
** If any sibling already holds an exclusive lock, go ahead and return
** SQLITE_BUSY.
*/
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
if( (pX->exclMask & mask)!=0 ){
rc = SQLITE_BUSY;
break;
}
allShared |= pX->sharedMask;
}
/* Get shared locks at the system level, if necessary */
if( rc==SQLITE_OK ){
if( (allShared & mask)==0 ){
rc = unixShmSystemLock(pDbFd, F_RDLCK, ofst+UNIX_SHM_BASE, n);
}else{
rc = SQLITE_OK;
}
}
/* Get the local shared locks */
if( rc==SQLITE_OK ){
p->sharedMask |= mask;
}
}else{
/* Make sure no sibling connections hold locks that will block this
** lock. If any do, return SQLITE_BUSY right away.
*/
for(pX=pShmNode->pFirst; pX; pX=pX->pNext){
if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){
rc = SQLITE_BUSY;
break;
}
}
/* Get the exclusive locks at the system level. Then if successful
** also mark the local connection as being locked.
*/
if( rc==SQLITE_OK ){
rc = unixShmSystemLock(pDbFd, F_WRLCK, ofst+UNIX_SHM_BASE, n);
if( rc==SQLITE_OK ){
assert( (p->sharedMask & mask)==0 );
p->exclMask |= mask;
}
}
}
sqlite3_mutex_leave(pShmNode->mutex);
OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n",
p->id, osGetpid(0), p->sharedMask, p->exclMask));
return rc;
}
/*
** Implement a memory barrier or memory fence on shared memory.
**
** All loads and stores begun before the barrier must complete before
** any load or store begun after the barrier.
*/
static void unixShmBarrier(
sqlite3_file *fd /* Database file holding the shared memory */
){
UNUSED_PARAMETER(fd);
sqlite3MemoryBarrier(); /* compiler-defined memory barrier */
unixEnterMutex(); /* Also mutex, for redundancy */
unixLeaveMutex();
}
/*
** Close a connection to shared-memory. Delete the underlying
** storage if deleteFlag is true.
**
** If there is no shared memory associated with the connection then this
** routine is a harmless no-op.
*/
static int unixShmUnmap(
sqlite3_file *fd, /* The underlying database file */
int deleteFlag /* Delete shared-memory if true */
){
unixShm *p; /* The connection to be closed */
unixShmNode *pShmNode; /* The underlying shared-memory file */
unixShm **pp; /* For looping over sibling connections */
unixFile *pDbFd; /* The underlying database file */
pDbFd = (unixFile*)fd;
p = pDbFd->pShm;
if( p==0 ) return SQLITE_OK;
pShmNode = p->pShmNode;
assert( pShmNode==pDbFd->pInode->pShmNode );
assert( pShmNode->pInode==pDbFd->pInode );
/* Remove connection p from the set of connections associated
** with pShmNode */
sqlite3_mutex_enter(pShmNode->mutex);
for(pp=&pShmNode->pFirst; (*pp)!=p; pp = &(*pp)->pNext){}
*pp = p->pNext;
/* Free the connection p */
sqlite3_free(p);
pDbFd->pShm = 0;
sqlite3_mutex_leave(pShmNode->mutex);
/* If pShmNode->nRef has reached 0, then close the underlying
** shared-memory file, too */
unixEnterMutex();
assert( pShmNode->nRef>0 );
pShmNode->nRef--;
if( pShmNode->nRef==0 ){
if( deleteFlag && pShmNode->h>=0 ){
osUnlink(pShmNode->zFilename);
}
unixShmPurge(pDbFd);
}
unixLeaveMutex();
return SQLITE_OK;
}
#else
# define unixShmMap 0
# define unixShmLock 0
# define unixShmBarrier 0
# define unixShmUnmap 0
#endif /* #ifndef SQLITE_OMIT_WAL */
#if SQLITE_MAX_MMAP_SIZE>0
/*
** If it is currently memory mapped, unmap file pFd.
*/
static void unixUnmapfile(unixFile *pFd){
assert( pFd->nFetchOut==0 );
if( pFd->pMapRegion ){
osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
pFd->pMapRegion = 0;
pFd->mmapSize = 0;
pFd->mmapSizeActual = 0;
}
}
/*
** Attempt to set the size of the memory mapping maintained by file
** descriptor pFd to nNew bytes. Any existing mapping is discarded.
**
** If successful, this function sets the following variables:
**
** unixFile.pMapRegion
** unixFile.mmapSize
** unixFile.mmapSizeActual
**
** If unsuccessful, an error message is logged via sqlite3_log() and
** the three variables above are zeroed. In this case SQLite should
** continue accessing the database using the xRead() and xWrite()
** methods.
*/
static void unixRemapfile(
unixFile *pFd, /* File descriptor object */
i64 nNew /* Required mapping size */
){
const char *zErr = "mmap";
int h = pFd->h; /* File descriptor open on db file */
u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */
i64 nOrig = pFd->mmapSizeActual; /* Size of pOrig region in bytes */
u8 *pNew = 0; /* Location of new mapping */
int flags = PROT_READ; /* Flags to pass to mmap() */
assert( pFd->nFetchOut==0 );
assert( nNew>pFd->mmapSize );
assert( nNew<=pFd->mmapSizeMax );
assert( nNew>0 );
assert( pFd->mmapSizeActual>=pFd->mmapSize );
assert( MAP_FAILED!=0 );
#ifdef SQLITE_MMAP_READWRITE
if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
#endif
if( pOrig ){
#if HAVE_MREMAP
i64 nReuse = pFd->mmapSize;
#else
const int szSyspage = osGetpagesize();
i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
#endif
u8 *pReq = &pOrig[nReuse];
/* Unmap any pages of the existing mapping that cannot be reused. */
if( nReuse!=nOrig ){
osMunmap(pReq, nOrig-nReuse);
}
#if HAVE_MREMAP
pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE);
zErr = "mremap";
#else
pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse);
if( pNew!=MAP_FAILED ){
if( pNew!=pReq ){
osMunmap(pNew, nNew - nReuse);
pNew = 0;
}else{
pNew = pOrig;
}
}
#endif
/* The attempt to extend the existing mapping failed. Free it. */
if( pNew==MAP_FAILED || pNew==0 ){
osMunmap(pOrig, nReuse);
}
}
/* If pNew is still NULL, try to create an entirely new mapping. */
if( pNew==0 ){
pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0);
}
if( pNew==MAP_FAILED ){
pNew = 0;
nNew = 0;
unixLogError(SQLITE_OK, zErr, pFd->zPath);
/* If the mmap() above failed, assume that all subsequent mmap() calls
** will probably fail too. Fall back to using xRead/xWrite exclusively
** in this case. */
pFd->mmapSizeMax = 0;
}
pFd->pMapRegion = (void *)pNew;
pFd->mmapSize = pFd->mmapSizeActual = nNew;
}
/*
** Memory map or remap the file opened by file-descriptor pFd (if the file
** is already mapped, the existing mapping is replaced by the new). Or, if
** there already exists a mapping for this file, and there are still
** outstanding xFetch() references to it, this function is a no-op.
**
** If parameter nByte is non-negative, then it is the requested size of
** the mapping to create. Otherwise, if nByte is less than zero, then the
** requested size is the size of the file on disk. The actual size of the
** created mapping is either the requested size or the value configured
** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller.
**
** SQLITE_OK is returned if no error occurs (even if the mapping is not
** recreated as a result of outstanding references) or an SQLite error
** code otherwise.
*/
static int unixMapfile(unixFile *pFd, i64 nMap){
assert( nMap>=0 || pFd->nFetchOut==0 );
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
if( pFd->nFetchOut>0 ) return SQLITE_OK;
if( nMap<0 ){
struct stat statbuf; /* Low-level file information */
if( osFstat(pFd->h, &statbuf) ){
return SQLITE_IOERR_FSTAT;
}
nMap = statbuf.st_size;
}
if( nMap>pFd->mmapSizeMax ){
nMap = pFd->mmapSizeMax;
}
assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) );
if( nMap!=pFd->mmapSize ){
unixRemapfile(pFd, nMap);
}
return SQLITE_OK;
}
#endif /* SQLITE_MAX_MMAP_SIZE>0 */
/*
** If possible, return a pointer to a mapping of file fd starting at offset
** iOff. The mapping must be valid for at least nAmt bytes.
**
** If such a pointer can be obtained, store it in *pp and return SQLITE_OK.
** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK.
** Finally, if an error does occur, return an SQLite error code. The final
** value of *pp is undefined in this case.
**
** If this function does return a pointer, the caller must eventually
** release the reference by calling unixUnfetch().
*/
static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){
#if SQLITE_MAX_MMAP_SIZE>0
unixFile *pFd = (unixFile *)fd; /* The underlying database file */
#endif
*pp = 0;
#if SQLITE_MAX_MMAP_SIZE>0
if( pFd->mmapSizeMax>0 ){
if( pFd->pMapRegion==0 ){
int rc = unixMapfile(pFd, -1);
if( rc!=SQLITE_OK ) return rc;
}
if( pFd->mmapSize >= iOff+nAmt ){
*pp = &((u8 *)pFd->pMapRegion)[iOff];
pFd->nFetchOut++;
}
}
#endif
return SQLITE_OK;
}
/*
** If the third argument is non-NULL, then this function releases a
** reference obtained by an earlier call to unixFetch(). The second
** argument passed to this function must be the same as the corresponding
** argument that was passed to the unixFetch() invocation.
**
** Or, if the third argument is NULL, then this function is being called
** to inform the VFS layer that, according to POSIX, any existing mapping
** may now be invalid and should be unmapped.
*/
static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){
#if SQLITE_MAX_MMAP_SIZE>0
unixFile *pFd = (unixFile *)fd; /* The underlying database file */
UNUSED_PARAMETER(iOff);
/* If p==0 (unmap the entire file) then there must be no outstanding
** xFetch references. Or, if p!=0 (meaning it is an xFetch reference),
** then there must be at least one outstanding. */
assert( (p==0)==(pFd->nFetchOut==0) );
/* If p!=0, it must match the iOff value. */
assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] );
if( p ){
pFd->nFetchOut--;
}else{
unixUnmapfile(pFd);
}
assert( pFd->nFetchOut>=0 );
#else
UNUSED_PARAMETER(fd);
UNUSED_PARAMETER(p);
UNUSED_PARAMETER(iOff);
#endif
return SQLITE_OK;
}
/*
** Here ends the implementation of all sqlite3_file methods.
**
********************** End sqlite3_file Methods *******************************
******************************************************************************/
/*
** This division contains definitions of sqlite3_io_methods objects that
** implement various file locking strategies. It also contains definitions
** of "finder" functions. A finder-function is used to locate the appropriate
** sqlite3_io_methods object for a particular database file. The pAppData
** field of the sqlite3_vfs VFS objects are initialized to be pointers to
** the correct finder-function for that VFS.
**
** Most finder functions return a pointer to a fixed sqlite3_io_methods
** object. The only interesting finder-function is autolockIoFinder, which
** looks at the filesystem type and tries to guess the best locking
** strategy from that.
**
** For finder-function F, two objects are created:
**
** (1) The real finder-function named "FImpt()".
**
** (2) A constant pointer to this function named just "F".
**
**
** A pointer to the F pointer is used as the pAppData value for VFS
** objects. We have to do this instead of letting pAppData point
** directly at the finder-function since C90 rules prevent a void*
** from be cast into a function pointer.
**
**
** Each instance of this macro generates two objects:
**
** * A constant sqlite3_io_methods object call METHOD that has locking
** methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
**
** * An I/O method finder function called FINDER that returns a pointer
** to the METHOD object in the previous bullet.
*/
#define IOMETHODS(FINDER,METHOD,VERSION,CLOSE,LOCK,UNLOCK,CKLOCK,SHMMAP) \
static const sqlite3_io_methods METHOD = { \
VERSION, /* iVersion */ \
CLOSE, /* xClose */ \
unixRead, /* xRead */ \
unixWrite, /* xWrite */ \
unixTruncate, /* xTruncate */ \
unixSync, /* xSync */ \
unixFileSize, /* xFileSize */ \
LOCK, /* xLock */ \
UNLOCK, /* xUnlock */ \
CKLOCK, /* xCheckReservedLock */ \
unixFileControl, /* xFileControl */ \
unixSectorSize, /* xSectorSize */ \
unixDeviceCharacteristics, /* xDeviceCapabilities */ \
SHMMAP, /* xShmMap */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
unixShmUnmap, /* xShmUnmap */ \
unixFetch, /* xFetch */ \
unixUnfetch, /* xUnfetch */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \
return &METHOD; \
} \
static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p) \
= FINDER##Impl;
/*
** Here are all of the sqlite3_io_methods objects for each of the
** locking strategies. Functions that return pointers to these methods
** are also created.
*/
IOMETHODS(
posixIoFinder, /* Finder function name */
posixIoMethods, /* sqlite3_io_methods object name */
3, /* shared memory and mmap are enabled */
unixClose, /* xClose method */
unixLock, /* xLock method */
unixUnlock, /* xUnlock method */
unixCheckReservedLock, /* xCheckReservedLock method */
unixShmMap /* xShmMap method */
)
IOMETHODS(
nolockIoFinder, /* Finder function name */
nolockIoMethods, /* sqlite3_io_methods object name */
3, /* shared memory is disabled */
nolockClose, /* xClose method */
nolockLock, /* xLock method */
nolockUnlock, /* xUnlock method */
nolockCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
IOMETHODS(
dotlockIoFinder, /* Finder function name */
dotlockIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
dotlockClose, /* xClose method */
dotlockLock, /* xLock method */
dotlockUnlock, /* xUnlock method */
dotlockCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#if SQLITE_ENABLE_LOCKING_STYLE
IOMETHODS(
flockIoFinder, /* Finder function name */
flockIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
flockClose, /* xClose method */
flockLock, /* xLock method */
flockUnlock, /* xUnlock method */
flockCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#endif
#if OS_VXWORKS
IOMETHODS(
semIoFinder, /* Finder function name */
semIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
semXClose, /* xClose method */
semXLock, /* xLock method */
semXUnlock, /* xUnlock method */
semXCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#endif
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
IOMETHODS(
afpIoFinder, /* Finder function name */
afpIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
afpClose, /* xClose method */
afpLock, /* xLock method */
afpUnlock, /* xUnlock method */
afpCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#endif
/*
** The proxy locking method is a "super-method" in the sense that it
** opens secondary file descriptors for the conch and lock files and
** it uses proxy, dot-file, AFP, and flock() locking methods on those
** secondary files. For this reason, the division that implements
** proxy locking is located much further down in the file. But we need
** to go ahead and define the sqlite3_io_methods and finder function
** for proxy locking here. So we forward declare the I/O methods.
*/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
static int proxyClose(sqlite3_file*);
static int proxyLock(sqlite3_file*, int);
static int proxyUnlock(sqlite3_file*, int);
static int proxyCheckReservedLock(sqlite3_file*, int*);
IOMETHODS(
proxyIoFinder, /* Finder function name */
proxyIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
proxyClose, /* xClose method */
proxyLock, /* xLock method */
proxyUnlock, /* xUnlock method */
proxyCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#endif
/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
IOMETHODS(
nfsIoFinder, /* Finder function name */
nfsIoMethods, /* sqlite3_io_methods object name */
1, /* shared memory is disabled */
unixClose, /* xClose method */
unixLock, /* xLock method */
nfsUnlock, /* xUnlock method */
unixCheckReservedLock, /* xCheckReservedLock method */
0 /* xShmMap method */
)
#endif
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
/*
** This "finder" function attempts to determine the best locking strategy
** for the database file "filePath". It then returns the sqlite3_io_methods
** object that implements that strategy.
**
** This is for MacOSX only.
*/
static const sqlite3_io_methods *autolockIoFinderImpl(
const char *filePath, /* name of the database file */
unixFile *pNew /* open file object for the database file */
){
static const struct Mapping {
const char *zFilesystem; /* Filesystem type name */
const sqlite3_io_methods *pMethods; /* Appropriate locking method */
} aMap[] = {
{ "hfs", &posixIoMethods },
{ "ufs", &posixIoMethods },
{ "afpfs", &afpIoMethods },
{ "smbfs", &afpIoMethods },
{ "webdav", &nolockIoMethods },
{ 0, 0 }
};
int i;
struct statfs fsInfo;
struct flock lockInfo;
if( !filePath ){
/* If filePath==NULL that means we are dealing with a transient file
** that does not need to be locked. */
return &nolockIoMethods;
}
if( statfs(filePath, &fsInfo) != -1 ){
if( fsInfo.f_flags & MNT_RDONLY ){
return &nolockIoMethods;
}
for(i=0; aMap[i].zFilesystem; i++){
if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
return aMap[i].pMethods;
}
}
}
/* Default case. Handles, amongst others, "nfs".
** Test byte-range lock using fcntl(). If the call succeeds,
** assume that the file-system supports POSIX style locks.
*/
lockInfo.l_len = 1;
lockInfo.l_start = 0;
lockInfo.l_whence = SEEK_SET;
lockInfo.l_type = F_RDLCK;
if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
if( strcmp(fsInfo.f_fstypename, "nfs")==0 ){
return &nfsIoMethods;
} else {
return &posixIoMethods;
}
}else{
return &dotlockIoMethods;
}
}
static const sqlite3_io_methods
*(*const autolockIoFinder)(const char*,unixFile*) = autolockIoFinderImpl;
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
#if OS_VXWORKS
/*
** This "finder" function for VxWorks checks to see if posix advisory
** locking works. If it does, then that is what is used. If it does not
** work, then fallback to named semaphore locking.
*/
static const sqlite3_io_methods *vxworksIoFinderImpl(
const char *filePath, /* name of the database file */
unixFile *pNew /* the open file object */
){
struct flock lockInfo;
if( !filePath ){
/* If filePath==NULL that means we are dealing with a transient file
** that does not need to be locked. */
return &nolockIoMethods;
}
/* Test if fcntl() is supported and use POSIX style locks.
** Otherwise fall back to the named semaphore method.
*/
lockInfo.l_len = 1;
lockInfo.l_start = 0;
lockInfo.l_whence = SEEK_SET;
lockInfo.l_type = F_RDLCK;
if( osFcntl(pNew->h, F_GETLK, &lockInfo)!=-1 ) {
return &posixIoMethods;
}else{
return &semIoMethods;
}
}
static const sqlite3_io_methods
*(*const vxworksIoFinder)(const char*,unixFile*) = vxworksIoFinderImpl;
#endif /* OS_VXWORKS */
/*
** An abstract type for a pointer to an IO method finder function:
*/
typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
/****************************************************************************
**************************** sqlite3_vfs methods ****************************
**
** This division contains the implementation of methods on the
** sqlite3_vfs object.
*/
/*
** Initialize the contents of the unixFile structure pointed to by pId.
*/
static int fillInUnixFile(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
int h, /* Open file descriptor of file being opened */
sqlite3_file *pId, /* Write to the unixFile structure here */
const char *zFilename, /* Name of the file being opened */
int ctrlFlags /* Zero or more UNIXFILE_* values */
){
const sqlite3_io_methods *pLockingStyle;
unixFile *pNew = (unixFile *)pId;
int rc = SQLITE_OK;
assert( pNew->pInode==NULL );
/* Usually the path zFilename should not be a relative pathname. The
** exception is when opening the proxy "conch" file in builds that
** include the special Apple locking styles.
*/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
assert( zFilename==0 || zFilename[0]=='/'
|| pVfs->pAppData==(void*)&autolockIoFinder );
#else
assert( zFilename==0 || zFilename[0]=='/' );
#endif
/* No locking occurs in temporary files */
assert( zFilename!=0 || (ctrlFlags & UNIXFILE_NOLOCK)!=0 );
OSTRACE(("OPEN %-3d %s\n", h, zFilename));
pNew->h = h;
pNew->pVfs = pVfs;
pNew->zPath = zFilename;
pNew->ctrlFlags = (u8)ctrlFlags;
#if SQLITE_MAX_MMAP_SIZE>0
pNew->mmapSizeMax = sqlite3GlobalConfig.szMmap;
#endif
if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0),
"psow", SQLITE_POWERSAFE_OVERWRITE) ){
pNew->ctrlFlags |= UNIXFILE_PSOW;
}
if( strcmp(pVfs->zName,"unix-excl")==0 ){
pNew->ctrlFlags |= UNIXFILE_EXCL;
}
#if OS_VXWORKS
pNew->pId = vxworksFindFileId(zFilename);
if( pNew->pId==0 ){
ctrlFlags |= UNIXFILE_NOLOCK;
rc = SQLITE_NOMEM;
}
#endif
if( ctrlFlags & UNIXFILE_NOLOCK ){
pLockingStyle = &nolockIoMethods;
}else{
pLockingStyle = (**(finder_type*)pVfs->pAppData)(zFilename, pNew);
#if SQLITE_ENABLE_LOCKING_STYLE
/* Cache zFilename in the locking context (AFP and dotlock override) for
** proxyLock activation is possible (remote proxy is based on db name)
** zFilename remains valid until file is closed, to support */
pNew->lockingContext = (void*)zFilename;
#endif
}
if( pLockingStyle == &posixIoMethods
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
|| pLockingStyle == &nfsIoMethods
#endif
){
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( rc!=SQLITE_OK ){
/* If an error occurred in findInodeInfo(), close the file descriptor
** immediately, before releasing the mutex. findInodeInfo() may fail
** in two scenarios:
**
** (a) A call to fstat() failed.
** (b) A malloc failed.
**
** Scenario (b) may only occur if the process is holding no other
** file descriptors open on the same file. If there were other file
** descriptors on this file, then no malloc would be required by
** findInodeInfo(). If this is the case, it is quite safe to close
** handle h - as it is guaranteed that no posix locks will be released
** by doing so.
**
** If scenario (a) caused the error then things are not so safe. The
** implicit assumption here is that if fstat() fails, things are in
** such bad shape that dropping a lock or two doesn't matter much.
*/
robust_close(pNew, h, __LINE__);
h = -1;
}
unixLeaveMutex();
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
else if( pLockingStyle == &afpIoMethods ){
/* AFP locking uses the file path so it needs to be included in
** the afpLockingContext.
*/
afpLockingContext *pCtx;
pNew->lockingContext = pCtx = sqlite3_malloc64( sizeof(*pCtx) );
if( pCtx==0 ){
rc = SQLITE_NOMEM;
}else{
/* NB: zFilename exists and remains valid until the file is closed
** according to requirement F11141. So we do not need to make a
** copy of the filename. */
pCtx->dbPath = zFilename;
pCtx->reserved = 0;
srandomdev();
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( rc!=SQLITE_OK ){
sqlite3_free(pNew->lockingContext);
robust_close(pNew, h, __LINE__);
h = -1;
}
unixLeaveMutex();
}
}
#endif
else if( pLockingStyle == &dotlockIoMethods ){
/* Dotfile locking uses the file path so it needs to be included in
** the dotlockLockingContext
*/
char *zLockFile;
int nFilename;
assert( zFilename!=0 );
nFilename = (int)strlen(zFilename) + 6;
zLockFile = (char *)sqlite3_malloc64(nFilename);
if( zLockFile==0 ){
rc = SQLITE_NOMEM;
}else{
sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
}
pNew->lockingContext = zLockFile;
}
#if OS_VXWORKS
else if( pLockingStyle == &semIoMethods ){
/* Named semaphore locking uses the file path so it needs to be
** included in the semLockingContext
*/
unixEnterMutex();
rc = findInodeInfo(pNew, &pNew->pInode);
if( (rc==SQLITE_OK) && (pNew->pInode->pSem==NULL) ){
char *zSemName = pNew->pInode->aSemName;
int n;
sqlite3_snprintf(MAX_PATHNAME, zSemName, "/%s.sem",
pNew->pId->zCanonicalName);
for( n=1; zSemName[n]; n++ )
if( zSemName[n]=='/' ) zSemName[n] = '_';
pNew->pInode->pSem = sem_open(zSemName, O_CREAT, 0666, 1);
if( pNew->pInode->pSem == SEM_FAILED ){
rc = SQLITE_NOMEM;
pNew->pInode->aSemName[0] = '\0';
}
}
unixLeaveMutex();
}
#endif
storeLastErrno(pNew, 0);
#if OS_VXWORKS
if( rc!=SQLITE_OK ){
if( h>=0 ) robust_close(pNew, h, __LINE__);
h = -1;
osUnlink(zFilename);
pNew->ctrlFlags |= UNIXFILE_DELETE;
}
#endif
if( rc!=SQLITE_OK ){
if( h>=0 ) robust_close(pNew, h, __LINE__);
}else{
pNew->pMethod = pLockingStyle;
OpenCounter(+1);
verifyDbFile(pNew);
}
return rc;
}
/*
** Return the name of a directory in which to put temporary files.
** If no suitable temporary file directory can be found, return NULL.
*/
static const char *unixTempFileDir(void){
static const char *azDirs[] = {
0,
0,
"/var/tmp",
"/usr/tmp",
"/tmp",
"."
};
unsigned int i;
struct stat buf;
const char *zDir = sqlite3_temp_directory;
if( !azDirs[0] ) azDirs[0] = getenv("SQLITE_TMPDIR");
if( !azDirs[1] ) azDirs[1] = getenv("TMPDIR");
for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); zDir=azDirs[i++]){
if( zDir==0 ) continue;
if( osStat(zDir, &buf) ) continue;
if( !S_ISDIR(buf.st_mode) ) continue;
if( osAccess(zDir, 07) ) continue;
break;
}
return zDir;
}
/*
** Create a temporary file name in zBuf. zBuf must be allocated
** by the calling process and must be big enough to hold at least
** pVfs->mxPathname bytes.
*/
static int unixGetTempname(int nBuf, char *zBuf){
const char *zDir;
int iLimit = 0;
/* It's odd to simulate an io-error here, but really this is just
** using the io-error infrastructure to test that SQLite handles this
** function failing.
*/
SimulateIOError( return SQLITE_IOERR );
zDir = unixTempFileDir();
do{
u64 r;
sqlite3_randomness(sizeof(r), &r);
assert( nBuf>2 );
zBuf[nBuf-2] = 0;
sqlite3_snprintf(nBuf, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX"%llx%c",
zDir, r, 0);
if( zBuf[nBuf-2]!=0 || (iLimit++)>10 ) return SQLITE_ERROR;
}while( osAccess(zBuf,0)==0 );
return SQLITE_OK;
}
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
/*
** Routine to transform a unixFile into a proxy-locking unixFile.
** Implementation in the proxy-lock division, but used by unixOpen()
** if SQLITE_PREFER_PROXY_LOCKING is defined.
*/
static int proxyTransformUnixFile(unixFile*, const char*);
#endif
/*
** Search for an unused file descriptor that was opened on the database
** file (not a journal or master-journal file) identified by pathname
** zPath with SQLITE_OPEN_XXX flags matching those passed as the second
** argument to this function.
**
** Such a file descriptor may exist if a database connection was closed
** but the associated file descriptor could not be closed because some
** other file descriptor open on the same file is holding a file-lock.
** Refer to comments in the unixClose() function and the lengthy comment
** describing "Posix Advisory Locking" at the start of this file for
** further details. Also, ticket #4018.
**
** If a suitable file descriptor is found, then it is returned. If no
** such file descriptor is located, -1 is returned.
*/
static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
UnixUnusedFd *pUnused = 0;
/* Do not search for an unused file descriptor on vxworks. Not because
** vxworks would not benefit from the change (it might, we're not sure),
** but because no way to test it is currently available. It is better
** not to risk breaking vxworks support for the sake of such an obscure
** feature. */
#if !OS_VXWORKS
struct stat sStat; /* Results of stat() call */
/* A stat() call may fail for various reasons. If this happens, it is
** almost certain that an open() call on the same path will also fail.
** For this reason, if an error occurs in the stat() call here, it is
** ignored and -1 is returned. The caller will try to open a new file
** descriptor on the same path, fail, and return an error to SQLite.
**
** Even if a subsequent open() call does succeed, the consequences of
** not searching for a reusable file descriptor are not dire. */
if( 0==osStat(zPath, &sStat) ){
unixInodeInfo *pInode;
unixEnterMutex();
pInode = inodeList;
while( pInode && (pInode->fileId.dev!=sStat.st_dev
|| pInode->fileId.ino!=sStat.st_ino) ){
pInode = pInode->pNext;
}
if( pInode ){
UnixUnusedFd **pp;
for(pp=&pInode->pUnused; *pp && (*pp)->flags!=flags; pp=&((*pp)->pNext));
pUnused = *pp;
if( pUnused ){
*pp = pUnused->pNext;
}
}
unixLeaveMutex();
}
#endif /* if !OS_VXWORKS */
return pUnused;
}
/*
** This function is called by unixOpen() to determine the unix permissions
** to create new files with. If no error occurs, then SQLITE_OK is returned
** and a value suitable for passing as the third argument to open(2) is
** written to *pMode. If an IO error occurs, an SQLite error code is
** returned and the value of *pMode is not modified.
**
** In most cases, this routine sets *pMode to 0, which will become
** an indication to robust_open() to create the file using
** SQLITE_DEFAULT_FILE_PERMISSIONS adjusted by the umask.
** But if the file being opened is a WAL or regular journal file, then
** this function queries the file-system for the permissions on the
** corresponding database file and sets *pMode to this value. Whenever
** possible, WAL and journal files are created using the same permissions
** as the associated database file.
**
** If the SQLITE_ENABLE_8_3_NAMES option is enabled, then the
** original filename is unavailable. But 8_3_NAMES is only used for
** FAT filesystems and permissions do not matter there, so just use
** the default permissions.
*/
static int findCreateFileMode(
const char *zPath, /* Path of file (possibly) being created */
int flags, /* Flags passed as 4th argument to xOpen() */
mode_t *pMode, /* OUT: Permissions to open file with */
uid_t *pUid, /* OUT: uid to set on the file */
gid_t *pGid /* OUT: gid to set on the file */
){
int rc = SQLITE_OK; /* Return Code */
*pMode = 0;
*pUid = 0;
*pGid = 0;
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
char zDb[MAX_PATHNAME+1]; /* Database file path */
int nDb; /* Number of valid bytes in zDb */
struct stat sStat; /* Output of stat() on database file */
/* zPath is a path to a WAL or journal file. The following block derives
** the path to the associated database file from zPath. This block handles
** the following naming conventions:
**
** "<path to db>-journal"
** "<path to db>-wal"
** "<path to db>-journalNN"
** "<path to db>-walNN"
**
** where NN is a decimal number. The NN naming schemes are
** used by the test_multiplex.c module.
*/
nDb = sqlite3Strlen30(zPath) - 1;
while( zPath[nDb]!='-' ){
#ifndef SQLITE_ENABLE_8_3_NAMES
/* In the normal case (8+3 filenames disabled) the journal filename
** is guaranteed to contain a '-' character. */
assert( nDb>0 );
assert( sqlite3Isalnum(zPath[nDb]) );
#else
/* If 8+3 names are possible, then the journal file might not contain
** a '-' character. So check for that case and return early. */
if( nDb==0 || zPath[nDb]=='.' ) return SQLITE_OK;
#endif
nDb--;
}
memcpy(zDb, zPath, nDb);
zDb[nDb] = '\0';
if( 0==osStat(zDb, &sStat) ){
*pMode = sStat.st_mode & 0777;
*pUid = sStat.st_uid;
*pGid = sStat.st_gid;
}else{
rc = SQLITE_IOERR_FSTAT;
}
}else if( flags & SQLITE_OPEN_DELETEONCLOSE ){
*pMode = 0600;
}
return rc;
}
/*
** Open the file zPath.
**
** Previously, the SQLite OS layer used three functions in place of this
** one:
**
** sqlite3OsOpenReadWrite();
** sqlite3OsOpenReadOnly();
** sqlite3OsOpenExclusive();
**
** These calls correspond to the following combinations of flags:
**
** ReadWrite() -> (READWRITE | CREATE)
** ReadOnly() -> (READONLY)
** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
**
** The old OpenExclusive() accepted a boolean argument - "delFlag". If
** true, the file was configured to be automatically deleted when the
** file handle closed. To achieve the same effect using this new
** interface, add the DELETEONCLOSE flag to those specified above for
** OpenExclusive().
*/
static int unixOpen(
sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */
const char *zPath, /* Pathname of file to be opened */
sqlite3_file *pFile, /* The file descriptor to be filled in */
int flags, /* Input flags to control the opening */
int *pOutFlags /* Output flags returned to SQLite core */
){
unixFile *p = (unixFile *)pFile;
int fd = -1; /* File descriptor returned by open() */
int openFlags = 0; /* Flags to pass to open() */
int eType = flags&0xFFFFFF00; /* Type of file to open */
int noLock; /* True to omit locking primitives */
int rc = SQLITE_OK; /* Function Return Code */
int ctrlFlags = 0; /* UNIXFILE_* flags */
int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
int isCreate = (flags & SQLITE_OPEN_CREATE);
int isReadonly = (flags & SQLITE_OPEN_READONLY);
int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
#if SQLITE_ENABLE_LOCKING_STYLE
int isAutoProxy = (flags & SQLITE_OPEN_AUTOPROXY);
#endif
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
struct statfs fsInfo;
#endif
/* If creating a master or main-file journal, this function will open
** a file-descriptor on the directory too. The first time unixSync()
** is called the directory file descriptor will be fsync()ed and close()d.
*/
int syncDir = (isCreate && (
eType==SQLITE_OPEN_MASTER_JOURNAL
|| eType==SQLITE_OPEN_MAIN_JOURNAL
|| eType==SQLITE_OPEN_WAL
));
/* If argument zPath is a NULL pointer, this function is required to open
** a temporary file. Use this buffer to store the file name in.
*/
char zTmpname[MAX_PATHNAME+2];
const char *zName = zPath;
/* Check the following statements are true:
**
** (a) Exactly one of the READWRITE and READONLY flags must be set, and
** (b) if CREATE is set, then READWRITE must also be set, and
** (c) if EXCLUSIVE is set, then CREATE must also be set.
** (d) if DELETEONCLOSE is set, then CREATE must also be set.
*/
assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
assert(isCreate==0 || isReadWrite);
assert(isExclusive==0 || isCreate);
assert(isDelete==0 || isCreate);
/* The main DB, main journal, WAL file and master journal are never
** automatically deleted. Nor are they ever temporary files. */
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_DB );
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MAIN_JOURNAL );
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_MASTER_JOURNAL );
assert( (!isDelete && zName) || eType!=SQLITE_OPEN_WAL );
/* Assert that the upper layer has set one of the "file-type" flags. */
assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
|| eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
|| eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
|| eType==SQLITE_OPEN_TRANSIENT_DB || eType==SQLITE_OPEN_WAL
);
/* Detect a pid change and reset the PRNG. There is a race condition
** here such that two or more threads all trying to open databases at
** the same instant might all reset the PRNG. But multiple resets
** are harmless.
*/
if( randomnessPid!=osGetpid(0) ){
randomnessPid = osGetpid(0);
sqlite3_randomness(0,0);
}
memset(p, 0, sizeof(unixFile));
if( eType==SQLITE_OPEN_MAIN_DB ){
UnixUnusedFd *pUnused;
pUnused = findReusableFd(zName, flags);
if( pUnused ){
fd = pUnused->fd;
}else{
pUnused = sqlite3_malloc64(sizeof(*pUnused));
if( !pUnused ){
return SQLITE_NOMEM;
}
}
p->pUnused = pUnused;
/* Database filenames are double-zero terminated if they are not
** URIs with parameters. Hence, they can always be passed into
** sqlite3_uri_parameter(). */
assert( (flags & SQLITE_OPEN_URI) || zName[strlen(zName)+1]==0 );
}else if( !zName ){
/* If zName is NULL, the upper layer is requesting a temp file. */
assert(isDelete && !syncDir);
rc = unixGetTempname(pVfs->mxPathname, zTmpname);
if( rc!=SQLITE_OK ){
return rc;
}
zName = zTmpname;
/* Generated temporary filenames are always double-zero terminated
** for use by sqlite3_uri_parameter(). */
assert( zName[strlen(zName)+1]==0 );
}
/* Determine the value of the flags parameter passed to POSIX function
** open(). These must be calculated even if open() is not called, as
** they may be stored as part of the file handle and used by the
** 'conch file' locking functions later on. */
if( isReadonly ) openFlags |= O_RDONLY;
if( isReadWrite ) openFlags |= O_RDWR;
if( isCreate ) openFlags |= O_CREAT;
if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
openFlags |= (O_LARGEFILE|O_BINARY);
if( fd<0 ){
mode_t openMode; /* Permissions to create file with */
uid_t uid; /* Userid for the file */
gid_t gid; /* Groupid for the file */
rc = findCreateFileMode(zName, flags, &openMode, &uid, &gid);
if( rc!=SQLITE_OK ){
assert( !p->pUnused );
assert( eType==SQLITE_OPEN_WAL || eType==SQLITE_OPEN_MAIN_JOURNAL );
return rc;
}
fd = robust_open(zName, openFlags, openMode);
OSTRACE(("OPENX %-3d %s 0%o\n", fd, zName, openFlags));
assert( !isExclusive || (openFlags & O_CREAT)!=0 );
if( fd<0 && errno!=EISDIR && isReadWrite ){
/* Failed to open the file for read/write access. Try read-only. */
flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
openFlags &= ~(O_RDWR|O_CREAT);
flags |= SQLITE_OPEN_READONLY;
openFlags |= O_RDONLY;
isReadonly = 1;
fd = robust_open(zName, openFlags, openMode);
}
if( fd<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zName);
goto open_finished;
}
/* If this process is running as root and if creating a new rollback
** journal or WAL file, set the ownership of the journal or WAL to be
** the same as the original database.
*/
if( flags & (SQLITE_OPEN_WAL|SQLITE_OPEN_MAIN_JOURNAL) ){
robustFchown(fd, uid, gid);
}
}
assert( fd>=0 );
if( pOutFlags ){
*pOutFlags = flags;
}
if( p->pUnused ){
p->pUnused->fd = fd;
p->pUnused->flags = flags;
}
if( isDelete ){
#if OS_VXWORKS
zPath = zName;
#elif defined(SQLITE_UNLINK_AFTER_CLOSE)
zPath = sqlite3_mprintf("%s", zName);
if( zPath==0 ){
robust_close(p, fd, __LINE__);
return SQLITE_NOMEM;
}
#else
osUnlink(zName);
#endif
}
#if SQLITE_ENABLE_LOCKING_STYLE
else{
p->openFlags = openFlags;
}
#endif
noLock = eType!=SQLITE_OPEN_MAIN_DB;
#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
if( fstatfs(fd, &fsInfo) == -1 ){
storeLastErrno(p, errno);
robust_close(p, fd, __LINE__);
return SQLITE_IOERR_ACCESS;
}
if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
}
if (0 == strncmp("exfat", fsInfo.f_fstypename, 5)) {
((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
}
#endif
/* Set up appropriate ctrlFlags */
if( isDelete ) ctrlFlags |= UNIXFILE_DELETE;
if( isReadonly ) ctrlFlags |= UNIXFILE_RDONLY;
if( noLock ) ctrlFlags |= UNIXFILE_NOLOCK;
if( syncDir ) ctrlFlags |= UNIXFILE_DIRSYNC;
if( flags & SQLITE_OPEN_URI ) ctrlFlags |= UNIXFILE_URI;
#if SQLITE_ENABLE_LOCKING_STYLE
#if SQLITE_PREFER_PROXY_LOCKING
isAutoProxy = 1;
#endif
if( isAutoProxy && (zPath!=NULL) && (!noLock) && pVfs->xOpen ){
char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
int useProxy = 0;
/* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy, 0 means
** never use proxy, NULL means use proxy for non-local files only. */
if( envforce!=NULL ){
useProxy = atoi(envforce)>0;
}else{
useProxy = !(fsInfo.f_flags&MNT_LOCAL);
}
if( useProxy ){
rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
if( rc==SQLITE_OK ){
rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
if( rc!=SQLITE_OK ){
/* Use unixClose to clean up the resources added in fillInUnixFile
** and clear all the structure's references. Specifically,
** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op
*/
unixClose(pFile);
return rc;
}
}
goto open_finished;
}
}
#endif
rc = fillInUnixFile(pVfs, fd, pFile, zPath, ctrlFlags);
open_finished:
if( rc!=SQLITE_OK ){
sqlite3_free(p->pUnused);
}
return rc;
}
/*
** Delete the file at zPath. If the dirSync argument is true, fsync()
** the directory after deleting the file.
*/
static int unixDelete(
sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */
const char *zPath, /* Name of file to be deleted */
int dirSync /* If true, fsync() directory after deleting file */
){
int rc = SQLITE_OK;
UNUSED_PARAMETER(NotUsed);
SimulateIOError(return SQLITE_IOERR_DELETE);
if( osUnlink(zPath)==(-1) ){
if( errno==ENOENT
#if OS_VXWORKS
|| osAccess(zPath,0)!=0
#endif
){
rc = SQLITE_IOERR_DELETE_NOENT;
}else{
rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
}
return rc;
}
#ifndef SQLITE_DISABLE_DIRSYNC
if( (dirSync & 1)!=0 ){
int fd;
rc = osOpenDirectory(zPath, &fd);
if( rc==SQLITE_OK ){
if( full_fsync(fd,0,0) ){
rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
}
robust_close(0, fd, __LINE__);
}else{
assert( rc==SQLITE_CANTOPEN );
rc = SQLITE_OK;
}
}
#endif
return rc;
}
/*
** Test the existence of or access permissions of file zPath. The
** test performed depends on the value of flags:
**
** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
**
** Otherwise return 0.
*/
static int unixAccess(
sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */
const char *zPath, /* Path of the file to examine */
int flags, /* What do we want to learn about the zPath file? */
int *pResOut /* Write result boolean here */
){
UNUSED_PARAMETER(NotUsed);
SimulateIOError( return SQLITE_IOERR_ACCESS; );
assert( pResOut!=0 );
/* The spec says there are three possible values for flags. But only
** two of them are actually used */
assert( flags==SQLITE_ACCESS_EXISTS || flags==SQLITE_ACCESS_READWRITE );
if( flags==SQLITE_ACCESS_EXISTS ){
struct stat buf;
*pResOut = (0==osStat(zPath, &buf) && buf.st_size>0);
}else{
*pResOut = osAccess(zPath, W_OK|R_OK)==0;
}
return SQLITE_OK;
}
/*
**
*/
static int mkFullPathname(
const char *zPath, /* Input path */
char *zOut, /* Output buffer */
int nOut /* Allocated size of buffer zOut */
){
int nPath = sqlite3Strlen30(zPath);
int iOff = 0;
if( zPath[0]!='/' ){
if( osGetcwd(zOut, nOut-2)==0 ){
return unixLogError(SQLITE_CANTOPEN_BKPT, "getcwd", zPath);
}
iOff = sqlite3Strlen30(zOut);
zOut[iOff++] = '/';
}
if( (iOff+nPath+1)>nOut ){
/* SQLite assumes that xFullPathname() nul-terminates the output buffer
** even if it returns an error. */
zOut[iOff] = '\0';
return SQLITE_CANTOPEN_BKPT;
}
sqlite3_snprintf(nOut-iOff, &zOut[iOff], "%s", zPath);
return SQLITE_OK;
}
/*
** Turn a relative pathname into a full pathname. The relative path
** is stored as a nul-terminated string in the buffer pointed to by
** zPath.
**
** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
** (in this case, MAX_PATHNAME bytes). The full-path is written to
** this buffer before returning.
*/
static int unixFullPathname(
sqlite3_vfs *pVfs, /* Pointer to vfs object */
const char *zPath, /* Possibly relative input path */
int nOut, /* Size of output buffer in bytes */
char *zOut /* Output buffer */
){
#if !defined(HAVE_READLINK) || !defined(HAVE_LSTAT)
return mkFullPathname(zPath, zOut, nOut);
#else
int rc = SQLITE_OK;
int nByte;
int nLink = 1; /* Number of symbolic links followed so far */
const char *zIn = zPath; /* Input path for each iteration of loop */
char *zDel = 0;
assert( pVfs->mxPathname==MAX_PATHNAME );
UNUSED_PARAMETER(pVfs);
/* It's odd to simulate an io-error here, but really this is just
** using the io-error infrastructure to test that SQLite handles this
** function failing. This function could fail if, for example, the
** current working directory has been unlinked.
*/
SimulateIOError( return SQLITE_ERROR );
do {
/* Call stat() on path zIn. Set bLink to true if the path is a symbolic
** link, or false otherwise. */
int bLink = 0;
struct stat buf;
if( osLstat(zIn, &buf)!=0 ){
if( errno!=ENOENT ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "lstat", zIn);
}
}else{
bLink = S_ISLNK(buf.st_mode);
}
if( bLink ){
if( zDel==0 ){
zDel = sqlite3_malloc(nOut);
if( zDel==0 ) rc = SQLITE_NOMEM;
}else if( ++nLink>SQLITE_MAX_SYMLINKS ){
rc = SQLITE_CANTOPEN_BKPT;
}
if( rc==SQLITE_OK ){
nByte = osReadlink(zIn, zDel, nOut-1);
if( nByte<0 ){
rc = unixLogError(SQLITE_CANTOPEN_BKPT, "readlink", zIn);
}else{
if( zDel[0]!='/' ){
int n;
for(n = sqlite3Strlen30(zIn); n>0 && zIn[n-1]!='/'; n--);
if( nByte+n+1>nOut ){
rc = SQLITE_CANTOPEN_BKPT;
}else{
memmove(&zDel[n], zDel, nByte+1);
memcpy(zDel, zIn, n);
nByte += n;
}
}
zDel[nByte] = '\0';
}
}
zIn = zDel;
}
assert( rc!=SQLITE_OK || zIn!=zOut || zIn[0]=='/' );
if( rc==SQLITE_OK && zIn!=zOut ){
rc = mkFullPathname(zIn, zOut, nOut);
}
if( bLink==0 ) break;
zIn = zOut;
}while( rc==SQLITE_OK );
sqlite3_free(zDel);
return rc;
#endif /* HAVE_READLINK && HAVE_LSTAT */
}
#ifndef SQLITE_OMIT_LOAD_EXTENSION
/*
** Interfaces for opening a shared library, finding entry points
** within the shared library, and closing the shared library.
*/
#include <dlfcn.h>
static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){
UNUSED_PARAMETER(NotUsed);
return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
}
/*
** SQLite calls this function immediately after a call to unixDlSym() or
** unixDlOpen() fails (returns a null pointer). If a more detailed error
** message is available, it is written to zBufOut. If no error message
** is available, zBufOut is left unmodified and SQLite uses a default
** error message.
*/
static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){
const char *zErr;
UNUSED_PARAMETER(NotUsed);
unixEnterMutex();
zErr = dlerror();
if( zErr ){
sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
}
unixLeaveMutex();
}
static void (*unixDlSym(sqlite3_vfs *NotUsed, void *p, const char*zSym))(void){
/*
** GCC with -pedantic-errors says that C90 does not allow a void* to be
** cast into a pointer to a function. And yet the library dlsym() routine
** returns a void* which is really a pointer to a function. So how do we
** use dlsym() with -pedantic-errors?
**
** Variable x below is defined to be a pointer to a function taking
** parameters void* and const char* and returning a pointer to a function.
** We initialize x by assigning it a pointer to the dlsym() function.
** (That assignment requires a cast.) Then we call the function that
** x points to.
**
** This work-around is unlikely to work correctly on any system where
** you really cannot cast a function pointer into void*. But then, on the
** other hand, dlsym() will not work on such a system either, so we have
** not really lost anything.
*/
void (*(*x)(void*,const char*))(void);
UNUSED_PARAMETER(NotUsed);
x = (void(*(*)(void*,const char*))(void))dlsym;
return (*x)(p, zSym);
}
static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){
UNUSED_PARAMETER(NotUsed);
dlclose(pHandle);
}
#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
#define unixDlOpen 0
#define unixDlError 0
#define unixDlSym 0
#define unixDlClose 0
#endif
/*
** Write nBuf bytes of random data to the supplied buffer zBuf.
*/
static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
UNUSED_PARAMETER(NotUsed);
assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));
/* We have to initialize zBuf to prevent valgrind from reporting
** errors. The reports issued by valgrind are incorrect - we would
** prefer that the randomness be increased by making use of the
** uninitialized space in zBuf - but valgrind errors tend to worry
** some users. Rather than argue, it seems easier just to initialize
** the whole array and silence valgrind, even if that means less randomness
** in the random seed.
**
** When testing, initializing zBuf[] to zero is all we do. That means
** that we always use the same random number sequence. This makes the
** tests repeatable.
*/
memset(zBuf, 0, nBuf);
randomnessPid = osGetpid(0);
#if !defined(SQLITE_TEST) && !defined(SQLITE_OMIT_RANDOMNESS)
{
int fd, got;
fd = robust_open("/dev/urandom", O_RDONLY, 0);
if( fd<0 ){
time_t t;
time(&t);
memcpy(zBuf, &t, sizeof(t));
memcpy(&zBuf[sizeof(t)], &randomnessPid, sizeof(randomnessPid));
assert( sizeof(t)+sizeof(randomnessPid)<=(size_t)nBuf );
nBuf = sizeof(t) + sizeof(randomnessPid);
}else{
do{ got = osRead(fd, zBuf, nBuf); }while( got<0 && errno==EINTR );
robust_close(0, fd, __LINE__);
}
}
#endif
return nBuf;
}
/*
** Sleep for a little while. Return the amount of time slept.
** The argument is the number of microseconds we want to sleep.
** The return value is the number of microseconds of sleep actually
** requested from the underlying operating system, a number which
** might be greater than or equal to the argument, but not less
** than the argument.
*/
static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
#if OS_VXWORKS
struct timespec sp;
sp.tv_sec = microseconds / 1000000;
sp.tv_nsec = (microseconds % 1000000) * 1000;
nanosleep(&sp, NULL);
UNUSED_PARAMETER(NotUsed);
return microseconds;
#elif defined(HAVE_USLEEP) && HAVE_USLEEP
usleep(microseconds);
UNUSED_PARAMETER(NotUsed);
return microseconds;
#else
int seconds = (microseconds+999999)/1000000;
sleep(seconds);
UNUSED_PARAMETER(NotUsed);
return seconds*1000000;
#endif
}
/*
** The following variable, if set to a non-zero value, is interpreted as
** the number of seconds since 1970 and is used to set the result of
** sqlite3OsCurrentTime() during testing.
*/
#ifdef SQLITE_TEST
int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */
#endif
/*
** Find the current time (in Universal Coordinated Time). Write into *piNow
** the current time and date as a Julian Day number times 86_400_000. In
** other words, write into *piNow the number of milliseconds since the Julian
** epoch of noon in Greenwich on November 24, 4714 B.C according to the
** proleptic Gregorian calendar.
**
** On success, return SQLITE_OK. Return SQLITE_ERROR if the time and date
** cannot be found.
*/
static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
int rc = SQLITE_OK;
#if defined(NO_GETTOD)
time_t t;
time(&t);
*piNow = ((sqlite3_int64)t)*1000 + unixEpoch;
#elif OS_VXWORKS
struct timespec sNow;
clock_gettime(CLOCK_REALTIME, &sNow);
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
#else
struct timeval sNow;
(void)gettimeofday(&sNow, 0); /* Cannot fail given valid arguments */
*piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
#endif
#ifdef SQLITE_TEST
if( sqlite3_current_time ){
*piNow = 1000*(sqlite3_int64)sqlite3_current_time + unixEpoch;
}
#endif
UNUSED_PARAMETER(NotUsed);
return rc;
}
#ifndef SQLITE_OMIT_DEPRECATED
/*
** Find the current time (in Universal Coordinated Time). Write the
** current time and date as a Julian Day number into *prNow and
** return 0. Return 1 if the time and date cannot be found.
*/
static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
sqlite3_int64 i = 0;
int rc;
UNUSED_PARAMETER(NotUsed);
rc = unixCurrentTimeInt64(0, &i);
*prNow = i/86400000.0;
return rc;
}
#else
# define unixCurrentTime 0
#endif
#ifndef SQLITE_OMIT_DEPRECATED
/*
** We added the xGetLastError() method with the intention of providing
** better low-level error messages when operating-system problems come up
** during SQLite operation. But so far, none of that has been implemented
** in the core. So this routine is never called. For now, it is merely
** a place-holder.
*/
static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
UNUSED_PARAMETER(NotUsed);
UNUSED_PARAMETER(NotUsed2);
UNUSED_PARAMETER(NotUsed3);
return 0;
}
#else
# define unixGetLastError 0
#endif
/*
************************ End of sqlite3_vfs methods ***************************
******************************************************************************/
/******************************************************************************
************************** Begin Proxy Locking ********************************
**
** Proxy locking is a "uber-locking-method" in this sense: It uses the
** other locking methods on secondary lock files. Proxy locking is a
** meta-layer over top of the primitive locking implemented above. For
** this reason, the division that implements of proxy locking is deferred
** until late in the file (here) after all of the other I/O methods have
** been defined - so that the primitive locking methods are available
** as services to help with the implementation of proxy locking.
**
****
**
** The default locking schemes in SQLite use byte-range locks on the
** database file to coordinate safe, concurrent access by multiple readers
** and writers [http://sqlite.org/lockingv3.html]. The five file locking
** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
** as POSIX read & write locks over fixed set of locations (via fsctl),
** on AFP and SMB only exclusive byte-range locks are available via fsctl
** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
** address in the shared range is taken for a SHARED lock, the entire
** shared range is taken for an EXCLUSIVE lock):
**
** PENDING_BYTE 0x40000000
** RESERVED_BYTE 0x40000001
** SHARED_RANGE 0x40000002 -> 0x40000200
**
** This works well on the local file system, but shows a nearly 100x
** slowdown in read performance on AFP because the AFP client disables
** the read cache when byte-range locks are present. Enabling the read
** cache exposes a cache coherency problem that is present on all OS X
** supported network file systems. NFS and AFP both observe the
** close-to-open semantics for ensuring cache coherency
** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
** address the requirements for concurrent database access by multiple
** readers and writers
** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
**
** To address the performance and cache coherency issues, proxy file locking
** changes the way database access is controlled by limiting access to a
** single host at a time and moving file locks off of the database file
** and onto a proxy file on the local file system.
**
**
** Using proxy locks
** -----------------
**
** C APIs
**
** sqlite3_file_control(db, dbname, SQLITE_FCNTL_SET_LOCKPROXYFILE,
** <proxy_path> | ":auto:");
** sqlite3_file_control(db, dbname, SQLITE_FCNTL_GET_LOCKPROXYFILE,
** &<proxy_path>);
**
**
** SQL pragmas
**
** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
** PRAGMA [database.]lock_proxy_file
**
** Specifying ":auto:" means that if there is a conch file with a matching
** host ID in it, the proxy path in the conch file will be used, otherwise
** a proxy path based on the user's temp dir
** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
** actual proxy file name is generated from the name and path of the
** database file. For example:
**
** For database path "/Users/me/foo.db"
** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
**
** Once a lock proxy is configured for a database connection, it can not
** be removed, however it may be switched to a different proxy path via
** the above APIs (assuming the conch file is not being held by another
** connection or process).
**
**
** How proxy locking works
** -----------------------
**
** Proxy file locking relies primarily on two new supporting files:
**
** * conch file to limit access to the database file to a single host
** at a time
**
** * proxy file to act as a proxy for the advisory locks normally
** taken on the database
**
** The conch file - to use a proxy file, sqlite must first "hold the conch"
** by taking an sqlite-style shared lock on the conch file, reading the
** contents and comparing the host's unique host ID (see below) and lock
** proxy path against the values stored in the conch. The conch file is
** stored in the same directory as the database file and the file name
** is patterned after the database file name as ".<databasename>-conch".
** If the conch file does not exist, or its contents do not match the
** host ID and/or proxy path, then the lock is escalated to an exclusive
** lock and the conch file contents is updated with the host ID and proxy
** path and the lock is downgraded to a shared lock again. If the conch
** is held by another process (with a shared lock), the exclusive lock
** will fail and SQLITE_BUSY is returned.
**
** The proxy file - a single-byte file used for all advisory file locks
** normally taken on the database file. This allows for safe sharing
** of the database file for multiple readers and writers on the same
** host (the conch ensures that they all use the same local lock file).
**
** Requesting the lock proxy does not immediately take the conch, it is
** only taken when the first request to lock database file is made.
** This matches the semantics of the traditional locking behavior, where
** opening a connection to a database file does not take a lock on it.
** The shared lock and an open file descriptor are maintained until
** the connection to the database is closed.
**
** The proxy file and the lock file are never deleted so they only need
** to be created the first time they are used.
**
** Configuration options
** ---------------------
**
** SQLITE_PREFER_PROXY_LOCKING
**
** Database files accessed on non-local file systems are
** automatically configured for proxy locking, lock files are
** named automatically using the same logic as
** PRAGMA lock_proxy_file=":auto:"
**
** SQLITE_PROXY_DEBUG
**
** Enables the logging of error messages during host id file
** retrieval and creation
**
** LOCKPROXYDIR
**
** Overrides the default directory used for lock proxy files that
** are named automatically via the ":auto:" setting
**
** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
**
** Permissions to use when creating a directory for storing the
** lock proxy files, only used when LOCKPROXYDIR is not set.
**
**
** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
** force proxy locking to be used for every database file opened, and 0
** will force automatic proxy locking to be disabled for all database
** files (explicitly calling the SQLITE_FCNTL_SET_LOCKPROXYFILE pragma or
** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
*/
/*
** Proxy locking is only available on MacOSX
*/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
/*
** The proxyLockingContext has the path and file structures for the remote
** and local proxy files in it
*/
typedef struct proxyLockingContext proxyLockingContext;
struct proxyLockingContext {
unixFile *conchFile; /* Open conch file */
char *conchFilePath; /* Name of the conch file */
unixFile *lockProxy; /* Open proxy lock file */
char *lockProxyPath; /* Name of the proxy lock file */
char *dbPath; /* Name of the open file */
int conchHeld; /* 1 if the conch is held, -1 if lockless */
int nFails; /* Number of conch taking failures */
void *oldLockingContext; /* Original lockingcontext to restore on close */
sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */
};
/*
** The proxy lock file path for the database at dbPath is written into lPath,
** which must point to valid, writable memory large enough for a maxLen length
** file path.
*/
static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){
int len;
int dbLen;
int i;
#ifdef LOCKPROXYDIR
len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
#else
# ifdef _CS_DARWIN_USER_TEMP_DIR
{
if( !confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen) ){
OSTRACE(("GETLOCKPATH failed %s errno=%d pid=%d\n",
lPath, errno, osGetpid(0)));
return SQLITE_IOERR_LOCK;
}
len = strlcat(lPath, "sqliteplocks", maxLen);
}
# else
len = strlcpy(lPath, "/tmp/", maxLen);
# endif
#endif
if( lPath[len-1]!='/' ){
len = strlcat(lPath, "/", maxLen);
}
/* transform the db path to a unique cache name */
dbLen = (int)strlen(dbPath);
for( i=0; i<dbLen && (i+len+7)<(int)maxLen; i++){
char c = dbPath[i];
lPath[i+len] = (c=='/')?'_':c;
}
lPath[i+len]='\0';
strlcat(lPath, ":auto:", maxLen);
OSTRACE(("GETLOCKPATH proxy lock path=%s pid=%d\n", lPath, osGetpid(0)));
return SQLITE_OK;
}
/*
** Creates the lock file and any missing directories in lockPath
*/
static int proxyCreateLockPath(const char *lockPath){
int i, len;
char buf[MAXPATHLEN];
int start = 0;
assert(lockPath!=NULL);
/* try to create all the intermediate directories */
len = (int)strlen(lockPath);
buf[0] = lockPath[0];
for( i=1; i<len; i++ ){
if( lockPath[i] == '/' && (i - start > 0) ){
/* only mkdir if leaf dir != "." or "/" or ".." */
if( i-start>2 || (i-start==1 && buf[start] != '.' && buf[start] != '/')
|| (i-start==2 && buf[start] != '.' && buf[start+1] != '.') ){
buf[i]='\0';
if( osMkdir(buf, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
int err=errno;
if( err!=EEXIST ) {
OSTRACE(("CREATELOCKPATH FAILED creating %s, "
"'%s' proxy lock path=%s pid=%d\n",
buf, strerror(err), lockPath, osGetpid(0)));
return err;
}
}
}
start=i+1;
}
buf[i] = lockPath[i];
}
OSTRACE(("CREATELOCKPATH proxy lock path=%s pid=%d\n",lockPath,osGetpid(0)));
return 0;
}
/*
** Create a new VFS file descriptor (stored in memory obtained from
** sqlite3_malloc) and open the file named "path" in the file descriptor.
**
** The caller is responsible not only for closing the file descriptor
** but also for freeing the memory associated with the file descriptor.
*/
static int proxyCreateUnixFile(
const char *path, /* path for the new unixFile */
unixFile **ppFile, /* unixFile created and returned by ref */
int islockfile /* if non zero missing dirs will be created */
) {
int fd = -1;
unixFile *pNew;
int rc = SQLITE_OK;
int openFlags = O_RDWR | O_CREAT;
sqlite3_vfs dummyVfs;
int terrno = 0;
UnixUnusedFd *pUnused = NULL;
/* 1. first try to open/create the file
** 2. if that fails, and this is a lock file (not-conch), try creating
** the parent directories and then try again.
** 3. if that fails, try to open the file read-only
** otherwise return BUSY (if lock file) or CANTOPEN for the conch file
*/
pUnused = findReusableFd(path, openFlags);
if( pUnused ){
fd = pUnused->fd;
}else{
pUnused = sqlite3_malloc64(sizeof(*pUnused));
if( !pUnused ){
return SQLITE_NOMEM;
}
}
if( fd<0 ){
fd = robust_open(path, openFlags, 0);
terrno = errno;
if( fd<0 && errno==ENOENT && islockfile ){
if( proxyCreateLockPath(path) == SQLITE_OK ){
fd = robust_open(path, openFlags, 0);
}
}
}
if( fd<0 ){
openFlags = O_RDONLY;
fd = robust_open(path, openFlags, 0);
terrno = errno;
}
if( fd<0 ){
if( islockfile ){
return SQLITE_BUSY;
}
switch (terrno) {
case EACCES:
return SQLITE_PERM;
case EIO:
return SQLITE_IOERR_LOCK; /* even though it is the conch */
default:
return SQLITE_CANTOPEN_BKPT;
}
}
pNew = (unixFile *)sqlite3_malloc64(sizeof(*pNew));
if( pNew==NULL ){
rc = SQLITE_NOMEM;
goto end_create_proxy;
}
memset(pNew, 0, sizeof(unixFile));
pNew->openFlags = openFlags;
memset(&dummyVfs, 0, sizeof(dummyVfs));
dummyVfs.pAppData = (void*)&autolockIoFinder;
dummyVfs.zName = "dummy";
pUnused->fd = fd;
pUnused->flags = openFlags;
pNew->pUnused = pUnused;
rc = fillInUnixFile(&dummyVfs, fd, (sqlite3_file*)pNew, path, 0);
if( rc==SQLITE_OK ){
*ppFile = pNew;
return SQLITE_OK;
}
end_create_proxy:
robust_close(pNew, fd, __LINE__);
sqlite3_free(pNew);
sqlite3_free(pUnused);
return rc;
}
#ifdef SQLITE_TEST
/* simulate multiple hosts by creating unique hostid file paths */
int sqlite3_hostid_num = 0;
#endif
#define PROXY_HOSTIDLEN 16 /* conch file host id length */
#ifdef HAVE_GETHOSTUUID
/* Not always defined in the headers as it ought to be */
extern int gethostuuid(uuid_t id, const struct timespec *wait);
#endif
/* get the host ID via gethostuuid(), pHostID must point to PROXY_HOSTIDLEN
** bytes of writable memory.
*/
static int proxyGetHostID(unsigned char *pHostID, int *pError){
assert(PROXY_HOSTIDLEN == sizeof(uuid_t));
memset(pHostID, 0, PROXY_HOSTIDLEN);
#ifdef HAVE_GETHOSTUUID
{
struct timespec timeout = {1, 0}; /* 1 sec timeout */
if( gethostuuid(pHostID, &timeout) ){
int err = errno;
if( pError ){
*pError = err;
}
return SQLITE_IOERR;
}
}
#else
UNUSED_PARAMETER(pError);
#endif
#ifdef SQLITE_TEST
/* simulate multiple hosts by creating unique hostid file paths */
if( sqlite3_hostid_num != 0){
pHostID[0] = (char)(pHostID[0] + (char)(sqlite3_hostid_num & 0xFF));
}
#endif
return SQLITE_OK;
}
/* The conch file contains the header, host id and lock file path
*/
#define PROXY_CONCHVERSION 2 /* 1-byte header, 16-byte host id, path */
#define PROXY_HEADERLEN 1 /* conch file header length */
#define PROXY_PATHINDEX (PROXY_HEADERLEN+PROXY_HOSTIDLEN)
#define PROXY_MAXCONCHLEN (PROXY_HEADERLEN+PROXY_HOSTIDLEN+MAXPATHLEN)
/*
** Takes an open conch file, copies the contents to a new path and then moves
** it back. The newly created file's file descriptor is assigned to the
** conch file structure and finally the original conch file descriptor is
** closed. Returns zero if successful.
*/
static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
unixFile *conchFile = pCtx->conchFile;
char tPath[MAXPATHLEN];
char buf[PROXY_MAXCONCHLEN];
char *cPath = pCtx->conchFilePath;
size_t readLen = 0;
size_t pathLen = 0;
char errmsg[64] = "";
int fd = -1;
int rc = -1;
UNUSED_PARAMETER(myHostID);
/* create a new path by replace the trailing '-conch' with '-break' */
pathLen = strlcpy(tPath, cPath, MAXPATHLEN);
if( pathLen>MAXPATHLEN || pathLen<6 ||
(strlcpy(&tPath[pathLen-5], "break", 6) != 5) ){
sqlite3_snprintf(sizeof(errmsg),errmsg,"path error (len %d)",(int)pathLen);
goto end_breaklock;
}
/* read the conch content */
readLen = osPread(conchFile->h, buf, PROXY_MAXCONCHLEN, 0);
if( readLen<PROXY_PATHINDEX ){
sqlite3_snprintf(sizeof(errmsg),errmsg,"read error (len %d)",(int)readLen);
goto end_breaklock;
}
/* write it out to the temporary break file */
fd = robust_open(tPath, (O_RDWR|O_CREAT|O_EXCL), 0);
if( fd<0 ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "create failed (%d)", errno);
goto end_breaklock;
}
if( osPwrite(fd, buf, readLen, 0) != (ssize_t)readLen ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "write failed (%d)", errno);
goto end_breaklock;
}
if( rename(tPath, cPath) ){
sqlite3_snprintf(sizeof(errmsg), errmsg, "rename failed (%d)", errno);
goto end_breaklock;
}
rc = 0;
fprintf(stderr, "broke stale lock on %s\n", cPath);
robust_close(pFile, conchFile->h, __LINE__);
conchFile->h = fd;
conchFile->openFlags = O_RDWR | O_CREAT;
end_breaklock:
if( rc ){
if( fd>=0 ){
osUnlink(tPath);
robust_close(pFile, fd, __LINE__);
}
fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
}
return rc;
}
/* Take the requested lock on the conch file and break a stale lock if the
** host id matches.
*/
static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
unixFile *conchFile = pCtx->conchFile;
int rc = SQLITE_OK;
int nTries = 0;
struct timespec conchModTime;
memset(&conchModTime, 0, sizeof(conchModTime));
do {
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
nTries ++;
if( rc==SQLITE_BUSY ){
/* If the lock failed (busy):
* 1st try: get the mod time of the conch, wait 0.5s and try again.
* 2nd try: fail if the mod time changed or host id is different, wait
* 10 sec and try again
* 3rd try: break the lock unless the mod time has changed.
*/
struct stat buf;
if( osFstat(conchFile->h, &buf) ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR_LOCK;
}
if( nTries==1 ){
conchModTime = buf.st_mtimespec;
usleep(500000); /* wait 0.5 sec and try the lock again*/
continue;
}
assert( nTries>1 );
if( conchModTime.tv_sec != buf.st_mtimespec.tv_sec ||
conchModTime.tv_nsec != buf.st_mtimespec.tv_nsec ){
return SQLITE_BUSY;
}
if( nTries==2 ){
char tBuf[PROXY_MAXCONCHLEN];
int len = osPread(conchFile->h, tBuf, PROXY_MAXCONCHLEN, 0);
if( len<0 ){
storeLastErrno(pFile, errno);
return SQLITE_IOERR_LOCK;
}
if( len>PROXY_PATHINDEX && tBuf[0]==(char)PROXY_CONCHVERSION){
/* don't break the lock if the host id doesn't match */
if( 0!=memcmp(&tBuf[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN) ){
return SQLITE_BUSY;
}
}else{
/* don't break the lock on short read or a version mismatch */
return SQLITE_BUSY;
}
usleep(10000000); /* wait 10 sec and try the lock again */
continue;
}
assert( nTries==3 );
if( 0==proxyBreakConchLock(pFile, myHostID) ){
rc = SQLITE_OK;
if( lockType==EXCLUSIVE_LOCK ){
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);
}
if( !rc ){
rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
}
}
}
} while( rc==SQLITE_BUSY && nTries<3 );
return rc;
}
/* Takes the conch by taking a shared lock and read the contents conch, if
** lockPath is non-NULL, the host ID and lock file path must match. A NULL
** lockPath means that the lockPath in the conch file will be used if the
** host IDs match, or a new lock path will be generated automatically
** and written to the conch file.
*/
static int proxyTakeConch(unixFile *pFile){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
if( pCtx->conchHeld!=0 ){
return SQLITE_OK;
}else{
unixFile *conchFile = pCtx->conchFile;
uuid_t myHostID;
int pError = 0;
char readBuf[PROXY_MAXCONCHLEN];
char lockPath[MAXPATHLEN];
char *tempLockPath = NULL;
int rc = SQLITE_OK;
int createConch = 0;
int hostIdMatch = 0;
int readLen = 0;
int tryOldLockPath = 0;
int forceNewLockPath = 0;
OSTRACE(("TAKECONCH %d for %s pid=%d\n", conchFile->h,
(pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
osGetpid(0)));
rc = proxyGetHostID(myHostID, &pError);
if( (rc&0xff)==SQLITE_IOERR ){
storeLastErrno(pFile, pError);
goto end_takeconch;
}
rc = proxyConchLock(pFile, myHostID, SHARED_LOCK);
if( rc!=SQLITE_OK ){
goto end_takeconch;
}
/* read the existing conch file */
readLen = seekAndRead((unixFile*)conchFile, 0, readBuf, PROXY_MAXCONCHLEN);
if( readLen<0 ){
/* I/O error: lastErrno set by seekAndRead */
storeLastErrno(pFile, conchFile->lastErrno);
rc = SQLITE_IOERR_READ;
goto end_takeconch;
}else if( readLen<=(PROXY_HEADERLEN+PROXY_HOSTIDLEN) ||
readBuf[0]!=(char)PROXY_CONCHVERSION ){
/* a short read or version format mismatch means we need to create a new
** conch file.
*/
createConch = 1;
}
/* if the host id matches and the lock path already exists in the conch
** we'll try to use the path there, if we can't open that path, we'll
** retry with a new auto-generated path
*/
do { /* in case we need to try again for an :auto: named lock file */
if( !createConch && !forceNewLockPath ){
hostIdMatch = !memcmp(&readBuf[PROXY_HEADERLEN], myHostID,
PROXY_HOSTIDLEN);
/* if the conch has data compare the contents */
if( !pCtx->lockProxyPath ){
/* for auto-named local lock file, just check the host ID and we'll
** use the local lock file path that's already in there
*/
if( hostIdMatch ){
size_t pathLen = (readLen - PROXY_PATHINDEX);
if( pathLen>=MAXPATHLEN ){
pathLen=MAXPATHLEN-1;
}
memcpy(lockPath, &readBuf[PROXY_PATHINDEX], pathLen);
lockPath[pathLen] = 0;
tempLockPath = lockPath;
tryOldLockPath = 1;
/* create a copy of the lock path if the conch is taken */
goto end_takeconch;
}
}else if( hostIdMatch
&& !strncmp(pCtx->lockProxyPath, &readBuf[PROXY_PATHINDEX],
readLen-PROXY_PATHINDEX)
){
/* conch host and lock path match */
goto end_takeconch;
}
}
/* if the conch isn't writable and doesn't match, we can't take it */
if( (conchFile->openFlags&O_RDWR) == 0 ){
rc = SQLITE_BUSY;
goto end_takeconch;
}
/* either the conch didn't match or we need to create a new one */
if( !pCtx->lockProxyPath ){
proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
tempLockPath = lockPath;
/* create a copy of the lock path _only_ if the conch is taken */
}
/* update conch with host and path (this will fail if other process
** has a shared lock already), if the host id matches, use the big
** stick.
*/
futimes(conchFile->h, NULL);
if( hostIdMatch && !createConch ){
if( conchFile->pInode && conchFile->pInode->nShared>1 ){
/* We are trying for an exclusive lock but another thread in this
** same process is still holding a shared lock. */
rc = SQLITE_BUSY;
} else {
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
}
}else{
rc = proxyConchLock(pFile, myHostID, EXCLUSIVE_LOCK);
}
if( rc==SQLITE_OK ){
char writeBuffer[PROXY_MAXCONCHLEN];
int writeSize = 0;
writeBuffer[0] = (char)PROXY_CONCHVERSION;
memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
if( pCtx->lockProxyPath!=NULL ){
strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath,
MAXPATHLEN);
}else{
strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
}
writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
robust_ftruncate(conchFile->h, writeSize);
rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
full_fsync(conchFile->h,0,0);
/* If we created a new conch file (not just updated the contents of a
** valid conch file), try to match the permissions of the database
*/
if( rc==SQLITE_OK && createConch ){
struct stat buf;
int err = osFstat(pFile->h, &buf);
if( err==0 ){
mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
S_IROTH|S_IWOTH);
/* try to match the database file R/W permissions, ignore failure */
#ifndef SQLITE_PROXY_DEBUG
osFchmod(conchFile->h, cmode);
#else
do{
rc = osFchmod(conchFile->h, cmode);
}while( rc==(-1) && errno==EINTR );
if( rc!=0 ){
int code = errno;
fprintf(stderr, "fchmod %o FAILED with %d %s\n",
cmode, code, strerror(code));
} else {
fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
}
}else{
int code = errno;
fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
err, code, strerror(code));
#endif
}
}
}
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
end_takeconch:
OSTRACE(("TRANSPROXY: CLOSE %d\n", pFile->h));
if( rc==SQLITE_OK && pFile->openFlags ){
int fd;
if( pFile->h>=0 ){
robust_close(pFile, pFile->h, __LINE__);
}
pFile->h = -1;
fd = robust_open(pCtx->dbPath, pFile->openFlags, 0);
OSTRACE(("TRANSPROXY: OPEN %d\n", fd));
if( fd>=0 ){
pFile->h = fd;
}else{
rc=SQLITE_CANTOPEN_BKPT; /* SQLITE_BUSY? proxyTakeConch called
during locking */
}
}
if( rc==SQLITE_OK && !pCtx->lockProxy ){
char *path = tempLockPath ? tempLockPath : pCtx->lockProxyPath;
rc = proxyCreateUnixFile(path, &pCtx->lockProxy, 1);
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && tryOldLockPath ){
/* we couldn't create the proxy lock file with the old lock file path
** so try again via auto-naming
*/
forceNewLockPath = 1;
tryOldLockPath = 0;
continue; /* go back to the do {} while start point, try again */
}
}
if( rc==SQLITE_OK ){
/* Need to make a copy of path if we extracted the value
** from the conch file or the path was allocated on the stack
*/
if( tempLockPath ){
pCtx->lockProxyPath = sqlite3DbStrDup(0, tempLockPath);
if( !pCtx->lockProxyPath ){
rc = SQLITE_NOMEM;
}
}
}
if( rc==SQLITE_OK ){
pCtx->conchHeld = 1;
if( pCtx->lockProxy->pMethod == &afpIoMethods ){
afpLockingContext *afpCtx;
afpCtx = (afpLockingContext *)pCtx->lockProxy->lockingContext;
afpCtx->dbPath = pCtx->lockProxyPath;
}
} else {
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
}
OSTRACE(("TAKECONCH %d %s\n", conchFile->h,
rc==SQLITE_OK?"ok":"failed"));
return rc;
} while (1); /* in case we need to retry the :auto: lock file -
** we should never get here except via the 'continue' call. */
}
}
/*
** If pFile holds a lock on a conch file, then release that lock.
*/
static int proxyReleaseConch(unixFile *pFile){
int rc = SQLITE_OK; /* Subroutine return code */
proxyLockingContext *pCtx; /* The locking context for the proxy lock */
unixFile *conchFile; /* Name of the conch file */
pCtx = (proxyLockingContext *)pFile->lockingContext;
conchFile = pCtx->conchFile;
OSTRACE(("RELEASECONCH %d for %s pid=%d\n", conchFile->h,
(pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
osGetpid(0)));
if( pCtx->conchHeld>0 ){
rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
}
pCtx->conchHeld = 0;
OSTRACE(("RELEASECONCH %d %s\n", conchFile->h,
(rc==SQLITE_OK ? "ok" : "failed")));
return rc;
}
/*
** Given the name of a database file, compute the name of its conch file.
** Store the conch filename in memory obtained from sqlite3_malloc64().
** Make *pConchPath point to the new name. Return SQLITE_OK on success
** or SQLITE_NOMEM if unable to obtain memory.
**
** The caller is responsible for ensuring that the allocated memory
** space is eventually freed.
**
** *pConchPath is set to NULL if a memory allocation error occurs.
*/
static int proxyCreateConchPathname(char *dbPath, char **pConchPath){
int i; /* Loop counter */
int len = (int)strlen(dbPath); /* Length of database filename - dbPath */
char *conchPath; /* buffer in which to construct conch name */
/* Allocate space for the conch filename and initialize the name to
** the name of the original database file. */
*pConchPath = conchPath = (char *)sqlite3_malloc64(len + 8);
if( conchPath==0 ){
return SQLITE_NOMEM;
}
memcpy(conchPath, dbPath, len+1);
/* now insert a "." before the last / character */
for( i=(len-1); i>=0; i-- ){
if( conchPath[i]=='/' ){
i++;
break;
}
}
conchPath[i]='.';
while ( i<len ){
conchPath[i+1]=dbPath[i];
i++;
}
/* append the "-conch" suffix to the file */
memcpy(&conchPath[i+1], "-conch", 7);
assert( (int)strlen(conchPath) == len+7 );
return SQLITE_OK;
}
/* Takes a fully configured proxy locking-style unix file and switches
** the local lock file path
*/
static int switchLockProxyPath(unixFile *pFile, const char *path) {
proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
char *oldPath = pCtx->lockProxyPath;
int rc = SQLITE_OK;
if( pFile->eFileLock!=NO_LOCK ){
return SQLITE_BUSY;
}
/* nothing to do if the path is NULL, :auto: or matches the existing path */
if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
(oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
return SQLITE_OK;
}else{
unixFile *lockProxy = pCtx->lockProxy;
pCtx->lockProxy=NULL;
pCtx->conchHeld = 0;
if( lockProxy!=NULL ){
rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
if( rc ) return rc;
sqlite3_free(lockProxy);
}
sqlite3_free(oldPath);
pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
}
return rc;
}
/*
** pFile is a file that has been opened by a prior xOpen call. dbPath
** is a string buffer at least MAXPATHLEN+1 characters in size.
**
** This routine find the filename associated with pFile and writes it
** int dbPath.
*/
static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){
#if defined(__APPLE__)
if( pFile->pMethod == &afpIoMethods ){
/* afp style keeps a reference to the db path in the filePath field
** of the struct */
assert( (int)strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath,
MAXPATHLEN);
} else
#endif
if( pFile->pMethod == &dotlockIoMethods ){
/* dot lock style uses the locking context to store the dot lock
** file path */
int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
memcpy(dbPath, (char *)pFile->lockingContext, len + 1);
}else{
/* all other styles use the locking context to store the db file path */
assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
strlcpy(dbPath, (char *)pFile->lockingContext, MAXPATHLEN);
}
return SQLITE_OK;
}
/*
** Takes an already filled in unix file and alters it so all file locking
** will be performed on the local proxy lock file. The following fields
** are preserved in the locking context so that they can be restored and
** the unix structure properly cleaned up at close time:
** ->lockingContext
** ->pMethod
*/
static int proxyTransformUnixFile(unixFile *pFile, const char *path) {
proxyLockingContext *pCtx;
char dbPath[MAXPATHLEN+1]; /* Name of the database file */
char *lockPath=NULL;
int rc = SQLITE_OK;
if( pFile->eFileLock!=NO_LOCK ){
return SQLITE_BUSY;
}
proxyGetDbPathForUnixFile(pFile, dbPath);
if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
lockPath=NULL;
}else{
lockPath=(char *)path;
}
OSTRACE(("TRANSPROXY %d for %s pid=%d\n", pFile->h,
(lockPath ? lockPath : ":auto:"), osGetpid(0)));
pCtx = sqlite3_malloc64( sizeof(*pCtx) );
if( pCtx==0 ){
return SQLITE_NOMEM;
}
memset(pCtx, 0, sizeof(*pCtx));
rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);
if( rc==SQLITE_OK ){
rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile, 0);
if( rc==SQLITE_CANTOPEN && ((pFile->openFlags&O_RDWR) == 0) ){
/* if (a) the open flags are not O_RDWR, (b) the conch isn't there, and
** (c) the file system is read-only, then enable no-locking access.
** Ugh, since O_RDONLY==0x0000 we test for !O_RDWR since unixOpen asserts
** that openFlags will have only one of O_RDONLY or O_RDWR.
*/
struct statfs fsInfo;
struct stat conchInfo;
int goLockless = 0;
if( osStat(pCtx->conchFilePath, &conchInfo) == -1 ) {
int err = errno;
if( (err==ENOENT) && (statfs(dbPath, &fsInfo) != -1) ){
goLockless = (fsInfo.f_flags&MNT_RDONLY) == MNT_RDONLY;
}
}
if( goLockless ){
pCtx->conchHeld = -1; /* read only FS/ lockless */
rc = SQLITE_OK;
}
}
}
if( rc==SQLITE_OK && lockPath ){
pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
}
if( rc==SQLITE_OK ){
pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
if( pCtx->dbPath==NULL ){
rc = SQLITE_NOMEM;
}
}
if( rc==SQLITE_OK ){
/* all memory is allocated, proxys are created and assigned,
** switch the locking context and pMethod then return.
*/
pCtx->oldLockingContext = pFile->lockingContext;
pFile->lockingContext = pCtx;
pCtx->pOldMethod = pFile->pMethod;
pFile->pMethod = &proxyIoMethods;
}else{
if( pCtx->conchFile ){
pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
sqlite3_free(pCtx->conchFile);
}
sqlite3DbFree(0, pCtx->lockProxyPath);
sqlite3_free(pCtx->conchFilePath);
sqlite3_free(pCtx);
}
OSTRACE(("TRANSPROXY %d %s\n", pFile->h,
(rc==SQLITE_OK ? "ok" : "failed")));
return rc;
}
/*
** This routine handles sqlite3_file_control() calls that are specific
** to proxy locking.
*/
static int proxyFileControl(sqlite3_file *id, int op, void *pArg){
switch( op ){
case SQLITE_FCNTL_GET_LOCKPROXYFILE: {
unixFile *pFile = (unixFile*)id;
if( pFile->pMethod == &proxyIoMethods ){
proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
proxyTakeConch(pFile);
if( pCtx->lockProxyPath ){
*(const char **)pArg = pCtx->lockProxyPath;
}else{
*(const char **)pArg = ":auto: (not held)";
}
} else {
*(const char **)pArg = NULL;
}
return SQLITE_OK;
}
case SQLITE_FCNTL_SET_LOCKPROXYFILE: {
unixFile *pFile = (unixFile*)id;
int rc = SQLITE_OK;
int isProxyStyle = (pFile->pMethod == &proxyIoMethods);
if( pArg==NULL || (const char *)pArg==0 ){
if( isProxyStyle ){
/* turn off proxy locking - not supported. If support is added for
** switching proxy locking mode off then it will need to fail if
** the journal mode is WAL mode.
*/
rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/;
}else{
/* turn off proxy locking - already off - NOOP */
rc = SQLITE_OK;
}
}else{
const char *proxyPath = (const char *)pArg;
if( isProxyStyle ){
proxyLockingContext *pCtx =
(proxyLockingContext*)pFile->lockingContext;
if( !strcmp(pArg, ":auto:")
|| (pCtx->lockProxyPath &&
!strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))
){
rc = SQLITE_OK;
}else{
rc = switchLockProxyPath(pFile, proxyPath);
}
}else{
/* turn on proxy file locking */
rc = proxyTransformUnixFile(pFile, proxyPath);
}
}
return rc;
}
default: {
assert( 0 ); /* The call assures that only valid opcodes are sent */
}
}
/*NOTREACHED*/
return SQLITE_ERROR;
}
/*
** Within this division (the proxying locking implementation) the procedures
** above this point are all utilities. The lock-related methods of the
** proxy-locking sqlite3_io_method object follow.
*/
/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero. The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
unixFile *pFile = (unixFile*)id;
int rc = proxyTakeConch(pFile);
if( rc==SQLITE_OK ){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
if( pCtx->conchHeld>0 ){
unixFile *proxy = pCtx->lockProxy;
return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
}else{ /* conchHeld < 0 is lockless */
pResOut=0;
}
}
return rc;
}
/*
** Lock the file with the lock specified by parameter eFileLock - one
** of the following:
**
** (1) SHARED_LOCK
** (2) RESERVED_LOCK
** (3) PENDING_LOCK
** (4) EXCLUSIVE_LOCK
**
** Sometimes when requesting one lock state, additional lock states
** are inserted in between. The locking might fail on one of the later
** transitions leaving the lock state different from what it started but
** still short of its goal. The following chart shows the allowed
** transitions and the inserted intermediate states:
**
** UNLOCKED -> SHARED
** SHARED -> RESERVED
** SHARED -> (PENDING) -> EXCLUSIVE
** RESERVED -> (PENDING) -> EXCLUSIVE
** PENDING -> EXCLUSIVE
**
** This routine will only increase a lock. Use the sqlite3OsUnlock()
** routine to lower a locking level.
*/
static int proxyLock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
int rc = proxyTakeConch(pFile);
if( rc==SQLITE_OK ){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
if( pCtx->conchHeld>0 ){
unixFile *proxy = pCtx->lockProxy;
rc = proxy->pMethod->xLock((sqlite3_file*)proxy, eFileLock);
pFile->eFileLock = proxy->eFileLock;
}else{
/* conchHeld < 0 is lockless */
}
}
return rc;
}
/*
** Lower the locking level on file descriptor pFile to eFileLock. eFileLock
** must be either NO_LOCK or SHARED_LOCK.
**
** If the locking level of the file descriptor is already at or below
** the requested locking level, this routine is a no-op.
*/
static int proxyUnlock(sqlite3_file *id, int eFileLock) {
unixFile *pFile = (unixFile*)id;
int rc = proxyTakeConch(pFile);
if( rc==SQLITE_OK ){
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
if( pCtx->conchHeld>0 ){
unixFile *proxy = pCtx->lockProxy;
rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, eFileLock);
pFile->eFileLock = proxy->eFileLock;
}else{
/* conchHeld < 0 is lockless */
}
}
return rc;
}
/*
** Close a file that uses proxy locks.
*/
static int proxyClose(sqlite3_file *id) {
if( ALWAYS(id) ){
unixFile *pFile = (unixFile*)id;
proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
unixFile *lockProxy = pCtx->lockProxy;
unixFile *conchFile = pCtx->conchFile;
int rc = SQLITE_OK;
if( lockProxy ){
rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
if( rc ) return rc;
rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
if( rc ) return rc;
sqlite3_free(lockProxy);
pCtx->lockProxy = 0;
}
if( conchFile ){
if( pCtx->conchHeld ){
rc = proxyReleaseConch(pFile);
if( rc ) return rc;
}
rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
if( rc ) return rc;
sqlite3_free(conchFile);
}
sqlite3DbFree(0, pCtx->lockProxyPath);
sqlite3_free(pCtx->conchFilePath);
sqlite3DbFree(0, pCtx->dbPath);
/* restore the original locking context and pMethod then close it */
pFile->lockingContext = pCtx->oldLockingContext;
pFile->pMethod = pCtx->pOldMethod;
sqlite3_free(pCtx);
return pFile->pMethod->xClose(id);
}
return SQLITE_OK;
}
#endif /* defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE */
/*
** The proxy locking style is intended for use with AFP filesystems.
** And since AFP is only supported on MacOSX, the proxy locking is also
** restricted to MacOSX.
**
**
******************* End of the proxy lock implementation **********************
******************************************************************************/
/*
** Initialize the operating system interface.
**
** This routine registers all VFS implementations for unix-like operating
** systems. This routine, and the sqlite3_os_end() routine that follows,
** should be the only routines in this file that are visible from other
** files.
**
** This routine is called once during SQLite initialization and by a
** single thread. The memory allocation and mutex subsystems have not
** necessarily been initialized when this routine is called, and so they
** should not be used.
*/
int sqlite3_os_init(void){
/*
** The following macro defines an initializer for an sqlite3_vfs object.
** The name of the VFS is NAME. The pAppData is a pointer to a pointer
** to the "finder" function. (pAppData is a pointer to a pointer because
** silly C90 rules prohibit a void* from being cast to a function pointer
** and so we have to go through the intermediate pointer to avoid problems
** when compiling with -pedantic-errors on GCC.)
**
** The FINDER parameter to this macro is the name of the pointer to the
** finder-function. The finder-function returns a pointer to the
** sqlite_io_methods object that implements the desired locking
** behaviors. See the division above that contains the IOMETHODS
** macro for addition information on finder-functions.
**
** Most finders simply return a pointer to a fixed sqlite3_io_methods
** object. But the "autolockIoFinder" available on MacOSX does a little
** more than that; it looks at the filesystem type that hosts the
** database file and tries to choose an locking method appropriate for
** that filesystem time.
*/
#define UNIXVFS(VFSNAME, FINDER) { \
3, /* iVersion */ \
sizeof(unixFile), /* szOsFile */ \
MAX_PATHNAME, /* mxPathname */ \
0, /* pNext */ \
VFSNAME, /* zName */ \
(void*)&FINDER, /* pAppData */ \
unixOpen, /* xOpen */ \
unixDelete, /* xDelete */ \
unixAccess, /* xAccess */ \
unixFullPathname, /* xFullPathname */ \
unixDlOpen, /* xDlOpen */ \
unixDlError, /* xDlError */ \
unixDlSym, /* xDlSym */ \
unixDlClose, /* xDlClose */ \
unixRandomness, /* xRandomness */ \
unixSleep, /* xSleep */ \
unixCurrentTime, /* xCurrentTime */ \
unixGetLastError, /* xGetLastError */ \
unixCurrentTimeInt64, /* xCurrentTimeInt64 */ \
unixSetSystemCall, /* xSetSystemCall */ \
unixGetSystemCall, /* xGetSystemCall */ \
unixNextSystemCall, /* xNextSystemCall */ \
}
/*
** All default VFSes for unix are contained in the following array.
**
** Note that the sqlite3_vfs.pNext field of the VFS object is modified
** by the SQLite core when the VFS is registered. So the following
** array cannot be const.
*/
static sqlite3_vfs aVfs[] = {
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
UNIXVFS("unix", autolockIoFinder ),
#elif OS_VXWORKS
UNIXVFS("unix", vxworksIoFinder ),
#else
UNIXVFS("unix", posixIoFinder ),
#endif
UNIXVFS("unix-none", nolockIoFinder ),
UNIXVFS("unix-dotfile", dotlockIoFinder ),
UNIXVFS("unix-excl", posixIoFinder ),
#if OS_VXWORKS
UNIXVFS("unix-namedsem", semIoFinder ),
#endif
#if SQLITE_ENABLE_LOCKING_STYLE || OS_VXWORKS
UNIXVFS("unix-posix", posixIoFinder ),
#endif
#if SQLITE_ENABLE_LOCKING_STYLE
UNIXVFS("unix-flock", flockIoFinder ),
#endif
#if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__)
UNIXVFS("unix-afp", afpIoFinder ),
UNIXVFS("unix-nfs", nfsIoFinder ),
UNIXVFS("unix-proxy", proxyIoFinder ),
#endif
};
unsigned int i; /* Loop counter */
/* Double-check that the aSyscall[] array has been constructed
** correctly. See ticket [bb3a86e890c8e96ab] */
assert( ArraySize(aSyscall)==28 );
/* Register all VFSes defined in the aVfs[] array */
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
sqlite3_vfs_register(&aVfs[i], i==0);
}
return SQLITE_OK;
}
/*
** Shutdown the operating system interface.
**
** Some operating systems might need to do some cleanup in this routine,
** to release dynamically allocated objects. But not on unix.
** This routine is a no-op for unix.
*/
int sqlite3_os_end(void){
return SQLITE_OK;
}
#endif /* SQLITE_OS_UNIX */