556 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			556 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| # 2002 May 24
 | |
| #
 | |
| # The author disclaims copyright to this source code.  In place of
 | |
| # a legal notice, here is a blessing:
 | |
| #
 | |
| #    May you do good and not evil.
 | |
| #    May you find forgiveness for yourself and forgive others.
 | |
| #    May you share freely, never taking more than you give.
 | |
| #
 | |
| #***********************************************************************
 | |
| # This file implements regression tests for SQLite library.  The focus of
 | |
| # this file is testing the SQLite routines used for converting between the
 | |
| # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
 | |
| # UTF-16be).
 | |
| #
 | |
| # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
 | |
| 
 | |
| set testdir [file dirname $argv0]
 | |
| source $testdir/tester.tcl
 | |
| 
 | |
| # If UTF16 support is disabled, ignore the tests in this file
 | |
| #
 | |
| ifcapable {!utf16} {
 | |
|   finish_test
 | |
|   return
 | |
| }
 | |
| 
 | |
| # The rough organisation of tests in this file is:
 | |
| #
 | |
| # enc2.1.*: Simple tests with a UTF-8 db.
 | |
| # enc2.2.*: Simple tests with a UTF-16LE db.
 | |
| # enc2.3.*: Simple tests with a UTF-16BE db.
 | |
| # enc2.4.*: Test that attached databases must have the same text encoding
 | |
| #           as the main database.
 | |
| # enc2.5.*: Test the behavior of the library when a collation sequence is
 | |
| #           not available for the most desirable text encoding.
 | |
| # enc2.6.*: Similar test for user functions.
 | |
| # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
 | |
| #           wrong text encoding for the database.
 | |
| # enc2.8.*: Test sqlite3_complete16()
 | |
| #
 | |
| 
 | |
| db close
 | |
| 
 | |
| # Return the UTF-8 representation of the supplied UTF-16 string $str. 
 | |
| proc utf8 {str} {
 | |
|   # If $str ends in two 0x00 0x00 bytes, knock these off before
 | |
|   # converting to UTF-8 using TCL.
 | |
|   binary scan $str \c* vals
 | |
|   if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
 | |
|     set str [binary format \c* [lrange $vals 0 end-2]]
 | |
|   }
 | |
| 
 | |
|   set r [encoding convertfrom unicode $str]
 | |
|   return $r
 | |
| }
 | |
| 
 | |
| #
 | |
| # This proc contains all the tests in this file. It is run
 | |
| # three times. Each time the file 'test.db' contains a database
 | |
| # with the following contents:
 | |
| set dbcontents {
 | |
|   CREATE TABLE t1(a PRIMARY KEY, b, c);
 | |
|   INSERT INTO t1 VALUES('one', 'I', 1);
 | |
| }
 | |
| # This proc tests that we can open and manipulate the test.db 
 | |
| # database, and that it is possible to retreive values in
 | |
| # various text encodings.
 | |
| #
 | |
| proc run_test_script {t enc} {
 | |
| 
 | |
| # Open the database and pull out a (the) row.
 | |
| do_test $t.1 {
 | |
|   sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
|   execsql {SELECT * FROM t1}
 | |
| } {one I 1}
 | |
| 
 | |
| # Insert some data
 | |
| do_test $t.2 {
 | |
|   execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
 | |
|   execsql {SELECT * FROM t1}
 | |
| } {one I 1 two II 2}
 | |
| 
 | |
| # Insert some data 
 | |
| do_test $t.3 {
 | |
|   execsql {
 | |
|     INSERT INTO t1 VALUES('three','III',3);
 | |
|     INSERT INTO t1 VALUES('four','IV',4);
 | |
|     INSERT INTO t1 VALUES('five','V',5);
 | |
|   }
 | |
|   execsql {SELECT * FROM t1}
 | |
| } {one I 1 two II 2 three III 3 four IV 4 five V 5}
 | |
| 
 | |
| # Use the index
 | |
| do_test $t.4 {
 | |
|   execsql {
 | |
|     SELECT * FROM t1 WHERE a = 'one';
 | |
|   }
 | |
| } {one I 1}
 | |
| do_test $t.5 {
 | |
|   execsql {
 | |
|     SELECT * FROM t1 WHERE a = 'four';
 | |
|   }
 | |
| } {four IV 4}
 | |
| ifcapable subquery {
 | |
|   do_test $t.6 {
 | |
|     execsql {
 | |
|       SELECT * FROM t1 WHERE a IN ('one', 'two');
 | |
|     }
 | |
|   } {one I 1 two II 2}
 | |
| }
 | |
| 
 | |
| # Now check that we can retrieve data in both UTF-16 and UTF-8
 | |
| do_test $t.7 {
 | |
|   set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
 | |
|   sqlite3_step $STMT
 | |
|   sqlite3_column_text $STMT 0
 | |
| } {four}
 | |
| 
 | |
| do_test $t.8 {
 | |
|   sqlite3_step $STMT
 | |
|   utf8 [sqlite3_column_text16 $STMT 0]
 | |
| } {five}
 | |
| 
 | |
| do_test $t.9 {
 | |
|   sqlite3_finalize $STMT
 | |
| } SQLITE_OK
 | |
| 
 | |
| ifcapable vacuum {
 | |
|   execsql VACUUM
 | |
| }
 | |
| 
 | |
| do_test $t.10 {
 | |
|   db eval {PRAGMA encoding}
 | |
| } $enc
 | |
| 
 | |
| }
 | |
| 
 | |
| # The three unicode encodings understood by SQLite.
 | |
| set encodings [list UTF-8 UTF-16le UTF-16be]
 | |
| 
 | |
| set sqlite_os_trace 0
 | |
| set i 1
 | |
| foreach enc $encodings {
 | |
|   forcedelete test.db
 | |
|   sqlite3 db test.db
 | |
|   db eval "PRAGMA encoding = \"$enc\""
 | |
|   execsql $dbcontents
 | |
|   do_test enc2-$i.0.1 {
 | |
|     db eval {PRAGMA encoding}
 | |
|   } $enc
 | |
|   do_test enc2-$i.0.2 {
 | |
|     db eval {PRAGMA encoding=UTF8}
 | |
|     db eval {PRAGMA encoding}
 | |
|   } $enc
 | |
|   do_test enc2-$i.0.3 {
 | |
|     db eval {PRAGMA encoding=UTF16le}
 | |
|     db eval {PRAGMA encoding}
 | |
|   } $enc
 | |
|   do_test enc2-$i.0.4 {
 | |
|     db eval {PRAGMA encoding=UTF16be}
 | |
|     db eval {PRAGMA encoding}
 | |
|   } $enc
 | |
| 
 | |
|   db close
 | |
|   run_test_script enc2-$i $enc
 | |
|   db close
 | |
|   incr i
 | |
| }
 | |
| 
 | |
| # Test that it is an error to try to attach a database with a different
 | |
| # encoding to the main database.
 | |
| ifcapable attach {
 | |
|   do_test enc2-4.1 {
 | |
|     forcedelete test.db
 | |
|     sqlite3 db test.db
 | |
|     db eval "PRAGMA encoding = 'UTF-8'"
 | |
|     db eval "CREATE TABLE abc(a, b, c);"
 | |
|   } {}
 | |
|   do_test enc2-4.2 {
 | |
|     forcedelete test2.db
 | |
|     sqlite3 db2 test2.db
 | |
|     db2 eval "PRAGMA encoding = 'UTF-16'"
 | |
|     db2 eval "CREATE TABLE abc(a, b, c);"
 | |
|   } {}
 | |
|   do_test enc2-4.3 {
 | |
|     catchsql {
 | |
|       ATTACH 'test2.db' as aux;
 | |
|     }
 | |
|   } {1 {attached databases must use the same text encoding as main database}}
 | |
|   db2 close
 | |
|   db close
 | |
| }
 | |
| 
 | |
| # The following tests - enc2-5.* - test that SQLite selects the correct
 | |
| # collation sequence when more than one is available.
 | |
| 
 | |
| set ::values [list one two three four five]
 | |
| set ::test_collate_enc INVALID
 | |
| proc test_collate {enc lhs rhs} {
 | |
|   set ::test_collate_enc $enc
 | |
|   set l [lsearch -exact $::values $lhs]
 | |
|   set r [lsearch -exact $::values $rhs]
 | |
|   set res [expr $l - $r]
 | |
|   # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
 | |
|   return $res
 | |
| }
 | |
| 
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-5.0 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('one');
 | |
|     INSERT INTO t5 VALUES('two');
 | |
|     INSERT INTO t5 VALUES('five');
 | |
|     INSERT INTO t5 VALUES('three');
 | |
|     INSERT INTO t5 VALUES('four');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-5.1 {
 | |
|   add_test_collate $DB 1 1 1
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-8}
 | |
| do_test enc2-5.2 {
 | |
|   add_test_collate $DB 0 1 0
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16LE}
 | |
| do_test enc2-5.3 {
 | |
|   add_test_collate $DB 0 0 1
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16BE}
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| execsql {pragma encoding = 'UTF-16LE'}
 | |
| do_test enc2-5.4 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('one');
 | |
|     INSERT INTO t5 VALUES('two');
 | |
|     INSERT INTO t5 VALUES('five');
 | |
|     INSERT INTO t5 VALUES('three');
 | |
|     INSERT INTO t5 VALUES('four');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-5.5 {
 | |
|   add_test_collate $DB 1 1 1
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16LE}
 | |
| do_test enc2-5.6 {
 | |
|   add_test_collate $DB 1 0 1
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16BE}
 | |
| do_test enc2-5.7 {
 | |
|   add_test_collate $DB 1 0 0
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-8}
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| execsql {pragma encoding = 'UTF-16BE'}
 | |
| do_test enc2-5.8 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('one');
 | |
|     INSERT INTO t5 VALUES('two');
 | |
|     INSERT INTO t5 VALUES('five');
 | |
|     INSERT INTO t5 VALUES('three');
 | |
|     INSERT INTO t5 VALUES('four');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-5.9 {
 | |
|   add_test_collate $DB 1 1 1
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16BE}
 | |
| do_test enc2-5.10 {
 | |
|   add_test_collate $DB 1 1 0
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16LE}
 | |
| do_test enc2-5.11 {
 | |
|   add_test_collate $DB 1 0 0
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-8}
 | |
| 
 | |
| # Also test that a UTF-16 collation factory works.
 | |
| do_test enc2-5-12 {
 | |
|   add_test_collate $DB 0 0 0
 | |
|   catchsql {
 | |
|     SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
 | |
|   }
 | |
| } {1 {no such collation sequence: test_collate}}
 | |
| do_test enc2-5.13 {
 | |
|   add_test_collate_needed $DB 
 | |
|   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
 | |
|   lappend res $::test_collate_enc
 | |
| } {one two three four five UTF-16BE}
 | |
| do_test enc2-5.14 {
 | |
|   set ::sqlite_last_needed_collation
 | |
| } test_collate
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| 
 | |
| do_test enc2-5.15 {
 | |
|   sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
 | |
|   add_test_collate_needed $::DB
 | |
|   set ::sqlite_last_needed_collation
 | |
| } {}
 | |
| do_test enc2-5.16 {
 | |
|   execsql {CREATE TABLE t1(a varchar collate test_collate);}
 | |
| } {}
 | |
| do_test enc2-5.17 {
 | |
|   set ::sqlite_last_needed_collation
 | |
| } {test_collate}
 | |
| 
 | |
| # The following tests - enc2-6.* - test that SQLite selects the correct
 | |
| # user function when more than one is available.
 | |
| 
 | |
| proc test_function {enc arg} {
 | |
|   return "$enc $arg"
 | |
| }
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| execsql {pragma encoding = 'UTF-8'}
 | |
| do_test enc2-6.0 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('one');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-6.1 {
 | |
|   add_test_function $DB 1 1 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-8 sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.2 {
 | |
|   add_test_function $DB 0 1 0
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16LE sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.3 {
 | |
|   add_test_function $DB 0 0 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16BE sqlite}}
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| execsql {pragma encoding = 'UTF-16LE'}
 | |
| do_test enc2-6.3 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('sqlite');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-6.4 {
 | |
|   add_test_function $DB 1 1 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16LE sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.5 {
 | |
|   add_test_function $DB 0 1 0
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16LE sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.6 {
 | |
|   add_test_function $DB 0 0 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16BE sqlite}}
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| execsql {pragma encoding = 'UTF-16BE'}
 | |
| do_test enc2-6.7 {
 | |
|   execsql {
 | |
|     CREATE TABLE t5(a);
 | |
|     INSERT INTO t5 VALUES('sqlite');
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-6.8 {
 | |
|   add_test_function $DB 1 1 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16BE sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.9 {
 | |
|   add_test_function $DB 0 1 0
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16LE sqlite}}
 | |
| db close
 | |
| sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
 | |
| do_test enc2-6.10 {
 | |
|   add_test_function $DB 0 0 1
 | |
|   execsql {
 | |
|     SELECT test_function('sqlite')
 | |
|   }
 | |
| } {{UTF-16BE sqlite}}
 | |
| 
 | |
| 
 | |
| db close
 | |
| forcedelete test.db
 | |
| 
 | |
| # The following tests - enc2-7.* - function as follows:
 | |
| #
 | |
| # 1: Open an empty database file assuming UTF-16 encoding.
 | |
| # 2: Open the same database with a different handle assuming UTF-8. Create
 | |
| #    a table using this handle.
 | |
| # 3: Read the sqlite_master table from the first handle. 
 | |
| # 4: Ensure the first handle recognises the database encoding is UTF-8.
 | |
| #
 | |
| do_test enc2-7.1 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-16';
 | |
|     SELECT * FROM sqlite_master;
 | |
|   }
 | |
| } {}
 | |
| do_test enc2-7.2 {
 | |
|   set enc [execsql {
 | |
|     PRAGMA encoding;
 | |
|   }]
 | |
|   string range $enc 0 end-2 ;# Chop off the "le" or "be"
 | |
| } {UTF-16}
 | |
| do_test enc2-7.3 {
 | |
|   sqlite3 db2 test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-8';
 | |
|     CREATE TABLE abc(a, b, c);
 | |
|   } db2
 | |
| } {}
 | |
| do_test enc2-7.4 {
 | |
|   execsql {
 | |
|     SELECT * FROM sqlite_master;
 | |
|   }
 | |
| } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
 | |
| do_test enc2-7.5 {
 | |
|   execsql {
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-8}
 | |
| 
 | |
| db close
 | |
| db2 close
 | |
| 
 | |
| proc utf16 {utf8} {
 | |
|   set utf16 [encoding convertto unicode $utf8]
 | |
|   append utf16 "\x00\x00"
 | |
|   return $utf16
 | |
| }
 | |
| ifcapable {complete} {
 | |
|   do_test enc2-8.1 {
 | |
|     sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
 | |
|   } {1}
 | |
|   do_test enc2-8.2 {
 | |
|     sqlite3_complete16 [utf16 "SELECT * FROM"]
 | |
|   } {0}
 | |
| }
 | |
| 
 | |
| # Test that the encoding of an empty database may still be set after the
 | |
| # (empty) schema has been initialized.
 | |
| forcedelete test.db
 | |
| do_test enc2-9.1 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-8';
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-8}
 | |
| do_test enc2-9.2 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-16le';
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-16le}
 | |
| do_test enc2-9.3 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     SELECT * FROM sqlite_master;
 | |
|     PRAGMA encoding = 'UTF-8';
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-8}
 | |
| do_test enc2-9.4 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-16le';
 | |
|     CREATE TABLE abc(a, b, c);
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-16le}
 | |
| do_test enc2-9.5 {
 | |
|   sqlite3 db test.db
 | |
|   execsql {
 | |
|     PRAGMA encoding = 'UTF-8';
 | |
|     PRAGMA encoding;
 | |
|   }
 | |
| } {UTF-16le}
 | |
| 
 | |
| # Ticket #1987.
 | |
| # Disallow encoding changes once the encoding has been set.
 | |
| #
 | |
| do_test enc2-10.1 {
 | |
|   db close
 | |
|   forcedelete test.db test.db-journal
 | |
|   sqlite3 db test.db
 | |
|   db eval {
 | |
|     PRAGMA encoding=UTF16;
 | |
|     CREATE TABLE t1(a);
 | |
|     PRAGMA encoding=UTF8;
 | |
|     CREATE TABLE t2(b);
 | |
|   }
 | |
|   db close
 | |
|   sqlite3 db test.db
 | |
|   db eval {
 | |
|     SELECT name FROM sqlite_master
 | |
|   }
 | |
| } {t1 t2}
 | |
| 
 | |
| finish_test
 | 
