MXS-2732 Remove old sqlite-src-3110100 directory
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -1,123 +0,0 @@
|
||||
#!/usr/make
|
||||
#
|
||||
# Makefile for SQLITE
|
||||
#
|
||||
# This is a template makefile for SQLite. Most people prefer to
|
||||
# use the autoconf generated "configure" script to generate the
|
||||
# makefile automatically. But that does not work for everybody
|
||||
# and in every situation. If you are having problems with the
|
||||
# "configure" script, you might want to try this makefile as an
|
||||
# alternative. Create a copy of this file, edit the parameters
|
||||
# below and type "make".
|
||||
#
|
||||
|
||||
#### The toplevel directory of the source tree. This is the directory
|
||||
# that contains this "Makefile.in" and the "configure.in" script.
|
||||
#
|
||||
TOP = ../sqlite
|
||||
|
||||
#### C Compiler and options for use in building executables that
|
||||
# will run on the platform that is doing the build.
|
||||
#
|
||||
BCC = gcc -g -O2
|
||||
#BCC = /opt/ancic/bin/c89 -0
|
||||
|
||||
#### If the target operating system supports the "usleep()" system
|
||||
# call, then define the HAVE_USLEEP macro for all C modules.
|
||||
#
|
||||
#USLEEP =
|
||||
USLEEP = -DHAVE_USLEEP=1
|
||||
|
||||
#### If you want the SQLite library to be safe for use within a
|
||||
# multi-threaded program, then define the following macro
|
||||
# appropriately:
|
||||
#
|
||||
#THREADSAFE = -DTHREADSAFE=1
|
||||
THREADSAFE = -DTHREADSAFE=0
|
||||
|
||||
#### Specify any extra linker options needed to make the library
|
||||
# thread safe
|
||||
#
|
||||
#THREADLIB = -lpthread
|
||||
THREADLIB =
|
||||
|
||||
#### Specify any extra libraries needed to access required functions.
|
||||
#
|
||||
#TLIBS = -lrt # fdatasync on Solaris 8
|
||||
TLIBS =
|
||||
|
||||
#### Leave SQLITE_DEBUG undefined for maximum speed. Use SQLITE_DEBUG=1
|
||||
# to check for memory leaks. Use SQLITE_DEBUG=2 to print a log of all
|
||||
# malloc()s and free()s in order to track down memory leaks.
|
||||
#
|
||||
# SQLite uses some expensive assert() statements in the inner loop.
|
||||
# You can make the library go almost twice as fast if you compile
|
||||
# with -DNDEBUG=1
|
||||
#
|
||||
#OPTS = -DSQLITE_DEBUG=2
|
||||
#OPTS = -DSQLITE_DEBUG=1
|
||||
#OPTS =
|
||||
OPTS = -DNDEBUG=1
|
||||
OPTS += -DHAVE_FDATASYNC=1
|
||||
|
||||
#### The suffix to add to executable files. ".exe" for windows.
|
||||
# Nothing for unix.
|
||||
#
|
||||
#EXE = .exe
|
||||
EXE =
|
||||
|
||||
#### C Compile and options for use in building executables that
|
||||
# will run on the target platform. This is usually the same
|
||||
# as BCC, unless you are cross-compiling.
|
||||
#
|
||||
TCC = gcc -O6
|
||||
#TCC = gcc -g -O0 -Wall
|
||||
#TCC = gcc -g -O0 -Wall -fprofile-arcs -ftest-coverage
|
||||
#TCC = /opt/mingw/bin/i386-mingw32-gcc -O6
|
||||
#TCC = /opt/ansic/bin/c89 -O +z -Wl,-a,archive
|
||||
|
||||
#### Tools used to build a static library.
|
||||
#
|
||||
AR = ar cr
|
||||
#AR = /opt/mingw/bin/i386-mingw32-ar cr
|
||||
RANLIB = ranlib
|
||||
#RANLIB = /opt/mingw/bin/i386-mingw32-ranlib
|
||||
|
||||
MKSHLIB = gcc -shared
|
||||
SO = so
|
||||
SHPREFIX = lib
|
||||
# SO = dll
|
||||
# SHPREFIX =
|
||||
|
||||
#### Extra compiler options needed for programs that use the TCL library.
|
||||
#
|
||||
#TCL_FLAGS =
|
||||
#TCL_FLAGS = -DSTATIC_BUILD=1
|
||||
TCL_FLAGS = -I/home/drh/tcltk/8.5linux
|
||||
#TCL_FLAGS = -I/home/drh/tcltk/8.5win -DSTATIC_BUILD=1
|
||||
#TCL_FLAGS = -I/home/drh/tcltk/8.3hpux
|
||||
|
||||
#### Linker options needed to link against the TCL library.
|
||||
#
|
||||
#LIBTCL = -ltcl -lm -ldl
|
||||
LIBTCL = /home/drh/tcltk/8.5linux/libtcl8.5g.a -lm -ldl
|
||||
#LIBTCL = /home/drh/tcltk/8.5win/libtcl85s.a -lmsvcrt
|
||||
#LIBTCL = /home/drh/tcltk/8.3hpux/libtcl8.3.a -ldld -lm -lc
|
||||
|
||||
#### Additional objects for SQLite library when TCL support is enabled.
|
||||
#TCLOBJ =
|
||||
TCLOBJ = tclsqlite.o
|
||||
|
||||
#### Compiler options needed for programs that use the readline() library.
|
||||
#
|
||||
READLINE_FLAGS =
|
||||
#READLINE_FLAGS = -DHAVE_READLINE=1 -I/usr/include/readline
|
||||
|
||||
#### Linker options needed by programs using readline() must link against.
|
||||
#
|
||||
LIBREADLINE =
|
||||
#LIBREADLINE = -static -lreadline -ltermcap
|
||||
|
||||
# You should not have to change anything below this line
|
||||
###############################################################################
|
||||
include $(TOP)/main.mk
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,230 +0,0 @@
|
||||
<h1 align="center">SQLite Source Repository</h1>
|
||||
|
||||
This repository contains the complete source code for the SQLite database
|
||||
engine. Some test scripts are also include. However, many other test scripts
|
||||
and most of the documentation are managed separately.
|
||||
|
||||
If you are reading this on a Git mirror someplace, you are doing it wrong.
|
||||
The [official repository](https://www.sqlite.org/src/) is better. Go there
|
||||
now.
|
||||
|
||||
## Compiling
|
||||
|
||||
First create a directory in which to place
|
||||
the build products. It is recommended, but not required, that the
|
||||
build directory be separate from the source directory. Cd into the
|
||||
build directory and then from the build directory run the configure
|
||||
script found at the root of the source tree. Then run "make".
|
||||
|
||||
For example:
|
||||
|
||||
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
|
||||
mkdir bld ;# Build will occur in a sibling directory
|
||||
cd bld ;# Change to the build directory
|
||||
../sqlite/configure ;# Run the configure script
|
||||
make ;# Run the makefile.
|
||||
make sqlite3.c ;# Build the "amalgamation" source file
|
||||
make test ;# Run some tests (requires Tcl)
|
||||
|
||||
See the makefile for additional targets.
|
||||
|
||||
The configure script uses autoconf 2.61 and libtool. If the configure
|
||||
script does not work out for you, there is a generic makefile named
|
||||
"Makefile.linux-gcc" in the top directory of the source tree that you
|
||||
can copy and edit to suit your needs. Comments on the generic makefile
|
||||
show what changes are needed.
|
||||
|
||||
## Using MSVC
|
||||
|
||||
On Windows, all applicable build products can be compiled with MSVC.
|
||||
First open the command prompt window associated with the desired compiler
|
||||
version (e.g. "Developer Command Prompt for VS2013"). Next, use NMAKE
|
||||
with the provided "Makefile.msc" to build one of the supported targets.
|
||||
|
||||
For example:
|
||||
|
||||
mkdir bld
|
||||
cd bld
|
||||
nmake /f Makefile.msc TOP=..\sqlite
|
||||
nmake /f Makefile.msc sqlite3.c TOP=..\sqlite
|
||||
nmake /f Makefile.msc sqlite3.dll TOP=..\sqlite
|
||||
nmake /f Makefile.msc sqlite3.exe TOP=..\sqlite
|
||||
nmake /f Makefile.msc test TOP=..\sqlite
|
||||
|
||||
There are several build options that can be set via the NMAKE command
|
||||
line. For example, to build for WinRT, simply add "FOR_WINRT=1" argument
|
||||
to the "sqlite3.dll" command line above. When debugging into the SQLite
|
||||
code, adding the "DEBUG=1" argument to one of the above command lines is
|
||||
recommended.
|
||||
|
||||
SQLite does not require [Tcl](http://www.tcl.tk/) to run, but a Tcl installation
|
||||
is required by the makefiles (including those for MSVC). SQLite contains
|
||||
a lot of generated code and Tcl is used to do much of that code generation.
|
||||
The makefiles also require AWK.
|
||||
|
||||
## Source Code Tour
|
||||
|
||||
Most of the core source files are in the **src/** subdirectory. But
|
||||
src/ also contains files used to build the "testfixture" test harness;
|
||||
those file all begin with "test". And src/ contains the "shell.c" file
|
||||
which is the main program for the "sqlite3.exe" command-line shell and
|
||||
the "tclsqlite.c" file which implements the bindings to SQLite from the
|
||||
Tcl programming language. (Historical note: SQLite began as a Tcl
|
||||
extension and only later escaped to the wild as an independent library.)
|
||||
|
||||
Test scripts and programs are found in the **test/** subdirectory.
|
||||
There are other test suites for SQLite (see
|
||||
[How SQLite Is Tested](http://www.sqlite.org/testing.html))
|
||||
but those other test suites are
|
||||
in separate source repositories.
|
||||
|
||||
The **ext/** subdirectory contains code for extensions. The
|
||||
Full-text search engine is in **ext/fts3**. The R-Tree engine is in
|
||||
**ext/rtree**. The **ext/misc** subdirectory contains a number of
|
||||
smaller, single-file extensions, such as a REGEXP operator.
|
||||
|
||||
The **tool/** subdirectory contains various scripts and programs used
|
||||
for building generated source code files or for testing or for generating
|
||||
accessory programs such as "sqlite3_analyzer(.exe)".
|
||||
|
||||
### Generated Source Code Files
|
||||
|
||||
Several of the C-language source files used by SQLite are generated from
|
||||
other sources rather than being typed in manually by a programmer. This
|
||||
section will summarize those automatically-generated files. To create all
|
||||
of the automatically-generated files, simply run "make target_source".
|
||||
The "target_source" make target will create a subdirectory "tsrc/" and
|
||||
fill it with all the source files needed to build SQLite, both
|
||||
manually-edited files and automatically-generated files.
|
||||
|
||||
The SQLite interface is defined by the **sqlite3.h** header file, which is
|
||||
generated from src/sqlite.h.in, ./manifest.uuid, and ./VERSION. The
|
||||
[Tcl script](http://www.tcl.tk) at tool/mksqlite3h.tcl does the conversion.
|
||||
The manifest.uuid file contains the SHA1 hash of the particular check-in
|
||||
and is used to generate the SQLITE\_SOURCE\_ID macro. The VERSION file
|
||||
contains the current SQLite version number. The sqlite3.h header is really
|
||||
just a copy of src/sqlite.h.in with the source-id and version number inserted
|
||||
at just the right spots. Note that comment text in the sqlite3.h file is
|
||||
used to generate much of the SQLite API documentation. The Tcl scripts
|
||||
used to generate that documentation are in a separate source repository.
|
||||
|
||||
The SQL language parser is **parse.c** which is generate from a grammar in
|
||||
the src/parse.y file. The conversion of "parse.y" into "parse.c" is done
|
||||
by the [lemon](./doc/lemon.html) LALR(1) parser generator. The source code
|
||||
for lemon is at tool/lemon.c. Lemon uses a
|
||||
template for generating its parser. A generic template is in tool/lempar.c,
|
||||
but SQLite uses a slightly modified template found in src/lempar.c.
|
||||
|
||||
Lemon also generates the **parse.h** header file, at the same time it
|
||||
generates parse.c. But the parse.h header file is
|
||||
modified further (to add additional symbols) using the ./addopcodes.awk
|
||||
AWK script.
|
||||
|
||||
The **opcodes.h** header file contains macros that define the numbers
|
||||
corresponding to opcodes in the "VDBE" virtual machine. The opcodes.h
|
||||
file is generated by the scanning the src/vdbe.c source file. The
|
||||
AWK script at ./mkopcodeh.awk does this scan and generates opcodes.h.
|
||||
A second AWK script, ./mkopcodec.awk, then scans opcodes.h to generate
|
||||
the **opcodes.c** source file, which contains a reverse mapping from
|
||||
opcode-number to opcode-name that is used for EXPLAIN output.
|
||||
|
||||
The **keywordhash.h** header file contains the definition of a hash table
|
||||
that maps SQL language keywords (ex: "CREATE", "SELECT", "INDEX", etc.) into
|
||||
the numeric codes used by the parse.c parser. The keywordhash.h file is
|
||||
generated by a C-language program at tool mkkeywordhash.c.
|
||||
|
||||
### The Amalgamation
|
||||
|
||||
All of the individual C source code and header files (both manually-edited
|
||||
and automatically-generated) can be combined into a single big source file
|
||||
**sqlite3.c** called "the amalgamation". The amalgamation is the recommended
|
||||
way of using SQLite in a larger application. Combining all individual
|
||||
source code files into a single big source code file allows the C compiler
|
||||
to perform more cross-procedure analysis and generate better code. SQLite
|
||||
runs about 5% faster when compiled from the amalgamation versus when compiled
|
||||
from individual source files.
|
||||
|
||||
The amalgamation is generated from the tool/mksqlite3c.tcl Tcl script.
|
||||
First, all of the individual source files must be gathered into the tsrc/
|
||||
subdirectory (using the equivalent of "make target_source") then the
|
||||
tool/mksqlite3c.tcl script is run to copy them all together in just the
|
||||
right order while resolving internal "#include" references.
|
||||
|
||||
The amalgamation source file is more than 100K lines long. Some symbolic
|
||||
debuggers (most notably MSVC) are unable to deal with files longer than 64K
|
||||
lines. To work around this, a separate Tcl script, tool/split-sqlite3c.tcl,
|
||||
can be run on the amalgamation to break it up into a single small C file
|
||||
called **sqlite3-all.c** that does #include on about five other files
|
||||
named **sqlite3-1.c**, **sqlite3-2.c**, ..., **sqlite3-5.c**. In this way,
|
||||
all of the source code is contained within a single translation unit so
|
||||
that the compiler can do extra cross-procedure optimization, but no
|
||||
individual source file exceeds 32K lines in length.
|
||||
|
||||
## How It All Fits Together
|
||||
|
||||
SQLite is modular in design.
|
||||
See the [architectural description](http://www.sqlite.org/arch.html)
|
||||
for details. Other documents that are useful in
|
||||
(helping to understand how SQLite works include the
|
||||
[file format](http://www.sqlite.org/fileformat2.html) description,
|
||||
the [virtual machine](http://www.sqlite.org/vdbe.html) that runs
|
||||
prepared statements, the description of
|
||||
[how transactions work](http://www.sqlite.org/atomiccommit.html), and
|
||||
the [overview of the query planner](http://www.sqlite.org/optoverview.html).
|
||||
|
||||
Unfortunately, years of effort have gone into optimizating SQLite, both
|
||||
for small size and high performance. And optimizations tend to result in
|
||||
complex code. So there is a lot of complexity in the SQLite implementation.
|
||||
|
||||
Key files:
|
||||
|
||||
* **sqlite.h.in** - This file defines the public interface to the SQLite
|
||||
library. Readers will need to be familiar with this interface before
|
||||
trying to understand how the library works internally.
|
||||
|
||||
* **sqliteInt.h** - this header file defines many of the data objects
|
||||
used internally by SQLite.
|
||||
|
||||
* **parse.y** - This file describes the LALR(1) grammer that SQLite uses
|
||||
to parse SQL statements, and the actions that are taken at each step
|
||||
in the parsing process.
|
||||
|
||||
* **vdbe.c** - This file implements the virtual machine that runs
|
||||
prepared statements. There are various helper files whose names
|
||||
begin with "vdbe". The VDBE has access to the vdbeInt.h header file
|
||||
which defines internal data objects. The rest of SQLite interacts
|
||||
with the VDBE through an interface defined by vdbe.h.
|
||||
|
||||
* **where.c** - This file analyzes the WHERE clause and generates
|
||||
virtual machine code to run queries efficiently. This file is
|
||||
sometimes called the "query optimizer". It has its own private
|
||||
header file, whereInt.h, that defines data objects used internally.
|
||||
|
||||
* **btree.c** - This file contains the implementation of the B-Tree
|
||||
storage engine used by SQLite.
|
||||
|
||||
* **pager.c** - This file contains the "pager" implementation, the
|
||||
module that implements transactions.
|
||||
|
||||
* **os_unix.c** and **os_win.c** - These two files implement the interface
|
||||
between SQLite and the underlying operating system using the run-time
|
||||
pluggable VFS interface.
|
||||
|
||||
* **shell.c** - This file is not part of the core SQLite library. This
|
||||
is the file that, when linked against sqlite3.a, generates the
|
||||
"sqlite3.exe" command-line shell.
|
||||
|
||||
* **tclsqlite.c** - This file implements the Tcl bindings for SQLite. It
|
||||
is not part of the core SQLite library. But as most of the tests in this
|
||||
repository are written in Tcl, the Tcl language bindings are important.
|
||||
|
||||
There are many other source files. Each has a suscinct header comment that
|
||||
describes its purpose and role within the larger system.
|
||||
|
||||
|
||||
## Contacts
|
||||
|
||||
The main SQLite webpage is [http://www.sqlite.org/](http://www.sqlite.org/)
|
||||
with geographically distributed backup servers at
|
||||
[http://www2.sqlite.org/](http://www2.sqlite.org) and
|
||||
[http://www3.sqlite.org/](http://www3.sqlite.org).
|
||||
@ -1 +0,0 @@
|
||||
3.11.1
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
|
Before Width: | Height: | Size: 2.2 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 79 KiB |
@ -1,370 +0,0 @@
|
||||
Installation Instructions
|
||||
*************************
|
||||
|
||||
Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
Copying and distribution of this file, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. This file is offered as-is,
|
||||
without warranty of any kind.
|
||||
|
||||
Basic Installation
|
||||
==================
|
||||
|
||||
Briefly, the shell commands `./configure; make; make install' should
|
||||
configure, build, and install this package. The following
|
||||
more-detailed instructions are generic; see the `README' file for
|
||||
instructions specific to this package. Some packages provide this
|
||||
`INSTALL' file but do not implement all of the features documented
|
||||
below. The lack of an optional feature in a given package is not
|
||||
necessarily a bug. More recommendations for GNU packages can be found
|
||||
in *note Makefile Conventions: (standards)Makefile Conventions.
|
||||
|
||||
The `configure' shell script attempts to guess correct values for
|
||||
various system-dependent variables used during compilation. It uses
|
||||
those values to create a `Makefile' in each directory of the package.
|
||||
It may also create one or more `.h' files containing system-dependent
|
||||
definitions. Finally, it creates a shell script `config.status' that
|
||||
you can run in the future to recreate the current configuration, and a
|
||||
file `config.log' containing compiler output (useful mainly for
|
||||
debugging `configure').
|
||||
|
||||
It can also use an optional file (typically called `config.cache'
|
||||
and enabled with `--cache-file=config.cache' or simply `-C') that saves
|
||||
the results of its tests to speed up reconfiguring. Caching is
|
||||
disabled by default to prevent problems with accidental use of stale
|
||||
cache files.
|
||||
|
||||
If you need to do unusual things to compile the package, please try
|
||||
to figure out how `configure' could check whether to do them, and mail
|
||||
diffs or instructions to the address given in the `README' so they can
|
||||
be considered for the next release. If you are using the cache, and at
|
||||
some point `config.cache' contains results you don't want to keep, you
|
||||
may remove or edit it.
|
||||
|
||||
The file `configure.ac' (or `configure.in') is used to create
|
||||
`configure' by a program called `autoconf'. You need `configure.ac' if
|
||||
you want to change it or regenerate `configure' using a newer version
|
||||
of `autoconf'.
|
||||
|
||||
The simplest way to compile this package is:
|
||||
|
||||
1. `cd' to the directory containing the package's source code and type
|
||||
`./configure' to configure the package for your system.
|
||||
|
||||
Running `configure' might take a while. While running, it prints
|
||||
some messages telling which features it is checking for.
|
||||
|
||||
2. Type `make' to compile the package.
|
||||
|
||||
3. Optionally, type `make check' to run any self-tests that come with
|
||||
the package, generally using the just-built uninstalled binaries.
|
||||
|
||||
4. Type `make install' to install the programs and any data files and
|
||||
documentation. When installing into a prefix owned by root, it is
|
||||
recommended that the package be configured and built as a regular
|
||||
user, and only the `make install' phase executed with root
|
||||
privileges.
|
||||
|
||||
5. Optionally, type `make installcheck' to repeat any self-tests, but
|
||||
this time using the binaries in their final installed location.
|
||||
This target does not install anything. Running this target as a
|
||||
regular user, particularly if the prior `make install' required
|
||||
root privileges, verifies that the installation completed
|
||||
correctly.
|
||||
|
||||
6. You can remove the program binaries and object files from the
|
||||
source code directory by typing `make clean'. To also remove the
|
||||
files that `configure' created (so you can compile the package for
|
||||
a different kind of computer), type `make distclean'. There is
|
||||
also a `make maintainer-clean' target, but that is intended mainly
|
||||
for the package's developers. If you use it, you may have to get
|
||||
all sorts of other programs in order to regenerate files that came
|
||||
with the distribution.
|
||||
|
||||
7. Often, you can also type `make uninstall' to remove the installed
|
||||
files again. In practice, not all packages have tested that
|
||||
uninstallation works correctly, even though it is required by the
|
||||
GNU Coding Standards.
|
||||
|
||||
8. Some packages, particularly those that use Automake, provide `make
|
||||
distcheck', which can by used by developers to test that all other
|
||||
targets like `make install' and `make uninstall' work correctly.
|
||||
This target is generally not run by end users.
|
||||
|
||||
Compilers and Options
|
||||
=====================
|
||||
|
||||
Some systems require unusual options for compilation or linking that
|
||||
the `configure' script does not know about. Run `./configure --help'
|
||||
for details on some of the pertinent environment variables.
|
||||
|
||||
You can give `configure' initial values for configuration parameters
|
||||
by setting variables in the command line or in the environment. Here
|
||||
is an example:
|
||||
|
||||
./configure CC=c99 CFLAGS=-g LIBS=-lposix
|
||||
|
||||
*Note Defining Variables::, for more details.
|
||||
|
||||
Compiling For Multiple Architectures
|
||||
====================================
|
||||
|
||||
You can compile the package for more than one kind of computer at the
|
||||
same time, by placing the object files for each architecture in their
|
||||
own directory. To do this, you can use GNU `make'. `cd' to the
|
||||
directory where you want the object files and executables to go and run
|
||||
the `configure' script. `configure' automatically checks for the
|
||||
source code in the directory that `configure' is in and in `..'. This
|
||||
is known as a "VPATH" build.
|
||||
|
||||
With a non-GNU `make', it is safer to compile the package for one
|
||||
architecture at a time in the source code directory. After you have
|
||||
installed the package for one architecture, use `make distclean' before
|
||||
reconfiguring for another architecture.
|
||||
|
||||
On MacOS X 10.5 and later systems, you can create libraries and
|
||||
executables that work on multiple system types--known as "fat" or
|
||||
"universal" binaries--by specifying multiple `-arch' options to the
|
||||
compiler but only a single `-arch' option to the preprocessor. Like
|
||||
this:
|
||||
|
||||
./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
|
||||
CPP="gcc -E" CXXCPP="g++ -E"
|
||||
|
||||
This is not guaranteed to produce working output in all cases, you
|
||||
may have to build one architecture at a time and combine the results
|
||||
using the `lipo' tool if you have problems.
|
||||
|
||||
Installation Names
|
||||
==================
|
||||
|
||||
By default, `make install' installs the package's commands under
|
||||
`/usr/local/bin', include files under `/usr/local/include', etc. You
|
||||
can specify an installation prefix other than `/usr/local' by giving
|
||||
`configure' the option `--prefix=PREFIX', where PREFIX must be an
|
||||
absolute file name.
|
||||
|
||||
You can specify separate installation prefixes for
|
||||
architecture-specific files and architecture-independent files. If you
|
||||
pass the option `--exec-prefix=PREFIX' to `configure', the package uses
|
||||
PREFIX as the prefix for installing programs and libraries.
|
||||
Documentation and other data files still use the regular prefix.
|
||||
|
||||
In addition, if you use an unusual directory layout you can give
|
||||
options like `--bindir=DIR' to specify different values for particular
|
||||
kinds of files. Run `configure --help' for a list of the directories
|
||||
you can set and what kinds of files go in them. In general, the
|
||||
default for these options is expressed in terms of `${prefix}', so that
|
||||
specifying just `--prefix' will affect all of the other directory
|
||||
specifications that were not explicitly provided.
|
||||
|
||||
The most portable way to affect installation locations is to pass the
|
||||
correct locations to `configure'; however, many packages provide one or
|
||||
both of the following shortcuts of passing variable assignments to the
|
||||
`make install' command line to change installation locations without
|
||||
having to reconfigure or recompile.
|
||||
|
||||
The first method involves providing an override variable for each
|
||||
affected directory. For example, `make install
|
||||
prefix=/alternate/directory' will choose an alternate location for all
|
||||
directory configuration variables that were expressed in terms of
|
||||
`${prefix}'. Any directories that were specified during `configure',
|
||||
but not in terms of `${prefix}', must each be overridden at install
|
||||
time for the entire installation to be relocated. The approach of
|
||||
makefile variable overrides for each directory variable is required by
|
||||
the GNU Coding Standards, and ideally causes no recompilation.
|
||||
However, some platforms have known limitations with the semantics of
|
||||
shared libraries that end up requiring recompilation when using this
|
||||
method, particularly noticeable in packages that use GNU Libtool.
|
||||
|
||||
The second method involves providing the `DESTDIR' variable. For
|
||||
example, `make install DESTDIR=/alternate/directory' will prepend
|
||||
`/alternate/directory' before all installation names. The approach of
|
||||
`DESTDIR' overrides is not required by the GNU Coding Standards, and
|
||||
does not work on platforms that have drive letters. On the other hand,
|
||||
it does better at avoiding recompilation issues, and works well even
|
||||
when some directory options were not specified in terms of `${prefix}'
|
||||
at `configure' time.
|
||||
|
||||
Optional Features
|
||||
=================
|
||||
|
||||
If the package supports it, you can cause programs to be installed
|
||||
with an extra prefix or suffix on their names by giving `configure' the
|
||||
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
|
||||
|
||||
Some packages pay attention to `--enable-FEATURE' options to
|
||||
`configure', where FEATURE indicates an optional part of the package.
|
||||
They may also pay attention to `--with-PACKAGE' options, where PACKAGE
|
||||
is something like `gnu-as' or `x' (for the X Window System). The
|
||||
`README' should mention any `--enable-' and `--with-' options that the
|
||||
package recognizes.
|
||||
|
||||
For packages that use the X Window System, `configure' can usually
|
||||
find the X include and library files automatically, but if it doesn't,
|
||||
you can use the `configure' options `--x-includes=DIR' and
|
||||
`--x-libraries=DIR' to specify their locations.
|
||||
|
||||
Some packages offer the ability to configure how verbose the
|
||||
execution of `make' will be. For these packages, running `./configure
|
||||
--enable-silent-rules' sets the default to minimal output, which can be
|
||||
overridden with `make V=1'; while running `./configure
|
||||
--disable-silent-rules' sets the default to verbose, which can be
|
||||
overridden with `make V=0'.
|
||||
|
||||
Particular systems
|
||||
==================
|
||||
|
||||
On HP-UX, the default C compiler is not ANSI C compatible. If GNU
|
||||
CC is not installed, it is recommended to use the following options in
|
||||
order to use an ANSI C compiler:
|
||||
|
||||
./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
|
||||
|
||||
and if that doesn't work, install pre-built binaries of GCC for HP-UX.
|
||||
|
||||
HP-UX `make' updates targets which have the same time stamps as
|
||||
their prerequisites, which makes it generally unusable when shipped
|
||||
generated files such as `configure' are involved. Use GNU `make'
|
||||
instead.
|
||||
|
||||
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
|
||||
parse its `<wchar.h>' header file. The option `-nodtk' can be used as
|
||||
a workaround. If GNU CC is not installed, it is therefore recommended
|
||||
to try
|
||||
|
||||
./configure CC="cc"
|
||||
|
||||
and if that doesn't work, try
|
||||
|
||||
./configure CC="cc -nodtk"
|
||||
|
||||
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
|
||||
directory contains several dysfunctional programs; working variants of
|
||||
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
|
||||
in your `PATH', put it _after_ `/usr/bin'.
|
||||
|
||||
On Haiku, software installed for all users goes in `/boot/common',
|
||||
not `/usr/local'. It is recommended to use the following options:
|
||||
|
||||
./configure --prefix=/boot/common
|
||||
|
||||
Specifying the System Type
|
||||
==========================
|
||||
|
||||
There may be some features `configure' cannot figure out
|
||||
automatically, but needs to determine by the type of machine the package
|
||||
will run on. Usually, assuming the package is built to be run on the
|
||||
_same_ architectures, `configure' can figure that out, but if it prints
|
||||
a message saying it cannot guess the machine type, give it the
|
||||
`--build=TYPE' option. TYPE can either be a short name for the system
|
||||
type, such as `sun4', or a canonical name which has the form:
|
||||
|
||||
CPU-COMPANY-SYSTEM
|
||||
|
||||
where SYSTEM can have one of these forms:
|
||||
|
||||
OS
|
||||
KERNEL-OS
|
||||
|
||||
See the file `config.sub' for the possible values of each field. If
|
||||
`config.sub' isn't included in this package, then this package doesn't
|
||||
need to know the machine type.
|
||||
|
||||
If you are _building_ compiler tools for cross-compiling, you should
|
||||
use the option `--target=TYPE' to select the type of system they will
|
||||
produce code for.
|
||||
|
||||
If you want to _use_ a cross compiler, that generates code for a
|
||||
platform different from the build platform, you should specify the
|
||||
"host" platform (i.e., that on which the generated programs will
|
||||
eventually be run) with `--host=TYPE'.
|
||||
|
||||
Sharing Defaults
|
||||
================
|
||||
|
||||
If you want to set default values for `configure' scripts to share,
|
||||
you can create a site shell script called `config.site' that gives
|
||||
default values for variables like `CC', `cache_file', and `prefix'.
|
||||
`configure' looks for `PREFIX/share/config.site' if it exists, then
|
||||
`PREFIX/etc/config.site' if it exists. Or, you can set the
|
||||
`CONFIG_SITE' environment variable to the location of the site script.
|
||||
A warning: not all `configure' scripts look for a site script.
|
||||
|
||||
Defining Variables
|
||||
==================
|
||||
|
||||
Variables not defined in a site shell script can be set in the
|
||||
environment passed to `configure'. However, some packages may run
|
||||
configure again during the build, and the customized values of these
|
||||
variables may be lost. In order to avoid this problem, you should set
|
||||
them in the `configure' command line, using `VAR=value'. For example:
|
||||
|
||||
./configure CC=/usr/local2/bin/gcc
|
||||
|
||||
causes the specified `gcc' to be used as the C compiler (unless it is
|
||||
overridden in the site shell script).
|
||||
|
||||
Unfortunately, this technique does not work for `CONFIG_SHELL' due to
|
||||
an Autoconf bug. Until the bug is fixed you can use this workaround:
|
||||
|
||||
CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
|
||||
|
||||
`configure' Invocation
|
||||
======================
|
||||
|
||||
`configure' recognizes the following options to control how it
|
||||
operates.
|
||||
|
||||
`--help'
|
||||
`-h'
|
||||
Print a summary of all of the options to `configure', and exit.
|
||||
|
||||
`--help=short'
|
||||
`--help=recursive'
|
||||
Print a summary of the options unique to this package's
|
||||
`configure', and exit. The `short' variant lists options used
|
||||
only in the top level, while the `recursive' variant lists options
|
||||
also present in any nested packages.
|
||||
|
||||
`--version'
|
||||
`-V'
|
||||
Print the version of Autoconf used to generate the `configure'
|
||||
script, and exit.
|
||||
|
||||
`--cache-file=FILE'
|
||||
Enable the cache: use and save the results of the tests in FILE,
|
||||
traditionally `config.cache'. FILE defaults to `/dev/null' to
|
||||
disable caching.
|
||||
|
||||
`--config-cache'
|
||||
`-C'
|
||||
Alias for `--cache-file=config.cache'.
|
||||
|
||||
`--quiet'
|
||||
`--silent'
|
||||
`-q'
|
||||
Do not print messages saying which checks are being made. To
|
||||
suppress all normal output, redirect it to `/dev/null' (any error
|
||||
messages will still be shown).
|
||||
|
||||
`--srcdir=DIR'
|
||||
Look for the package's source code in directory DIR. Usually
|
||||
`configure' can determine that directory automatically.
|
||||
|
||||
`--prefix=DIR'
|
||||
Use DIR as the installation prefix. *note Installation Names::
|
||||
for more details, including other options available for fine-tuning
|
||||
the installation locations.
|
||||
|
||||
`--no-create'
|
||||
`-n'
|
||||
Run the configure checks, but stop before creating any output
|
||||
files.
|
||||
|
||||
`configure' also accepts some other, not widely useful, options. Run
|
||||
`configure --help' for more details.
|
||||
|
||||
@ -1,20 +0,0 @@
|
||||
|
||||
AM_CFLAGS = @THREADSAFE_FLAGS@ @DYNAMIC_EXTENSION_FLAGS@ @FTS5_FLAGS@ @JSON1_FLAGS@ -DSQLITE_ENABLE_FTS3 -DSQLITE_ENABLE_RTREE
|
||||
|
||||
lib_LTLIBRARIES = libsqlite3.la
|
||||
libsqlite3_la_SOURCES = sqlite3.c
|
||||
libsqlite3_la_LDFLAGS = -no-undefined -version-info 8:6:8
|
||||
|
||||
bin_PROGRAMS = sqlite3
|
||||
sqlite3_SOURCES = shell.c sqlite3.c sqlite3.h
|
||||
sqlite3_LDADD = @READLINE_LIBS@
|
||||
sqlite3_DEPENDENCIES = @EXTRA_SHELL_OBJ@
|
||||
sqlite3_CFLAGS = $(AM_CFLAGS) -DSQLITE_ENABLE_EXPLAIN_COMMENTS
|
||||
|
||||
include_HEADERS = sqlite3.h sqlite3ext.h
|
||||
|
||||
EXTRA_DIST = sqlite3.1 tea Makefile.msc sqlite3.rc README.txt
|
||||
pkgconfigdir = ${libdir}/pkgconfig
|
||||
pkgconfig_DATA = sqlite3.pc
|
||||
|
||||
man_MANS = sqlite3.1
|
||||
@ -1,930 +0,0 @@
|
||||
#### DO NOT EDIT ####
|
||||
# This makefile is automatically generated from the Makefile.msc at
|
||||
# the root of the canonical SQLite source tree (not the
|
||||
# amalgamation tarball) using the tool/mkmsvcmin.tcl
|
||||
# script.
|
||||
#
|
||||
|
||||
#
|
||||
# nmake Makefile for SQLite
|
||||
#
|
||||
###############################################################################
|
||||
############################## START OF OPTIONS ###############################
|
||||
###############################################################################
|
||||
|
||||
# The toplevel directory of the source tree. This is the directory
|
||||
# that contains this "Makefile.msc".
|
||||
#
|
||||
TOP = .
|
||||
|
||||
|
||||
# Set this non-0 to enable full warnings (-W4, etc) when compiling.
|
||||
#
|
||||
!IFNDEF USE_FULLWARN
|
||||
USE_FULLWARN = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to use "stdcall" calling convention for the core library
|
||||
# and shell executable.
|
||||
#
|
||||
!IFNDEF USE_STDCALL
|
||||
USE_STDCALL = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to have the shell executable link against the core dynamic
|
||||
# link library.
|
||||
#
|
||||
!IFNDEF DYNAMIC_SHELL
|
||||
DYNAMIC_SHELL = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to enable extra code that attempts to detect misuse of the
|
||||
# SQLite API.
|
||||
#
|
||||
!IFNDEF API_ARMOR
|
||||
API_ARMOR = 0
|
||||
!ENDIF
|
||||
|
||||
# If necessary, create a list of harmless compiler warnings to disable when
|
||||
# compiling the various tools. For the SQLite source code itself, warnings,
|
||||
# if any, will be disabled from within it.
|
||||
#
|
||||
!IFNDEF NO_WARN
|
||||
!IF $(USE_FULLWARN)!=0
|
||||
NO_WARN = -wd4054 -wd4055 -wd4100 -wd4127 -wd4130 -wd4152 -wd4189 -wd4206
|
||||
NO_WARN = $(NO_WARN) -wd4210 -wd4232 -wd4305 -wd4306 -wd4702 -wd4706
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to use the library paths and other options necessary for
|
||||
# Windows Phone 8.1.
|
||||
#
|
||||
!IFNDEF USE_WP81_OPTS
|
||||
USE_WP81_OPTS = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to split the SQLite amalgamation file into chunks to
|
||||
# be used for debugging with Visual Studio.
|
||||
#
|
||||
!IFNDEF SPLIT_AMALGAMATION
|
||||
SPLIT_AMALGAMATION = 0
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Set this non-0 to dynamically link to the MSVC runtime library.
|
||||
#
|
||||
!IFNDEF USE_CRT_DLL
|
||||
USE_CRT_DLL = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to link to the RPCRT4 library.
|
||||
#
|
||||
!IFNDEF USE_RPCRT4_LIB
|
||||
USE_RPCRT4_LIB = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to generate assembly code listings for the source code
|
||||
# files.
|
||||
#
|
||||
!IFNDEF USE_LISTINGS
|
||||
USE_LISTINGS = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to attempt setting the native compiler automatically
|
||||
# for cross-compiling the command line tools needed during the compilation
|
||||
# process.
|
||||
#
|
||||
!IFNDEF XCOMPILE
|
||||
XCOMPILE = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to use the native libraries paths for cross-compiling
|
||||
# the command line tools needed during the compilation process.
|
||||
#
|
||||
!IFNDEF USE_NATIVE_LIBPATHS
|
||||
USE_NATIVE_LIBPATHS = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this 0 to skip the compiling and embedding of version resources.
|
||||
#
|
||||
!IFNDEF USE_RC
|
||||
USE_RC = 1
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to compile binaries suitable for the WinRT environment.
|
||||
# This setting does not apply to any binaries that require Tcl to operate
|
||||
# properly (i.e. the text fixture, etc).
|
||||
#
|
||||
!IFNDEF FOR_WINRT
|
||||
FOR_WINRT = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to compile binaries suitable for the UWP environment.
|
||||
# This setting does not apply to any binaries that require Tcl to operate
|
||||
# properly (i.e. the text fixture, etc).
|
||||
#
|
||||
!IFNDEF FOR_UWP
|
||||
FOR_UWP = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this non-0 to compile binaries suitable for the Windows 10 platform.
|
||||
#
|
||||
!IFNDEF FOR_WIN10
|
||||
FOR_WIN10 = 0
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Set this to non-0 to create and use PDBs.
|
||||
#
|
||||
!IFNDEF SYMBOLS
|
||||
SYMBOLS = 1
|
||||
!ENDIF
|
||||
|
||||
# Set this to non-0 to use the SQLite debugging heap subsystem.
|
||||
#
|
||||
!IFNDEF MEMDEBUG
|
||||
MEMDEBUG = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this to non-0 to use the Win32 native heap subsystem.
|
||||
#
|
||||
!IFNDEF WIN32HEAP
|
||||
WIN32HEAP = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this to non-0 to enable OSTRACE() macros, which can be useful when
|
||||
# debugging.
|
||||
#
|
||||
!IFNDEF OSTRACE
|
||||
OSTRACE = 0
|
||||
!ENDIF
|
||||
|
||||
# Set this to one of the following values to enable various debugging
|
||||
# features. Each level includes the debugging options from the previous
|
||||
# levels. Currently, the recognized values for DEBUG are:
|
||||
#
|
||||
# 0 == NDEBUG: Disables assert() and other runtime diagnostics.
|
||||
# 1 == SQLITE_ENABLE_API_ARMOR: extra attempts to detect misuse of the API.
|
||||
# 2 == Disables NDEBUG and all optimizations and then enables PDBs.
|
||||
# 3 == SQLITE_DEBUG: Enables various diagnostics messages and code.
|
||||
# 4 == SQLITE_WIN32_MALLOC_VALIDATE: Validate the Win32 native heap per call.
|
||||
# 5 == SQLITE_DEBUG_OS_TRACE: Enables output from the OSTRACE() macros.
|
||||
# 6 == SQLITE_ENABLE_IOTRACE: Enables output from the IOTRACE() macros.
|
||||
#
|
||||
!IFNDEF DEBUG
|
||||
DEBUG = 0
|
||||
!ENDIF
|
||||
|
||||
# Enable use of available compiler optimizations? Normally, this should be
|
||||
# non-zero. Setting this to zero, thus disabling all compiler optimizations,
|
||||
# can be useful for testing.
|
||||
#
|
||||
!IFNDEF OPTIMIZATIONS
|
||||
OPTIMIZATIONS = 2
|
||||
!ENDIF
|
||||
|
||||
# Set the source code file to be used by executables and libraries when
|
||||
# they need the amalgamation.
|
||||
#
|
||||
!IFNDEF SQLITE3C
|
||||
!IF $(SPLIT_AMALGAMATION)!=0
|
||||
SQLITE3C = sqlite3-all.c
|
||||
!ELSE
|
||||
SQLITE3C = sqlite3.c
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# Set the include code file to be used by executables and libraries when
|
||||
# they need SQLite.
|
||||
#
|
||||
!IFNDEF SQLITE3H
|
||||
SQLITE3H = sqlite3.h
|
||||
!ENDIF
|
||||
|
||||
# This is the name to use for the SQLite dynamic link library (DLL).
|
||||
#
|
||||
!IFNDEF SQLITE3DLL
|
||||
!IF $(FOR_WIN10)!=0
|
||||
SQLITE3DLL = winsqlite3.dll
|
||||
!ELSE
|
||||
SQLITE3DLL = sqlite3.dll
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the name to use for the SQLite import library (LIB).
|
||||
#
|
||||
!IFNDEF SQLITE3LIB
|
||||
!IF $(FOR_WIN10)!=0
|
||||
SQLITE3LIB = winsqlite3.lib
|
||||
!ELSE
|
||||
SQLITE3LIB = sqlite3.lib
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the name to use for the SQLite shell executable (EXE).
|
||||
#
|
||||
!IFNDEF SQLITE3EXE
|
||||
!IF $(FOR_WIN10)!=0
|
||||
SQLITE3EXE = winsqlite3shell.exe
|
||||
!ELSE
|
||||
SQLITE3EXE = sqlite3.exe
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the argument used to set the program database (PDB) file for the
|
||||
# SQLite shell executable (EXE).
|
||||
#
|
||||
!IFNDEF SQLITE3EXEPDB
|
||||
!IF $(FOR_WIN10)!=0
|
||||
SQLITE3EXEPDB =
|
||||
!ELSE
|
||||
SQLITE3EXEPDB = /pdb:sqlite3sh.pdb
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# These are the "standard" SQLite compilation options used when compiling for
|
||||
# the Windows platform.
|
||||
#
|
||||
!IFNDEF OPT_FEATURE_FLAGS
|
||||
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_FTS3=1
|
||||
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_RTREE=1
|
||||
OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_COLUMN_METADATA=1
|
||||
!ENDIF
|
||||
|
||||
# These are the "extended" SQLite compilation options used when compiling for
|
||||
# the Windows 10 platform.
|
||||
#
|
||||
!IFNDEF EXT_FEATURE_FLAGS
|
||||
!IF $(FOR_WIN10)!=0
|
||||
EXT_FEATURE_FLAGS = $(EXT_FEATURE_FLAGS) -DSQLITE_ENABLE_FTS4=1
|
||||
EXT_FEATURE_FLAGS = $(EXT_FEATURE_FLAGS) -DSQLITE_SYSTEM_MALLOC=1
|
||||
EXT_FEATURE_FLAGS = $(EXT_FEATURE_FLAGS) -DSQLITE_OMIT_LOCALTIME=1
|
||||
!ELSE
|
||||
EXT_FEATURE_FLAGS =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
###############################################################################
|
||||
############################### END OF OPTIONS ################################
|
||||
###############################################################################
|
||||
|
||||
# When compiling for the Windows 10 platform, the PLATFORM macro must be set
|
||||
# to an appropriate value (e.g. x86, x64, arm, arm64, etc).
|
||||
#
|
||||
!IF $(FOR_WIN10)!=0
|
||||
!IFNDEF PLATFORM
|
||||
!ERROR Using the FOR_WIN10 option requires a value for PLATFORM.
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This assumes that MSVC is always installed in 32-bit Program Files directory
|
||||
# and sets the variable for use in locating other 32-bit installs accordingly.
|
||||
#
|
||||
PROGRAMFILES_X86 = $(VCINSTALLDIR)\..\..
|
||||
PROGRAMFILES_X86 = $(PROGRAMFILES_X86:\\=\)
|
||||
|
||||
# Check for the predefined command macro CC. This should point to the compiler
|
||||
# binary for the target platform. If it is not defined, simply define it to
|
||||
# the legacy default value 'cl.exe'.
|
||||
#
|
||||
!IFNDEF CC
|
||||
CC = cl.exe
|
||||
!ENDIF
|
||||
|
||||
# Check for the command macro LD. This should point to the linker binary for
|
||||
# the target platform. If it is not defined, simply define it to the legacy
|
||||
# default value 'link.exe'.
|
||||
#
|
||||
!IFNDEF LD
|
||||
LD = link.exe
|
||||
!ENDIF
|
||||
|
||||
# Check for the predefined command macro RC. This should point to the resource
|
||||
# compiler binary for the target platform. If it is not defined, simply define
|
||||
# it to the legacy default value 'rc.exe'.
|
||||
#
|
||||
!IFNDEF RC
|
||||
RC = rc.exe
|
||||
!ENDIF
|
||||
|
||||
# Check for the MSVC runtime library path macro. Otherwise, this value will
|
||||
# default to the 'lib' directory underneath the MSVC installation directory.
|
||||
#
|
||||
!IFNDEF CRTLIBPATH
|
||||
CRTLIBPATH = $(VCINSTALLDIR)\lib
|
||||
!ENDIF
|
||||
|
||||
CRTLIBPATH = $(CRTLIBPATH:\\=\)
|
||||
|
||||
# Check for the command macro NCC. This should point to the compiler binary
|
||||
# for the platform the compilation process is taking place on. If it is not
|
||||
# defined, simply define it to have the same value as the CC macro. When
|
||||
# cross-compiling, it is suggested that this macro be modified via the command
|
||||
# line (since nmake itself does not provide a built-in method to guess it).
|
||||
# For example, to use the x86 compiler when cross-compiling for x64, a command
|
||||
# line similar to the following could be used (all on one line):
|
||||
#
|
||||
# nmake /f Makefile.msc sqlite3.dll
|
||||
# XCOMPILE=1 USE_NATIVE_LIBPATHS=1
|
||||
#
|
||||
# Alternatively, the full path and file name to the compiler binary for the
|
||||
# platform the compilation process is taking place may be specified (all on
|
||||
# one line):
|
||||
#
|
||||
# nmake /f Makefile.msc sqlite3.dll
|
||||
# "NCC=""%VCINSTALLDIR%\bin\cl.exe"""
|
||||
# USE_NATIVE_LIBPATHS=1
|
||||
#
|
||||
!IFDEF NCC
|
||||
NCC = $(NCC:\\=\)
|
||||
!ELSEIF $(XCOMPILE)!=0
|
||||
NCC = "$(VCINSTALLDIR)\bin\$(CC)"
|
||||
NCC = $(NCC:\\=\)
|
||||
!ELSE
|
||||
NCC = $(CC)
|
||||
!ENDIF
|
||||
|
||||
# Check for the MSVC native runtime library path macro. Otherwise,
|
||||
# this value will default to the 'lib' directory underneath the MSVC
|
||||
# installation directory.
|
||||
#
|
||||
!IFNDEF NCRTLIBPATH
|
||||
NCRTLIBPATH = $(VCINSTALLDIR)\lib
|
||||
!ENDIF
|
||||
|
||||
NCRTLIBPATH = $(NCRTLIBPATH:\\=\)
|
||||
|
||||
# Check for the Platform SDK library path macro. Otherwise, this
|
||||
# value will default to the 'lib' directory underneath the Windows
|
||||
# SDK installation directory (the environment variable used appears
|
||||
# to be available when using Visual C++ 2008 or later via the
|
||||
# command line).
|
||||
#
|
||||
!IFNDEF NSDKLIBPATH
|
||||
NSDKLIBPATH = $(WINDOWSSDKDIR)\lib
|
||||
!ENDIF
|
||||
|
||||
NSDKLIBPATH = $(NSDKLIBPATH:\\=\)
|
||||
|
||||
# Check for the UCRT library path macro. Otherwise, this value will
|
||||
# default to the version-specific, platform-specific 'lib' directory
|
||||
# underneath the Windows SDK installation directory.
|
||||
#
|
||||
!IFNDEF UCRTLIBPATH
|
||||
UCRTLIBPATH = $(WINDOWSSDKDIR)\lib\$(WINDOWSSDKLIBVERSION)\ucrt\$(PLATFORM)
|
||||
!ENDIF
|
||||
|
||||
UCRTLIBPATH = $(UCRTLIBPATH:\\=\)
|
||||
|
||||
# C compiler and options for use in building executables that
|
||||
# will run on the platform that is doing the build.
|
||||
#
|
||||
!IF $(USE_FULLWARN)!=0
|
||||
BCC = $(NCC) -nologo -W4 $(CCOPTS) $(BCCOPTS)
|
||||
!ELSE
|
||||
BCC = $(NCC) -nologo -W3 $(CCOPTS) $(BCCOPTS)
|
||||
!ENDIF
|
||||
|
||||
# Check if assembly code listings should be generated for the source
|
||||
# code files to be compiled.
|
||||
#
|
||||
!IF $(USE_LISTINGS)!=0
|
||||
BCC = $(BCC) -FAcs
|
||||
!ENDIF
|
||||
|
||||
# Check if the native library paths should be used when compiling
|
||||
# the command line tools used during the compilation process. If
|
||||
# so, set the necessary macro now.
|
||||
#
|
||||
!IF $(USE_NATIVE_LIBPATHS)!=0
|
||||
NLTLIBPATHS = "/LIBPATH:$(NCRTLIBPATH)" "/LIBPATH:$(NSDKLIBPATH)"
|
||||
|
||||
!IFDEF NUCRTLIBPATH
|
||||
NUCRTLIBPATH = $(NUCRTLIBPATH:\\=\)
|
||||
NLTLIBPATHS = $(NLTLIBPATHS) "/LIBPATH:$(NUCRTLIBPATH)"
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# C compiler and options for use in building executables that
|
||||
# will run on the target platform. (BCC and TCC are usually the
|
||||
# same unless your are cross-compiling.)
|
||||
#
|
||||
!IF $(USE_FULLWARN)!=0
|
||||
TCC = $(CC) -nologo -W4 -DINCLUDE_MSVC_H=1 $(CCOPTS) $(TCCOPTS)
|
||||
!ELSE
|
||||
TCC = $(CC) -nologo -W3 $(CCOPTS) $(TCCOPTS)
|
||||
!ENDIF
|
||||
|
||||
TCC = $(TCC) -DSQLITE_OS_WIN=1 -I. -I$(TOP) -fp:precise
|
||||
RCC = $(RC) -DSQLITE_OS_WIN=1 -I. -I$(TOP) $(RCOPTS) $(RCCOPTS)
|
||||
|
||||
# Check if we want to use the "stdcall" calling convention when compiling.
|
||||
# This is not supported by the compilers for non-x86 platforms. It should
|
||||
# also be noted here that building any target with these "stdcall" options
|
||||
# will most likely fail if the Tcl library is also required. This is due
|
||||
# to how the Tcl library functions are declared and exported (i.e. without
|
||||
# an explicit calling convention, which results in "cdecl").
|
||||
#
|
||||
!IF $(USE_STDCALL)!=0 || $(FOR_WIN10)!=0
|
||||
!IF "$(PLATFORM)"=="x86"
|
||||
CORE_CCONV_OPTS = -Gz -DSQLITE_CDECL=__cdecl -DSQLITE_STDCALL=__stdcall
|
||||
SHELL_CCONV_OPTS = -Gz -DSQLITE_CDECL=__cdecl -DSQLITE_STDCALL=__stdcall
|
||||
!ELSE
|
||||
!IFNDEF PLATFORM
|
||||
CORE_CCONV_OPTS = -Gz -DSQLITE_CDECL=__cdecl -DSQLITE_STDCALL=__stdcall
|
||||
SHELL_CCONV_OPTS = -Gz -DSQLITE_CDECL=__cdecl -DSQLITE_STDCALL=__stdcall
|
||||
!ELSE
|
||||
CORE_CCONV_OPTS =
|
||||
SHELL_CCONV_OPTS =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
!ELSE
|
||||
CORE_CCONV_OPTS =
|
||||
SHELL_CCONV_OPTS =
|
||||
!ENDIF
|
||||
|
||||
# These are additional compiler options used for the core library.
|
||||
#
|
||||
!IFNDEF CORE_COMPILE_OPTS
|
||||
!IF $(DYNAMIC_SHELL)!=0 || $(FOR_WIN10)!=0
|
||||
CORE_COMPILE_OPTS = $(CORE_CCONV_OPTS) -DSQLITE_API=__declspec(dllexport)
|
||||
!ELSE
|
||||
CORE_COMPILE_OPTS = $(CORE_CCONV_OPTS)
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# These are the additional targets that the core library should depend on
|
||||
# when linking.
|
||||
#
|
||||
!IFNDEF CORE_LINK_DEP
|
||||
!IF $(DYNAMIC_SHELL)!=0
|
||||
CORE_LINK_DEP =
|
||||
!ELSEIF $(FOR_WIN10)==0 || "$(PLATFORM)"=="x86"
|
||||
CORE_LINK_DEP =
|
||||
!ELSE
|
||||
CORE_LINK_DEP =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# These are additional linker options used for the core library.
|
||||
#
|
||||
!IFNDEF CORE_LINK_OPTS
|
||||
!IF $(DYNAMIC_SHELL)!=0
|
||||
CORE_LINK_OPTS =
|
||||
!ELSEIF $(FOR_WIN10)==0 || "$(PLATFORM)"=="x86"
|
||||
CORE_LINK_OPTS =
|
||||
!ELSE
|
||||
CORE_LINK_OPTS =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# These are additional compiler options used for the shell executable.
|
||||
#
|
||||
!IFNDEF SHELL_COMPILE_OPTS
|
||||
!IF $(DYNAMIC_SHELL)!=0 || $(FOR_WIN10)!=0
|
||||
SHELL_COMPILE_OPTS = $(SHELL_CCONV_OPTS) -DSQLITE_API=__declspec(dllimport)
|
||||
!ELSE
|
||||
SHELL_COMPILE_OPTS = $(SHELL_CCONV_OPTS)
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the source code that the shell executable should be compiled
|
||||
# with.
|
||||
#
|
||||
!IFNDEF SHELL_CORE_SRC
|
||||
!IF $(DYNAMIC_SHELL)!=0 || $(FOR_WIN10)!=0
|
||||
SHELL_CORE_SRC =
|
||||
!ELSE
|
||||
SHELL_CORE_SRC = $(SQLITE3C)
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the core library that the shell executable should depend on.
|
||||
#
|
||||
!IFNDEF SHELL_CORE_DEP
|
||||
!IF $(DYNAMIC_SHELL)!=0 || $(FOR_WIN10)!=0
|
||||
SHELL_CORE_DEP = $(SQLITE3DLL)
|
||||
!ELSE
|
||||
SHELL_CORE_DEP =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# This is the core library that the shell executable should link with.
|
||||
#
|
||||
!IFNDEF SHELL_CORE_LIB
|
||||
!IF $(DYNAMIC_SHELL)!=0 || $(FOR_WIN10)!=0
|
||||
SHELL_CORE_LIB = $(SQLITE3LIB)
|
||||
!ELSE
|
||||
SHELL_CORE_LIB =
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# These are additional linker options used for the shell executable.
|
||||
#
|
||||
!IFNDEF SHELL_LINK_OPTS
|
||||
SHELL_LINK_OPTS = $(SHELL_CORE_LIB)
|
||||
!ENDIF
|
||||
|
||||
# Check if assembly code listings should be generated for the source
|
||||
# code files to be compiled.
|
||||
#
|
||||
!IF $(USE_LISTINGS)!=0
|
||||
TCC = $(TCC) -FAcs
|
||||
!ENDIF
|
||||
|
||||
# When compiling the library for use in the WinRT environment,
|
||||
# the following compile-time options must be used as well to
|
||||
# disable use of Win32 APIs that are not available and to enable
|
||||
# use of Win32 APIs that are specific to Windows 8 and/or WinRT.
|
||||
#
|
||||
!IF $(FOR_WINRT)!=0
|
||||
TCC = $(TCC) -DSQLITE_OS_WINRT=1
|
||||
RCC = $(RCC) -DSQLITE_OS_WINRT=1
|
||||
TCC = $(TCC) -DWINAPI_FAMILY=WINAPI_FAMILY_APP
|
||||
RCC = $(RCC) -DWINAPI_FAMILY=WINAPI_FAMILY_APP
|
||||
!ENDIF
|
||||
|
||||
# C compiler options for the Windows 10 platform (needs MSVC 2015).
|
||||
#
|
||||
!IF $(FOR_WIN10)!=0
|
||||
TCC = $(TCC) /d2guard4 -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE
|
||||
BCC = $(BCC) /d2guard4 -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE
|
||||
!ENDIF
|
||||
|
||||
# Also, we need to dynamically link to the correct MSVC runtime
|
||||
# when compiling for WinRT (e.g. debug or release) OR if the
|
||||
# USE_CRT_DLL option is set to force dynamically linking to the
|
||||
# MSVC runtime library.
|
||||
#
|
||||
!IF $(FOR_WINRT)!=0 || $(USE_CRT_DLL)!=0
|
||||
!IF $(DEBUG)>1
|
||||
TCC = $(TCC) -MDd
|
||||
BCC = $(BCC) -MDd
|
||||
!ELSE
|
||||
TCC = $(TCC) -MD
|
||||
BCC = $(BCC) -MD
|
||||
!ENDIF
|
||||
!ELSE
|
||||
!IF $(DEBUG)>1
|
||||
TCC = $(TCC) -MTd
|
||||
BCC = $(BCC) -MTd
|
||||
!ELSE
|
||||
TCC = $(TCC) -MT
|
||||
BCC = $(BCC) -MT
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Define -DNDEBUG to compile without debugging (i.e., for production usage)
|
||||
# Omitting the define will cause extra debugging code to be inserted and
|
||||
# includes extra comments when "EXPLAIN stmt" is used.
|
||||
#
|
||||
!IF $(DEBUG)==0
|
||||
TCC = $(TCC) -DNDEBUG
|
||||
BCC = $(BCC) -DNDEBUG
|
||||
RCC = $(RCC) -DNDEBUG
|
||||
!ENDIF
|
||||
|
||||
!IF $(DEBUG)>0 || $(API_ARMOR)!=0 || $(FOR_WIN10)!=0
|
||||
TCC = $(TCC) -DSQLITE_ENABLE_API_ARMOR=1
|
||||
RCC = $(RCC) -DSQLITE_ENABLE_API_ARMOR=1
|
||||
!ENDIF
|
||||
|
||||
!IF $(DEBUG)>2
|
||||
TCC = $(TCC) -DSQLITE_DEBUG=1
|
||||
RCC = $(RCC) -DSQLITE_DEBUG=1
|
||||
!ENDIF
|
||||
|
||||
!IF $(DEBUG)>4 || $(OSTRACE)!=0
|
||||
TCC = $(TCC) -DSQLITE_FORCE_OS_TRACE=1 -DSQLITE_DEBUG_OS_TRACE=1
|
||||
RCC = $(RCC) -DSQLITE_FORCE_OS_TRACE=1 -DSQLITE_DEBUG_OS_TRACE=1
|
||||
!ENDIF
|
||||
|
||||
!IF $(DEBUG)>5
|
||||
TCC = $(TCC) -DSQLITE_ENABLE_IOTRACE=1
|
||||
RCC = $(RCC) -DSQLITE_ENABLE_IOTRACE=1
|
||||
!ENDIF
|
||||
|
||||
# Prevent warnings about "insecure" MSVC runtime library functions
|
||||
# being used.
|
||||
#
|
||||
TCC = $(TCC) -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS
|
||||
BCC = $(BCC) -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS
|
||||
RCC = $(RCC) -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS
|
||||
|
||||
# Prevent warnings about "deprecated" POSIX functions being used.
|
||||
#
|
||||
TCC = $(TCC) -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS
|
||||
BCC = $(BCC) -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS
|
||||
RCC = $(RCC) -D_CRT_NONSTDC_NO_DEPRECATE -D_CRT_NONSTDC_NO_WARNINGS
|
||||
|
||||
# Use the SQLite debugging heap subsystem?
|
||||
#
|
||||
!IF $(MEMDEBUG)!=0
|
||||
TCC = $(TCC) -DSQLITE_MEMDEBUG=1
|
||||
RCC = $(RCC) -DSQLITE_MEMDEBUG=1
|
||||
|
||||
# Use native Win32 heap subsystem instead of malloc/free?
|
||||
#
|
||||
!ELSEIF $(WIN32HEAP)!=0
|
||||
TCC = $(TCC) -DSQLITE_WIN32_MALLOC=1
|
||||
RCC = $(RCC) -DSQLITE_WIN32_MALLOC=1
|
||||
|
||||
# Validate the heap on every call into the native Win32 heap subsystem?
|
||||
#
|
||||
!IF $(DEBUG)>3
|
||||
TCC = $(TCC) -DSQLITE_WIN32_MALLOC_VALIDATE=1
|
||||
RCC = $(RCC) -DSQLITE_WIN32_MALLOC_VALIDATE=1
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Compiler options needed for programs that use the readline() library.
|
||||
#
|
||||
!IFNDEF READLINE_FLAGS
|
||||
READLINE_FLAGS = -DHAVE_READLINE=0
|
||||
!ENDIF
|
||||
|
||||
# The library that programs using readline() must link against.
|
||||
#
|
||||
!IFNDEF LIBREADLINE
|
||||
LIBREADLINE =
|
||||
!ENDIF
|
||||
|
||||
# Should the database engine be compiled threadsafe
|
||||
#
|
||||
TCC = $(TCC) -DSQLITE_THREADSAFE=1
|
||||
RCC = $(RCC) -DSQLITE_THREADSAFE=1
|
||||
|
||||
# Do threads override each others locks by default (1), or do we test (-1)
|
||||
#
|
||||
TCC = $(TCC) -DSQLITE_THREAD_OVERRIDE_LOCK=-1
|
||||
RCC = $(RCC) -DSQLITE_THREAD_OVERRIDE_LOCK=-1
|
||||
|
||||
# Any target libraries which libsqlite must be linked against
|
||||
#
|
||||
!IFNDEF TLIBS
|
||||
TLIBS =
|
||||
!ENDIF
|
||||
|
||||
# Flags controlling use of the in memory btree implementation
|
||||
#
|
||||
# SQLITE_TEMP_STORE is 0 to force temporary tables to be in a file, 1 to
|
||||
# default to file, 2 to default to memory, and 3 to force temporary
|
||||
# tables to always be in memory.
|
||||
#
|
||||
TCC = $(TCC) -DSQLITE_TEMP_STORE=1
|
||||
RCC = $(RCC) -DSQLITE_TEMP_STORE=1
|
||||
|
||||
# Enable/disable loadable extensions, and other optional features
|
||||
# based on configuration. (-DSQLITE_OMIT*, -DSQLITE_ENABLE*).
|
||||
# The same set of OMIT and ENABLE flags should be passed to the
|
||||
# LEMON parser generator and the mkkeywordhash tool as well.
|
||||
|
||||
# These are the required SQLite compilation options used when compiling for
|
||||
# the Windows platform.
|
||||
#
|
||||
REQ_FEATURE_FLAGS = $(REQ_FEATURE_FLAGS) -DSQLITE_MAX_TRIGGER_DEPTH=100
|
||||
|
||||
# If we are linking to the RPCRT4 library, enable features that need it.
|
||||
#
|
||||
!IF $(USE_RPCRT4_LIB)!=0
|
||||
REQ_FEATURE_FLAGS = $(REQ_FEATURE_FLAGS) -DSQLITE_WIN32_USE_UUID=1
|
||||
!ENDIF
|
||||
|
||||
# Add the required and optional SQLite compilation options into the command
|
||||
# lines used to invoke the MSVC code and resource compilers.
|
||||
#
|
||||
TCC = $(TCC) $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(EXT_FEATURE_FLAGS)
|
||||
RCC = $(RCC) $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(EXT_FEATURE_FLAGS)
|
||||
|
||||
# Add in any optional parameters specified on the commane line, e.g.
|
||||
# nmake /f Makefile.msc all "OPTS=-DSQLITE_ENABLE_FOO=1 -DSQLITE_OMIT_FOO=1"
|
||||
#
|
||||
TCC = $(TCC) $(OPTS)
|
||||
RCC = $(RCC) $(OPTS)
|
||||
|
||||
# If compiling for debugging, add some defines.
|
||||
#
|
||||
!IF $(DEBUG)>1
|
||||
TCC = $(TCC) -D_DEBUG
|
||||
BCC = $(BCC) -D_DEBUG
|
||||
RCC = $(RCC) -D_DEBUG
|
||||
!ENDIF
|
||||
|
||||
# If optimizations are enabled or disabled (either implicitly or
|
||||
# explicitly), add the necessary flags.
|
||||
#
|
||||
!IF $(DEBUG)>1 || $(OPTIMIZATIONS)==0
|
||||
TCC = $(TCC) -Od
|
||||
BCC = $(BCC) -Od
|
||||
!ELSEIF $(OPTIMIZATIONS)>=3
|
||||
TCC = $(TCC) -Ox
|
||||
BCC = $(BCC) -Ox
|
||||
!ELSEIF $(OPTIMIZATIONS)==2
|
||||
TCC = $(TCC) -O2
|
||||
BCC = $(BCC) -O2
|
||||
!ELSEIF $(OPTIMIZATIONS)==1
|
||||
TCC = $(TCC) -O1
|
||||
BCC = $(BCC) -O1
|
||||
!ENDIF
|
||||
|
||||
# If symbols are enabled (or compiling for debugging), enable PDBs.
|
||||
#
|
||||
!IF $(DEBUG)>1 || $(SYMBOLS)!=0
|
||||
TCC = $(TCC) -Zi
|
||||
BCC = $(BCC) -Zi
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Command line prefixes for compiling code, compiling resources,
|
||||
# linking, etc.
|
||||
#
|
||||
LTCOMPILE = $(TCC) -Fo$@
|
||||
LTRCOMPILE = $(RCC) -r
|
||||
LTLIB = lib.exe
|
||||
LTLINK = $(TCC) -Fe$@
|
||||
|
||||
# If requested, link to the RPCRT4 library.
|
||||
#
|
||||
!IF $(USE_RPCRT4_LIB)!=0
|
||||
LTLINK = $(LTLINK) rpcrt4.lib
|
||||
!ENDIF
|
||||
|
||||
# If a platform was set, force the linker to target that.
|
||||
# Note that the vcvars*.bat family of batch files typically
|
||||
# set this for you. Otherwise, the linker will attempt
|
||||
# to deduce the binary type based on the object files.
|
||||
!IFDEF PLATFORM
|
||||
LTLINKOPTS = /NOLOGO /MACHINE:$(PLATFORM)
|
||||
LTLIBOPTS = /NOLOGO /MACHINE:$(PLATFORM)
|
||||
!ELSE
|
||||
LTLINKOPTS = /NOLOGO
|
||||
LTLIBOPTS = /NOLOGO
|
||||
!ENDIF
|
||||
|
||||
# When compiling for use in the WinRT environment, the following
|
||||
# linker option must be used to mark the executable as runnable
|
||||
# only in the context of an application container.
|
||||
#
|
||||
!IF $(FOR_WINRT)!=0
|
||||
LTLINKOPTS = $(LTLINKOPTS) /APPCONTAINER
|
||||
!IF "$(VISUALSTUDIOVERSION)"=="12.0" || "$(VISUALSTUDIOVERSION)"=="14.0"
|
||||
!IFNDEF STORELIBPATH
|
||||
!IF "$(PLATFORM)"=="x86"
|
||||
STORELIBPATH = $(CRTLIBPATH)\store
|
||||
!ELSEIF "$(PLATFORM)"=="x64"
|
||||
STORELIBPATH = $(CRTLIBPATH)\store\amd64
|
||||
!ELSEIF "$(PLATFORM)"=="ARM"
|
||||
STORELIBPATH = $(CRTLIBPATH)\store\arm
|
||||
!ELSE
|
||||
STORELIBPATH = $(CRTLIBPATH)\store
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
STORELIBPATH = $(STORELIBPATH:\\=\)
|
||||
LTLINKOPTS = $(LTLINKOPTS) "/LIBPATH:$(STORELIBPATH)"
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# When compiling for Windows Phone 8.1, an extra library path is
|
||||
# required.
|
||||
#
|
||||
!IF $(USE_WP81_OPTS)!=0
|
||||
!IFNDEF WP81LIBPATH
|
||||
!IF "$(PLATFORM)"=="x86"
|
||||
WP81LIBPATH = $(PROGRAMFILES_X86)\Windows Phone Kits\8.1\lib\x86
|
||||
!ELSEIF "$(PLATFORM)"=="ARM"
|
||||
WP81LIBPATH = $(PROGRAMFILES_X86)\Windows Phone Kits\8.1\lib\ARM
|
||||
!ELSE
|
||||
WP81LIBPATH = $(PROGRAMFILES_X86)\Windows Phone Kits\8.1\lib\x86
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# When compiling for Windows Phone 8.1, some extra linker options
|
||||
# are also required.
|
||||
#
|
||||
!IF $(USE_WP81_OPTS)!=0
|
||||
!IFDEF WP81LIBPATH
|
||||
LTLINKOPTS = $(LTLINKOPTS) "/LIBPATH:$(WP81LIBPATH)"
|
||||
!ENDIF
|
||||
LTLINKOPTS = $(LTLINKOPTS) /DYNAMICBASE
|
||||
LTLINKOPTS = $(LTLINKOPTS) WindowsPhoneCore.lib RuntimeObject.lib PhoneAppModelHost.lib
|
||||
LTLINKOPTS = $(LTLINKOPTS) /NODEFAULTLIB:kernel32.lib /NODEFAULTLIB:ole32.lib
|
||||
!ENDIF
|
||||
|
||||
# When compiling for UWP or the Windows 10 platform, some extra linker
|
||||
# options are also required.
|
||||
#
|
||||
!IF $(FOR_UWP)!=0 || $(FOR_WIN10)!=0
|
||||
LTLINKOPTS = $(LTLINKOPTS) /DYNAMICBASE /NODEFAULTLIB:kernel32.lib
|
||||
LTLINKOPTS = $(LTLINKOPTS) mincore.lib
|
||||
!IFDEF PSDKLIBPATH
|
||||
LTLINKOPTS = $(LTLINKOPTS) "/LIBPATH:$(PSDKLIBPATH)"
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
!IF $(FOR_WIN10)!=0
|
||||
LTLINKOPTS = $(LTLINKOPTS) /guard:cf "/LIBPATH:$(UCRTLIBPATH)"
|
||||
!IF $(DEBUG)>1
|
||||
LTLINKOPTS = $(LTLINKOPTS) /NODEFAULTLIB:libucrtd.lib /DEFAULTLIB:ucrtd.lib
|
||||
!ELSE
|
||||
LTLINKOPTS = $(LTLINKOPTS) /NODEFAULTLIB:libucrt.lib /DEFAULTLIB:ucrt.lib
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
|
||||
# If either debugging or symbols are enabled, enable PDBs.
|
||||
#
|
||||
!IF $(DEBUG)>1 || $(SYMBOLS)!=0
|
||||
LDFLAGS = /DEBUG $(LDOPTS)
|
||||
!ELSE
|
||||
LDFLAGS = $(LDOPTS)
|
||||
!ENDIF
|
||||
|
||||
|
||||
# You should not have to change anything below this line
|
||||
###############################################################################
|
||||
|
||||
|
||||
# Object files for the amalgamation.
|
||||
#
|
||||
LIBOBJS1 = sqlite3.lo
|
||||
|
||||
# Determine the real value of LIBOBJ based on the 'configure' script
|
||||
#
|
||||
LIBOBJ = $(LIBOBJS1)
|
||||
|
||||
# Determine if embedded resource compilation and usage are enabled.
|
||||
#
|
||||
!IF $(USE_RC)!=0
|
||||
LIBRESOBJS = sqlite3res.lo
|
||||
!ELSE
|
||||
LIBRESOBJS =
|
||||
!ENDIF
|
||||
|
||||
|
||||
# Additional compiler options for the shell. These are only effective
|
||||
# when the shell is not being dynamically linked.
|
||||
#
|
||||
!IF $(DYNAMIC_SHELL)==0 && $(FOR_WIN10)==0
|
||||
SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_SHELL_JSON1 -DSQLITE_ENABLE_FTS4 -DSQLITE_ENABLE_EXPLAIN_COMMENTS
|
||||
!ENDIF
|
||||
|
||||
|
||||
# This is the default Makefile target. The objects listed here
|
||||
# are what get build when you type just "make" with no arguments.
|
||||
#
|
||||
all: dll shell
|
||||
|
||||
# Dynamic link library section.
|
||||
#
|
||||
dll: $(SQLITE3DLL)
|
||||
|
||||
# Shell executable.
|
||||
#
|
||||
shell: $(SQLITE3EXE)
|
||||
|
||||
|
||||
$(SQLITE3DLL): $(LIBOBJ) $(LIBRESOBJS) $(CORE_LINK_DEP)
|
||||
$(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL $(CORE_LINK_OPTS) /OUT:$@ $(LIBOBJ) $(LIBRESOBJS) $(LTLIBS) $(TLIBS)
|
||||
|
||||
|
||||
$(SQLITE3EXE): $(TOP)\shell.c $(SHELL_CORE_DEP) $(LIBRESOBJS) $(SHELL_CORE_SRC) $(SQLITE3H)
|
||||
$(LTLINK) $(SHELL_COMPILE_OPTS) $(READLINE_FLAGS) $(TOP)\shell.c $(SHELL_CORE_SRC) \
|
||||
/link $(SQLITE3EXEPDB) $(LDFLAGS) $(LTLINKOPTS) $(SHELL_LINK_OPTS) $(LTLIBPATHS) $(LIBRESOBJS) $(LIBREADLINE) $(LTLIBS) $(TLIBS)
|
||||
|
||||
|
||||
# Rule to build the amalgamation
|
||||
#
|
||||
sqlite3.lo: $(SQLITE3C)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) -c $(SQLITE3C)
|
||||
|
||||
|
||||
# Rule to build the Win32 resources object file.
|
||||
#
|
||||
!IF $(USE_RC)!=0
|
||||
_HASHCHAR=^#
|
||||
!IF ![echo !IFNDEF VERSION > rcver.vc] && \
|
||||
![for /F "delims=" %V in ('type "$(SQLITE3H)" ^| find "$(_HASHCHAR)define SQLITE_VERSION "') do (echo VERSION = ^^%V >> rcver.vc)] && \
|
||||
![echo !ENDIF >> rcver.vc]
|
||||
!INCLUDE rcver.vc
|
||||
!ENDIF
|
||||
|
||||
RESOURCE_VERSION = $(VERSION:^#=)
|
||||
RESOURCE_VERSION = $(RESOURCE_VERSION:define=)
|
||||
RESOURCE_VERSION = $(RESOURCE_VERSION:SQLITE_VERSION=)
|
||||
RESOURCE_VERSION = $(RESOURCE_VERSION:"=)
|
||||
RESOURCE_VERSION = $(RESOURCE_VERSION:.=,)
|
||||
|
||||
$(LIBRESOBJS): $(TOP)\sqlite3.rc rcver.vc $(SQLITE3H)
|
||||
echo #ifndef SQLITE_RESOURCE_VERSION > sqlite3rc.h
|
||||
echo #define SQLITE_RESOURCE_VERSION $(RESOURCE_VERSION) >> sqlite3rc.h
|
||||
echo #endif >> sqlite3rc.h
|
||||
$(LTRCOMPILE) -fo $(LIBRESOBJS) -DRC_VERONLY $(TOP)\sqlite3.rc
|
||||
!ENDIF
|
||||
|
||||
|
||||
clean:
|
||||
del /Q *.exp *.lo *.ilk *.lib *.obj *.ncb *.pdb *.sdf *.suo 2>NUL
|
||||
del /Q *.bsc *.def *.cod *.da *.bb *.bbg *.vc gmon.out 2>NUL
|
||||
del /Q $(SQLITE3EXE) $(SQLITE3DLL) 2>NUL
|
||||
@ -1,11 +0,0 @@
|
||||
This directory contains components use to build an autoconf-ready package
|
||||
of the SQLite amalgamation: sqlite-autoconf-30XXXXXX.tar.gz
|
||||
|
||||
To build the autoconf amalgamation, run from the top-level:
|
||||
|
||||
./configure
|
||||
make amalgamation-tarball
|
||||
|
||||
The amalgamation-tarball target (also available in "main.mk") runs the
|
||||
script tool/mkautoconfamal.sh which does the work. Refer to that script
|
||||
for details.
|
||||
@ -1,113 +0,0 @@
|
||||
This package contains:
|
||||
|
||||
* the SQLite library amalgamation source code file: sqlite3.c
|
||||
* the sqlite3.h and sqlite3ext.h header files that define the C-language
|
||||
interface to the sqlite3.c library file
|
||||
* the shell.c file used to build the sqlite3 command-line shell program
|
||||
* autoconf/automake installation infrastucture for building on POSIX
|
||||
compliant systems
|
||||
* a Makefile.msc and sqlite3.rc for building with Microsoft Visual C++ on
|
||||
Windows
|
||||
|
||||
SUMMARY OF HOW TO BUILD
|
||||
=======================
|
||||
|
||||
Unix: ./configure; make
|
||||
Windows: nmake /f Makefile.msc
|
||||
|
||||
BUILDING ON POSIX
|
||||
=================
|
||||
|
||||
The generic installation instructions for autoconf/automake are found
|
||||
in the INSTALL file.
|
||||
|
||||
The following SQLite specific boolean options are supported:
|
||||
|
||||
--enable-readline use readline in shell tool [default=yes]
|
||||
--enable-threadsafe build a thread-safe library [default=yes]
|
||||
--enable-dynamic-extensions support loadable extensions [default=yes]
|
||||
|
||||
The default value for the CFLAGS variable (options passed to the C
|
||||
compiler) includes debugging symbols in the build, resulting in larger
|
||||
binaries than are necessary. Override it on the configure command
|
||||
line like this:
|
||||
|
||||
$ CFLAGS="-Os" ./configure
|
||||
|
||||
to produce a smaller installation footprint.
|
||||
|
||||
Other SQLite compilation parameters can also be set using CFLAGS. For
|
||||
example:
|
||||
|
||||
$ CFLAGS="-Os -DSQLITE_THREADSAFE=0" ./configure
|
||||
|
||||
|
||||
BUILDING WITH MICROSOFT VISUAL C++
|
||||
==================================
|
||||
|
||||
To compile for Windows using Microsoft Visual C++:
|
||||
|
||||
$ nmake /f Makefile.msc
|
||||
|
||||
Using Microsoft Visual C++ 2005 (or later) is recommended. Several Windows
|
||||
platform variants may be built by adding additional macros to the NMAKE
|
||||
command line.
|
||||
|
||||
Building for WinRT 8.0
|
||||
----------------------
|
||||
|
||||
FOR_WINRT=1
|
||||
|
||||
Using Microsoft Visual C++ 2012 (or later) is required. When using the
|
||||
above, something like the following macro will need to be added to the
|
||||
NMAKE command line as well:
|
||||
|
||||
"NSDKLIBPATH=%WindowsSdkDir%\..\8.0\lib\win8\um\x86"
|
||||
|
||||
Building for WinRT 8.1
|
||||
----------------------
|
||||
|
||||
FOR_WINRT=1
|
||||
|
||||
Using Microsoft Visual C++ 2013 (or later) is required. When using the
|
||||
above, something like the following macro will need to be added to the
|
||||
NMAKE command line as well:
|
||||
|
||||
"NSDKLIBPATH=%WindowsSdkDir%\..\8.1\lib\winv6.3\um\x86"
|
||||
|
||||
Building for UWP 10.0
|
||||
---------------------
|
||||
|
||||
FOR_WINRT=1 FOR_UWP=1
|
||||
|
||||
Using Microsoft Visual C++ 2015 (or later) is required. When using the
|
||||
above, something like the following macros will need to be added to the
|
||||
NMAKE command line as well:
|
||||
|
||||
"NSDKLIBPATH=%WindowsSdkDir%\..\10\lib\10.0.10586.0\um\x86"
|
||||
"PSDKLIBPATH=%WindowsSdkDir%\..\10\lib\10.0.10586.0\um\x86"
|
||||
"NUCRTLIBPATH=%UniversalCRTSdkDir%\..\10\lib\10.0.10586.0\ucrt\x86"
|
||||
|
||||
Building for the Windows 10 SDK
|
||||
-------------------------------
|
||||
|
||||
FOR_WIN10=1
|
||||
|
||||
Using Microsoft Visual C++ 2015 (or later) is required. When using the
|
||||
above, no other macros should be needed on the NMAKE command line.
|
||||
|
||||
Other preprocessor defines
|
||||
--------------------------
|
||||
|
||||
Additionally, preprocessor defines may be specified by using the OPTS macro
|
||||
on the NMAKE command line. However, not all possible preprocessor defines
|
||||
may be specified in this manner as some require the amalgamation to be built
|
||||
with them enabled (see http://www.sqlite.org/compile.html). For example, the
|
||||
following will work:
|
||||
|
||||
"OPTS=-DSQLITE_ENABLE_STAT4=1 -DSQLITE_ENABLE_JSON1=1"
|
||||
|
||||
However, the following will not compile unless the amalgamation was built
|
||||
with it enabled:
|
||||
|
||||
"OPTS=-DSQLITE_ENABLE_UPDATE_DELETE_LIMIT=1"
|
||||
@ -1,167 +0,0 @@
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Supports the following non-standard switches.
|
||||
#
|
||||
# --enable-threadsafe
|
||||
# --enable-readline
|
||||
# --enable-editline
|
||||
# --enable-static-shell
|
||||
# --enable-dynamic-extensions
|
||||
#
|
||||
|
||||
AC_PREREQ(2.61)
|
||||
AC_INIT(sqlite, --SQLITE-VERSION--, http://www.sqlite.org)
|
||||
AC_CONFIG_SRCDIR([sqlite3.c])
|
||||
|
||||
# Use automake.
|
||||
AM_INIT_AUTOMAKE([foreign])
|
||||
|
||||
AC_SYS_LARGEFILE
|
||||
|
||||
# Check for required programs.
|
||||
AC_PROG_CC
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_MKDIR_P
|
||||
|
||||
# Check for library functions that SQLite can optionally use.
|
||||
AC_CHECK_FUNCS([fdatasync usleep fullfsync localtime_r gmtime_r])
|
||||
AC_FUNC_STRERROR_R
|
||||
|
||||
AC_CONFIG_FILES([Makefile sqlite3.pc])
|
||||
AC_SUBST(BUILD_CFLAGS)
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-editline
|
||||
# --enable-readline
|
||||
#
|
||||
AC_ARG_ENABLE(editline, [AS_HELP_STRING(
|
||||
[--enable-editline],
|
||||
[use BSD libedit])],
|
||||
[], [enable_editline=yes])
|
||||
AC_ARG_ENABLE(readline, [AS_HELP_STRING(
|
||||
[--enable-readline],
|
||||
[use readline])],
|
||||
[], [enable_readline=no])
|
||||
if test x"$enable_editline" != xno ; then
|
||||
sLIBS=$LIBS
|
||||
LIBS=""
|
||||
AC_SEARCH_LIBS([readline],[edit],[enable_readline=no],[enable_editline=no])
|
||||
READLINE_LIBS=$LIBS
|
||||
if test x"$LIBS" != "x"; then
|
||||
AC_DEFINE([HAVE_EDITLINE],1,Define to use BSD editline)
|
||||
else
|
||||
unset ac_cv_search_readline
|
||||
fi
|
||||
LIBS=$sLIBS
|
||||
fi
|
||||
if test x"$enable_readline" != xno ; then
|
||||
sLIBS=$LIBS
|
||||
LIBS=""
|
||||
AC_SEARCH_LIBS(tgetent, curses ncurses ncursesw, [], [])
|
||||
AC_SEARCH_LIBS(readline, readline, [], [enable_readline=no])
|
||||
AC_CHECK_FUNCS(readline, [], [])
|
||||
READLINE_LIBS=$LIBS
|
||||
LIBS=$sLIBS
|
||||
fi
|
||||
AC_SUBST(READLINE_LIBS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-threadsafe
|
||||
#
|
||||
AC_ARG_ENABLE(threadsafe, [AS_HELP_STRING(
|
||||
[--enable-threadsafe], [build a thread-safe library [default=yes]])],
|
||||
[], [enable_threadsafe=yes])
|
||||
THREADSAFE_FLAGS=-DSQLITE_THREADSAFE=0
|
||||
if test x"$enable_threadsafe" != "xno"; then
|
||||
THREADSAFE_FLAGS="-D_REENTRANT=1 -DSQLITE_THREADSAFE=1"
|
||||
AC_SEARCH_LIBS(pthread_create, pthread)
|
||||
AC_SEARCH_LIBS(pthread_mutexattr_init, pthread)
|
||||
fi
|
||||
AC_SUBST(THREADSAFE_FLAGS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-dynamic-extensions
|
||||
#
|
||||
AC_ARG_ENABLE(dynamic-extensions, [AS_HELP_STRING(
|
||||
[--enable-dynamic-extensions], [support loadable extensions [default=yes]])],
|
||||
[], [enable_dynamic_extensions=yes])
|
||||
if test x"$enable_dynamic_extensions" != "xno"; then
|
||||
AC_SEARCH_LIBS(dlopen, dl)
|
||||
else
|
||||
DYNAMIC_EXTENSION_FLAGS=-DSQLITE_OMIT_LOAD_EXTENSION=1
|
||||
fi
|
||||
AC_MSG_CHECKING([for whether to support dynamic extensions])
|
||||
AC_MSG_RESULT($enable_dynamic_extensions)
|
||||
AC_SUBST(DYNAMIC_EXTENSION_FLAGS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-fts5
|
||||
#
|
||||
AC_ARG_ENABLE(fts5, [AS_HELP_STRING(
|
||||
[--enable-fts5], [include fts5 support [default=no]])],
|
||||
[], [enable_fts5=no])
|
||||
if test x"$enable_fts5" == "xyes"; then
|
||||
AC_SEARCH_LIBS(log, m)
|
||||
FTS5_FLAGS=-DSQLITE_ENABLE_FTS5
|
||||
fi
|
||||
AC_SUBST(FTS5_FLAGS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-json1
|
||||
#
|
||||
AC_ARG_ENABLE(json1, [AS_HELP_STRING(
|
||||
[--enable-json1], [include json1 support [default=no]])],
|
||||
[], [enable_json1=no])
|
||||
if test x"$enable_json1" == "xyes"; then
|
||||
JSON1_FLAGS=-DSQLITE_ENABLE_JSON1
|
||||
fi
|
||||
AC_SUBST(JSON1_FLAGS)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# --enable-static-shell
|
||||
#
|
||||
AC_ARG_ENABLE(static-shell, [AS_HELP_STRING(
|
||||
[--enable-static-shell],
|
||||
[statically link libsqlite3 into shell tool [default=yes]])],
|
||||
[], [enable_static_shell=yes])
|
||||
if test x"$enable_static_shell" == "xyes"; then
|
||||
EXTRA_SHELL_OBJ=sqlite3.$OBJEXT
|
||||
else
|
||||
EXTRA_SHELL_OBJ=libsqlite3.la
|
||||
fi
|
||||
AC_SUBST(EXTRA_SHELL_OBJ)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_CHECK_FUNCS(posix_fallocate)
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# UPDATE: Maybe it's better if users just set CFLAGS before invoking
|
||||
# configure. This option doesn't really add much...
|
||||
#
|
||||
# --enable-tempstore
|
||||
#
|
||||
# AC_ARG_ENABLE(tempstore, [AS_HELP_STRING(
|
||||
# [--enable-tempstore],
|
||||
# [in-memory temporary tables (never, no, yes, always) [default=no]])],
|
||||
# [], [enable_tempstore=no])
|
||||
# AC_MSG_CHECKING([for whether or not to store temp tables in-memory])
|
||||
# case "$enable_tempstore" in
|
||||
# never ) TEMP_STORE=0 ;;
|
||||
# no ) TEMP_STORE=1 ;;
|
||||
# always ) TEMP_STORE=3 ;;
|
||||
# yes ) TEMP_STORE=3 ;;
|
||||
# * )
|
||||
# TEMP_STORE=1
|
||||
# enable_tempstore=yes
|
||||
# ;;
|
||||
# esac
|
||||
# AC_MSG_RESULT($enable_tempstore)
|
||||
# AC_SUBST(TEMP_STORE)
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_OUTPUT
|
||||
@ -1,440 +0,0 @@
|
||||
# Makefile.in --
|
||||
#
|
||||
# This file is a Makefile for Sample TEA Extension. If it has the name
|
||||
# "Makefile.in" then it is a template for a Makefile; to generate the
|
||||
# actual Makefile, run "./configure", which is a configuration script
|
||||
# generated by the "autoconf" program (constructs like "@foo@" will get
|
||||
# replaced in the actual Makefile.
|
||||
#
|
||||
# Copyright (c) 1999 Scriptics Corporation.
|
||||
# Copyright (c) 2002-2005 ActiveState Corporation.
|
||||
#
|
||||
# See the file "license.terms" for information on usage and redistribution
|
||||
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||||
#
|
||||
# RCS: @(#) $Id: Makefile.in,v 1.59 2005/07/26 19:17:02 mdejong Exp $
|
||||
|
||||
#========================================================================
|
||||
# Add additional lines to handle any additional AC_SUBST cases that
|
||||
# have been added in a customized configure script.
|
||||
#========================================================================
|
||||
|
||||
#SAMPLE_NEW_VAR = @SAMPLE_NEW_VAR@
|
||||
|
||||
#========================================================================
|
||||
# Nothing of the variables below this line should need to be changed.
|
||||
# Please check the TARGETS section below to make sure the make targets
|
||||
# are correct.
|
||||
#========================================================================
|
||||
|
||||
#========================================================================
|
||||
# The names of the source files is defined in the configure script.
|
||||
# The object files are used for linking into the final library.
|
||||
# This will be used when a dist target is added to the Makefile.
|
||||
# It is not important to specify the directory, as long as it is the
|
||||
# $(srcdir) or in the generic, win or unix subdirectory.
|
||||
#========================================================================
|
||||
|
||||
PKG_SOURCES = @PKG_SOURCES@
|
||||
PKG_OBJECTS = @PKG_OBJECTS@
|
||||
|
||||
PKG_STUB_SOURCES = @PKG_STUB_SOURCES@
|
||||
PKG_STUB_OBJECTS = @PKG_STUB_OBJECTS@
|
||||
|
||||
#========================================================================
|
||||
# PKG_TCL_SOURCES identifies Tcl runtime files that are associated with
|
||||
# this package that need to be installed, if any.
|
||||
#========================================================================
|
||||
|
||||
PKG_TCL_SOURCES = @PKG_TCL_SOURCES@
|
||||
|
||||
#========================================================================
|
||||
# This is a list of public header files to be installed, if any.
|
||||
#========================================================================
|
||||
|
||||
PKG_HEADERS = @PKG_HEADERS@
|
||||
|
||||
#========================================================================
|
||||
# "PKG_LIB_FILE" refers to the library (dynamic or static as per
|
||||
# configuration options) composed of the named objects.
|
||||
#========================================================================
|
||||
|
||||
PKG_LIB_FILE = @PKG_LIB_FILE@
|
||||
PKG_STUB_LIB_FILE = @PKG_STUB_LIB_FILE@
|
||||
|
||||
lib_BINARIES = $(PKG_LIB_FILE)
|
||||
BINARIES = $(lib_BINARIES)
|
||||
|
||||
SHELL = @SHELL@
|
||||
|
||||
srcdir = @srcdir@
|
||||
prefix = @prefix@
|
||||
exec_prefix = @exec_prefix@
|
||||
|
||||
bindir = @bindir@
|
||||
libdir = @libdir@
|
||||
datarootdir = @datarootdir@
|
||||
datadir = @datadir@
|
||||
mandir = @mandir@
|
||||
includedir = @includedir@
|
||||
|
||||
DESTDIR =
|
||||
|
||||
PKG_DIR = $(PACKAGE_NAME)$(PACKAGE_VERSION)
|
||||
pkgdatadir = $(datadir)/$(PKG_DIR)
|
||||
pkglibdir = $(libdir)/$(PKG_DIR)
|
||||
pkgincludedir = $(includedir)/$(PKG_DIR)
|
||||
|
||||
top_builddir = .
|
||||
|
||||
INSTALL = @INSTALL@
|
||||
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||
INSTALL_DATA = @INSTALL_DATA@
|
||||
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||
|
||||
PACKAGE_NAME = @PACKAGE_NAME@
|
||||
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||
CC = @CC@
|
||||
CFLAGS_DEFAULT = @CFLAGS_DEFAULT@
|
||||
CFLAGS_WARNING = @CFLAGS_WARNING@
|
||||
CLEANFILES = @CLEANFILES@
|
||||
EXEEXT = @EXEEXT@
|
||||
LDFLAGS_DEFAULT = @LDFLAGS_DEFAULT@
|
||||
MAKE_LIB = @MAKE_LIB@
|
||||
MAKE_SHARED_LIB = @MAKE_SHARED_LIB@
|
||||
MAKE_STATIC_LIB = @MAKE_STATIC_LIB@
|
||||
MAKE_STUB_LIB = @MAKE_STUB_LIB@
|
||||
OBJEXT = @OBJEXT@
|
||||
RANLIB = @RANLIB@
|
||||
RANLIB_STUB = @RANLIB_STUB@
|
||||
SHLIB_CFLAGS = @SHLIB_CFLAGS@
|
||||
SHLIB_LD = @SHLIB_LD@
|
||||
SHLIB_LD_LIBS = @SHLIB_LD_LIBS@
|
||||
STLIB_LD = @STLIB_LD@
|
||||
#TCL_DEFS = @TCL_DEFS@
|
||||
TCL_BIN_DIR = @TCL_BIN_DIR@
|
||||
TCL_SRC_DIR = @TCL_SRC_DIR@
|
||||
#TK_BIN_DIR = @TK_BIN_DIR@
|
||||
#TK_SRC_DIR = @TK_SRC_DIR@
|
||||
|
||||
# This is no longer necessary even for packages that use private Tcl headers
|
||||
#TCL_TOP_DIR_NATIVE = @TCL_TOP_DIR_NATIVE@
|
||||
# Not used, but retained for reference of what libs Tcl required
|
||||
#TCL_LIBS = @TCL_LIBS@
|
||||
|
||||
#========================================================================
|
||||
# TCLLIBPATH seeds the auto_path in Tcl's init.tcl so we can test our
|
||||
# package without installing. The other environment variables allow us
|
||||
# to test against an uninstalled Tcl. Add special env vars that you
|
||||
# require for testing here (like TCLX_LIBRARY).
|
||||
#========================================================================
|
||||
|
||||
EXTRA_PATH = $(top_builddir):$(TCL_BIN_DIR)
|
||||
#EXTRA_PATH = $(top_builddir):$(TCL_BIN_DIR):$(TK_BIN_DIR)
|
||||
TCLLIBPATH = $(top_builddir)
|
||||
TCLSH_ENV = TCL_LIBRARY=`@CYGPATH@ $(TCL_SRC_DIR)/library` \
|
||||
@LD_LIBRARY_PATH_VAR@="$(EXTRA_PATH):$(@LD_LIBRARY_PATH_VAR@)" \
|
||||
PATH="$(EXTRA_PATH):$(PATH)" \
|
||||
TCLLIBPATH="$(TCLLIBPATH)"
|
||||
# TK_LIBRARY=`@CYGPATH@ $(TK_SRC_DIR)/library`
|
||||
|
||||
TCLSH_PROG = @TCLSH_PROG@
|
||||
TCLSH = $(TCLSH_ENV) $(TCLSH_PROG)
|
||||
|
||||
#WISH_PROG = @WISH_PROG@
|
||||
#WISH = $(TCLSH_ENV) $(WISH_PROG)
|
||||
|
||||
|
||||
SHARED_BUILD = @SHARED_BUILD@
|
||||
|
||||
INCLUDES = @PKG_INCLUDES@ @TCL_INCLUDES@ -I$(srcdir)/..
|
||||
#INCLUDES = @PKG_INCLUDES@ @TCL_INCLUDES@ @TK_INCLUDES@ @TK_XINCLUDES@
|
||||
|
||||
PKG_CFLAGS = @PKG_CFLAGS@
|
||||
|
||||
# TCL_DEFS is not strictly need here, but if you remove it, then you
|
||||
# must make sure that configure.in checks for the necessary components
|
||||
# that your library may use. TCL_DEFS can actually be a problem if
|
||||
# you do not compile with a similar machine setup as the Tcl core was
|
||||
# compiled with.
|
||||
#DEFS = $(TCL_DEFS) @DEFS@ $(PKG_CFLAGS)
|
||||
DEFS = @DEFS@ $(PKG_CFLAGS)
|
||||
|
||||
CONFIG_CLEAN_FILES = Makefile pkgIndex.tcl
|
||||
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
LIBS = @PKG_LIBS@ @LIBS@
|
||||
AR = @AR@
|
||||
CFLAGS = @CFLAGS@
|
||||
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||
|
||||
#========================================================================
|
||||
# Start of user-definable TARGETS section
|
||||
#========================================================================
|
||||
|
||||
#========================================================================
|
||||
# TEA TARGETS. Please note that the "libraries:" target refers to platform
|
||||
# independent files, and the "binaries:" target inclues executable programs and
|
||||
# platform-dependent libraries. Modify these targets so that they install
|
||||
# the various pieces of your package. The make and install rules
|
||||
# for the BINARIES that you specified above have already been done.
|
||||
#========================================================================
|
||||
|
||||
all: binaries libraries doc
|
||||
|
||||
#========================================================================
|
||||
# The binaries target builds executable programs, Windows .dll's, unix
|
||||
# shared/static libraries, and any other platform-dependent files.
|
||||
# The list of targets to build for "binaries:" is specified at the top
|
||||
# of the Makefile, in the "BINARIES" variable.
|
||||
#========================================================================
|
||||
|
||||
binaries: $(BINARIES)
|
||||
|
||||
libraries:
|
||||
|
||||
|
||||
#========================================================================
|
||||
# Your doc target should differentiate from doc builds (by the developer)
|
||||
# and doc installs (see install-doc), which just install the docs on the
|
||||
# end user machine when building from source.
|
||||
#========================================================================
|
||||
|
||||
doc:
|
||||
@echo "If you have documentation to create, place the commands to"
|
||||
@echo "build the docs in the 'doc:' target. For example:"
|
||||
@echo " xml2nroff sample.xml > sample.n"
|
||||
@echo " xml2html sample.xml > sample.html"
|
||||
|
||||
install: all install-binaries install-libraries install-doc
|
||||
|
||||
install-binaries: binaries install-lib-binaries install-bin-binaries
|
||||
|
||||
#========================================================================
|
||||
# This rule installs platform-independent files, such as header files.
|
||||
# The list=...; for p in $$list handles the empty list case x-platform.
|
||||
#========================================================================
|
||||
|
||||
install-libraries: libraries
|
||||
@mkdir -p $(DESTDIR)$(includedir)
|
||||
@echo "Installing header files in $(DESTDIR)$(includedir)"
|
||||
@list='$(PKG_HEADERS)'; for i in $$list; do \
|
||||
echo "Installing $(srcdir)/$$i" ; \
|
||||
$(INSTALL_DATA) $(srcdir)/$$i $(DESTDIR)$(includedir) ; \
|
||||
done;
|
||||
|
||||
#========================================================================
|
||||
# Install documentation. Unix manpages should go in the $(mandir)
|
||||
# directory.
|
||||
#========================================================================
|
||||
|
||||
install-doc: doc
|
||||
@mkdir -p $(DESTDIR)$(mandir)/mann
|
||||
@echo "Installing documentation in $(DESTDIR)$(mandir)"
|
||||
@list='$(srcdir)/doc/*.n'; for i in $$list; do \
|
||||
echo "Installing $$i"; \
|
||||
rm -f $(DESTDIR)$(mandir)/mann/`basename $$i`; \
|
||||
$(INSTALL_DATA) $$i $(DESTDIR)$(mandir)/mann ; \
|
||||
done
|
||||
|
||||
test: binaries libraries
|
||||
@echo "SQLite TEA distribution does not include tests"
|
||||
|
||||
shell: binaries libraries
|
||||
@$(TCLSH) $(SCRIPT)
|
||||
|
||||
gdb:
|
||||
$(TCLSH_ENV) gdb $(TCLSH_PROG) $(SCRIPT)
|
||||
|
||||
depend:
|
||||
|
||||
#========================================================================
|
||||
# $(PKG_LIB_FILE) should be listed as part of the BINARIES variable
|
||||
# mentioned above. That will ensure that this target is built when you
|
||||
# run "make binaries".
|
||||
#
|
||||
# The $(PKG_OBJECTS) objects are created and linked into the final
|
||||
# library. In most cases these object files will correspond to the
|
||||
# source files above.
|
||||
#========================================================================
|
||||
|
||||
$(PKG_LIB_FILE): $(PKG_OBJECTS)
|
||||
-rm -f $(PKG_LIB_FILE)
|
||||
${MAKE_LIB}
|
||||
$(RANLIB) $(PKG_LIB_FILE)
|
||||
|
||||
$(PKG_STUB_LIB_FILE): $(PKG_STUB_OBJECTS)
|
||||
-rm -f $(PKG_STUB_LIB_FILE)
|
||||
${MAKE_STUB_LIB}
|
||||
$(RANLIB_STUB) $(PKG_STUB_LIB_FILE)
|
||||
|
||||
#========================================================================
|
||||
# We need to enumerate the list of .c to .o lines here.
|
||||
#
|
||||
# In the following lines, $(srcdir) refers to the toplevel directory
|
||||
# containing your extension. If your sources are in a subdirectory,
|
||||
# you will have to modify the paths to reflect this:
|
||||
#
|
||||
# sample.$(OBJEXT): $(srcdir)/generic/sample.c
|
||||
# $(COMPILE) -c `@CYGPATH@ $(srcdir)/generic/sample.c` -o $@
|
||||
#
|
||||
# Setting the VPATH variable to a list of paths will cause the makefile
|
||||
# to look into these paths when resolving .c to .obj dependencies.
|
||||
# As necessary, add $(srcdir):$(srcdir)/compat:....
|
||||
#========================================================================
|
||||
|
||||
VPATH = $(srcdir):$(srcdir)/generic:$(srcdir)/unix:$(srcdir)/win
|
||||
|
||||
.c.@OBJEXT@:
|
||||
$(COMPILE) -c `@CYGPATH@ $<` -o $@
|
||||
|
||||
#========================================================================
|
||||
# Distribution creation
|
||||
# You may need to tweak this target to make it work correctly.
|
||||
#========================================================================
|
||||
|
||||
#COMPRESS = tar cvf $(PKG_DIR).tar $(PKG_DIR); compress $(PKG_DIR).tar
|
||||
COMPRESS = gtar zcvf $(PKG_DIR).tar.gz $(PKG_DIR)
|
||||
DIST_ROOT = /tmp/dist
|
||||
DIST_DIR = $(DIST_ROOT)/$(PKG_DIR)
|
||||
|
||||
dist-clean:
|
||||
rm -rf $(DIST_DIR) $(DIST_ROOT)/$(PKG_DIR).tar.*
|
||||
|
||||
dist: dist-clean
|
||||
mkdir -p $(DIST_DIR)
|
||||
cp -p $(srcdir)/README* $(srcdir)/license* \
|
||||
$(srcdir)/aclocal.m4 $(srcdir)/configure $(srcdir)/*.in \
|
||||
$(DIST_DIR)/
|
||||
chmod 664 $(DIST_DIR)/Makefile.in $(DIST_DIR)/aclocal.m4
|
||||
chmod 775 $(DIST_DIR)/configure $(DIST_DIR)/configure.in
|
||||
|
||||
for i in $(srcdir)/*.[ch]; do \
|
||||
if [ -f $$i ]; then \
|
||||
cp -p $$i $(DIST_DIR)/ ; \
|
||||
fi; \
|
||||
done;
|
||||
|
||||
mkdir $(DIST_DIR)/tclconfig
|
||||
cp $(srcdir)/tclconfig/install-sh $(srcdir)/tclconfig/tcl.m4 \
|
||||
$(DIST_DIR)/tclconfig/
|
||||
chmod 664 $(DIST_DIR)/tclconfig/tcl.m4
|
||||
chmod +x $(DIST_DIR)/tclconfig/install-sh
|
||||
|
||||
list='demos doc generic library mac tests unix win'; \
|
||||
for p in $$list; do \
|
||||
if test -d $(srcdir)/$$p ; then \
|
||||
mkdir $(DIST_DIR)/$$p; \
|
||||
cp -p $(srcdir)/$$p/*.* $(DIST_DIR)/$$p/; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
(cd $(DIST_ROOT); $(COMPRESS);)
|
||||
|
||||
#========================================================================
|
||||
# End of user-definable section
|
||||
#========================================================================
|
||||
|
||||
#========================================================================
|
||||
# Don't modify the file to clean here. Instead, set the "CLEANFILES"
|
||||
# variable in configure.in
|
||||
#========================================================================
|
||||
|
||||
clean:
|
||||
-test -z "$(BINARIES)" || rm -f $(BINARIES)
|
||||
-rm -f *.$(OBJEXT) core *.core
|
||||
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
|
||||
|
||||
distclean: clean
|
||||
-rm -f *.tab.c
|
||||
-rm -f $(CONFIG_CLEAN_FILES)
|
||||
-rm -f config.h config.cache config.log config.status
|
||||
|
||||
#========================================================================
|
||||
# Install binary object libraries. On Windows this includes both .dll and
|
||||
# .lib files. Because the .lib files are not explicitly listed anywhere,
|
||||
# we need to deduce their existence from the .dll file of the same name.
|
||||
# Library files go into the lib directory.
|
||||
# In addition, this will generate the pkgIndex.tcl
|
||||
# file in the install location (assuming it can find a usable tclsh shell)
|
||||
#
|
||||
# You should not have to modify this target.
|
||||
#========================================================================
|
||||
|
||||
install-lib-binaries: binaries
|
||||
@mkdir -p $(DESTDIR)$(pkglibdir)
|
||||
@list='$(lib_BINARIES)'; for p in $$list; do \
|
||||
if test -f $$p; then \
|
||||
echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(pkglibdir)/$$p"; \
|
||||
$(INSTALL_PROGRAM) $$p $(DESTDIR)$(pkglibdir)/$$p; \
|
||||
stub=`echo $$p|sed -e "s/.*\(stub\).*/\1/"`; \
|
||||
if test "x$$stub" = "xstub"; then \
|
||||
echo " $(RANLIB_STUB) $(DESTDIR)$(pkglibdir)/$$p"; \
|
||||
$(RANLIB_STUB) $(DESTDIR)$(pkglibdir)/$$p; \
|
||||
else \
|
||||
echo " $(RANLIB) $(DESTDIR)$(pkglibdir)/$$p"; \
|
||||
$(RANLIB) $(DESTDIR)$(pkglibdir)/$$p; \
|
||||
fi; \
|
||||
ext=`echo $$p|sed -e "s/.*\.//"`; \
|
||||
if test "x$$ext" = "xdll"; then \
|
||||
lib=`basename $$p|sed -e 's/.[^.]*$$//'`.lib; \
|
||||
if test -f $$lib; then \
|
||||
echo " $(INSTALL_DATA) $$lib $(DESTDIR)$(pkglibdir)/$$lib"; \
|
||||
$(INSTALL_DATA) $$lib $(DESTDIR)$(pkglibdir)/$$lib; \
|
||||
fi; \
|
||||
fi; \
|
||||
fi; \
|
||||
done
|
||||
@list='$(PKG_TCL_SOURCES)'; for p in $$list; do \
|
||||
if test -f $(srcdir)/$$p; then \
|
||||
destp=`basename $$p`; \
|
||||
echo " Install $$destp $(DESTDIR)$(pkglibdir)/$$destp"; \
|
||||
$(INSTALL_DATA) $(srcdir)/$$p $(DESTDIR)$(pkglibdir)/$$destp; \
|
||||
fi; \
|
||||
done
|
||||
@if test "x$(SHARED_BUILD)" = "x1"; then \
|
||||
echo " Install pkgIndex.tcl $(DESTDIR)$(pkglibdir)"; \
|
||||
$(INSTALL_DATA) pkgIndex.tcl $(DESTDIR)$(pkglibdir); \
|
||||
fi
|
||||
|
||||
#========================================================================
|
||||
# Install binary executables (e.g. .exe files and dependent .dll files)
|
||||
# This is for files that must go in the bin directory (located next to
|
||||
# wish and tclsh), like dependent .dll files on Windows.
|
||||
#
|
||||
# You should not have to modify this target, except to define bin_BINARIES
|
||||
# above if necessary.
|
||||
#========================================================================
|
||||
|
||||
install-bin-binaries: binaries
|
||||
@mkdir -p $(DESTDIR)$(bindir)
|
||||
@list='$(bin_BINARIES)'; for p in $$list; do \
|
||||
if test -f $$p; then \
|
||||
echo " $(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/$$p"; \
|
||||
$(INSTALL_PROGRAM) $$p $(DESTDIR)$(bindir)/$$p; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
.SUFFIXES: .c .$(OBJEXT)
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
uninstall-binaries:
|
||||
list='$(lib_BINARIES)'; for p in $$list; do \
|
||||
rm -f $(DESTDIR)$(pkglibdir)/$$p; \
|
||||
done
|
||||
list='$(PKG_TCL_SOURCES)'; for p in $$list; do \
|
||||
p=`basename $$p`; \
|
||||
rm -f $(DESTDIR)$(pkglibdir)/$$p; \
|
||||
done
|
||||
list='$(bin_BINARIES)'; for p in $$list; do \
|
||||
rm -f $(DESTDIR)$(bindir)/$$p; \
|
||||
done
|
||||
|
||||
.PHONY: all binaries clean depend distclean doc install libraries test
|
||||
|
||||
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||
.NOEXPORT:
|
||||
@ -1,36 +0,0 @@
|
||||
This is the SQLite extension for Tcl using the Tcl Extension
|
||||
Architecture (TEA). For additional information on SQLite see
|
||||
|
||||
http://www.sqlite.org/
|
||||
|
||||
|
||||
UNIX BUILD
|
||||
==========
|
||||
|
||||
Building under most UNIX systems is easy, just run the configure script
|
||||
and then run make. For more information about the build process, see
|
||||
the tcl/unix/README file in the Tcl src dist. The following minimal
|
||||
example will install the extension in the /opt/tcl directory.
|
||||
|
||||
$ cd sqlite-*-tea
|
||||
$ ./configure --prefix=/opt/tcl
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
WINDOWS BUILD
|
||||
=============
|
||||
|
||||
The recommended method to build extensions under windows is to use the
|
||||
Msys + Mingw build process. This provides a Unix-style build while
|
||||
generating native Windows binaries. Using the Msys + Mingw build tools
|
||||
means that you can use the same configure script as per the Unix build
|
||||
to create a Makefile. See the tcl/win/README file for the URL of
|
||||
the Msys + Mingw download.
|
||||
|
||||
If you have VC++ then you may wish to use the files in the win
|
||||
subdirectory and build the extension using just VC++. These files have
|
||||
been designed to be as generic as possible but will require some
|
||||
additional maintenance by the project developer to synchronise with
|
||||
the TEA configure.in and Makefile.in files. Instructions for using the
|
||||
VC++ makefile are written in the first part of the Makefile.vc
|
||||
file.
|
||||
@ -1,9 +0,0 @@
|
||||
#
|
||||
# Include the TEA standard macro set
|
||||
#
|
||||
|
||||
builtin(include,tclconfig/tcl.m4)
|
||||
|
||||
#
|
||||
# Add here whatever m4 macros you want to define for your package
|
||||
#
|
||||
@ -1,202 +0,0 @@
|
||||
#!/bin/bash -norc
|
||||
dnl This file is an input file used by the GNU "autoconf" program to
|
||||
dnl generate the file "configure", which is run during Tcl installation
|
||||
dnl to configure the system for the local environment.
|
||||
#
|
||||
# RCS: @(#) $Id: configure.in,v 1.43 2005/07/26 19:17:05 mdejong Exp $
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Sample configure.in for Tcl Extensions. The only places you should
|
||||
# need to modify this file are marked by the string __CHANGE__
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# __CHANGE__
|
||||
# Set your package name and version numbers here.
|
||||
#
|
||||
# This initializes the environment with PACKAGE_NAME and PACKAGE_VERSION
|
||||
# set as provided. These will also be added as -D defs in your Makefile
|
||||
# so you can encode the package version directly into the source files.
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
AC_INIT([sqlite], [3.7.4])
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Call TEA_INIT as the first TEA_ macro to set up initial vars.
|
||||
# This will define a ${TEA_PLATFORM} variable == "unix" or "windows"
|
||||
# as well as PKG_LIB_FILE and PKG_STUB_LIB_FILE.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_INIT([3.9])
|
||||
|
||||
AC_CONFIG_AUX_DIR(tclconfig)
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Load the tclConfig.sh file
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_PATH_TCLCONFIG
|
||||
TEA_LOAD_TCLCONFIG
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Load the tkConfig.sh file if necessary (Tk extension)
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
#TEA_PATH_TKCONFIG
|
||||
#TEA_LOAD_TKCONFIG
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Handle the --prefix=... option by defaulting to what Tcl gave.
|
||||
# Must be called after TEA_LOAD_TCLCONFIG and before TEA_SETUP_COMPILER.
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
TEA_PREFIX
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# Standard compiler checks.
|
||||
# This sets up CC by using the CC env var, or looks for gcc otherwise.
|
||||
# This also calls AC_PROG_CC, AC_PROG_INSTALL and a few others to create
|
||||
# the basic setup necessary to compile executables.
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
TEA_SETUP_COMPILER
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# __CHANGE__
|
||||
# Specify the C source files to compile in TEA_ADD_SOURCES,
|
||||
# public headers that need to be installed in TEA_ADD_HEADERS,
|
||||
# stub library C source files to compile in TEA_ADD_STUB_SOURCES,
|
||||
# and runtime Tcl library files in TEA_ADD_TCL_SOURCES.
|
||||
# This defines PKG(_STUB)_SOURCES, PKG(_STUB)_OBJECTS, PKG_HEADERS
|
||||
# and PKG_TCL_SOURCES.
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
TEA_ADD_SOURCES([tclsqlite3.c])
|
||||
TEA_ADD_HEADERS([])
|
||||
TEA_ADD_INCLUDES([-I\"`\${CYGPATH} \${srcdir}/generic`\"])
|
||||
TEA_ADD_LIBS([])
|
||||
TEA_ADD_CFLAGS([-DSQLITE_ENABLE_FTS3=1])
|
||||
TEA_ADD_CFLAGS([-DSQLITE_3_SUFFIX_ONLY=1])
|
||||
TEA_ADD_CFLAGS([-DSQLITE_ENABLE_RTREE=1])
|
||||
TEA_ADD_CFLAGS([-DSQLITE_OMIT_DEPRECATED=1])
|
||||
TEA_ADD_STUB_SOURCES([])
|
||||
TEA_ADD_TCL_SOURCES([])
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# The --with-system-sqlite causes the TCL bindings to SQLite to use
|
||||
# the system shared library for SQLite rather than statically linking
|
||||
# against its own private copy. This is dangerous and leads to
|
||||
# undersirable dependences and is not recommended.
|
||||
# Patchs from rmax.
|
||||
#--------------------------------------------------------------------
|
||||
AC_ARG_WITH([system-sqlite],
|
||||
[AC_HELP_STRING([--with-system-sqlite],
|
||||
[use a system-supplied libsqlite3 instead of the bundled one])],
|
||||
[], [with_system_sqlite=no])
|
||||
if test x$with_system_sqlite != xno; then
|
||||
AC_CHECK_HEADER([sqlite3.h],
|
||||
[AC_CHECK_LIB([sqlite3],[sqlite3_initialize],
|
||||
[AC_DEFINE(USE_SYSTEM_SQLITE)
|
||||
LIBS="$LIBS -lsqlite3"])])
|
||||
fi
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# __CHANGE__
|
||||
# Choose which headers you need. Extension authors should try very
|
||||
# hard to only rely on the Tcl public header files. Internal headers
|
||||
# contain private data structures and are subject to change without
|
||||
# notice.
|
||||
# This MUST be called after TEA_LOAD_TCLCONFIG / TEA_LOAD_TKCONFIG
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_PUBLIC_TCL_HEADERS
|
||||
#TEA_PRIVATE_TCL_HEADERS
|
||||
|
||||
#TEA_PUBLIC_TK_HEADERS
|
||||
#TEA_PRIVATE_TK_HEADERS
|
||||
#TEA_PATH_X
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Check whether --enable-threads or --disable-threads was given.
|
||||
# This auto-enables if Tcl was compiled threaded.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_ENABLE_THREADS
|
||||
if test "${TCL_THREADS}" = "1" ; then
|
||||
AC_DEFINE(SQLITE_THREADSAFE, 1, [Trigger sqlite threadsafe build])
|
||||
# Not automatically added by Tcl because its assumed Tcl links to them,
|
||||
# but it may not if it isn't really a threaded build.
|
||||
TEA_ADD_LIBS([$THREADS_LIBS])
|
||||
else
|
||||
AC_DEFINE(SQLITE_THREADSAFE, 0, [Trigger sqlite non-threadsafe build])
|
||||
fi
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# The statement below defines a collection of symbols related to
|
||||
# building as a shared library instead of a static library.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_ENABLE_SHARED
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# This macro figures out what flags to use with the compiler/linker
|
||||
# when building shared/static debug/optimized objects. This information
|
||||
# can be taken from the tclConfig.sh file, but this figures it all out.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_CONFIG_CFLAGS
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Set the default compiler switches based on the --enable-symbols option.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_ENABLE_SYMBOLS
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Everyone should be linking against the Tcl stub library. If you
|
||||
# can't for some reason, remove this definition. If you aren't using
|
||||
# stubs, you also need to modify the SHLIB_LD_LIBS setting below to
|
||||
# link against the non-stubbed Tcl library. Add Tk too if necessary.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
AC_DEFINE(USE_TCL_STUBS, 1, [Use Tcl stubs])
|
||||
#AC_DEFINE(USE_TK_STUBS, 1, [Use Tk stubs])
|
||||
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Redefine fdatasync as fsync on systems that lack fdatasync
|
||||
#--------------------------------------------------------------------
|
||||
#
|
||||
#AC_CHECK_FUNC(fdatasync, , AC_DEFINE(fdatasync, fsync))
|
||||
# Check for library functions that SQLite can optionally use.
|
||||
AC_CHECK_FUNCS([fdatasync usleep fullfsync localtime_r gmtime_r])
|
||||
|
||||
AC_FUNC_STRERROR_R
|
||||
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# This macro generates a line to use when building a library. It
|
||||
# depends on values set by the TEA_ENABLE_SHARED, TEA_ENABLE_SYMBOLS,
|
||||
# and TEA_LOAD_TCLCONFIG macros above.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_MAKE_LIB
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Determine the name of the tclsh and/or wish executables in the
|
||||
# Tcl and Tk build directories or the location they were installed
|
||||
# into. These paths are used to support running test cases only,
|
||||
# the Makefile should not be making use of these paths to generate
|
||||
# a pkgIndex.tcl file or anything else at extension build time.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
TEA_PROG_TCLSH
|
||||
#TEA_PROG_WISH
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Finally, substitute all of the various values into the Makefile.
|
||||
# You may alternatively have a special pkgIndex.tcl.in or other files
|
||||
# which require substituting th AC variables in. Include these here.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
AC_OUTPUT([Makefile pkgIndex.tcl])
|
||||
@ -1,15 +0,0 @@
|
||||
.TH sqlite3 n 4.1 "Tcl-Extensions"
|
||||
.HS sqlite3 tcl
|
||||
.BS
|
||||
.SH NAME
|
||||
sqlite3 \- an interface to the SQLite3 database engine
|
||||
.SH SYNOPSIS
|
||||
\fBsqlite3\fI command_name ?filename?\fR
|
||||
.br
|
||||
.SH DESCRIPTION
|
||||
SQLite3 is a self-contains, zero-configuration, transactional SQL database
|
||||
engine. This extension provides an easy to use interface for accessing
|
||||
SQLite database files from Tcl.
|
||||
.PP
|
||||
For full documentation see \fIhttp://www.sqlite.org/\fR and
|
||||
in particular \fIhttp://www.sqlite.org/tclsqlite.html\fR.
|
||||
@ -1,6 +0,0 @@
|
||||
The author disclaims copyright to this source code. In place of
|
||||
a legal notice, here is a blessing:
|
||||
|
||||
May you do good and not evil.
|
||||
May you find forgiveness for yourself and forgive others.
|
||||
May you share freely, never taking more than you give.
|
||||
@ -1,7 +0,0 @@
|
||||
#
|
||||
# Tcl package index file
|
||||
#
|
||||
# Note sqlite*3* init specifically
|
||||
#
|
||||
package ifneeded sqlite3 @PACKAGE_VERSION@ \
|
||||
[list load [file join $dir @PKG_LIB_FILE@] Sqlite3]
|
||||
@ -1,528 +0,0 @@
|
||||
#!/bin/sh
|
||||
# install - install a program, script, or datafile
|
||||
|
||||
scriptversion=2011-04-20.01; # UTC
|
||||
|
||||
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||
# following copyright and license.
|
||||
#
|
||||
# Copyright (C) 1994 X Consortium
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
# Except as contained in this notice, the name of the X Consortium shall not
|
||||
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||
# ings in this Software without prior written authorization from the X Consor-
|
||||
# tium.
|
||||
#
|
||||
#
|
||||
# FSF changes to this file are in the public domain.
|
||||
#
|
||||
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||
# `make' implicit rules from creating a file called install from it
|
||||
# when there is no Makefile.
|
||||
#
|
||||
# This script is compatible with the BSD install script, but was written
|
||||
# from scratch.
|
||||
|
||||
nl='
|
||||
'
|
||||
IFS=" "" $nl"
|
||||
|
||||
# set DOITPROG to echo to test this script
|
||||
|
||||
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||
doit=${DOITPROG-}
|
||||
if test -z "$doit"; then
|
||||
doit_exec=exec
|
||||
else
|
||||
doit_exec=$doit
|
||||
fi
|
||||
|
||||
# Put in absolute file names if you don't have them in your path;
|
||||
# or use environment vars.
|
||||
|
||||
chgrpprog=${CHGRPPROG-chgrp}
|
||||
chmodprog=${CHMODPROG-chmod}
|
||||
chownprog=${CHOWNPROG-chown}
|
||||
cmpprog=${CMPPROG-cmp}
|
||||
cpprog=${CPPROG-cp}
|
||||
mkdirprog=${MKDIRPROG-mkdir}
|
||||
mvprog=${MVPROG-mv}
|
||||
rmprog=${RMPROG-rm}
|
||||
stripprog=${STRIPPROG-strip}
|
||||
|
||||
posix_glob='?'
|
||||
initialize_posix_glob='
|
||||
test "$posix_glob" != "?" || {
|
||||
if (set -f) 2>/dev/null; then
|
||||
posix_glob=
|
||||
else
|
||||
posix_glob=:
|
||||
fi
|
||||
}
|
||||
'
|
||||
|
||||
posix_mkdir=
|
||||
|
||||
# Desired mode of installed file.
|
||||
mode=0755
|
||||
|
||||
chgrpcmd=
|
||||
chmodcmd=$chmodprog
|
||||
chowncmd=
|
||||
mvcmd=$mvprog
|
||||
rmcmd="$rmprog -f"
|
||||
stripcmd=
|
||||
|
||||
src=
|
||||
dst=
|
||||
dir_arg=
|
||||
dst_arg=
|
||||
|
||||
copy_on_change=false
|
||||
no_target_directory=
|
||||
|
||||
usage="\
|
||||
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||
or: $0 [OPTION]... -d DIRECTORIES...
|
||||
|
||||
In the 1st form, copy SRCFILE to DSTFILE.
|
||||
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||
In the 4th, create DIRECTORIES.
|
||||
|
||||
Options:
|
||||
--help display this help and exit.
|
||||
--version display version info and exit.
|
||||
|
||||
-c (ignored)
|
||||
-C install only if different (preserve the last data modification time)
|
||||
-d create directories instead of installing files.
|
||||
-g GROUP $chgrpprog installed files to GROUP.
|
||||
-m MODE $chmodprog installed files to MODE.
|
||||
-o USER $chownprog installed files to USER.
|
||||
-s $stripprog installed files.
|
||||
-S $stripprog installed files.
|
||||
-t DIRECTORY install into DIRECTORY.
|
||||
-T report an error if DSTFILE is a directory.
|
||||
|
||||
Environment variables override the default commands:
|
||||
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||
RMPROG STRIPPROG
|
||||
"
|
||||
|
||||
while test $# -ne 0; do
|
||||
case $1 in
|
||||
-c) ;;
|
||||
|
||||
-C) copy_on_change=true;;
|
||||
|
||||
-d) dir_arg=true;;
|
||||
|
||||
-g) chgrpcmd="$chgrpprog $2"
|
||||
shift;;
|
||||
|
||||
--help) echo "$usage"; exit $?;;
|
||||
|
||||
-m) mode=$2
|
||||
case $mode in
|
||||
*' '* | *' '* | *'
|
||||
'* | *'*'* | *'?'* | *'['*)
|
||||
echo "$0: invalid mode: $mode" >&2
|
||||
exit 1;;
|
||||
esac
|
||||
shift;;
|
||||
|
||||
-o) chowncmd="$chownprog $2"
|
||||
shift;;
|
||||
|
||||
-s) stripcmd=$stripprog;;
|
||||
|
||||
-S) stripcmd="$stripprog $2"
|
||||
shift;;
|
||||
|
||||
-t) dst_arg=$2
|
||||
shift;;
|
||||
|
||||
-T) no_target_directory=true;;
|
||||
|
||||
--version) echo "$0 $scriptversion"; exit $?;;
|
||||
|
||||
--) shift
|
||||
break;;
|
||||
|
||||
-*) echo "$0: invalid option: $1" >&2
|
||||
exit 1;;
|
||||
|
||||
*) break;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||
# When -d is used, all remaining arguments are directories to create.
|
||||
# When -t is used, the destination is already specified.
|
||||
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||
for arg
|
||||
do
|
||||
if test -n "$dst_arg"; then
|
||||
# $@ is not empty: it contains at least $arg.
|
||||
set fnord "$@" "$dst_arg"
|
||||
shift # fnord
|
||||
fi
|
||||
shift # arg
|
||||
dst_arg=$arg
|
||||
done
|
||||
fi
|
||||
|
||||
if test $# -eq 0; then
|
||||
if test -z "$dir_arg"; then
|
||||
echo "$0: no input file specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
# It's OK to call `install-sh -d' without argument.
|
||||
# This can happen when creating conditional directories.
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if test -z "$dir_arg"; then
|
||||
do_exit='(exit $ret); exit $ret'
|
||||
trap "ret=129; $do_exit" 1
|
||||
trap "ret=130; $do_exit" 2
|
||||
trap "ret=141; $do_exit" 13
|
||||
trap "ret=143; $do_exit" 15
|
||||
|
||||
# Set umask so as not to create temps with too-generous modes.
|
||||
# However, 'strip' requires both read and write access to temps.
|
||||
case $mode in
|
||||
# Optimize common cases.
|
||||
*644) cp_umask=133;;
|
||||
*755) cp_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw='% 200'
|
||||
fi
|
||||
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||
*)
|
||||
if test -z "$stripcmd"; then
|
||||
u_plus_rw=
|
||||
else
|
||||
u_plus_rw=,u+rw
|
||||
fi
|
||||
cp_umask=$mode$u_plus_rw;;
|
||||
esac
|
||||
fi
|
||||
|
||||
for src
|
||||
do
|
||||
# Protect names starting with `-'.
|
||||
case $src in
|
||||
-*) src=./$src;;
|
||||
esac
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
dst=$src
|
||||
dstdir=$dst
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
else
|
||||
|
||||
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||
# might cause directories to be created, which would be especially bad
|
||||
# if $src (and thus $dsttmp) contains '*'.
|
||||
if test ! -f "$src" && test ! -d "$src"; then
|
||||
echo "$0: $src does not exist." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test -z "$dst_arg"; then
|
||||
echo "$0: no destination specified." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dst=$dst_arg
|
||||
# Protect names starting with `-'.
|
||||
case $dst in
|
||||
-*) dst=./$dst;;
|
||||
esac
|
||||
|
||||
# If destination is a directory, append the input filename; won't work
|
||||
# if double slashes aren't ignored.
|
||||
if test -d "$dst"; then
|
||||
if test -n "$no_target_directory"; then
|
||||
echo "$0: $dst_arg: Is a directory" >&2
|
||||
exit 1
|
||||
fi
|
||||
dstdir=$dst
|
||||
dst=$dstdir/`basename "$src"`
|
||||
dstdir_status=0
|
||||
else
|
||||
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||
dstdir=`
|
||||
(dirname "$dst") 2>/dev/null ||
|
||||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||
X"$dst" : 'X\(//\)[^/]' \| \
|
||||
X"$dst" : 'X\(//\)$' \| \
|
||||
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||
echo X"$dst" |
|
||||
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)[^/].*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\/\)$/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
/^X\(\/\).*/{
|
||||
s//\1/
|
||||
q
|
||||
}
|
||||
s/.*/./; q'
|
||||
`
|
||||
|
||||
test -d "$dstdir"
|
||||
dstdir_status=$?
|
||||
fi
|
||||
fi
|
||||
|
||||
obsolete_mkdir_used=false
|
||||
|
||||
if test $dstdir_status != 0; then
|
||||
case $posix_mkdir in
|
||||
'')
|
||||
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||
umask=`umask`
|
||||
case $stripcmd.$umask in
|
||||
# Optimize common cases.
|
||||
*[2367][2367]) mkdir_umask=$umask;;
|
||||
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||
|
||||
*[0-7])
|
||||
mkdir_umask=`expr $umask + 22 \
|
||||
- $umask % 100 % 40 + $umask % 20 \
|
||||
- $umask % 10 % 4 + $umask % 2
|
||||
`;;
|
||||
*) mkdir_umask=$umask,go-w;;
|
||||
esac
|
||||
|
||||
# With -d, create the new directory with the user-specified mode.
|
||||
# Otherwise, rely on $mkdir_umask.
|
||||
if test -n "$dir_arg"; then
|
||||
mkdir_mode=-m$mode
|
||||
else
|
||||
mkdir_mode=
|
||||
fi
|
||||
|
||||
posix_mkdir=false
|
||||
case $umask in
|
||||
*[123567][0-7][0-7])
|
||||
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||
;;
|
||||
*)
|
||||
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||
|
||||
if (umask $mkdir_umask &&
|
||||
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||
then
|
||||
if test -z "$dir_arg" || {
|
||||
# Check for POSIX incompatibilities with -m.
|
||||
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||
# other-writeable bit of parent directory when it shouldn't.
|
||||
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||
case $ls_ld_tmpdir in
|
||||
d????-?r-*) different_mode=700;;
|
||||
d????-?--*) different_mode=755;;
|
||||
*) false;;
|
||||
esac &&
|
||||
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||
}
|
||||
}
|
||||
then posix_mkdir=:
|
||||
fi
|
||||
rmdir "$tmpdir/d" "$tmpdir"
|
||||
else
|
||||
# Remove any dirs left behind by ancient mkdir implementations.
|
||||
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||
fi
|
||||
trap '' 0;;
|
||||
esac;;
|
||||
esac
|
||||
|
||||
if
|
||||
$posix_mkdir && (
|
||||
umask $mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||
)
|
||||
then :
|
||||
else
|
||||
|
||||
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||
# or it failed possibly due to a race condition. Create the
|
||||
# directory the slow way, step by step, checking for races as we go.
|
||||
|
||||
case $dstdir in
|
||||
/*) prefix='/';;
|
||||
-*) prefix='./';;
|
||||
*) prefix='';;
|
||||
esac
|
||||
|
||||
eval "$initialize_posix_glob"
|
||||
|
||||
oIFS=$IFS
|
||||
IFS=/
|
||||
$posix_glob set -f
|
||||
set fnord $dstdir
|
||||
shift
|
||||
$posix_glob set +f
|
||||
IFS=$oIFS
|
||||
|
||||
prefixes=
|
||||
|
||||
for d
|
||||
do
|
||||
test -z "$d" && continue
|
||||
|
||||
prefix=$prefix$d
|
||||
if test -d "$prefix"; then
|
||||
prefixes=
|
||||
else
|
||||
if $posix_mkdir; then
|
||||
(umask=$mkdir_umask &&
|
||||
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||
# Don't fail if two instances are running concurrently.
|
||||
test -d "$prefix" || exit 1
|
||||
else
|
||||
case $prefix in
|
||||
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||
*) qprefix=$prefix;;
|
||||
esac
|
||||
prefixes="$prefixes '$qprefix'"
|
||||
fi
|
||||
fi
|
||||
prefix=$prefix/
|
||||
done
|
||||
|
||||
if test -n "$prefixes"; then
|
||||
# Don't fail if two instances are running concurrently.
|
||||
(umask $mkdir_umask &&
|
||||
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||
test -d "$dstdir" || exit 1
|
||||
obsolete_mkdir_used=true
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -n "$dir_arg"; then
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||
else
|
||||
|
||||
# Make a couple of temp file names in the proper directory.
|
||||
dsttmp=$dstdir/_inst.$$_
|
||||
rmtmp=$dstdir/_rm.$$_
|
||||
|
||||
# Trap to clean up those temp files at exit.
|
||||
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||
|
||||
# Copy the file name to the temp name.
|
||||
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||
|
||||
# and set any options; do chmod last to preserve setuid bits.
|
||||
#
|
||||
# If any of these fail, we abort the whole thing. If we want to
|
||||
# ignore errors from any of these, just make sure not to ignore
|
||||
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||
#
|
||||
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||
|
||||
# If -C, don't bother to copy if it wouldn't change the file.
|
||||
if $copy_on_change &&
|
||||
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||
|
||||
eval "$initialize_posix_glob" &&
|
||||
$posix_glob set -f &&
|
||||
set X $old && old=:$2:$4:$5:$6 &&
|
||||
set X $new && new=:$2:$4:$5:$6 &&
|
||||
$posix_glob set +f &&
|
||||
|
||||
test "$old" = "$new" &&
|
||||
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||
then
|
||||
rm -f "$dsttmp"
|
||||
else
|
||||
# Rename the file to the real destination.
|
||||
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||
|
||||
# The rename failed, perhaps because mv can't rename something else
|
||||
# to itself, or perhaps because mv is so ancient that it does not
|
||||
# support -f.
|
||||
{
|
||||
# Now remove or move aside any old file at destination location.
|
||||
# We try this two ways since rm can't unlink itself on some
|
||||
# systems and the destination file might be busy for other
|
||||
# reasons. In this case, the final cleanup might fail but the new
|
||||
# file should still install successfully.
|
||||
{
|
||||
test ! -f "$dst" ||
|
||||
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||
} ||
|
||||
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||
(exit 1); exit 1
|
||||
}
|
||||
} &&
|
||||
|
||||
# Now rename the file to the real destination.
|
||||
$doit $mvcmd "$dsttmp" "$dst"
|
||||
}
|
||||
fi || exit 1
|
||||
|
||||
trap '' 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Local variables:
|
||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||
# time-stamp-start: "scriptversion="
|
||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||
# time-stamp-time-zone: "UTC"
|
||||
# time-stamp-end: "; # UTC"
|
||||
# End:
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,414 +0,0 @@
|
||||
# makefile.vc -- -*- Makefile -*-
|
||||
#
|
||||
# Microsoft Visual C++ makefile for use with nmake.exe v1.62+ (VC++ 5.0+)
|
||||
#
|
||||
# This makefile is based upon the Tcl 8.4 Makefile.vc and modified to
|
||||
# make it suitable as a general package makefile. Look for the word EDIT
|
||||
# which marks sections that may need modification. As a minumum you will
|
||||
# need to change the PROJECT, DOTVERSION and DLLOBJS variables to values
|
||||
# relevant to your package.
|
||||
#
|
||||
# See the file "license.terms" for information on usage and redistribution
|
||||
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||||
#
|
||||
# Copyright (c) 1995-1996 Sun Microsystems, Inc.
|
||||
# Copyright (c) 1998-2000 Ajuba Solutions.
|
||||
# Copyright (c) 2001 ActiveState Corporation.
|
||||
# Copyright (c) 2001-2002 David Gravereaux.
|
||||
# Copyright (c) 2003 Pat Thoyts
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
# RCS: @(#)$Id: makefile.vc,v 1.4 2004/07/26 08:22:05 patthoyts Exp $
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
!if !defined(MSDEVDIR) && !defined(MSVCDIR) && !defined(VCINSTALLDIR) && !defined(MSSDK) && !defined(WINDOWSSDKDIR)
|
||||
MSG = ^
|
||||
You will need to run vcvars32.bat from Developer Studio, first, to setup^
|
||||
the environment. Jump to this line to read the new instructions.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# HOW TO USE this makefile:
|
||||
#
|
||||
# 1) It is now necessary to have %MSVCDir% set in the environment. This is
|
||||
# used as a check to see if vcvars32.bat had been run prior to running
|
||||
# nmake or during the installation of Microsoft Visual C++, MSVCDir had
|
||||
# been set globally and the PATH adjusted. Either way is valid.
|
||||
#
|
||||
# You'll need to run vcvars32.bat contained in the MsDev's vc(98)/bin
|
||||
# directory to setup the proper environment, if needed, for your current
|
||||
# setup. This is a needed bootstrap requirement and allows the swapping of
|
||||
# different environments to be easier.
|
||||
#
|
||||
# 2) To use the Platform SDK (not expressly needed), run setenv.bat after
|
||||
# vcvars32.bat according to the instructions for it. This can also turn on
|
||||
# the 64-bit compiler, if your SDK has it.
|
||||
#
|
||||
# 3) Targets are:
|
||||
# all -- Builds everything.
|
||||
# <project> -- Builds the project (eg: nmake sample)
|
||||
# test -- Builds and runs the test suite.
|
||||
# install -- Installs the built binaries and libraries to $(INSTALLDIR)
|
||||
# in an appropriate subdirectory.
|
||||
# clean/realclean/distclean -- varying levels of cleaning.
|
||||
#
|
||||
# 4) Macros usable on the commandline:
|
||||
# INSTALLDIR=<path>
|
||||
# Sets where to install Tcl from the built binaries.
|
||||
# C:\Progra~1\Tcl is assumed when not specified.
|
||||
#
|
||||
# OPTS=static,msvcrt,staticpkg,threads,symbols,profile,loimpact,none
|
||||
# Sets special options for the core. The default is for none.
|
||||
# Any combination of the above may be used (comma separated).
|
||||
# 'none' will over-ride everything to nothing.
|
||||
#
|
||||
# static = Builds a static library of the core instead of a
|
||||
# dll. The shell will be static (and large), as well.
|
||||
# msvcrt = Effects the static option only to switch it from
|
||||
# using libcmt(d) as the C runtime [by default] to
|
||||
# msvcrt(d). This is useful for static embedding
|
||||
# support.
|
||||
# staticpkg = Effects the static option only to switch
|
||||
# tclshXX.exe to have the dde and reg extension linked
|
||||
# inside it.
|
||||
# threads = Turns on full multithreading support.
|
||||
# thrdalloc = Use the thread allocator (shared global free pool).
|
||||
# symbols = Adds symbols for step debugging.
|
||||
# profile = Adds profiling hooks. Map file is assumed.
|
||||
# loimpact = Adds a flag for how NT treats the heap to keep memory
|
||||
# in use, low. This is said to impact alloc performance.
|
||||
#
|
||||
# STATS=memdbg,compdbg,none
|
||||
# Sets optional memory and bytecode compiler debugging code added
|
||||
# to the core. The default is for none. Any combination of the
|
||||
# above may be used (comma separated). 'none' will over-ride
|
||||
# everything to nothing.
|
||||
#
|
||||
# memdbg = Enables the debugging memory allocator.
|
||||
# compdbg = Enables byte compilation logging.
|
||||
#
|
||||
# MACHINE=(IX86|IA64|ALPHA)
|
||||
# Set the machine type used for the compiler, linker, and
|
||||
# resource compiler. This hook is needed to tell the tools
|
||||
# when alternate platforms are requested. IX86 is the default
|
||||
# when not specified.
|
||||
#
|
||||
# TMP_DIR=<path>
|
||||
# OUT_DIR=<path>
|
||||
# Hooks to allow the intermediate and output directories to be
|
||||
# changed. $(OUT_DIR) is assumed to be
|
||||
# $(BINROOT)\(Release|Debug) based on if symbols are requested.
|
||||
# $(TMP_DIR) will de $(OUT_DIR)\<buildtype> by default.
|
||||
#
|
||||
# TESTPAT=<file>
|
||||
# Reads the tests requested to be run from this file.
|
||||
#
|
||||
# CFG_ENCODING=encoding
|
||||
# name of encoding for configuration information. Defaults
|
||||
# to cp1252
|
||||
#
|
||||
# 5) Examples:
|
||||
#
|
||||
# Basic syntax of calling nmake looks like this:
|
||||
# nmake [-nologo] -f makefile.vc [target|macrodef [target|macrodef] [...]]
|
||||
#
|
||||
# Standard (no frills)
|
||||
# c:\tcl_src\win\>c:\progra~1\micros~1\vc98\bin\vcvars32.bat
|
||||
# Setting environment for using Microsoft Visual C++ tools.
|
||||
# c:\tcl_src\win\>nmake -f makefile.vc all
|
||||
# c:\tcl_src\win\>nmake -f makefile.vc install INSTALLDIR=c:\progra~1\tcl
|
||||
#
|
||||
# Building for Win64
|
||||
# c:\tcl_src\win\>c:\progra~1\micros~1\vc98\bin\vcvars32.bat
|
||||
# Setting environment for using Microsoft Visual C++ tools.
|
||||
# c:\tcl_src\win\>c:\progra~1\platfo~1\setenv.bat /pre64 /RETAIL
|
||||
# Targeting Windows pre64 RETAIL
|
||||
# c:\tcl_src\win\>nmake -f makefile.vc MACHINE=IA64
|
||||
#
|
||||
#------------------------------------------------------------------------------
|
||||
#==============================================================================
|
||||
###############################################################################
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
!if !exist("makefile.vc")
|
||||
MSG = ^
|
||||
You must run this makefile only from the directory it is in.^
|
||||
Please `cd` to its location first.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Project specific information (EDIT)
|
||||
#
|
||||
# You should edit this with the name and version of your project. This
|
||||
# information is used to generate the name of the package library and
|
||||
# it's install location.
|
||||
#
|
||||
# For example, the sample extension is going to build sample04.dll and
|
||||
# would install it into $(INSTALLDIR)\lib\sample04
|
||||
#
|
||||
# You need to specify the object files that need to be linked into your
|
||||
# binary here.
|
||||
#
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
PROJECT = sqlite3
|
||||
!include "rules.vc"
|
||||
|
||||
# nmakehelp -V <file> <tag> will search the file for tag, skips until a
|
||||
# number and returns all character until a character not in [0-9.ab]
|
||||
# is read.
|
||||
|
||||
!if [echo REM = This file is generated from Makefile.vc > versions.vc]
|
||||
!endif
|
||||
# get project version from row "AC_INIT([sqlite], [3.7.14])"
|
||||
!if [echo DOTVERSION = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\configure.in AC_INIT >> versions.vc]
|
||||
!endif
|
||||
!include "versions.vc"
|
||||
|
||||
VERSION = $(DOTVERSION:.=)
|
||||
STUBPREFIX = $(PROJECT)stub
|
||||
|
||||
DLLOBJS = \
|
||||
$(TMP_DIR)\tclsqlite3.obj
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Target names and paths ( shouldn't need changing )
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
BINROOT = .
|
||||
ROOT = ..
|
||||
|
||||
PRJIMPLIB = $(OUT_DIR)\$(PROJECT)$(VERSION)$(SUFX).lib
|
||||
PRJLIBNAME = $(PROJECT)$(VERSION)$(SUFX).$(EXT)
|
||||
PRJLIB = $(OUT_DIR)\$(PRJLIBNAME)
|
||||
|
||||
PRJSTUBLIBNAME = $(STUBPREFIX)$(VERSION).lib
|
||||
PRJSTUBLIB = $(OUT_DIR)\$(PRJSTUBLIBNAME)
|
||||
|
||||
### Make sure we use backslash only.
|
||||
PRJ_INSTALL_DIR = $(_INSTALLDIR)\$(PROJECT)$(DOTVERSION)
|
||||
LIB_INSTALL_DIR = $(PRJ_INSTALL_DIR)
|
||||
BIN_INSTALL_DIR = $(PRJ_INSTALL_DIR)
|
||||
DOC_INSTALL_DIR = $(PRJ_INSTALL_DIR)
|
||||
SCRIPT_INSTALL_DIR = $(PRJ_INSTALL_DIR)
|
||||
INCLUDE_INSTALL_DIR = $(_TCLDIR)\include
|
||||
|
||||
### The following paths CANNOT have spaces in them.
|
||||
GENERICDIR = $(ROOT)\generic
|
||||
WINDIR = $(ROOT)\win
|
||||
LIBDIR = $(ROOT)\library
|
||||
DOCDIR = $(ROOT)\doc
|
||||
TOOLSDIR = $(ROOT)\tools
|
||||
COMPATDIR = $(ROOT)\compat
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Compile flags
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
!if !$(DEBUG)
|
||||
!if $(OPTIMIZING)
|
||||
### This cranks the optimization level to maximize speed
|
||||
cdebug = -O2 -Op -Gs
|
||||
!else
|
||||
cdebug =
|
||||
!endif
|
||||
!else if "$(MACHINE)" == "IA64"
|
||||
### Warnings are too many, can't support warnings into errors.
|
||||
cdebug = -Z7 -Od -GZ
|
||||
!else
|
||||
cdebug = -Z7 -WX -Od -GZ
|
||||
!endif
|
||||
|
||||
### Declarations common to all compiler options
|
||||
cflags = -nologo -c -W3 -YX -Fp$(TMP_DIR)^\
|
||||
|
||||
!if $(MSVCRT)
|
||||
!if $(DEBUG)
|
||||
crt = -MDd
|
||||
!else
|
||||
crt = -MD
|
||||
!endif
|
||||
!else
|
||||
!if $(DEBUG)
|
||||
crt = -MTd
|
||||
!else
|
||||
crt = -MT
|
||||
!endif
|
||||
!endif
|
||||
|
||||
INCLUDES = $(TCL_INCLUDES) -I"$(WINDIR)" -I"$(GENERICDIR)" \
|
||||
-I"$(ROOT)\.."
|
||||
BASE_CLFAGS = $(cflags) $(cdebug) $(crt) $(INCLUDES) \
|
||||
-DSQLITE_3_SUFFIX_ONLY=1 -DSQLITE_ENABLE_RTREE=1 \
|
||||
-DSQLITE_ENABLE_FTS3=1 -DSQLITE_OMIT_DEPRECATED=1
|
||||
CON_CFLAGS = $(cflags) $(cdebug) $(crt) -DCONSOLE -DSQLITE_ENABLE_FTS3=1
|
||||
TCL_CFLAGS = -DBUILD_sqlite -DUSE_TCL_STUBS \
|
||||
-DPACKAGE_VERSION="\"$(DOTVERSION)\"" $(BASE_CLFAGS) \
|
||||
$(OPTDEFINES)
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Link flags
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
!if $(DEBUG)
|
||||
ldebug = -debug:full -debugtype:cv
|
||||
!else
|
||||
ldebug = -release -opt:ref -opt:icf,3
|
||||
!endif
|
||||
|
||||
### Declarations common to all linker options
|
||||
lflags = -nologo -machine:$(MACHINE) $(ldebug)
|
||||
|
||||
!if $(PROFILE)
|
||||
lflags = $(lflags) -profile
|
||||
!endif
|
||||
|
||||
!if $(ALIGN98_HACK) && !$(STATIC_BUILD)
|
||||
### Align sections for PE size savings.
|
||||
lflags = $(lflags) -opt:nowin98
|
||||
!else if !$(ALIGN98_HACK) && $(STATIC_BUILD)
|
||||
### Align sections for speed in loading by choosing the virtual page size.
|
||||
lflags = $(lflags) -align:4096
|
||||
!endif
|
||||
|
||||
!if $(LOIMPACT)
|
||||
lflags = $(lflags) -ws:aggressive
|
||||
!endif
|
||||
|
||||
dlllflags = $(lflags) -dll
|
||||
conlflags = $(lflags) -subsystem:console
|
||||
guilflags = $(lflags) -subsystem:windows
|
||||
baselibs = $(TCLSTUBLIB)
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# TclTest flags
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
!IF "$(TESTPAT)" != ""
|
||||
TESTFLAGS = $(TESTFLAGS) -file $(TESTPAT)
|
||||
!ENDIF
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Project specific targets (EDIT)
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
all: setup $(PROJECT)
|
||||
$(PROJECT): setup $(PRJLIB)
|
||||
install: install-binaries install-libraries install-docs
|
||||
|
||||
# Tests need to ensure we load the right dll file we
|
||||
# have to handle the output differently on Win9x.
|
||||
#
|
||||
!if "$(OS)" == "Windows_NT" || "$(MSVCDIR)" == "IDE"
|
||||
test: setup $(PROJECT)
|
||||
set TCL_LIBRARY=$(ROOT)/library
|
||||
$(TCLSH) <<
|
||||
load $(PRJLIB:\=/)
|
||||
cd "$(ROOT)/tests"
|
||||
set argv "$(TESTFLAGS)"
|
||||
source all.tcl
|
||||
<<
|
||||
!else
|
||||
test: setup $(PROJECT)
|
||||
echo Please wait while the test results are collected
|
||||
set TCL_LIBRARY=$(ROOT)/library
|
||||
$(TCLSH) << >tests.log
|
||||
load $(PRJLIB:\=/)
|
||||
cd "$(ROOT)/tests"
|
||||
set argv "$(TESTFLAGS)"
|
||||
source all.tcl
|
||||
<<
|
||||
type tests.log | more
|
||||
!endif
|
||||
|
||||
setup:
|
||||
@if not exist $(OUT_DIR)\nul mkdir $(OUT_DIR)
|
||||
@if not exist $(TMP_DIR)\nul mkdir $(TMP_DIR)
|
||||
|
||||
$(PRJLIB): $(DLLOBJS)
|
||||
$(link32) $(dlllflags) -out:$@ $(baselibs) @<<
|
||||
$**
|
||||
<<
|
||||
-@del $*.exp
|
||||
|
||||
$(PRJSTUBLIB): $(PRJSTUBOBJS)
|
||||
$(lib32) -nologo -out:$@ $(PRJSTUBOBJS)
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Implicit rules
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
{$(WINDIR)}.c{$(TMP_DIR)}.obj::
|
||||
$(cc32) $(TCL_CFLAGS) -DBUILD_$(PROJECT) -Fo$(TMP_DIR)\ @<<
|
||||
$<
|
||||
<<
|
||||
|
||||
{$(GENERICDIR)}.c{$(TMP_DIR)}.obj::
|
||||
$(cc32) $(TCL_CFLAGS) -DBUILD_$(PROJECT) -Fo$(TMP_DIR)\ @<<
|
||||
$<
|
||||
<<
|
||||
|
||||
{$(COMPATDIR)}.c{$(TMP_DIR)}.obj::
|
||||
$(cc32) $(TCL_CFLAGS) -DBUILD_$(PROJECT) -Fo$(TMP_DIR)\ @<<
|
||||
$<
|
||||
<<
|
||||
|
||||
{$(WINDIR)}.rc{$(TMP_DIR)}.res:
|
||||
$(rc32) -fo $@ -r -i "$(GENERICDIR)" -D__WIN32__ \
|
||||
!if $(DEBUG)
|
||||
-d DEBUG \
|
||||
!endif
|
||||
!if $(TCL_THREADS)
|
||||
-d TCL_THREADS \
|
||||
!endif
|
||||
!if $(STATIC_BUILD)
|
||||
-d STATIC_BUILD \
|
||||
!endif
|
||||
$<
|
||||
|
||||
.SUFFIXES:
|
||||
.SUFFIXES:.c .rc
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Installation. (EDIT)
|
||||
#
|
||||
# You may need to modify this section to reflect the final distribution
|
||||
# of your files and possibly to generate documentation.
|
||||
#
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
install-binaries:
|
||||
@echo Installing binaries to '$(SCRIPT_INSTALL_DIR)'
|
||||
@if not exist "$(SCRIPT_INSTALL_DIR)" mkdir "$(SCRIPT_INSTALL_DIR)"
|
||||
@$(CPY) $(PRJLIB) "$(SCRIPT_INSTALL_DIR)" >NUL
|
||||
|
||||
install-libraries:
|
||||
@echo Installing libraries to '$(SCRIPT_INSTALL_DIR)'
|
||||
@if exist $(LIBDIR) $(CPY) $(LIBDIR)\*.tcl "$(SCRIPT_INSTALL_DIR)"
|
||||
@echo Installing package index in '$(SCRIPT_INSTALL_DIR)'
|
||||
@type << >"$(SCRIPT_INSTALL_DIR)\pkgIndex.tcl"
|
||||
package ifneeded $(PROJECT) $(DOTVERSION) \
|
||||
[list load [file join $$dir $(PRJLIBNAME)] sqlite3]
|
||||
<<
|
||||
|
||||
install-docs:
|
||||
@echo Installing documentation files to '$(DOC_INSTALL_DIR)'
|
||||
@if exist $(DOCDIR) $(CPY) $(DOCDIR)\*.n "$(DOC_INSTALL_DIR)"
|
||||
|
||||
#---------------------------------------------------------------------
|
||||
# Clean up
|
||||
#---------------------------------------------------------------------
|
||||
|
||||
clean:
|
||||
@if exist $(TMP_DIR)\nul $(RMDIR) $(TMP_DIR)
|
||||
@if exist $(WINDIR)\version.vc del $(WINDIR)\version.vc
|
||||
|
||||
realclean: clean
|
||||
@if exist $(OUT_DIR)\nul $(RMDIR) $(OUT_DIR)
|
||||
|
||||
distclean: realclean
|
||||
@if exist $(WINDIR)\nmakehlp.exe del $(WINDIR)\nmakehlp.exe
|
||||
@if exist $(WINDIR)\nmakehlp.obj del $(WINDIR)\nmakehlp.obj
|
||||
@ -1,694 +0,0 @@
|
||||
/*
|
||||
* ----------------------------------------------------------------------------
|
||||
* nmakehlp.c --
|
||||
*
|
||||
* This is used to fix limitations within nmake and the environment.
|
||||
*
|
||||
* Copyright (c) 2002 by David Gravereaux.
|
||||
* Copyright (c) 2006 by Pat Thoyts
|
||||
*
|
||||
* See the file "license.terms" for information on usage and redistribution of
|
||||
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||||
* ----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_DEPRECATE
|
||||
#include <windows.h>
|
||||
#define NO_SHLWAPI_GDI
|
||||
#define NO_SHLWAPI_STREAM
|
||||
#define NO_SHLWAPI_REG
|
||||
#include <shlwapi.h>
|
||||
#pragma comment (lib, "user32.lib")
|
||||
#pragma comment (lib, "kernel32.lib")
|
||||
#pragma comment (lib, "shlwapi.lib")
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
* This library is required for x64 builds with _some_ versions of MSVC
|
||||
*/
|
||||
#if defined(_M_IA64) || defined(_M_AMD64)
|
||||
#if _MSC_VER >= 1400 && _MSC_VER < 1500
|
||||
#pragma comment(lib, "bufferoverflowU")
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ISO hack for dumb VC++ */
|
||||
#ifdef _MSC_VER
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* protos */
|
||||
|
||||
static int CheckForCompilerFeature(const char *option);
|
||||
static int CheckForLinkerFeature(const char *option);
|
||||
static int IsIn(const char *string, const char *substring);
|
||||
static int SubstituteFile(const char *substs, const char *filename);
|
||||
static int QualifyPath(const char *path);
|
||||
static const char *GetVersionFromFile(const char *filename, const char *match);
|
||||
static DWORD WINAPI ReadFromPipe(LPVOID args);
|
||||
|
||||
/* globals */
|
||||
|
||||
#define CHUNK 25
|
||||
#define STATICBUFFERSIZE 1000
|
||||
typedef struct {
|
||||
HANDLE pipe;
|
||||
char buffer[STATICBUFFERSIZE];
|
||||
} pipeinfo;
|
||||
|
||||
pipeinfo Out = {INVALID_HANDLE_VALUE, '\0'};
|
||||
pipeinfo Err = {INVALID_HANDLE_VALUE, '\0'};
|
||||
|
||||
/*
|
||||
* exitcodes: 0 == no, 1 == yes, 2 == error
|
||||
*/
|
||||
|
||||
int
|
||||
main(
|
||||
int argc,
|
||||
char *argv[])
|
||||
{
|
||||
char msg[300];
|
||||
DWORD dwWritten;
|
||||
int chars;
|
||||
|
||||
/*
|
||||
* Make sure children (cl.exe and link.exe) are kept quiet.
|
||||
*/
|
||||
|
||||
SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX);
|
||||
|
||||
/*
|
||||
* Make sure the compiler and linker aren't effected by the outside world.
|
||||
*/
|
||||
|
||||
SetEnvironmentVariable("CL", "");
|
||||
SetEnvironmentVariable("LINK", "");
|
||||
|
||||
if (argc > 1 && *argv[1] == '-') {
|
||||
switch (*(argv[1]+1)) {
|
||||
case 'c':
|
||||
if (argc != 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -c <compiler option>\n"
|
||||
"Tests for whether cl.exe supports an option\n"
|
||||
"exitcodes: 0 == no, 1 == yes, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return CheckForCompilerFeature(argv[2]);
|
||||
case 'l':
|
||||
if (argc != 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -l <linker option>\n"
|
||||
"Tests for whether link.exe supports an option\n"
|
||||
"exitcodes: 0 == no, 1 == yes, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return CheckForLinkerFeature(argv[2]);
|
||||
case 'f':
|
||||
if (argc == 2) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -f <string> <substring>\n"
|
||||
"Find a substring within another\n"
|
||||
"exitcodes: 0 == no, 1 == yes, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
} else if (argc == 3) {
|
||||
/*
|
||||
* If the string is blank, there is no match.
|
||||
*/
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
return IsIn(argv[2], argv[3]);
|
||||
}
|
||||
case 's':
|
||||
if (argc == 2) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -s <substitutions file> <file>\n"
|
||||
"Perform a set of string map type substutitions on a file\n"
|
||||
"exitcodes: 0\n",
|
||||
argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return SubstituteFile(argv[2], argv[3]);
|
||||
case 'V':
|
||||
if (argc != 4) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -V filename matchstring\n"
|
||||
"Extract a version from a file:\n"
|
||||
"eg: pkgIndex.tcl \"package ifneeded http\"",
|
||||
argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 0;
|
||||
}
|
||||
printf("%s\n", GetVersionFromFile(argv[2], argv[3]));
|
||||
return 0;
|
||||
case 'Q':
|
||||
if (argc != 3) {
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -Q path\n"
|
||||
"Emit the fully qualified path\n"
|
||||
"exitcodes: 0 == no, 1 == yes, 2 == error\n", argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars,
|
||||
&dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
return QualifyPath(argv[2]);
|
||||
}
|
||||
}
|
||||
chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"usage: %s -c|-f|-l|-Q|-s|-V ...\n"
|
||||
"This is a little helper app to equalize shell differences between WinNT and\n"
|
||||
"Win9x and get nmake.exe to accomplish its job.\n",
|
||||
argv[0]);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, chars, &dwWritten, NULL);
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int
|
||||
CheckForCompilerFeature(
|
||||
const char *option)
|
||||
{
|
||||
STARTUPINFO si;
|
||||
PROCESS_INFORMATION pi;
|
||||
SECURITY_ATTRIBUTES sa;
|
||||
DWORD threadID;
|
||||
char msg[300];
|
||||
BOOL ok;
|
||||
HANDLE hProcess, h, pipeThreads[2];
|
||||
char cmdline[100];
|
||||
|
||||
hProcess = GetCurrentProcess();
|
||||
|
||||
ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
|
||||
ZeroMemory(&si, sizeof(STARTUPINFO));
|
||||
si.cb = sizeof(STARTUPINFO);
|
||||
si.dwFlags = STARTF_USESTDHANDLES;
|
||||
si.hStdInput = INVALID_HANDLE_VALUE;
|
||||
|
||||
ZeroMemory(&sa, sizeof(SECURITY_ATTRIBUTES));
|
||||
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
|
||||
sa.lpSecurityDescriptor = NULL;
|
||||
sa.bInheritHandle = FALSE;
|
||||
|
||||
/*
|
||||
* Create a non-inheritible pipe.
|
||||
*/
|
||||
|
||||
CreatePipe(&Out.pipe, &h, &sa, 0);
|
||||
|
||||
/*
|
||||
* Dupe the write side, make it inheritible, and close the original.
|
||||
*/
|
||||
|
||||
DuplicateHandle(hProcess, h, hProcess, &si.hStdOutput, 0, TRUE,
|
||||
DUPLICATE_SAME_ACCESS | DUPLICATE_CLOSE_SOURCE);
|
||||
|
||||
/*
|
||||
* Same as above, but for the error side.
|
||||
*/
|
||||
|
||||
CreatePipe(&Err.pipe, &h, &sa, 0);
|
||||
DuplicateHandle(hProcess, h, hProcess, &si.hStdError, 0, TRUE,
|
||||
DUPLICATE_SAME_ACCESS | DUPLICATE_CLOSE_SOURCE);
|
||||
|
||||
/*
|
||||
* Base command line.
|
||||
*/
|
||||
|
||||
lstrcpy(cmdline, "cl.exe -nologo -c -TC -Zs -X -Fp.\\_junk.pch ");
|
||||
|
||||
/*
|
||||
* Append our option for testing
|
||||
*/
|
||||
|
||||
lstrcat(cmdline, option);
|
||||
|
||||
/*
|
||||
* Filename to compile, which exists, but is nothing and empty.
|
||||
*/
|
||||
|
||||
lstrcat(cmdline, " .\\nul");
|
||||
|
||||
ok = CreateProcess(
|
||||
NULL, /* Module name. */
|
||||
cmdline, /* Command line. */
|
||||
NULL, /* Process handle not inheritable. */
|
||||
NULL, /* Thread handle not inheritable. */
|
||||
TRUE, /* yes, inherit handles. */
|
||||
DETACHED_PROCESS, /* No console for you. */
|
||||
NULL, /* Use parent's environment block. */
|
||||
NULL, /* Use parent's starting directory. */
|
||||
&si, /* Pointer to STARTUPINFO structure. */
|
||||
&pi); /* Pointer to PROCESS_INFORMATION structure. */
|
||||
|
||||
if (!ok) {
|
||||
DWORD err = GetLastError();
|
||||
int chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"Tried to launch: \"%s\", but got error [%u]: ", cmdline, err);
|
||||
|
||||
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPVOID)&msg[chars],
|
||||
(300-chars), 0);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, lstrlen(msg), &err,NULL);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Close our references to the write handles that have now been inherited.
|
||||
*/
|
||||
|
||||
CloseHandle(si.hStdOutput);
|
||||
CloseHandle(si.hStdError);
|
||||
|
||||
WaitForInputIdle(pi.hProcess, 5000);
|
||||
CloseHandle(pi.hThread);
|
||||
|
||||
/*
|
||||
* Start the pipe reader threads.
|
||||
*/
|
||||
|
||||
pipeThreads[0] = CreateThread(NULL, 0, ReadFromPipe, &Out, 0, &threadID);
|
||||
pipeThreads[1] = CreateThread(NULL, 0, ReadFromPipe, &Err, 0, &threadID);
|
||||
|
||||
/*
|
||||
* Block waiting for the process to end.
|
||||
*/
|
||||
|
||||
WaitForSingleObject(pi.hProcess, INFINITE);
|
||||
CloseHandle(pi.hProcess);
|
||||
|
||||
/*
|
||||
* Wait for our pipe to get done reading, should it be a little slow.
|
||||
*/
|
||||
|
||||
WaitForMultipleObjects(2, pipeThreads, TRUE, 500);
|
||||
CloseHandle(pipeThreads[0]);
|
||||
CloseHandle(pipeThreads[1]);
|
||||
|
||||
/*
|
||||
* Look for the commandline warning code in both streams.
|
||||
* - in MSVC 6 & 7 we get D4002, in MSVC 8 we get D9002.
|
||||
*/
|
||||
|
||||
return !(strstr(Out.buffer, "D4002") != NULL
|
||||
|| strstr(Err.buffer, "D4002") != NULL
|
||||
|| strstr(Out.buffer, "D9002") != NULL
|
||||
|| strstr(Err.buffer, "D9002") != NULL
|
||||
|| strstr(Out.buffer, "D2021") != NULL
|
||||
|| strstr(Err.buffer, "D2021") != NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
CheckForLinkerFeature(
|
||||
const char *option)
|
||||
{
|
||||
STARTUPINFO si;
|
||||
PROCESS_INFORMATION pi;
|
||||
SECURITY_ATTRIBUTES sa;
|
||||
DWORD threadID;
|
||||
char msg[300];
|
||||
BOOL ok;
|
||||
HANDLE hProcess, h, pipeThreads[2];
|
||||
char cmdline[100];
|
||||
|
||||
hProcess = GetCurrentProcess();
|
||||
|
||||
ZeroMemory(&pi, sizeof(PROCESS_INFORMATION));
|
||||
ZeroMemory(&si, sizeof(STARTUPINFO));
|
||||
si.cb = sizeof(STARTUPINFO);
|
||||
si.dwFlags = STARTF_USESTDHANDLES;
|
||||
si.hStdInput = INVALID_HANDLE_VALUE;
|
||||
|
||||
ZeroMemory(&sa, sizeof(SECURITY_ATTRIBUTES));
|
||||
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
|
||||
sa.lpSecurityDescriptor = NULL;
|
||||
sa.bInheritHandle = TRUE;
|
||||
|
||||
/*
|
||||
* Create a non-inheritible pipe.
|
||||
*/
|
||||
|
||||
CreatePipe(&Out.pipe, &h, &sa, 0);
|
||||
|
||||
/*
|
||||
* Dupe the write side, make it inheritible, and close the original.
|
||||
*/
|
||||
|
||||
DuplicateHandle(hProcess, h, hProcess, &si.hStdOutput, 0, TRUE,
|
||||
DUPLICATE_SAME_ACCESS | DUPLICATE_CLOSE_SOURCE);
|
||||
|
||||
/*
|
||||
* Same as above, but for the error side.
|
||||
*/
|
||||
|
||||
CreatePipe(&Err.pipe, &h, &sa, 0);
|
||||
DuplicateHandle(hProcess, h, hProcess, &si.hStdError, 0, TRUE,
|
||||
DUPLICATE_SAME_ACCESS | DUPLICATE_CLOSE_SOURCE);
|
||||
|
||||
/*
|
||||
* Base command line.
|
||||
*/
|
||||
|
||||
lstrcpy(cmdline, "link.exe -nologo ");
|
||||
|
||||
/*
|
||||
* Append our option for testing.
|
||||
*/
|
||||
|
||||
lstrcat(cmdline, option);
|
||||
|
||||
ok = CreateProcess(
|
||||
NULL, /* Module name. */
|
||||
cmdline, /* Command line. */
|
||||
NULL, /* Process handle not inheritable. */
|
||||
NULL, /* Thread handle not inheritable. */
|
||||
TRUE, /* yes, inherit handles. */
|
||||
DETACHED_PROCESS, /* No console for you. */
|
||||
NULL, /* Use parent's environment block. */
|
||||
NULL, /* Use parent's starting directory. */
|
||||
&si, /* Pointer to STARTUPINFO structure. */
|
||||
&pi); /* Pointer to PROCESS_INFORMATION structure. */
|
||||
|
||||
if (!ok) {
|
||||
DWORD err = GetLastError();
|
||||
int chars = snprintf(msg, sizeof(msg) - 1,
|
||||
"Tried to launch: \"%s\", but got error [%u]: ", cmdline, err);
|
||||
|
||||
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|
|
||||
FORMAT_MESSAGE_MAX_WIDTH_MASK, 0L, err, 0, (LPVOID)&msg[chars],
|
||||
(300-chars), 0);
|
||||
WriteFile(GetStdHandle(STD_ERROR_HANDLE), msg, lstrlen(msg), &err,NULL);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Close our references to the write handles that have now been inherited.
|
||||
*/
|
||||
|
||||
CloseHandle(si.hStdOutput);
|
||||
CloseHandle(si.hStdError);
|
||||
|
||||
WaitForInputIdle(pi.hProcess, 5000);
|
||||
CloseHandle(pi.hThread);
|
||||
|
||||
/*
|
||||
* Start the pipe reader threads.
|
||||
*/
|
||||
|
||||
pipeThreads[0] = CreateThread(NULL, 0, ReadFromPipe, &Out, 0, &threadID);
|
||||
pipeThreads[1] = CreateThread(NULL, 0, ReadFromPipe, &Err, 0, &threadID);
|
||||
|
||||
/*
|
||||
* Block waiting for the process to end.
|
||||
*/
|
||||
|
||||
WaitForSingleObject(pi.hProcess, INFINITE);
|
||||
CloseHandle(pi.hProcess);
|
||||
|
||||
/*
|
||||
* Wait for our pipe to get done reading, should it be a little slow.
|
||||
*/
|
||||
|
||||
WaitForMultipleObjects(2, pipeThreads, TRUE, 500);
|
||||
CloseHandle(pipeThreads[0]);
|
||||
CloseHandle(pipeThreads[1]);
|
||||
|
||||
/*
|
||||
* Look for the commandline warning code in the stderr stream.
|
||||
*/
|
||||
|
||||
return !(strstr(Out.buffer, "LNK1117") != NULL ||
|
||||
strstr(Err.buffer, "LNK1117") != NULL ||
|
||||
strstr(Out.buffer, "LNK4044") != NULL ||
|
||||
strstr(Err.buffer, "LNK4044") != NULL);
|
||||
}
|
||||
|
||||
static DWORD WINAPI
|
||||
ReadFromPipe(
|
||||
LPVOID args)
|
||||
{
|
||||
pipeinfo *pi = (pipeinfo *) args;
|
||||
char *lastBuf = pi->buffer;
|
||||
DWORD dwRead;
|
||||
BOOL ok;
|
||||
|
||||
again:
|
||||
if (lastBuf - pi->buffer + CHUNK > STATICBUFFERSIZE) {
|
||||
CloseHandle(pi->pipe);
|
||||
return (DWORD)-1;
|
||||
}
|
||||
ok = ReadFile(pi->pipe, lastBuf, CHUNK, &dwRead, 0L);
|
||||
if (!ok || dwRead == 0) {
|
||||
CloseHandle(pi->pipe);
|
||||
return 0;
|
||||
}
|
||||
lastBuf += dwRead;
|
||||
goto again;
|
||||
|
||||
return 0; /* makes the compiler happy */
|
||||
}
|
||||
|
||||
static int
|
||||
IsIn(
|
||||
const char *string,
|
||||
const char *substring)
|
||||
{
|
||||
return (strstr(string, substring) != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* GetVersionFromFile --
|
||||
* Looks for a match string in a file and then returns the version
|
||||
* following the match where a version is anything acceptable to
|
||||
* package provide or package ifneeded.
|
||||
*/
|
||||
|
||||
static const char *
|
||||
GetVersionFromFile(
|
||||
const char *filename,
|
||||
const char *match)
|
||||
{
|
||||
size_t cbBuffer = 100;
|
||||
static char szBuffer[100];
|
||||
char *szResult = NULL;
|
||||
FILE *fp = fopen(filename, "rt");
|
||||
|
||||
if (fp != NULL) {
|
||||
/*
|
||||
* Read data until we see our match string.
|
||||
*/
|
||||
|
||||
while (fgets(szBuffer, cbBuffer, fp) != NULL) {
|
||||
LPSTR p, q;
|
||||
|
||||
p = strstr(szBuffer, match);
|
||||
if (p != NULL) {
|
||||
/*
|
||||
* Skip to first digit.
|
||||
*/
|
||||
|
||||
while (*p && !isdigit(*p)) {
|
||||
++p;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find ending whitespace.
|
||||
*/
|
||||
|
||||
q = p;
|
||||
while (*q && (isalnum(*q) || *q == '.')) {
|
||||
++q;
|
||||
}
|
||||
|
||||
memcpy(szBuffer, p, q - p);
|
||||
szBuffer[q-p] = 0;
|
||||
szResult = szBuffer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
return szResult;
|
||||
}
|
||||
|
||||
/*
|
||||
* List helpers for the SubstituteFile function
|
||||
*/
|
||||
|
||||
typedef struct list_item_t {
|
||||
struct list_item_t *nextPtr;
|
||||
char * key;
|
||||
char * value;
|
||||
} list_item_t;
|
||||
|
||||
/* insert a list item into the list (list may be null) */
|
||||
static list_item_t *
|
||||
list_insert(list_item_t **listPtrPtr, const char *key, const char *value)
|
||||
{
|
||||
list_item_t *itemPtr = malloc(sizeof(list_item_t));
|
||||
if (itemPtr) {
|
||||
itemPtr->key = strdup(key);
|
||||
itemPtr->value = strdup(value);
|
||||
itemPtr->nextPtr = NULL;
|
||||
|
||||
while(*listPtrPtr) {
|
||||
listPtrPtr = &(*listPtrPtr)->nextPtr;
|
||||
}
|
||||
*listPtrPtr = itemPtr;
|
||||
}
|
||||
return itemPtr;
|
||||
}
|
||||
|
||||
static void
|
||||
list_free(list_item_t **listPtrPtr)
|
||||
{
|
||||
list_item_t *tmpPtr, *listPtr = *listPtrPtr;
|
||||
while (listPtr) {
|
||||
tmpPtr = listPtr;
|
||||
listPtr = listPtr->nextPtr;
|
||||
free(tmpPtr->key);
|
||||
free(tmpPtr->value);
|
||||
free(tmpPtr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* SubstituteFile --
|
||||
* As windows doesn't provide anything useful like sed and it's unreliable
|
||||
* to use the tclsh you are building against (consider x-platform builds -
|
||||
* eg compiling AMD64 target from IX86) we provide a simple substitution
|
||||
* option here to handle autoconf style substitutions.
|
||||
* The substitution file is whitespace and line delimited. The file should
|
||||
* consist of lines matching the regular expression:
|
||||
* \s*\S+\s+\S*$
|
||||
*
|
||||
* Usage is something like:
|
||||
* nmakehlp -S << $** > $@
|
||||
* @PACKAGE_NAME@ $(PACKAGE_NAME)
|
||||
* @PACKAGE_VERSION@ $(PACKAGE_VERSION)
|
||||
* <<
|
||||
*/
|
||||
|
||||
static int
|
||||
SubstituteFile(
|
||||
const char *substitutions,
|
||||
const char *filename)
|
||||
{
|
||||
size_t cbBuffer = 1024;
|
||||
static char szBuffer[1024], szCopy[1024];
|
||||
char *szResult = NULL;
|
||||
list_item_t *substPtr = NULL;
|
||||
FILE *fp, *sp;
|
||||
|
||||
fp = fopen(filename, "rt");
|
||||
if (fp != NULL) {
|
||||
|
||||
/*
|
||||
* Build a list of substutitions from the first filename
|
||||
*/
|
||||
|
||||
sp = fopen(substitutions, "rt");
|
||||
if (sp != NULL) {
|
||||
while (fgets(szBuffer, cbBuffer, sp) != NULL) {
|
||||
unsigned char *ks, *ke, *vs, *ve;
|
||||
ks = (unsigned char*)szBuffer;
|
||||
while (ks && *ks && isspace(*ks)) ++ks;
|
||||
ke = ks;
|
||||
while (ke && *ke && !isspace(*ke)) ++ke;
|
||||
vs = ke;
|
||||
while (vs && *vs && isspace(*vs)) ++vs;
|
||||
ve = vs;
|
||||
while (ve && *ve && !(*ve == '\r' || *ve == '\n')) ++ve;
|
||||
*ke = 0, *ve = 0;
|
||||
list_insert(&substPtr, (char*)ks, (char*)vs);
|
||||
}
|
||||
fclose(sp);
|
||||
}
|
||||
|
||||
/* debug: dump the list */
|
||||
#ifdef _DEBUG
|
||||
{
|
||||
int n = 0;
|
||||
list_item_t *p = NULL;
|
||||
for (p = substPtr; p != NULL; p = p->nextPtr, ++n) {
|
||||
fprintf(stderr, "% 3d '%s' => '%s'\n", n, p->key, p->value);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Run the substitutions over each line of the input
|
||||
*/
|
||||
|
||||
while (fgets(szBuffer, cbBuffer, fp) != NULL) {
|
||||
list_item_t *p = NULL;
|
||||
for (p = substPtr; p != NULL; p = p->nextPtr) {
|
||||
char *m = strstr(szBuffer, p->key);
|
||||
if (m) {
|
||||
char *cp, *op, *sp;
|
||||
cp = szCopy;
|
||||
op = szBuffer;
|
||||
while (op != m) *cp++ = *op++;
|
||||
sp = p->value;
|
||||
while (sp && *sp) *cp++ = *sp++;
|
||||
op += strlen(p->key);
|
||||
while (*op) *cp++ = *op++;
|
||||
*cp = 0;
|
||||
memcpy(szBuffer, szCopy, sizeof(szCopy));
|
||||
}
|
||||
}
|
||||
printf(szBuffer);
|
||||
}
|
||||
|
||||
list_free(&substPtr);
|
||||
}
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* QualifyPath --
|
||||
*
|
||||
* This composes the current working directory with a provided path
|
||||
* and returns the fully qualified and normalized path.
|
||||
* Mostly needed to setup paths for testing.
|
||||
*/
|
||||
|
||||
static int
|
||||
QualifyPath(
|
||||
const char *szPath)
|
||||
{
|
||||
char szCwd[MAX_PATH + 1];
|
||||
char szTmp[MAX_PATH + 1];
|
||||
char *p;
|
||||
GetCurrentDirectory(MAX_PATH, szCwd);
|
||||
while ((p = strchr(szPath, '/')) && *p)
|
||||
*p = '\\';
|
||||
PathCombine(szTmp, szCwd, szPath);
|
||||
PathCanonicalize(szCwd, szTmp);
|
||||
printf("%s\n", szCwd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* mode: c
|
||||
* c-basic-offset: 4
|
||||
* fill-column: 78
|
||||
* indent-tabs-mode: t
|
||||
* tab-width: 8
|
||||
* End:
|
||||
*/
|
||||
@ -1,711 +0,0 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# rules.vc --
|
||||
#
|
||||
# Microsoft Visual C++ makefile include for decoding the commandline
|
||||
# macros. This file does not need editing to build Tcl.
|
||||
#
|
||||
# See the file "license.terms" for information on usage and redistribution
|
||||
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||||
#
|
||||
# Copyright (c) 2001-2003 David Gravereaux.
|
||||
# Copyright (c) 2003-2008 Patrick Thoyts
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
!ifndef _RULES_VC
|
||||
_RULES_VC = 1
|
||||
|
||||
cc32 = $(CC) # built-in default.
|
||||
link32 = link
|
||||
lib32 = lib
|
||||
rc32 = $(RC) # built-in default.
|
||||
|
||||
!ifndef INSTALLDIR
|
||||
### Assume the normal default.
|
||||
_INSTALLDIR = C:\Program Files\Tcl
|
||||
!else
|
||||
### Fix the path separators.
|
||||
_INSTALLDIR = $(INSTALLDIR:/=\)
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Set the proper copy method to avoid overwrite questions
|
||||
# to the user when copying files and selecting the right
|
||||
# "delete all" method.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if "$(OS)" == "Windows_NT"
|
||||
RMDIR = rmdir /S /Q
|
||||
ERRNULL = 2>NUL
|
||||
!if ![ver | find "4.0" > nul]
|
||||
CPY = echo y | xcopy /i >NUL
|
||||
COPY = copy >NUL
|
||||
!else
|
||||
CPY = xcopy /i /y >NUL
|
||||
COPY = copy /y >NUL
|
||||
!endif
|
||||
!else # "$(OS)" != "Windows_NT"
|
||||
CPY = xcopy /i >_JUNK.OUT # On Win98 NUL does not work here.
|
||||
COPY = copy >_JUNK.OUT # On Win98 NUL does not work here.
|
||||
RMDIR = deltree /Y
|
||||
NULL = \NUL # Used in testing directory existence
|
||||
ERRNULL = >NUL # Win9x shell cannot redirect stderr
|
||||
!endif
|
||||
MKDIR = mkdir
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Determine the host and target architectures and compiler version.
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
_HASH=^#
|
||||
_VC_MANIFEST_EMBED_EXE=
|
||||
_VC_MANIFEST_EMBED_DLL=
|
||||
VCVER=0
|
||||
!if ![echo VCVERSION=_MSC_VER > vercl.x] \
|
||||
&& ![echo $(_HASH)if defined(_M_IX86) >> vercl.x] \
|
||||
&& ![echo ARCH=IX86 >> vercl.x] \
|
||||
&& ![echo $(_HASH)elif defined(_M_AMD64) >> vercl.x] \
|
||||
&& ![echo ARCH=AMD64 >> vercl.x] \
|
||||
&& ![echo $(_HASH)endif >> vercl.x] \
|
||||
&& ![cl -nologo -TC -P vercl.x $(ERRNULL)]
|
||||
!include vercl.i
|
||||
!if ![echo VCVER= ^\> vercl.vc] \
|
||||
&& ![set /a $(VCVERSION) / 100 - 6 >> vercl.vc]
|
||||
!include vercl.vc
|
||||
!endif
|
||||
!endif
|
||||
!if ![del $(ERRNUL) /q/f vercl.x vercl.i vercl.vc]
|
||||
!endif
|
||||
|
||||
!if ![reg query HKLM\Hardware\Description\System\CentralProcessor\0 /v Identifier | findstr /i x86]
|
||||
NATIVE_ARCH=IX86
|
||||
!else
|
||||
NATIVE_ARCH=AMD64
|
||||
!endif
|
||||
|
||||
# Since MSVC8 we must deal with manifest resources.
|
||||
!if $(VCVERSION) >= 1400
|
||||
_VC_MANIFEST_EMBED_EXE=if exist $@.manifest mt -nologo -manifest $@.manifest -outputresource:$@;1
|
||||
_VC_MANIFEST_EMBED_DLL=if exist $@.manifest mt -nologo -manifest $@.manifest -outputresource:$@;2
|
||||
!endif
|
||||
|
||||
!ifndef MACHINE
|
||||
MACHINE=$(ARCH)
|
||||
!endif
|
||||
|
||||
!ifndef CFG_ENCODING
|
||||
CFG_ENCODING = \"cp1252\"
|
||||
!endif
|
||||
|
||||
!message ===============================================================================
|
||||
|
||||
#----------------------------------------------------------
|
||||
# build the helper app we need to overcome nmake's limiting
|
||||
# environment.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if !exist(nmakehlp.exe)
|
||||
!if [$(cc32) -nologo nmakehlp.c -link -subsystem:console > nul]
|
||||
!endif
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Test for compiler features
|
||||
#----------------------------------------------------------
|
||||
|
||||
### test for optimizations
|
||||
!if [nmakehlp -c -Ot]
|
||||
!message *** Compiler has 'Optimizations'
|
||||
OPTIMIZING = 1
|
||||
!else
|
||||
!message *** Compiler does not have 'Optimizations'
|
||||
OPTIMIZING = 0
|
||||
!endif
|
||||
|
||||
OPTIMIZATIONS =
|
||||
|
||||
!if [nmakehlp -c -Ot]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -Ot
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -Oi]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -Oi
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -Op]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -Op
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -fp:strict]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -fp:strict
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -Gs]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -Gs
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -GS]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -GS
|
||||
!endif
|
||||
|
||||
!if [nmakehlp -c -GL]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -GL
|
||||
!endif
|
||||
|
||||
DEBUGFLAGS =
|
||||
|
||||
!if [nmakehlp -c -RTC1]
|
||||
DEBUGFLAGS = $(DEBUGFLAGS) -RTC1
|
||||
!elseif [nmakehlp -c -GZ]
|
||||
DEBUGFLAGS = $(DEBUGFLAGS) -GZ
|
||||
!endif
|
||||
|
||||
COMPILERFLAGS =-W3 -DUNICODE -D_UNICODE
|
||||
|
||||
# In v13 -GL and -YX are incompatible.
|
||||
!if [nmakehlp -c -YX]
|
||||
!if ![nmakehlp -c -GL]
|
||||
OPTIMIZATIONS = $(OPTIMIZATIONS) -YX
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!if "$(MACHINE)" == "IX86"
|
||||
### test for pentium errata
|
||||
!if [nmakehlp -c -QI0f]
|
||||
!message *** Compiler has 'Pentium 0x0f fix'
|
||||
COMPILERFLAGS = $(COMPILERFLAGS) -QI0f
|
||||
!else
|
||||
!message *** Compiler does not have 'Pentium 0x0f fix'
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!if "$(MACHINE)" == "IA64"
|
||||
### test for Itanium errata
|
||||
!if [nmakehlp -c -QIA64_Bx]
|
||||
!message *** Compiler has 'B-stepping errata workarounds'
|
||||
COMPILERFLAGS = $(COMPILERFLAGS) -QIA64_Bx
|
||||
!else
|
||||
!message *** Compiler does not have 'B-stepping errata workarounds'
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!if "$(MACHINE)" == "IX86"
|
||||
### test for -align:4096, when align:512 will do.
|
||||
!if [nmakehlp -l -opt:nowin98]
|
||||
!message *** Linker has 'Win98 alignment problem'
|
||||
ALIGN98_HACK = 1
|
||||
!else
|
||||
!message *** Linker does not have 'Win98 alignment problem'
|
||||
ALIGN98_HACK = 0
|
||||
!endif
|
||||
!else
|
||||
ALIGN98_HACK = 0
|
||||
!endif
|
||||
|
||||
LINKERFLAGS =
|
||||
|
||||
!if [nmakehlp -l -ltcg]
|
||||
LINKERFLAGS =-ltcg
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Decode the options requested.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if "$(OPTS)" == "" || [nmakehlp -f "$(OPTS)" "none"]
|
||||
STATIC_BUILD = 0
|
||||
TCL_THREADS = 1
|
||||
DEBUG = 0
|
||||
SYMBOLS = 0
|
||||
PROFILE = 0
|
||||
PGO = 0
|
||||
MSVCRT = 0
|
||||
LOIMPACT = 0
|
||||
TCL_USE_STATIC_PACKAGES = 0
|
||||
USE_THREAD_ALLOC = 1
|
||||
UNCHECKED = 0
|
||||
!else
|
||||
!if [nmakehlp -f $(OPTS) "static"]
|
||||
!message *** Doing static
|
||||
STATIC_BUILD = 1
|
||||
!else
|
||||
STATIC_BUILD = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "msvcrt"]
|
||||
!message *** Doing msvcrt
|
||||
MSVCRT = 1
|
||||
!else
|
||||
MSVCRT = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "staticpkg"]
|
||||
!message *** Doing staticpkg
|
||||
TCL_USE_STATIC_PACKAGES = 1
|
||||
!else
|
||||
TCL_USE_STATIC_PACKAGES = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "nothreads"]
|
||||
!message *** Compile explicitly for non-threaded tcl
|
||||
TCL_THREADS = 0
|
||||
!else
|
||||
TCL_THREADS = 1
|
||||
USE_THREAD_ALLOC= 1
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "symbols"]
|
||||
!message *** Doing symbols
|
||||
DEBUG = 1
|
||||
!else
|
||||
DEBUG = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "pdbs"]
|
||||
!message *** Doing pdbs
|
||||
SYMBOLS = 1
|
||||
!else
|
||||
SYMBOLS = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "profile"]
|
||||
!message *** Doing profile
|
||||
PROFILE = 1
|
||||
!else
|
||||
PROFILE = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "pgi"]
|
||||
!message *** Doing profile guided optimization instrumentation
|
||||
PGO = 1
|
||||
!elseif [nmakehlp -f $(OPTS) "pgo"]
|
||||
!message *** Doing profile guided optimization
|
||||
PGO = 2
|
||||
!else
|
||||
PGO = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "loimpact"]
|
||||
!message *** Doing loimpact
|
||||
LOIMPACT = 1
|
||||
!else
|
||||
LOIMPACT = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "thrdalloc"]
|
||||
!message *** Doing thrdalloc
|
||||
USE_THREAD_ALLOC = 1
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "tclalloc"]
|
||||
!message *** Doing tclalloc
|
||||
USE_THREAD_ALLOC = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(OPTS) "unchecked"]
|
||||
!message *** Doing unchecked
|
||||
UNCHECKED = 1
|
||||
!else
|
||||
UNCHECKED = 0
|
||||
!endif
|
||||
!endif
|
||||
|
||||
|
||||
!if !$(STATIC_BUILD)
|
||||
# Make sure we don't build overly fat DLLs.
|
||||
MSVCRT = 1
|
||||
# We shouldn't statically put the extensions inside the shell when dynamic.
|
||||
TCL_USE_STATIC_PACKAGES = 0
|
||||
!endif
|
||||
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Figure-out how to name our intermediate and output directories.
|
||||
# We wouldn't want different builds to use the same .obj files
|
||||
# by accident.
|
||||
#----------------------------------------------------------
|
||||
|
||||
#----------------------------------------
|
||||
# Naming convention:
|
||||
# t = full thread support.
|
||||
# s = static library (as opposed to an
|
||||
# import library)
|
||||
# g = linked to the debug enabled C
|
||||
# run-time.
|
||||
# x = special static build when it
|
||||
# links to the dynamic C run-time.
|
||||
#----------------------------------------
|
||||
SUFX = tsgx
|
||||
|
||||
!if $(DEBUG)
|
||||
BUILDDIRTOP = Debug
|
||||
!else
|
||||
BUILDDIRTOP = Release
|
||||
!endif
|
||||
|
||||
!if "$(MACHINE)" != "IX86"
|
||||
BUILDDIRTOP =$(BUILDDIRTOP)_$(MACHINE)
|
||||
!endif
|
||||
!if $(VCVER) > 6
|
||||
BUILDDIRTOP =$(BUILDDIRTOP)_VC$(VCVER)
|
||||
!endif
|
||||
|
||||
!if !$(DEBUG) || $(DEBUG) && $(UNCHECKED)
|
||||
SUFX = $(SUFX:g=)
|
||||
!endif
|
||||
|
||||
TMP_DIRFULL = .\$(BUILDDIRTOP)\$(PROJECT)_ThreadedDynamicStaticX
|
||||
|
||||
!if !$(STATIC_BUILD)
|
||||
TMP_DIRFULL = $(TMP_DIRFULL:Static=)
|
||||
SUFX = $(SUFX:s=)
|
||||
EXT = dll
|
||||
!if $(MSVCRT)
|
||||
TMP_DIRFULL = $(TMP_DIRFULL:X=)
|
||||
SUFX = $(SUFX:x=)
|
||||
!endif
|
||||
!else
|
||||
TMP_DIRFULL = $(TMP_DIRFULL:Dynamic=)
|
||||
EXT = lib
|
||||
!if !$(MSVCRT)
|
||||
TMP_DIRFULL = $(TMP_DIRFULL:X=)
|
||||
SUFX = $(SUFX:x=)
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!if !$(TCL_THREADS)
|
||||
TMP_DIRFULL = $(TMP_DIRFULL:Threaded=)
|
||||
SUFX = $(SUFX:t=)
|
||||
!endif
|
||||
|
||||
!ifndef TMP_DIR
|
||||
TMP_DIR = $(TMP_DIRFULL)
|
||||
!ifndef OUT_DIR
|
||||
OUT_DIR = .\$(BUILDDIRTOP)
|
||||
!endif
|
||||
!else
|
||||
!ifndef OUT_DIR
|
||||
OUT_DIR = $(TMP_DIR)
|
||||
!endif
|
||||
!endif
|
||||
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Decode the statistics requested.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if "$(STATS)" == "" || [nmakehlp -f "$(STATS)" "none"]
|
||||
TCL_MEM_DEBUG = 0
|
||||
TCL_COMPILE_DEBUG = 0
|
||||
!else
|
||||
!if [nmakehlp -f $(STATS) "memdbg"]
|
||||
!message *** Doing memdbg
|
||||
TCL_MEM_DEBUG = 1
|
||||
!else
|
||||
TCL_MEM_DEBUG = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(STATS) "compdbg"]
|
||||
!message *** Doing compdbg
|
||||
TCL_COMPILE_DEBUG = 1
|
||||
!else
|
||||
TCL_COMPILE_DEBUG = 0
|
||||
!endif
|
||||
!endif
|
||||
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Decode the checks requested.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if "$(CHECKS)" == "" || [nmakehlp -f "$(CHECKS)" "none"]
|
||||
TCL_NO_DEPRECATED = 0
|
||||
WARNINGS = -W3
|
||||
!else
|
||||
!if [nmakehlp -f $(CHECKS) "nodep"]
|
||||
!message *** Doing nodep check
|
||||
TCL_NO_DEPRECATED = 1
|
||||
!else
|
||||
TCL_NO_DEPRECATED = 0
|
||||
!endif
|
||||
!if [nmakehlp -f $(CHECKS) "fullwarn"]
|
||||
!message *** Doing full warnings check
|
||||
WARNINGS = -W4
|
||||
!if [nmakehlp -l -warn:3]
|
||||
LINKERFLAGS = $(LINKERFLAGS) -warn:3
|
||||
!endif
|
||||
!else
|
||||
WARNINGS = -W3
|
||||
!endif
|
||||
!if [nmakehlp -f $(CHECKS) "64bit"] && [nmakehlp -c -Wp64]
|
||||
!message *** Doing 64bit portability warnings
|
||||
WARNINGS = $(WARNINGS) -Wp64
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!if $(PGO) > 1
|
||||
!if [nmakehlp -l -ltcg:pgoptimize]
|
||||
LINKERFLAGS = $(LINKERFLAGS:-ltcg=) -ltcg:pgoptimize
|
||||
!else
|
||||
MSG=^
|
||||
This compiler does not support profile guided optimization.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
!elseif $(PGO) > 0
|
||||
!if [nmakehlp -l -ltcg:pginstrument]
|
||||
LINKERFLAGS = $(LINKERFLAGS:-ltcg=) -ltcg:pginstrument
|
||||
!else
|
||||
MSG=^
|
||||
This compiler does not support profile guided optimization.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Set our defines now armed with our options.
|
||||
#----------------------------------------------------------
|
||||
|
||||
OPTDEFINES = -DTCL_CFGVAL_ENCODING=$(CFG_ENCODING) -DSTDC_HEADERS
|
||||
|
||||
!if $(TCL_MEM_DEBUG)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_MEM_DEBUG
|
||||
!endif
|
||||
!if $(TCL_COMPILE_DEBUG)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_COMPILE_DEBUG -DTCL_COMPILE_STATS
|
||||
!endif
|
||||
!if $(TCL_THREADS)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_THREADS=1
|
||||
!if $(USE_THREAD_ALLOC)
|
||||
OPTDEFINES = $(OPTDEFINES) -DUSE_THREAD_ALLOC=1
|
||||
!endif
|
||||
!endif
|
||||
!if $(STATIC_BUILD)
|
||||
OPTDEFINES = $(OPTDEFINES) -DSTATIC_BUILD
|
||||
!endif
|
||||
!if $(TCL_NO_DEPRECATED)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_NO_DEPRECATED
|
||||
!endif
|
||||
|
||||
!if !$(DEBUG)
|
||||
OPTDEFINES = $(OPTDEFINES) -DNDEBUG
|
||||
!if $(OPTIMIZING)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_CFG_OPTIMIZED
|
||||
!endif
|
||||
!endif
|
||||
!if $(PROFILE)
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_CFG_PROFILED
|
||||
!endif
|
||||
!if "$(MACHINE)" == "IA64" || "$(MACHINE)" == "AMD64"
|
||||
OPTDEFINES = $(OPTDEFINES) -DTCL_CFG_DO64BIT
|
||||
!endif
|
||||
!if $(VCVERSION) < 1300
|
||||
OPTDEFINES = $(OPTDEFINES) -DNO_STRTOI64
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Locate the Tcl headers to build against
|
||||
#----------------------------------------------------------
|
||||
|
||||
!if "$(PROJECT)" == "tcl"
|
||||
|
||||
_TCL_H = ..\generic\tcl.h
|
||||
|
||||
!else
|
||||
|
||||
# If INSTALLDIR set to tcl root dir then reset to the lib dir.
|
||||
!if exist("$(_INSTALLDIR)\include\tcl.h")
|
||||
_INSTALLDIR=$(_INSTALLDIR)\lib
|
||||
!endif
|
||||
|
||||
!if !defined(TCLDIR)
|
||||
!if exist("$(_INSTALLDIR)\..\include\tcl.h")
|
||||
TCLINSTALL = 1
|
||||
_TCLDIR = $(_INSTALLDIR)\..
|
||||
_TCL_H = $(_INSTALLDIR)\..\include\tcl.h
|
||||
TCLDIR = $(_INSTALLDIR)\..
|
||||
!else
|
||||
MSG=^
|
||||
Failed to find tcl.h. Set the TCLDIR macro.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
!else
|
||||
_TCLDIR = $(TCLDIR:/=\)
|
||||
!if exist("$(_TCLDIR)\include\tcl.h")
|
||||
TCLINSTALL = 1
|
||||
_TCL_H = $(_TCLDIR)\include\tcl.h
|
||||
!elseif exist("$(_TCLDIR)\generic\tcl.h")
|
||||
TCLINSTALL = 0
|
||||
_TCL_H = $(_TCLDIR)\generic\tcl.h
|
||||
!else
|
||||
MSG =^
|
||||
Failed to find tcl.h. The TCLDIR macro does not appear correct.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
!endif
|
||||
!endif
|
||||
|
||||
#--------------------------------------------------------------
|
||||
# Extract various version numbers from tcl headers
|
||||
# The generated file is then included in the makefile.
|
||||
#--------------------------------------------------------------
|
||||
|
||||
!if [echo REM = This file is generated from rules.vc > versions.vc]
|
||||
!endif
|
||||
!if [echo TCL_MAJOR_VERSION = \>> versions.vc] \
|
||||
&& [nmakehlp -V "$(_TCL_H)" TCL_MAJOR_VERSION >> versions.vc]
|
||||
!endif
|
||||
!if [echo TCL_MINOR_VERSION = \>> versions.vc] \
|
||||
&& [nmakehlp -V "$(_TCL_H)" TCL_MINOR_VERSION >> versions.vc]
|
||||
!endif
|
||||
!if [echo TCL_PATCH_LEVEL = \>> versions.vc] \
|
||||
&& [nmakehlp -V "$(_TCL_H)" TCL_PATCH_LEVEL >> versions.vc]
|
||||
!endif
|
||||
|
||||
# If building the tcl core then we need additional package versions
|
||||
!if "$(PROJECT)" == "tcl"
|
||||
!if [echo PKG_HTTP_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\http\pkgIndex.tcl http >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_TCLTEST_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\tcltest\pkgIndex.tcl tcltest >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_MSGCAT_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\msgcat\pkgIndex.tcl msgcat >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_PLATFORM_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\platform\pkgIndex.tcl "platform " >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_SHELL_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\platform\pkgIndex.tcl "platform::shell" >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_DDE_VER = \>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\dde\pkgIndex.tcl "dde " >> versions.vc]
|
||||
!endif
|
||||
!if [echo PKG_REG_VER =\>> versions.vc] \
|
||||
&& [nmakehlp -V ..\library\reg\pkgIndex.tcl registry >> versions.vc]
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!include versions.vc
|
||||
|
||||
#--------------------------------------------------------------
|
||||
# Setup tcl version dependent stuff headers
|
||||
#--------------------------------------------------------------
|
||||
|
||||
!if "$(PROJECT)" != "tcl"
|
||||
|
||||
TCL_VERSION = $(TCL_MAJOR_VERSION)$(TCL_MINOR_VERSION)
|
||||
|
||||
!if $(TCL_VERSION) < 81
|
||||
TCL_DOES_STUBS = 0
|
||||
!else
|
||||
TCL_DOES_STUBS = 1
|
||||
!endif
|
||||
|
||||
!if $(TCLINSTALL)
|
||||
TCLSH = "$(_TCLDIR)\bin\tclsh$(TCL_VERSION)$(SUFX).exe"
|
||||
!if !exist($(TCLSH)) && $(TCL_THREADS)
|
||||
TCLSH = "$(_TCLDIR)\bin\tclsh$(TCL_VERSION)t$(SUFX).exe"
|
||||
!endif
|
||||
TCLSTUBLIB = "$(_TCLDIR)\lib\tclstub$(TCL_VERSION).lib"
|
||||
TCLIMPLIB = "$(_TCLDIR)\lib\tcl$(TCL_VERSION)$(SUFX).lib"
|
||||
TCL_LIBRARY = $(_TCLDIR)\lib
|
||||
TCLREGLIB = "$(_TCLDIR)\lib\tclreg13$(SUFX:t=).lib"
|
||||
TCLDDELIB = "$(_TCLDIR)\lib\tcldde14$(SUFX:t=).lib"
|
||||
COFFBASE = \must\have\tcl\sources\to\build\this\target
|
||||
TCLTOOLSDIR = \must\have\tcl\sources\to\build\this\target
|
||||
TCL_INCLUDES = -I"$(_TCLDIR)\include"
|
||||
!else
|
||||
TCLSH = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tclsh$(TCL_VERSION)$(SUFX).exe"
|
||||
!if !exist($(TCLSH)) && $(TCL_THREADS)
|
||||
TCLSH = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tclsh$(TCL_VERSION)t$(SUFX).exe"
|
||||
!endif
|
||||
TCLSTUBLIB = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tclstub$(TCL_VERSION).lib"
|
||||
TCLIMPLIB = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tcl$(TCL_VERSION)$(SUFX).lib"
|
||||
TCL_LIBRARY = $(_TCLDIR)\library
|
||||
TCLREGLIB = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tclreg13$(SUFX:t=).lib"
|
||||
TCLDDELIB = "$(_TCLDIR)\win\$(BUILDDIRTOP)\tcldde14$(SUFX:t=).lib"
|
||||
COFFBASE = "$(_TCLDIR)\win\coffbase.txt"
|
||||
TCLTOOLSDIR = $(_TCLDIR)\tools
|
||||
TCL_INCLUDES = -I"$(_TCLDIR)\generic" -I"$(_TCLDIR)\win"
|
||||
!endif
|
||||
|
||||
!endif
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Locate the Tk headers to build against
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
!if "$(PROJECT)" == "tk"
|
||||
_TK_H = ..\generic\tk.h
|
||||
_INSTALLDIR = $(_INSTALLDIR)\..
|
||||
!endif
|
||||
|
||||
!ifdef PROJECT_REQUIRES_TK
|
||||
!if !defined(TKDIR)
|
||||
!if exist("$(_INSTALLDIR)\..\include\tk.h")
|
||||
TKINSTALL = 1
|
||||
_TKDIR = $(_INSTALLDIR)\..
|
||||
_TK_H = $(_TKDIR)\include\tk.h
|
||||
TKDIR = $(_TKDIR)
|
||||
!elseif exist("$(_TCLDIR)\include\tk.h")
|
||||
TKINSTALL = 1
|
||||
_TKDIR = $(_TCLDIR)
|
||||
_TK_H = $(_TKDIR)\include\tk.h
|
||||
TKDIR = $(_TKDIR)
|
||||
!endif
|
||||
!else
|
||||
_TKDIR = $(TKDIR:/=\)
|
||||
!if exist("$(_TKDIR)\include\tk.h")
|
||||
TKINSTALL = 1
|
||||
_TK_H = $(_TKDIR)\include\tk.h
|
||||
!elseif exist("$(_TKDIR)\generic\tk.h")
|
||||
TKINSTALL = 0
|
||||
_TK_H = $(_TKDIR)\generic\tk.h
|
||||
!else
|
||||
MSG =^
|
||||
Failed to find tk.h. The TKDIR macro does not appear correct.
|
||||
!error $(MSG)
|
||||
!endif
|
||||
!endif
|
||||
!endif
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Extract Tk version numbers
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
!if defined(PROJECT_REQUIRES_TK) || "$(PROJECT)" == "tk"
|
||||
|
||||
!if [echo TK_MAJOR_VERSION = \>> versions.vc] \
|
||||
&& [nmakehlp -V $(_TK_H) TK_MAJOR_VERSION >> versions.vc]
|
||||
!endif
|
||||
!if [echo TK_MINOR_VERSION = \>> versions.vc] \
|
||||
&& [nmakehlp -V $(_TK_H) TK_MINOR_VERSION >> versions.vc]
|
||||
!endif
|
||||
!if [echo TK_PATCH_LEVEL = \>> versions.vc] \
|
||||
&& [nmakehlp -V $(_TK_H) TK_PATCH_LEVEL >> versions.vc]
|
||||
!endif
|
||||
|
||||
!include versions.vc
|
||||
|
||||
TK_DOTVERSION = $(TK_MAJOR_VERSION).$(TK_MINOR_VERSION)
|
||||
TK_VERSION = $(TK_MAJOR_VERSION)$(TK_MINOR_VERSION)
|
||||
|
||||
!if "$(PROJECT)" != "tk"
|
||||
!if $(TKINSTALL)
|
||||
WISH = "$(_TKDIR)\bin\wish$(TK_VERSION)$(SUFX).exe"
|
||||
TKSTUBLIB = "$(_TKDIR)\lib\tkstub$(TK_VERSION).lib"
|
||||
TKIMPLIB = "$(_TKDIR)\lib\tk$(TK_VERSION)$(SUFX).lib"
|
||||
TK_INCLUDES = -I"$(_TKDIR)\include"
|
||||
!else
|
||||
WISH = "$(_TKDIR)\win\$(BUILDDIRTOP)\wish$(TCL_VERSION)$(SUFX).exe"
|
||||
TKSTUBLIB = "$(_TKDIR)\win\$(BUILDDIRTOP)\tkstub$(TCL_VERSION).lib"
|
||||
TKIMPLIB = "$(_TKDIR)\win\$(BUILDDIRTOP)\tk$(TCL_VERSION)$(SUFX).lib"
|
||||
TK_INCLUDES = -I"$(_TKDIR)\generic" -I"$(_TKDIR)\win" -I"$(_TKDIR)\xlib"
|
||||
!endif
|
||||
!endif
|
||||
|
||||
!endif
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Display stats being used.
|
||||
#----------------------------------------------------------
|
||||
|
||||
!message *** Intermediate directory will be '$(TMP_DIR)'
|
||||
!message *** Output directory will be '$(OUT_DIR)'
|
||||
!message *** Suffix for binaries will be '$(SUFX)'
|
||||
!message *** Optional defines are '$(OPTDEFINES)'
|
||||
!message *** Compiler version $(VCVER). Target machine is $(MACHINE)
|
||||
!message *** Host architecture is $(NATIVE_ARCH)
|
||||
!message *** Compiler options '$(COMPILERFLAGS) $(OPTIMIZATIONS) $(DEBUGFLAGS) $(WARNINGS)'
|
||||
!message *** Link options '$(LINKERFLAGS)'
|
||||
|
||||
!endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,119 +0,0 @@
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the `fdatasync' function. */
|
||||
#undef HAVE_FDATASYNC
|
||||
|
||||
/* Define to 1 if you have the `gmtime_r' function. */
|
||||
#undef HAVE_GMTIME_R
|
||||
|
||||
/* Define to 1 if the system has the type `int16_t'. */
|
||||
#undef HAVE_INT16_T
|
||||
|
||||
/* Define to 1 if the system has the type `int32_t'. */
|
||||
#undef HAVE_INT32_T
|
||||
|
||||
/* Define to 1 if the system has the type `int64_t'. */
|
||||
#undef HAVE_INT64_T
|
||||
|
||||
/* Define to 1 if the system has the type `int8_t'. */
|
||||
#undef HAVE_INT8_T
|
||||
|
||||
/* Define to 1 if the system has the type `intptr_t'. */
|
||||
#undef HAVE_INTPTR_T
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `isnan' function. */
|
||||
#undef HAVE_ISNAN
|
||||
|
||||
/* Define to 1 if you have the `localtime_r' function. */
|
||||
#undef HAVE_LOCALTIME_R
|
||||
|
||||
/* Define to 1 if you have the `localtime_s' function. */
|
||||
#undef HAVE_LOCALTIME_S
|
||||
|
||||
/* Define to 1 if you have the <malloc.h> header file. */
|
||||
#undef HAVE_MALLOC_H
|
||||
|
||||
/* Define to 1 if you have the `malloc_usable_size' function. */
|
||||
#undef HAVE_MALLOC_USABLE_SIZE
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the strchrnul() function */
|
||||
#undef HAVE_STRCHRNUL
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if the system has the type `uint16_t'. */
|
||||
#undef HAVE_UINT16_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint32_t'. */
|
||||
#undef HAVE_UINT32_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint64_t'. */
|
||||
#undef HAVE_UINT64_T
|
||||
|
||||
/* Define to 1 if the system has the type `uint8_t'. */
|
||||
#undef HAVE_UINT8_T
|
||||
|
||||
/* Define to 1 if the system has the type `uintptr_t'. */
|
||||
#undef HAVE_UINTPTR_T
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if you have the `usleep' function. */
|
||||
#undef HAVE_USLEEP
|
||||
|
||||
/* Define to 1 if you have the utime() library function. */
|
||||
#undef HAVE_UTIME
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
#undef _FILE_OFFSET_BITS
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
#undef _LARGE_FILES
|
||||
File diff suppressed because it is too large
Load Diff
13777
query_classifier/qc_sqlite/sqlite-src-3110100.old/configure
vendored
13777
query_classifier/qc_sqlite/sqlite-src-3110100.old/configure
vendored
File diff suppressed because it is too large
Load Diff
@ -1,702 +0,0 @@
|
||||
#
|
||||
# The build process allows for using a cross-compiler. But the default
|
||||
# action is to target the same platform that we are running on. The
|
||||
# configure script needs to discover the following properties of the
|
||||
# build and target systems:
|
||||
#
|
||||
# srcdir
|
||||
#
|
||||
# The is the name of the directory that contains the
|
||||
# "configure" shell script. All source files are
|
||||
# located relative to this directory.
|
||||
#
|
||||
# bindir
|
||||
#
|
||||
# The name of the directory where executables should be
|
||||
# written by the "install" target of the makefile.
|
||||
#
|
||||
# program_prefix
|
||||
#
|
||||
# Add this prefix to the names of all executables that run
|
||||
# on the target machine. Default: ""
|
||||
#
|
||||
# ENABLE_SHARED
|
||||
#
|
||||
# True if shared libraries should be generated.
|
||||
#
|
||||
# BUILD_CC
|
||||
#
|
||||
# The name of a command that is used to convert C
|
||||
# source files into executables that run on the build
|
||||
# platform.
|
||||
#
|
||||
# BUILD_CFLAGS
|
||||
#
|
||||
# Switches that the build compiler needs in order to construct
|
||||
# command-line programs.
|
||||
#
|
||||
# BUILD_LIBS
|
||||
#
|
||||
# Libraries that the build compiler needs in order to construct
|
||||
# command-line programs.
|
||||
#
|
||||
# BUILD_EXEEXT
|
||||
#
|
||||
# The filename extension for executables on the build
|
||||
# platform. "" for Unix and ".exe" for Windows.
|
||||
#
|
||||
# TCL_*
|
||||
#
|
||||
# Lots of values are read in from the tclConfig.sh script,
|
||||
# if that script is available. This values are used for
|
||||
# constructing and installing the TCL extension.
|
||||
#
|
||||
# TARGET_READLINE_LIBS
|
||||
#
|
||||
# This is the library directives passed to the target linker
|
||||
# that cause the executable to link against the readline library.
|
||||
# This might be a switch like "-lreadline" or pathnames of library
|
||||
# file like "../../src/libreadline.a".
|
||||
#
|
||||
# TARGET_READLINE_INC
|
||||
#
|
||||
# This variables define the directory that contain header
|
||||
# files for the readline library. If the compiler is able
|
||||
# to find <readline.h> on its own, then this can be blank.
|
||||
#
|
||||
# TARGET_EXEEXT
|
||||
#
|
||||
# The filename extension for executables on the
|
||||
# target platform. "" for Unix and ".exe" for windows.
|
||||
#
|
||||
# This configure.in file is easy to reuse on other projects. Just
|
||||
# change the argument to AC_INIT(). And disable any features that
|
||||
# you don't need (for example BLT) by erasing or commenting out
|
||||
# the corresponding code.
|
||||
#
|
||||
AC_INIT(sqlite, m4_esyscmd([cat VERSION | tr -d '\n']))
|
||||
|
||||
dnl Make sure the local VERSION file matches this configure script
|
||||
sqlite_version_sanity_check=`cat $srcdir/VERSION | tr -d '\n'`
|
||||
if test "$PACKAGE_VERSION" != "$sqlite_version_sanity_check" ; then
|
||||
AC_MSG_ERROR([configure script is out of date:
|
||||
configure \$PACKAGE_VERSION = $PACKAGE_VERSION
|
||||
top level VERSION file = $sqlite_version_sanity_check
|
||||
please regen with autoconf])
|
||||
fi
|
||||
|
||||
#########
|
||||
# Programs needed
|
||||
#
|
||||
AC_PROG_LIBTOOL
|
||||
AC_PROG_INSTALL
|
||||
|
||||
#########
|
||||
# Enable large file support (if special flags are necessary)
|
||||
#
|
||||
AC_SYS_LARGEFILE
|
||||
|
||||
#########
|
||||
# Check for needed/wanted data types
|
||||
AC_CHECK_TYPES([int8_t, int16_t, int32_t, int64_t, intptr_t, uint8_t,
|
||||
uint16_t, uint32_t, uint64_t, uintptr_t])
|
||||
|
||||
#########
|
||||
# Check for needed/wanted headers
|
||||
AC_CHECK_HEADERS([sys/types.h stdlib.h stdint.h inttypes.h malloc.h])
|
||||
|
||||
#########
|
||||
# Figure out whether or not we have these functions
|
||||
#
|
||||
AC_CHECK_FUNCS([fdatasync gmtime_r isnan localtime_r localtime_s malloc_usable_size strchrnul usleep utime])
|
||||
|
||||
#########
|
||||
# By default, we use the amalgamation (this may be changed below...)
|
||||
#
|
||||
USE_AMALGAMATION=1
|
||||
|
||||
#########
|
||||
# See whether we can run specific tclsh versions known to work well;
|
||||
# if not, then we fall back to plain tclsh.
|
||||
# TODO: try other versions before falling back?
|
||||
#
|
||||
AC_CHECK_PROGS(TCLSH_CMD, [tclsh8.6 tclsh8.5 tclsh], none)
|
||||
if test "$TCLSH_CMD" = "none"; then
|
||||
# If we can't find a local tclsh, then building the amalgamation will fail.
|
||||
# We act as though --disable-amalgamation has been used.
|
||||
echo "Warning: can't find tclsh - defaulting to non-amalgamation build."
|
||||
USE_AMALGAMATION=0
|
||||
TCLSH_CMD="tclsh"
|
||||
fi
|
||||
AC_SUBST(TCLSH_CMD)
|
||||
|
||||
AC_ARG_VAR([TCLLIBDIR], [Where to install tcl plugin])
|
||||
if test "x${TCLLIBDIR+set}" != "xset" ; then
|
||||
TCLLIBDIR='$(libdir)'
|
||||
for i in `echo 'puts stdout $auto_path' | ${TCLSH_CMD}` ; do
|
||||
TCLLIBDIR=$i
|
||||
break
|
||||
done
|
||||
TCLLIBDIR="${TCLLIBDIR}/sqlite3"
|
||||
fi
|
||||
|
||||
|
||||
#########
|
||||
# Set up an appropriate program prefix
|
||||
#
|
||||
if test "$program_prefix" = "NONE"; then
|
||||
program_prefix=""
|
||||
fi
|
||||
AC_SUBST(program_prefix)
|
||||
|
||||
VERSION=[`cat $srcdir/VERSION | sed 's/^\([0-9]*\.*[0-9]*\).*/\1/'`]
|
||||
AC_MSG_NOTICE(Version set to $VERSION)
|
||||
AC_SUBST(VERSION)
|
||||
RELEASE=`cat $srcdir/VERSION`
|
||||
AC_MSG_NOTICE(Release set to $RELEASE)
|
||||
AC_SUBST(RELEASE)
|
||||
VERSION_NUMBER=[`cat $srcdir/VERSION \
|
||||
| sed 's/[^0-9]/ /g' \
|
||||
| awk '{printf "%d%03d%03d",$1,$2,$3}'`]
|
||||
AC_MSG_NOTICE(Version number set to $VERSION_NUMBER)
|
||||
AC_SUBST(VERSION_NUMBER)
|
||||
|
||||
#########
|
||||
# Locate a compiler for the build machine. This compiler should
|
||||
# generate command-line programs that run on the build machine.
|
||||
#
|
||||
if test x"$cross_compiling" = xno; then
|
||||
BUILD_CC=$CC
|
||||
BUILD_CFLAGS=$CFLAGS
|
||||
else
|
||||
if test "${BUILD_CC+set}" != set; then
|
||||
AC_CHECK_PROGS(BUILD_CC, gcc cc cl)
|
||||
fi
|
||||
if test "${BUILD_CFLAGS+set}" != set; then
|
||||
BUILD_CFLAGS="-g"
|
||||
fi
|
||||
fi
|
||||
AC_SUBST(BUILD_CC)
|
||||
|
||||
##########
|
||||
# Do we want to support multithreaded use of sqlite
|
||||
#
|
||||
AC_ARG_ENABLE(threadsafe,
|
||||
AC_HELP_STRING([--disable-threadsafe],[Disable mutexing]),,enable_threadsafe=yes)
|
||||
AC_MSG_CHECKING([whether to support threadsafe operation])
|
||||
if test "$enable_threadsafe" = "no"; then
|
||||
SQLITE_THREADSAFE=0
|
||||
AC_MSG_RESULT([no])
|
||||
else
|
||||
SQLITE_THREADSAFE=1
|
||||
AC_MSG_RESULT([yes])
|
||||
fi
|
||||
AC_SUBST(SQLITE_THREADSAFE)
|
||||
|
||||
if test "$SQLITE_THREADSAFE" = "1"; then
|
||||
AC_SEARCH_LIBS(pthread_create, pthread)
|
||||
AC_SEARCH_LIBS(pthread_mutexattr_init, pthread)
|
||||
fi
|
||||
|
||||
##########
|
||||
# Do we want to support release
|
||||
#
|
||||
AC_ARG_ENABLE(releasemode,
|
||||
AC_HELP_STRING([--enable-releasemode],[Support libtool link to release mode]),,enable_releasemode=no)
|
||||
AC_MSG_CHECKING([whether to support shared library linked as release mode or not])
|
||||
if test "$enable_releasemode" = "no"; then
|
||||
ALLOWRELEASE=""
|
||||
AC_MSG_RESULT([no])
|
||||
else
|
||||
ALLOWRELEASE="-release `cat $srcdir/VERSION`"
|
||||
AC_MSG_RESULT([yes])
|
||||
fi
|
||||
AC_SUBST(ALLOWRELEASE)
|
||||
|
||||
##########
|
||||
# Do we want temporary databases in memory
|
||||
#
|
||||
AC_ARG_ENABLE(tempstore,
|
||||
AC_HELP_STRING([--enable-tempstore],[Use an in-ram database for temporary tables (never,no,yes,always)]),,enable_tempstore=no)
|
||||
AC_MSG_CHECKING([whether to use an in-ram database for temporary tables])
|
||||
case "$enable_tempstore" in
|
||||
never )
|
||||
TEMP_STORE=0
|
||||
AC_MSG_RESULT([never])
|
||||
;;
|
||||
no )
|
||||
TEMP_STORE=1
|
||||
AC_MSG_RESULT([no])
|
||||
;;
|
||||
yes )
|
||||
TEMP_STORE=2
|
||||
AC_MSG_RESULT([yes])
|
||||
;;
|
||||
always )
|
||||
TEMP_STORE=3
|
||||
AC_MSG_RESULT([always])
|
||||
;;
|
||||
* )
|
||||
TEMP_STORE=1
|
||||
AC_MSG_RESULT([no])
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_SUBST(TEMP_STORE)
|
||||
|
||||
###########
|
||||
# Lots of things are different if we are compiling for Windows using
|
||||
# the CYGWIN environment. So check for that special case and handle
|
||||
# things accordingly.
|
||||
#
|
||||
AC_MSG_CHECKING([if executables have the .exe suffix])
|
||||
if test "$config_BUILD_EXEEXT" = ".exe"; then
|
||||
CYGWIN=yes
|
||||
AC_MSG_RESULT(yes)
|
||||
else
|
||||
AC_MSG_RESULT(unknown)
|
||||
fi
|
||||
if test "$CYGWIN" != "yes"; then
|
||||
AC_CYGWIN
|
||||
fi
|
||||
if test "$CYGWIN" = "yes"; then
|
||||
BUILD_EXEEXT=.exe
|
||||
else
|
||||
BUILD_EXEEXT=$EXEEXT
|
||||
fi
|
||||
if test x"$cross_compiling" = xno; then
|
||||
TARGET_EXEEXT=$BUILD_EXEEXT
|
||||
else
|
||||
TARGET_EXEEXT=$config_TARGET_EXEEXT
|
||||
fi
|
||||
if test "$TARGET_EXEEXT" = ".exe"; then
|
||||
SQLITE_OS_UNIX=0
|
||||
SQLITE_OS_WIN=1
|
||||
CFLAGS="$CFLAGS -DSQLITE_OS_WIN=1"
|
||||
else
|
||||
SQLITE_OS_UNIX=1
|
||||
SQLITE_OS_WIN=0
|
||||
CFLAGS="$CFLAGS -DSQLITE_OS_UNIX=1"
|
||||
fi
|
||||
|
||||
AC_SUBST(BUILD_EXEEXT)
|
||||
AC_SUBST(SQLITE_OS_UNIX)
|
||||
AC_SUBST(SQLITE_OS_WIN)
|
||||
AC_SUBST(TARGET_EXEEXT)
|
||||
|
||||
##########
|
||||
# Figure out all the parameters needed to compile against Tcl.
|
||||
#
|
||||
# This code is derived from the SC_PATH_TCLCONFIG and SC_LOAD_TCLCONFIG
|
||||
# macros in the in the tcl.m4 file of the standard TCL distribution.
|
||||
# Those macros could not be used directly since we have to make some
|
||||
# minor changes to accomodate systems that do not have TCL installed.
|
||||
#
|
||||
AC_ARG_ENABLE(tcl, AC_HELP_STRING([--disable-tcl],[do not build TCL extension]),
|
||||
[use_tcl=$enableval],[use_tcl=yes])
|
||||
if test "${use_tcl}" = "yes" ; then
|
||||
AC_ARG_WITH(tcl, AC_HELP_STRING([--with-tcl=DIR],[directory containing tcl configuration (tclConfig.sh)]), with_tclconfig=${withval})
|
||||
AC_MSG_CHECKING([for Tcl configuration])
|
||||
AC_CACHE_VAL(ac_cv_c_tclconfig,[
|
||||
# First check to see if --with-tcl was specified.
|
||||
if test x"${with_tclconfig}" != x ; then
|
||||
if test -f "${with_tclconfig}/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig=`(cd ${with_tclconfig}; pwd)`
|
||||
else
|
||||
AC_MSG_ERROR([${with_tclconfig} directory doesn't contain tclConfig.sh])
|
||||
fi
|
||||
fi
|
||||
|
||||
# Start autosearch by asking tclsh
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
if test x"$cross_compiling" = xno; then
|
||||
for i in `echo 'puts stdout $auto_path' | ${TCLSH_CMD}`
|
||||
do
|
||||
if test -f "$i/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig="$i"
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# On ubuntu 14.10, $auto_path on tclsh is not quite correct.
|
||||
# So try again after applying corrections.
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
if test x"$cross_compiling" = xno; then
|
||||
for i in `echo 'puts stdout $auto_path' | ${TCLSH_CMD} | sed 's,/tcltk/tcl,/tcl,g'`
|
||||
do
|
||||
if test -f "$i/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig="$i"
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
# then check for a private Tcl installation
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
for i in \
|
||||
../tcl \
|
||||
`ls -dr ../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
|
||||
`ls -dr ../tcl[[8-9]].[[0-9]] 2>/dev/null` \
|
||||
`ls -dr ../tcl[[8-9]].[[0-9]]* 2>/dev/null` \
|
||||
../../tcl \
|
||||
`ls -dr ../../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
|
||||
`ls -dr ../../tcl[[8-9]].[[0-9]] 2>/dev/null` \
|
||||
`ls -dr ../../tcl[[8-9]].[[0-9]]* 2>/dev/null` \
|
||||
../../../tcl \
|
||||
`ls -dr ../../../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
|
||||
`ls -dr ../../../tcl[[8-9]].[[0-9]] 2>/dev/null` \
|
||||
`ls -dr ../../../tcl[[8-9]].[[0-9]]* 2>/dev/null`
|
||||
do
|
||||
if test -f "$i/unix/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig=`(cd $i/unix; pwd)`
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# check in a few common install locations
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
for i in \
|
||||
`ls -d ${libdir} 2>/dev/null` \
|
||||
`ls -d /usr/local/lib 2>/dev/null` \
|
||||
`ls -d /usr/contrib/lib 2>/dev/null` \
|
||||
`ls -d /usr/lib 2>/dev/null`
|
||||
do
|
||||
if test -f "$i/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig=`(cd $i; pwd)`
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# check in a few other private locations
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
for i in \
|
||||
${srcdir}/../tcl \
|
||||
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]].[[0-9]]* 2>/dev/null` \
|
||||
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]] 2>/dev/null` \
|
||||
`ls -dr ${srcdir}/../tcl[[8-9]].[[0-9]]* 2>/dev/null`
|
||||
do
|
||||
if test -f "$i/unix/tclConfig.sh" ; then
|
||||
ac_cv_c_tclconfig=`(cd $i/unix; pwd)`
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
])
|
||||
|
||||
if test x"${ac_cv_c_tclconfig}" = x ; then
|
||||
use_tcl=no
|
||||
AC_MSG_WARN(Can't find Tcl configuration definitions)
|
||||
AC_MSG_WARN(*** Without Tcl the regression tests cannot be executed ***)
|
||||
AC_MSG_WARN(*** Consider using --with-tcl=... to define location of Tcl ***)
|
||||
else
|
||||
TCL_BIN_DIR=${ac_cv_c_tclconfig}
|
||||
AC_MSG_RESULT(found $TCL_BIN_DIR/tclConfig.sh)
|
||||
|
||||
AC_MSG_CHECKING([for existence of $TCL_BIN_DIR/tclConfig.sh])
|
||||
if test -f "$TCL_BIN_DIR/tclConfig.sh" ; then
|
||||
AC_MSG_RESULT([loading])
|
||||
. $TCL_BIN_DIR/tclConfig.sh
|
||||
else
|
||||
AC_MSG_RESULT([file not found])
|
||||
fi
|
||||
|
||||
#
|
||||
# If the TCL_BIN_DIR is the build directory (not the install directory),
|
||||
# then set the common variable name to the value of the build variables.
|
||||
# For example, the variable TCL_LIB_SPEC will be set to the value
|
||||
# of TCL_BUILD_LIB_SPEC. An extension should make use of TCL_LIB_SPEC
|
||||
# instead of TCL_BUILD_LIB_SPEC since it will work with both an
|
||||
# installed and uninstalled version of Tcl.
|
||||
#
|
||||
|
||||
if test -f $TCL_BIN_DIR/Makefile ; then
|
||||
TCL_LIB_SPEC=${TCL_BUILD_LIB_SPEC}
|
||||
TCL_STUB_LIB_SPEC=${TCL_BUILD_STUB_LIB_SPEC}
|
||||
TCL_STUB_LIB_PATH=${TCL_BUILD_STUB_LIB_PATH}
|
||||
fi
|
||||
|
||||
#
|
||||
# eval is required to do the TCL_DBGX substitution
|
||||
#
|
||||
|
||||
eval "TCL_LIB_FILE=\"${TCL_LIB_FILE}\""
|
||||
eval "TCL_LIB_FLAG=\"${TCL_LIB_FLAG}\""
|
||||
eval "TCL_LIB_SPEC=\"${TCL_LIB_SPEC}\""
|
||||
|
||||
eval "TCL_STUB_LIB_FILE=\"${TCL_STUB_LIB_FILE}\""
|
||||
eval "TCL_STUB_LIB_FLAG=\"${TCL_STUB_LIB_FLAG}\""
|
||||
eval "TCL_STUB_LIB_SPEC=\"${TCL_STUB_LIB_SPEC}\""
|
||||
|
||||
AC_SUBST(TCL_VERSION)
|
||||
AC_SUBST(TCL_BIN_DIR)
|
||||
AC_SUBST(TCL_SRC_DIR)
|
||||
AC_SUBST(TCL_INCLUDE_SPEC)
|
||||
|
||||
AC_SUBST(TCL_LIB_FILE)
|
||||
AC_SUBST(TCL_LIB_FLAG)
|
||||
AC_SUBST(TCL_LIB_SPEC)
|
||||
|
||||
AC_SUBST(TCL_STUB_LIB_FILE)
|
||||
AC_SUBST(TCL_STUB_LIB_FLAG)
|
||||
AC_SUBST(TCL_STUB_LIB_SPEC)
|
||||
AC_SUBST(TCL_SHLIB_SUFFIX)
|
||||
fi
|
||||
fi
|
||||
if test "${use_tcl}" = "no" ; then
|
||||
HAVE_TCL=""
|
||||
else
|
||||
HAVE_TCL=1
|
||||
fi
|
||||
AC_SUBST(HAVE_TCL)
|
||||
|
||||
##########
|
||||
# Figure out what C libraries are required to compile programs
|
||||
# that use "readline()" library.
|
||||
#
|
||||
TARGET_READLINE_LIBS=""
|
||||
TARGET_READLINE_INC=""
|
||||
TARGET_HAVE_READLINE=0
|
||||
TARGET_HAVE_EDITLINE=0
|
||||
AC_ARG_ENABLE([editline],
|
||||
[AC_HELP_STRING([--enable-editline],[enable BSD editline support])],
|
||||
[with_editline=$enableval],
|
||||
[with_editline=auto])
|
||||
AC_ARG_ENABLE([readline],
|
||||
[AC_HELP_STRING([--disable-readline],[disable readline support])],
|
||||
[with_readline=$enableval],
|
||||
[with_readline=auto])
|
||||
|
||||
if test x"$with_editline" != xno; then
|
||||
sLIBS=$LIBS
|
||||
LIBS=""
|
||||
TARGET_HAVE_EDITLINE=1
|
||||
AC_SEARCH_LIBS(readline,edit,[with_readline=no],[TARGET_HAVE_EDITLINE=0])
|
||||
TARGET_READLINE_LIBS=$LIBS
|
||||
LIBS=$sLIBS
|
||||
fi
|
||||
if test x"$with_readline" != xno; then
|
||||
found="yes"
|
||||
|
||||
AC_ARG_WITH([readline-lib],
|
||||
[AC_HELP_STRING([--with-readline-lib],[specify readline library])],
|
||||
[with_readline_lib=$withval],
|
||||
[with_readline_lib="auto"])
|
||||
if test "x$with_readline_lib" = xauto; then
|
||||
save_LIBS="$LIBS"
|
||||
LIBS=""
|
||||
AC_SEARCH_LIBS(tgetent, [readline ncurses curses termcap], [term_LIBS="$LIBS"], [term_LIBS=""])
|
||||
AC_CHECK_LIB([readline], [readline], [TARGET_READLINE_LIBS="-lreadline"], [found="no"])
|
||||
TARGET_READLINE_LIBS="$TARGET_READLINE_LIBS $term_LIBS"
|
||||
LIBS="$save_LIBS"
|
||||
else
|
||||
TARGET_READLINE_LIBS="$with_readline_lib"
|
||||
fi
|
||||
|
||||
AC_ARG_WITH([readline-inc],
|
||||
[AC_HELP_STRING([--with-readline-inc],[specify readline include paths])],
|
||||
[with_readline_inc=$withval],
|
||||
[with_readline_inc="auto"])
|
||||
if test "x$with_readline_inc" = xauto; then
|
||||
AC_CHECK_HEADER(readline.h, [found="yes"], [
|
||||
found="no"
|
||||
if test "$cross_compiling" != yes; then
|
||||
for dir in /usr /usr/local /usr/local/readline /usr/contrib /mingw; do
|
||||
for subdir in include include/readline; do
|
||||
AC_CHECK_FILE($dir/$subdir/readline.h, found=yes)
|
||||
if test "$found" = "yes"; then
|
||||
TARGET_READLINE_INC="-I$dir/$subdir"
|
||||
break
|
||||
fi
|
||||
done
|
||||
test "$found" = "yes" && break
|
||||
done
|
||||
fi
|
||||
])
|
||||
else
|
||||
TARGET_READLINE_INC="$with_readline_inc"
|
||||
fi
|
||||
|
||||
if test x"$found" = xno; then
|
||||
TARGET_READLINE_LIBS=""
|
||||
TARGET_READLINE_INC=""
|
||||
TARGET_HAVE_READLINE=0
|
||||
else
|
||||
TARGET_HAVE_READLINE=1
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_SUBST(TARGET_READLINE_LIBS)
|
||||
AC_SUBST(TARGET_READLINE_INC)
|
||||
AC_SUBST(TARGET_HAVE_READLINE)
|
||||
AC_SUBST(TARGET_HAVE_EDITLINE)
|
||||
|
||||
##########
|
||||
# Figure out what C libraries are required to compile programs
|
||||
# that use "fdatasync()" function.
|
||||
#
|
||||
AC_SEARCH_LIBS(fdatasync, [rt])
|
||||
|
||||
#########
|
||||
# check for debug enabled
|
||||
AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[enable debugging & verbose explain]),
|
||||
[use_debug=$enableval],[use_debug=no])
|
||||
if test "${use_debug}" = "yes" ; then
|
||||
TARGET_DEBUG="-DSQLITE_DEBUG=1"
|
||||
else
|
||||
TARGET_DEBUG="-DNDEBUG"
|
||||
fi
|
||||
AC_SUBST(TARGET_DEBUG)
|
||||
|
||||
#########
|
||||
# See whether we should use the amalgamation to build
|
||||
AC_ARG_ENABLE(amalgamation, AC_HELP_STRING([--disable-amalgamation],
|
||||
[Disable the amalgamation and instead build all files separately]),
|
||||
[use_amalgamation=$enableval],[use_amalgamation=yes])
|
||||
if test "${use_amalgamation}" != "yes" ; then
|
||||
USE_AMALGAMATION=0
|
||||
fi
|
||||
AC_SUBST(USE_AMALGAMATION)
|
||||
|
||||
#########
|
||||
# See whether we should allow loadable extensions
|
||||
AC_ARG_ENABLE(load-extension, AC_HELP_STRING([--disable-load-extension],
|
||||
[Disable loading of external extensions]),
|
||||
[use_loadextension=$enableval],[use_loadextension=yes])
|
||||
if test "${use_loadextension}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS=""
|
||||
AC_SEARCH_LIBS(dlopen, dl)
|
||||
else
|
||||
OPT_FEATURE_FLAGS="-DSQLITE_OMIT_LOAD_EXTENSION=1"
|
||||
fi
|
||||
|
||||
#########
|
||||
# See whether we should enable Full Text Search extensions
|
||||
AC_ARG_ENABLE(fts3, AC_HELP_STRING([--enable-fts3],
|
||||
[Enable the FTS3 extension]),
|
||||
[enable_fts3=yes],[enable_fts3=no])
|
||||
if test "${enable_fts3}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DSQLITE_ENABLE_FTS3"
|
||||
fi
|
||||
AC_ARG_ENABLE(fts4, AC_HELP_STRING([--enable-fts4],
|
||||
[Enable the FTS4 extension]),
|
||||
[enable_fts4=yes],[enable_fts4=no])
|
||||
if test "${enable_fts4}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DSQLITE_ENABLE_FTS4"
|
||||
AC_SEARCH_LIBS([log],[m])
|
||||
fi
|
||||
AC_ARG_ENABLE(fts5, AC_HELP_STRING([--enable-fts5],
|
||||
[Enable the FTS5 extension]),
|
||||
[enable_fts5=yes],[enable_fts5=no])
|
||||
if test "${enable_fts5}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DSQLITE_ENABLE_FTS5"
|
||||
AC_SEARCH_LIBS([log],[m])
|
||||
fi
|
||||
|
||||
#########
|
||||
# See whether we should enable JSON1
|
||||
AC_ARG_ENABLE(json1, AC_HELP_STRING([--enable-json1],
|
||||
[Enable the JSON1 extension]),
|
||||
[enable_json1=yes],[enable_json1=no])
|
||||
if test "${enable_json1}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DSQLITE_ENABLE_JSON1"
|
||||
fi
|
||||
|
||||
#########
|
||||
# See whether we should enable RTREE
|
||||
AC_ARG_ENABLE(rtree, AC_HELP_STRING([--enable-rtree],
|
||||
[Enable the RTREE extension]),
|
||||
[enable_rtree=yes],[enable_rtree=no])
|
||||
if test "${enable_rtree}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DSQLITE_ENABLE_RTREE"
|
||||
fi
|
||||
|
||||
#########
|
||||
# See whether we should enable MaxScale
|
||||
AC_ARG_ENABLE(maxscale, AC_HELP_STRING([--enable-maxscale],
|
||||
[Enable MaxScale extensions]),
|
||||
[enable_maxscale=yes],[enable_maxscale=no])
|
||||
if test "${enable_maxscale}" = "yes" ; then
|
||||
OPT_FEATURE_FLAGS+=" -DMAXSCALE -DSQLITE_ENABLE_UPDATE_DELETE_LIMIT -DSQLITE_OMIT_ATTACH -DSQLITE_OMIT_REINDEX -DSQLITE_OMIT_AUTOVACUUM -DSQLITE_OMIT_PRAGMA"
|
||||
fi
|
||||
|
||||
#########
|
||||
# attempt to duplicate any OMITS and ENABLES into the $(OPT_FEATURE_FLAGS) parameter
|
||||
for option in $CFLAGS $CPPFLAGS
|
||||
do
|
||||
case $option in
|
||||
-DSQLITE_OMIT*) OPT_FEATURE_FLAGS="$OPT_FEATURE_FLAGS $option";;
|
||||
-DSQLITE_ENABLE*) OPT_FEATURE_FLAGS="$OPT_FEATURE_FLAGS $option";;
|
||||
esac
|
||||
done
|
||||
AC_SUBST(OPT_FEATURE_FLAGS)
|
||||
|
||||
|
||||
# attempt to remove any OMITS and ENABLES from the $(CFLAGS) parameter
|
||||
ac_temp_CFLAGS=""
|
||||
for option in $CFLAGS
|
||||
do
|
||||
case $option in
|
||||
-DSQLITE_OMIT*) ;;
|
||||
-DSQLITE_ENABLE*) ;;
|
||||
*) ac_temp_CFLAGS="$ac_temp_CFLAGS $option";;
|
||||
esac
|
||||
done
|
||||
CFLAGS=$ac_temp_CFLAGS
|
||||
|
||||
|
||||
# attempt to remove any OMITS and ENABLES from the $(CPPFLAGS) parameter
|
||||
ac_temp_CPPFLAGS=""
|
||||
for option in $CPPFLAGS
|
||||
do
|
||||
case $option in
|
||||
-DSQLITE_OMIT*) ;;
|
||||
-DSQLITE_ENABLE*) ;;
|
||||
*) ac_temp_CPPFLAGS="$ac_temp_CPPFLAGS $option";;
|
||||
esac
|
||||
done
|
||||
CPPFLAGS=$ac_temp_CPPFLAGS
|
||||
|
||||
|
||||
# attempt to remove any OMITS and ENABLES from the $(BUILD_CFLAGS) parameter
|
||||
ac_temp_BUILD_CFLAGS=""
|
||||
for option in $BUILD_CFLAGS
|
||||
do
|
||||
case $option in
|
||||
-DSQLITE_OMIT*) ;;
|
||||
-DSQLITE_ENABLE*) ;;
|
||||
*) ac_temp_BUILD_CFLAGS="$ac_temp_BUILD_CFLAGS $option";;
|
||||
esac
|
||||
done
|
||||
BUILD_CFLAGS=$ac_temp_BUILD_CFLAGS
|
||||
|
||||
|
||||
#########
|
||||
# See whether we should use GCOV
|
||||
AC_ARG_ENABLE(gcov, AC_HELP_STRING([--enable-gcov],
|
||||
[Enable coverage testing using gcov]),
|
||||
[use_gcov=$enableval],[use_gcov=no])
|
||||
if test "${use_gcov}" = "yes" ; then
|
||||
USE_GCOV=1
|
||||
else
|
||||
USE_GCOV=0
|
||||
fi
|
||||
AC_SUBST(USE_GCOV)
|
||||
|
||||
|
||||
#########
|
||||
# Output the config header
|
||||
AC_CONFIG_HEADERS(config.h)
|
||||
|
||||
#########
|
||||
# Generate the output files.
|
||||
#
|
||||
AC_SUBST(BUILD_CFLAGS)
|
||||
AC_OUTPUT([
|
||||
Makefile
|
||||
sqlite3.pc
|
||||
])
|
||||
@ -1,679 +0,0 @@
|
||||
# A Tk console widget for SQLite. Invoke sqlitecon::create with a window name,
|
||||
# a prompt string, a title to set a new top-level window, and the SQLite
|
||||
# database handle. For example:
|
||||
#
|
||||
# sqlitecon::create .sqlcon {sql:- } {SQL Console} db
|
||||
#
|
||||
# A toplevel window is created that allows you to type in SQL commands to
|
||||
# be processed on the spot.
|
||||
#
|
||||
# A limited set of dot-commands are supported:
|
||||
#
|
||||
# .table
|
||||
# .schema ?TABLE?
|
||||
# .mode list|column|multicolumn|line
|
||||
# .exit
|
||||
#
|
||||
# In addition, a new SQL function named "edit()" is created. This function
|
||||
# takes a single text argument and returns a text result. Whenever the
|
||||
# the function is called, it pops up a new toplevel window containing a
|
||||
# text editor screen initialized to the argument. When the "OK" button
|
||||
# is pressed, whatever revised text is in the text editor is returned as
|
||||
# the result of the edit() function. This allows text fields of SQL tables
|
||||
# to be edited quickly and easily as follows:
|
||||
#
|
||||
# UPDATE table1 SET dscr = edit(dscr) WHERE rowid=15;
|
||||
#
|
||||
|
||||
|
||||
# Create a namespace to work in
|
||||
#
|
||||
namespace eval ::sqlitecon {
|
||||
# do nothing
|
||||
}
|
||||
|
||||
# Create a console widget named $w. The prompt string is $prompt.
|
||||
# The title at the top of the window is $title. The database connection
|
||||
# object is $db
|
||||
#
|
||||
proc sqlitecon::create {w prompt title db} {
|
||||
upvar #0 $w.t v
|
||||
if {[winfo exists $w]} {destroy $w}
|
||||
if {[info exists v]} {unset v}
|
||||
toplevel $w
|
||||
wm title $w $title
|
||||
wm iconname $w $title
|
||||
frame $w.mb -bd 2 -relief raised
|
||||
pack $w.mb -side top -fill x
|
||||
menubutton $w.mb.file -text File -menu $w.mb.file.m
|
||||
menubutton $w.mb.edit -text Edit -menu $w.mb.edit.m
|
||||
pack $w.mb.file $w.mb.edit -side left -padx 8 -pady 1
|
||||
set m [menu $w.mb.file.m -tearoff 0]
|
||||
$m add command -label {Close} -command "destroy $w"
|
||||
sqlitecon::create_child $w $prompt $w.mb.edit.m
|
||||
set v(db) $db
|
||||
$db function edit ::sqlitecon::_edit
|
||||
}
|
||||
|
||||
# This routine creates a console as a child window within a larger
|
||||
# window. It also creates an edit menu named "$editmenu" if $editmenu!="".
|
||||
# The calling function is responsible for posting the edit menu.
|
||||
#
|
||||
proc sqlitecon::create_child {w prompt editmenu} {
|
||||
upvar #0 $w.t v
|
||||
if {$editmenu!=""} {
|
||||
set m [menu $editmenu -tearoff 0]
|
||||
$m add command -label Cut -command "sqlitecon::Cut $w.t"
|
||||
$m add command -label Copy -command "sqlitecon::Copy $w.t"
|
||||
$m add command -label Paste -command "sqlitecon::Paste $w.t"
|
||||
$m add command -label {Clear Screen} -command "sqlitecon::Clear $w.t"
|
||||
$m add separator
|
||||
$m add command -label {Save As...} -command "sqlitecon::SaveFile $w.t"
|
||||
catch {$editmenu config -postcommand "sqlitecon::EnableEditMenu $w"}
|
||||
}
|
||||
scrollbar $w.sb -orient vertical -command "$w.t yview"
|
||||
pack $w.sb -side right -fill y
|
||||
text $w.t -font fixed -yscrollcommand "$w.sb set"
|
||||
pack $w.t -side right -fill both -expand 1
|
||||
bindtags $w.t Sqlitecon
|
||||
set v(editmenu) $editmenu
|
||||
set v(history) 0
|
||||
set v(historycnt) 0
|
||||
set v(current) -1
|
||||
set v(prompt) $prompt
|
||||
set v(prior) {}
|
||||
set v(plength) [string length $v(prompt)]
|
||||
set v(x) 0
|
||||
set v(y) 0
|
||||
set v(mode) column
|
||||
set v(header) on
|
||||
$w.t mark set insert end
|
||||
$w.t tag config ok -foreground blue
|
||||
$w.t tag config err -foreground red
|
||||
$w.t insert end $v(prompt)
|
||||
$w.t mark set out 1.0
|
||||
after idle "focus $w.t"
|
||||
}
|
||||
|
||||
bind Sqlitecon <1> {sqlitecon::Button1 %W %x %y}
|
||||
bind Sqlitecon <B1-Motion> {sqlitecon::B1Motion %W %x %y}
|
||||
bind Sqlitecon <B1-Leave> {sqlitecon::B1Leave %W %x %y}
|
||||
bind Sqlitecon <B1-Enter> {sqlitecon::cancelMotor %W}
|
||||
bind Sqlitecon <ButtonRelease-1> {sqlitecon::cancelMotor %W}
|
||||
bind Sqlitecon <KeyPress> {sqlitecon::Insert %W %A}
|
||||
bind Sqlitecon <Left> {sqlitecon::Left %W}
|
||||
bind Sqlitecon <Control-b> {sqlitecon::Left %W}
|
||||
bind Sqlitecon <Right> {sqlitecon::Right %W}
|
||||
bind Sqlitecon <Control-f> {sqlitecon::Right %W}
|
||||
bind Sqlitecon <BackSpace> {sqlitecon::Backspace %W}
|
||||
bind Sqlitecon <Control-h> {sqlitecon::Backspace %W}
|
||||
bind Sqlitecon <Delete> {sqlitecon::Delete %W}
|
||||
bind Sqlitecon <Control-d> {sqlitecon::Delete %W}
|
||||
bind Sqlitecon <Home> {sqlitecon::Home %W}
|
||||
bind Sqlitecon <Control-a> {sqlitecon::Home %W}
|
||||
bind Sqlitecon <End> {sqlitecon::End %W}
|
||||
bind Sqlitecon <Control-e> {sqlitecon::End %W}
|
||||
bind Sqlitecon <Return> {sqlitecon::Enter %W}
|
||||
bind Sqlitecon <KP_Enter> {sqlitecon::Enter %W}
|
||||
bind Sqlitecon <Up> {sqlitecon::Prior %W}
|
||||
bind Sqlitecon <Control-p> {sqlitecon::Prior %W}
|
||||
bind Sqlitecon <Down> {sqlitecon::Next %W}
|
||||
bind Sqlitecon <Control-n> {sqlitecon::Next %W}
|
||||
bind Sqlitecon <Control-k> {sqlitecon::EraseEOL %W}
|
||||
bind Sqlitecon <<Cut>> {sqlitecon::Cut %W}
|
||||
bind Sqlitecon <<Copy>> {sqlitecon::Copy %W}
|
||||
bind Sqlitecon <<Paste>> {sqlitecon::Paste %W}
|
||||
bind Sqlitecon <<Clear>> {sqlitecon::Clear %W}
|
||||
|
||||
# Insert a single character at the insertion cursor
|
||||
#
|
||||
proc sqlitecon::Insert {w a} {
|
||||
$w insert insert $a
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Move the cursor one character to the left
|
||||
#
|
||||
proc sqlitecon::Left {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>$v(plength)} {
|
||||
$w mark set insert "insert -1c"
|
||||
}
|
||||
}
|
||||
|
||||
# Erase the character to the left of the cursor
|
||||
#
|
||||
proc sqlitecon::Backspace {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>$v(plength)} {
|
||||
$w delete {insert -1c}
|
||||
}
|
||||
}
|
||||
|
||||
# Erase to the end of the line
|
||||
#
|
||||
proc sqlitecon::EraseEOL {w} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
if {$col>=$v(plength)} {
|
||||
$w delete insert {insert lineend}
|
||||
}
|
||||
}
|
||||
|
||||
# Move the cursor one character to the right
|
||||
#
|
||||
proc sqlitecon::Right {w} {
|
||||
$w mark set insert "insert +1c"
|
||||
}
|
||||
|
||||
# Erase the character to the right of the cursor
|
||||
#
|
||||
proc sqlitecon::Delete w {
|
||||
$w delete insert
|
||||
}
|
||||
|
||||
# Move the cursor to the beginning of the current line
|
||||
#
|
||||
proc sqlitecon::Home w {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
$w mark set insert $row.$v(plength)
|
||||
}
|
||||
|
||||
# Move the cursor to the end of the current line
|
||||
#
|
||||
proc sqlitecon::End w {
|
||||
$w mark set insert {insert lineend}
|
||||
}
|
||||
|
||||
# Add a line to the history
|
||||
#
|
||||
proc sqlitecon::addHistory {w line} {
|
||||
upvar #0 $w v
|
||||
if {$v(historycnt)>0} {
|
||||
set last [lindex $v(history) [expr $v(historycnt)-1]]
|
||||
if {[string compare $last $line]} {
|
||||
lappend v(history) $line
|
||||
incr v(historycnt)
|
||||
}
|
||||
} else {
|
||||
set v(history) [list $line]
|
||||
set v(historycnt) 1
|
||||
}
|
||||
set v(current) $v(historycnt)
|
||||
}
|
||||
|
||||
# Called when "Enter" is pressed. Do something with the line
|
||||
# of text that was entered.
|
||||
#
|
||||
proc sqlitecon::Enter w {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
set start $row.$v(plength)
|
||||
set line [$w get $start "$start lineend"]
|
||||
$w insert end \n
|
||||
$w mark set out end
|
||||
if {$v(prior)==""} {
|
||||
set cmd $line
|
||||
} else {
|
||||
set cmd $v(prior)\n$line
|
||||
}
|
||||
if {[string index $cmd 0]=="." || [$v(db) complete $cmd]} {
|
||||
regsub -all {\n} [string trim $cmd] { } cmd2
|
||||
addHistory $w $cmd2
|
||||
set rc [catch {DoCommand $w $cmd} res]
|
||||
if {![winfo exists $w]} return
|
||||
if {$rc} {
|
||||
$w insert end $res\n err
|
||||
} elseif {[string length $res]>0} {
|
||||
$w insert end $res\n ok
|
||||
}
|
||||
set v(prior) {}
|
||||
$w insert end $v(prompt)
|
||||
} else {
|
||||
set v(prior) $cmd
|
||||
regsub -all {[^ ]} $v(prompt) . x
|
||||
$w insert end $x
|
||||
}
|
||||
$w mark set insert end
|
||||
$w mark set out {insert linestart}
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Execute a single SQL command. Pay special attention to control
|
||||
# directives that begin with "."
|
||||
#
|
||||
# The return value is the text output from the command, properly
|
||||
# formatted.
|
||||
#
|
||||
proc sqlitecon::DoCommand {w cmd} {
|
||||
upvar #0 $w v
|
||||
set mode $v(mode)
|
||||
set header $v(header)
|
||||
if {[regexp {^(\.[a-z]+)} $cmd all word]} {
|
||||
if {$word==".mode"} {
|
||||
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(mode)
|
||||
return {}
|
||||
} elseif {$word==".exit"} {
|
||||
destroy [winfo toplevel $w]
|
||||
return {}
|
||||
} elseif {$word==".header"} {
|
||||
regexp {^.[a-z]+ +([a-z]+)} $cmd all v(header)
|
||||
return {}
|
||||
} elseif {$word==".tables"} {
|
||||
set mode multicolumn
|
||||
set cmd {SELECT name FROM sqlite_master WHERE type='table'
|
||||
UNION ALL
|
||||
SELECT name FROM sqlite_temp_master WHERE type='table'}
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd "UNION ALL SELECT name FROM $name.sqlite_master\
|
||||
WHERE type='table'"
|
||||
}
|
||||
}
|
||||
append cmd { ORDER BY 1}
|
||||
} elseif {$word==".fullschema"} {
|
||||
set pattern %
|
||||
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
|
||||
set mode list
|
||||
set header 0
|
||||
set cmd "SELECT sql FROM sqlite_master WHERE tbl_name LIKE '$pattern'
|
||||
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
|
||||
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
|
||||
WHERE tbl_name LIKE '$pattern' AND sql NOT NULL"
|
||||
}
|
||||
}
|
||||
} elseif {$word==".schema"} {
|
||||
set pattern %
|
||||
regexp {^.[a-z]+ +([^ ]+)} $cmd all pattern
|
||||
set mode list
|
||||
set header 0
|
||||
set cmd "SELECT sql FROM sqlite_master WHERE name LIKE '$pattern'
|
||||
AND sql NOT NULL UNION ALL SELECT sql FROM sqlite_temp_master
|
||||
WHERE name LIKE '$pattern' AND sql NOT NULL"
|
||||
$v(db) eval {PRAGMA database_list} {
|
||||
if {$name!="temp" && $name!="main"} {
|
||||
append cmd " UNION ALL SELECT sql FROM $name.sqlite_master\
|
||||
WHERE name LIKE '$pattern' AND sql NOT NULL"
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return \
|
||||
".exit\n.mode line|list|column\n.schema ?TABLENAME?\n.tables"
|
||||
}
|
||||
}
|
||||
set res {}
|
||||
if {$mode=="list"} {
|
||||
$v(db) eval $cmd x {
|
||||
set sep {}
|
||||
foreach col $x(*) {
|
||||
append res $sep$x($col)
|
||||
set sep |
|
||||
}
|
||||
append res \n
|
||||
}
|
||||
if {[info exists x(*)] && $header} {
|
||||
set sep {}
|
||||
set hdr {}
|
||||
foreach col $x(*) {
|
||||
append hdr $sep$col
|
||||
set sep |
|
||||
}
|
||||
set res $hdr\n$res
|
||||
}
|
||||
} elseif {[string range $mode 0 2]=="col"} {
|
||||
set y {}
|
||||
$v(db) eval $cmd x {
|
||||
foreach col $x(*) {
|
||||
if {![info exists cw($col)] || $cw($col)<[string length $x($col)]} {
|
||||
set cw($col) [string length $x($col)]
|
||||
}
|
||||
lappend y $x($col)
|
||||
}
|
||||
}
|
||||
if {[info exists x(*)] && $header} {
|
||||
set hdr {}
|
||||
set ln {}
|
||||
set dash ---------------------------------------------------------------
|
||||
append dash ------------------------------------------------------------
|
||||
foreach col $x(*) {
|
||||
if {![info exists cw($col)] || $cw($col)<[string length $col]} {
|
||||
set cw($col) [string length $col]
|
||||
}
|
||||
lappend hdr $col
|
||||
lappend ln [string range $dash 1 $cw($col)]
|
||||
}
|
||||
set y [concat $hdr $ln $y]
|
||||
}
|
||||
if {[info exists x(*)]} {
|
||||
set format {}
|
||||
set arglist {}
|
||||
set arglist2 {}
|
||||
set i 0
|
||||
foreach col $x(*) {
|
||||
lappend arglist x$i
|
||||
append arglist2 " \$x$i"
|
||||
incr i
|
||||
append format " %-$cw($col)s"
|
||||
}
|
||||
set format [string trimleft $format]\n
|
||||
if {[llength $arglist]>0} {
|
||||
foreach $arglist $y "append res \[format [list $format] $arglist2\]"
|
||||
}
|
||||
}
|
||||
} elseif {$mode=="multicolumn"} {
|
||||
set y [$v(db) eval $cmd]
|
||||
set max 0
|
||||
foreach e $y {
|
||||
if {$max<[string length $e]} {set max [string length $e]}
|
||||
}
|
||||
set ncol [expr {int(80/($max+2))}]
|
||||
if {$ncol<1} {set ncol 1}
|
||||
set nelem [llength $y]
|
||||
set nrow [expr {($nelem+$ncol-1)/$ncol}]
|
||||
set format "%-${max}s"
|
||||
for {set i 0} {$i<$nrow} {incr i} {
|
||||
set j $i
|
||||
while 1 {
|
||||
append res [format $format [lindex $y $j]]
|
||||
incr j $nrow
|
||||
if {$j>=$nelem} break
|
||||
append res { }
|
||||
}
|
||||
append res \n
|
||||
}
|
||||
} else {
|
||||
$v(db) eval $cmd x {
|
||||
foreach col $x(*) {append res "$col = $x($col)\n"}
|
||||
append res \n
|
||||
}
|
||||
}
|
||||
return [string trimright $res]
|
||||
}
|
||||
|
||||
# Change the line to the previous line
|
||||
#
|
||||
proc sqlitecon::Prior w {
|
||||
upvar #0 $w v
|
||||
if {$v(current)<=0} return
|
||||
incr v(current) -1
|
||||
set line [lindex $v(history) $v(current)]
|
||||
sqlitecon::SetLine $w $line
|
||||
}
|
||||
|
||||
# Change the line to the next line
|
||||
#
|
||||
proc sqlitecon::Next w {
|
||||
upvar #0 $w v
|
||||
if {$v(current)>=$v(historycnt)} return
|
||||
incr v(current) 1
|
||||
set line [lindex $v(history) $v(current)]
|
||||
sqlitecon::SetLine $w $line
|
||||
}
|
||||
|
||||
# Change the contents of the entry line
|
||||
#
|
||||
proc sqlitecon::SetLine {w line} {
|
||||
upvar #0 $w v
|
||||
scan [$w index insert] %d.%d row col
|
||||
set start $row.$v(plength)
|
||||
$w delete $start end
|
||||
$w insert end $line
|
||||
$w mark set insert end
|
||||
$w yview insert
|
||||
}
|
||||
|
||||
# Called when the mouse button is pressed at position $x,$y on
|
||||
# the console widget.
|
||||
#
|
||||
proc sqlitecon::Button1 {w x y} {
|
||||
global tkPriv
|
||||
upvar #0 $w v
|
||||
set v(mouseMoved) 0
|
||||
set v(pressX) $x
|
||||
set p [sqlitecon::nearestBoundry $w $x $y]
|
||||
scan [$w index insert] %d.%d ix iy
|
||||
scan $p %d.%d px py
|
||||
if {$px==$ix} {
|
||||
$w mark set insert $p
|
||||
}
|
||||
$w mark set anchor $p
|
||||
focus $w
|
||||
}
|
||||
|
||||
# Find the boundry between characters that is nearest
|
||||
# to $x,$y
|
||||
#
|
||||
proc sqlitecon::nearestBoundry {w x y} {
|
||||
set p [$w index @$x,$y]
|
||||
set bb [$w bbox $p]
|
||||
if {![string compare $bb ""]} {return $p}
|
||||
if {($x-[lindex $bb 0])<([lindex $bb 2]/2)} {return $p}
|
||||
$w index "$p + 1 char"
|
||||
}
|
||||
|
||||
# This routine extends the selection to the point specified by $x,$y
|
||||
#
|
||||
proc sqlitecon::SelectTo {w x y} {
|
||||
upvar #0 $w v
|
||||
set cur [sqlitecon::nearestBoundry $w $x $y]
|
||||
if {[catch {$w index anchor}]} {
|
||||
$w mark set anchor $cur
|
||||
}
|
||||
set anchor [$w index anchor]
|
||||
if {[$w compare $cur != $anchor] || (abs($v(pressX) - $x) >= 3)} {
|
||||
if {$v(mouseMoved)==0} {
|
||||
$w tag remove sel 0.0 end
|
||||
}
|
||||
set v(mouseMoved) 1
|
||||
}
|
||||
if {[$w compare $cur < anchor]} {
|
||||
set first $cur
|
||||
set last anchor
|
||||
} else {
|
||||
set first anchor
|
||||
set last $cur
|
||||
}
|
||||
if {$v(mouseMoved)} {
|
||||
$w tag remove sel 0.0 $first
|
||||
$w tag add sel $first $last
|
||||
$w tag remove sel $last end
|
||||
update idletasks
|
||||
}
|
||||
}
|
||||
|
||||
# Called whenever the mouse moves while button-1 is held down.
|
||||
#
|
||||
proc sqlitecon::B1Motion {w x y} {
|
||||
upvar #0 $w v
|
||||
set v(y) $y
|
||||
set v(x) $x
|
||||
sqlitecon::SelectTo $w $x $y
|
||||
}
|
||||
|
||||
# Called whenever the mouse leaves the boundries of the widget
|
||||
# while button 1 is held down.
|
||||
#
|
||||
proc sqlitecon::B1Leave {w x y} {
|
||||
upvar #0 $w v
|
||||
set v(y) $y
|
||||
set v(x) $x
|
||||
sqlitecon::motor $w
|
||||
}
|
||||
|
||||
# This routine is called to automatically scroll the window when
|
||||
# the mouse drags offscreen.
|
||||
#
|
||||
proc sqlitecon::motor w {
|
||||
upvar #0 $w v
|
||||
if {![winfo exists $w]} return
|
||||
if {$v(y)>=[winfo height $w]} {
|
||||
$w yview scroll 1 units
|
||||
} elseif {$v(y)<0} {
|
||||
$w yview scroll -1 units
|
||||
} else {
|
||||
return
|
||||
}
|
||||
sqlitecon::SelectTo $w $v(x) $v(y)
|
||||
set v(timer) [after 50 sqlitecon::motor $w]
|
||||
}
|
||||
|
||||
# This routine cancels the scrolling motor if it is active
|
||||
#
|
||||
proc sqlitecon::cancelMotor w {
|
||||
upvar #0 $w v
|
||||
catch {after cancel $v(timer)}
|
||||
catch {unset v(timer)}
|
||||
}
|
||||
|
||||
# Do a Copy operation on the stuff currently selected.
|
||||
#
|
||||
proc sqlitecon::Copy w {
|
||||
if {![catch {set text [$w get sel.first sel.last]}]} {
|
||||
clipboard clear -displayof $w
|
||||
clipboard append -displayof $w $text
|
||||
}
|
||||
}
|
||||
|
||||
# Return 1 if the selection exists and is contained
|
||||
# entirely on the input line. Return 2 if the selection
|
||||
# exists but is not entirely on the input line. Return 0
|
||||
# if the selection does not exist.
|
||||
#
|
||||
proc sqlitecon::canCut w {
|
||||
set r [catch {
|
||||
scan [$w index sel.first] %d.%d s1x s1y
|
||||
scan [$w index sel.last] %d.%d s2x s2y
|
||||
scan [$w index insert] %d.%d ix iy
|
||||
}]
|
||||
if {$r==1} {return 0}
|
||||
if {$s1x==$ix && $s2x==$ix} {return 1}
|
||||
return 2
|
||||
}
|
||||
|
||||
# Do a Cut operation if possible. Cuts are only allowed
|
||||
# if the current selection is entirely contained on the
|
||||
# current input line.
|
||||
#
|
||||
proc sqlitecon::Cut w {
|
||||
if {[sqlitecon::canCut $w]==1} {
|
||||
sqlitecon::Copy $w
|
||||
$w delete sel.first sel.last
|
||||
}
|
||||
}
|
||||
|
||||
# Do a paste opeation.
|
||||
#
|
||||
proc sqlitecon::Paste w {
|
||||
if {[sqlitecon::canCut $w]==1} {
|
||||
$w delete sel.first sel.last
|
||||
}
|
||||
if {[catch {selection get -displayof $w -selection CLIPBOARD} topaste]
|
||||
&& [catch {selection get -displayof $w -selection PRIMARY} topaste]} {
|
||||
return
|
||||
}
|
||||
if {[info exists ::$w]} {
|
||||
set prior 0
|
||||
foreach line [split $topaste \n] {
|
||||
if {$prior} {
|
||||
sqlitecon::Enter $w
|
||||
update
|
||||
}
|
||||
set prior 1
|
||||
$w insert insert $line
|
||||
}
|
||||
} else {
|
||||
$w insert insert $topaste
|
||||
}
|
||||
}
|
||||
|
||||
# Enable or disable entries in the Edit menu
|
||||
#
|
||||
proc sqlitecon::EnableEditMenu w {
|
||||
upvar #0 $w.t v
|
||||
set m $v(editmenu)
|
||||
if {$m=="" || ![winfo exists $m]} return
|
||||
switch [sqlitecon::canCut $w.t] {
|
||||
0 {
|
||||
$m entryconf Copy -state disabled
|
||||
$m entryconf Cut -state disabled
|
||||
}
|
||||
1 {
|
||||
$m entryconf Copy -state normal
|
||||
$m entryconf Cut -state normal
|
||||
}
|
||||
2 {
|
||||
$m entryconf Copy -state normal
|
||||
$m entryconf Cut -state disabled
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Prompt the user for the name of a writable file. Then write the
|
||||
# entire contents of the console screen to that file.
|
||||
#
|
||||
proc sqlitecon::SaveFile w {
|
||||
set types {
|
||||
{{Text Files} {.txt}}
|
||||
{{All Files} *}
|
||||
}
|
||||
set f [tk_getSaveFile -filetypes $types -title "Write Screen To..."]
|
||||
if {$f!=""} {
|
||||
if {[catch {open $f w} fd]} {
|
||||
tk_messageBox -type ok -icon error -message $fd
|
||||
} else {
|
||||
puts $fd [string trimright [$w get 1.0 end] \n]
|
||||
close $fd
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Erase everything from the console above the insertion line.
|
||||
#
|
||||
proc sqlitecon::Clear w {
|
||||
$w delete 1.0 {insert linestart}
|
||||
}
|
||||
|
||||
# An in-line editor for SQL
|
||||
#
|
||||
proc sqlitecon::_edit {origtxt {title {}}} {
|
||||
for {set i 0} {[winfo exists .ed$i]} {incr i} continue
|
||||
set w .ed$i
|
||||
toplevel $w
|
||||
wm protocol $w WM_DELETE_WINDOW "$w.b.can invoke"
|
||||
wm title $w {Inline SQL Editor}
|
||||
frame $w.b
|
||||
pack $w.b -side bottom -fill x
|
||||
button $w.b.can -text Cancel -width 6 -command [list set ::$w 0]
|
||||
button $w.b.ok -text OK -width 6 -command [list set ::$w 1]
|
||||
button $w.b.cut -text Cut -width 6 -command [list ::sqlitecon::Cut $w.t]
|
||||
button $w.b.copy -text Copy -width 6 -command [list ::sqlitecon::Copy $w.t]
|
||||
button $w.b.paste -text Paste -width 6 -command [list ::sqlitecon::Paste $w.t]
|
||||
set ::$w {}
|
||||
pack $w.b.cut $w.b.copy $w.b.paste $w.b.can $w.b.ok\
|
||||
-side left -padx 5 -pady 5 -expand 1
|
||||
if {$title!=""} {
|
||||
label $w.title -text $title
|
||||
pack $w.title -side top -padx 5 -pady 5
|
||||
}
|
||||
text $w.t -bg white -fg black -yscrollcommand [list $w.sb set]
|
||||
pack $w.t -side left -fill both -expand 1
|
||||
scrollbar $w.sb -orient vertical -command [list $w.t yview]
|
||||
pack $w.sb -side left -fill y
|
||||
$w.t insert end $origtxt
|
||||
|
||||
vwait ::$w
|
||||
|
||||
if {[set ::$w]} {
|
||||
set txt [string trimright [$w.t get 1.0 end]]
|
||||
} else {
|
||||
set txt $origtxt
|
||||
}
|
||||
destroy $w
|
||||
return $txt
|
||||
}
|
||||
@ -1,892 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Lemon Parser Generator</title>
|
||||
</head>
|
||||
<body bgcolor=white>
|
||||
<h1 align=center>The Lemon Parser Generator</h1>
|
||||
|
||||
<p>Lemon is an LALR(1) parser generator for C or C++.
|
||||
It does the same job as ``bison'' and ``yacc''.
|
||||
But lemon is not another bison or yacc clone. It
|
||||
uses a different grammar syntax which is designed to
|
||||
reduce the number of coding errors. Lemon also uses a more
|
||||
sophisticated parsing engine that is faster than yacc and
|
||||
bison and which is both reentrant and thread-safe.
|
||||
Furthermore, Lemon implements features that can be used
|
||||
to eliminate resource leaks, making is suitable for use
|
||||
in long-running programs such as graphical user interfaces
|
||||
or embedded controllers.</p>
|
||||
|
||||
<p>This document is an introduction to the Lemon
|
||||
parser generator.</p>
|
||||
|
||||
<h2>Theory of Operation</h2>
|
||||
|
||||
<p>The main goal of Lemon is to translate a context free grammar (CFG)
|
||||
for a particular language into C code that implements a parser for
|
||||
that language.
|
||||
The program has two inputs:
|
||||
<ul>
|
||||
<li>The grammar specification.
|
||||
<li>A parser template file.
|
||||
</ul>
|
||||
Typically, only the grammar specification is supplied by the programmer.
|
||||
Lemon comes with a default parser template which works fine for most
|
||||
applications. But the user is free to substitute a different parser
|
||||
template if desired.</p>
|
||||
|
||||
<p>Depending on command-line options, Lemon will generate between
|
||||
one and three files of outputs.
|
||||
<ul>
|
||||
<li>C code to implement the parser.
|
||||
<li>A header file defining an integer ID for each terminal symbol.
|
||||
<li>An information file that describes the states of the generated parser
|
||||
automaton.
|
||||
</ul>
|
||||
By default, all three of these output files are generated.
|
||||
The header file is suppressed if the ``-m'' command-line option is
|
||||
used and the report file is omitted when ``-q'' is selected.</p>
|
||||
|
||||
<p>The grammar specification file uses a ``.y'' suffix, by convention.
|
||||
In the examples used in this document, we'll assume the name of the
|
||||
grammar file is ``gram.y''. A typical use of Lemon would be the
|
||||
following command:
|
||||
<pre>
|
||||
lemon gram.y
|
||||
</pre>
|
||||
This command will generate three output files named ``gram.c'',
|
||||
``gram.h'' and ``gram.out''.
|
||||
The first is C code to implement the parser. The second
|
||||
is the header file that defines numerical values for all
|
||||
terminal symbols, and the last is the report that explains
|
||||
the states used by the parser automaton.</p>
|
||||
|
||||
<h3>Command Line Options</h3>
|
||||
|
||||
<p>The behavior of Lemon can be modified using command-line options.
|
||||
You can obtain a list of the available command-line options together
|
||||
with a brief explanation of what each does by typing
|
||||
<pre>
|
||||
lemon -?
|
||||
</pre>
|
||||
As of this writing, the following command-line options are supported:
|
||||
<ul>
|
||||
<li><tt>-b</tt>
|
||||
<li><tt>-c</tt>
|
||||
<li><tt>-g</tt>
|
||||
<li><tt>-m</tt>
|
||||
<li><tt>-q</tt>
|
||||
<li><tt>-s</tt>
|
||||
<li><tt>-x</tt>
|
||||
</ul>
|
||||
The ``-b'' option reduces the amount of text in the report file by
|
||||
printing only the basis of each parser state, rather than the full
|
||||
configuration.
|
||||
The ``-c'' option suppresses action table compression. Using -c
|
||||
will make the parser a little larger and slower but it will detect
|
||||
syntax errors sooner.
|
||||
The ``-g'' option causes no output files to be generated at all.
|
||||
Instead, the input grammar file is printed on standard output but
|
||||
with all comments, actions and other extraneous text deleted. This
|
||||
is a useful way to get a quick summary of a grammar.
|
||||
The ``-m'' option causes the output C source file to be compatible
|
||||
with the ``makeheaders'' program.
|
||||
Makeheaders is a program that automatically generates header files
|
||||
from C source code. When the ``-m'' option is used, the header
|
||||
file is not output since the makeheaders program will take care
|
||||
of generated all header files automatically.
|
||||
The ``-q'' option suppresses the report file.
|
||||
Using ``-s'' causes a brief summary of parser statistics to be
|
||||
printed. Like this:
|
||||
<pre>
|
||||
Parser statistics: 74 terminals, 70 nonterminals, 179 rules
|
||||
340 states, 2026 parser table entries, 0 conflicts
|
||||
</pre>
|
||||
Finally, the ``-x'' option causes Lemon to print its version number
|
||||
and then stops without attempting to read the grammar or generate a parser.</p>
|
||||
|
||||
<h3>The Parser Interface</h3>
|
||||
|
||||
<p>Lemon doesn't generate a complete, working program. It only generates
|
||||
a few subroutines that implement a parser. This section describes
|
||||
the interface to those subroutines. It is up to the programmer to
|
||||
call these subroutines in an appropriate way in order to produce a
|
||||
complete system.</p>
|
||||
|
||||
<p>Before a program begins using a Lemon-generated parser, the program
|
||||
must first create the parser.
|
||||
A new parser is created as follows:
|
||||
<pre>
|
||||
void *pParser = ParseAlloc( malloc );
|
||||
</pre>
|
||||
The ParseAlloc() routine allocates and initializes a new parser and
|
||||
returns a pointer to it.
|
||||
The actual data structure used to represent a parser is opaque --
|
||||
its internal structure is not visible or usable by the calling routine.
|
||||
For this reason, the ParseAlloc() routine returns a pointer to void
|
||||
rather than a pointer to some particular structure.
|
||||
The sole argument to the ParseAlloc() routine is a pointer to the
|
||||
subroutine used to allocate memory. Typically this means ``malloc()''.</p>
|
||||
|
||||
<p>After a program is finished using a parser, it can reclaim all
|
||||
memory allocated by that parser by calling
|
||||
<pre>
|
||||
ParseFree(pParser, free);
|
||||
</pre>
|
||||
The first argument is the same pointer returned by ParseAlloc(). The
|
||||
second argument is a pointer to the function used to release bulk
|
||||
memory back to the system.</p>
|
||||
|
||||
<p>After a parser has been allocated using ParseAlloc(), the programmer
|
||||
must supply the parser with a sequence of tokens (terminal symbols) to
|
||||
be parsed. This is accomplished by calling the following function
|
||||
once for each token:
|
||||
<pre>
|
||||
Parse(pParser, hTokenID, sTokenData, pArg);
|
||||
</pre>
|
||||
The first argument to the Parse() routine is the pointer returned by
|
||||
ParseAlloc().
|
||||
The second argument is a small positive integer that tells the parse the
|
||||
type of the next token in the data stream.
|
||||
There is one token type for each terminal symbol in the grammar.
|
||||
The gram.h file generated by Lemon contains #define statements that
|
||||
map symbolic terminal symbol names into appropriate integer values.
|
||||
(A value of 0 for the second argument is a special flag to the
|
||||
parser to indicate that the end of input has been reached.)
|
||||
The third argument is the value of the given token. By default,
|
||||
the type of the third argument is integer, but the grammar will
|
||||
usually redefine this type to be some kind of structure.
|
||||
Typically the second argument will be a broad category of tokens
|
||||
such as ``identifier'' or ``number'' and the third argument will
|
||||
be the name of the identifier or the value of the number.</p>
|
||||
|
||||
<p>The Parse() function may have either three or four arguments,
|
||||
depending on the grammar. If the grammar specification file request
|
||||
it, the Parse() function will have a fourth parameter that can be
|
||||
of any type chosen by the programmer. The parser doesn't do anything
|
||||
with this argument except to pass it through to action routines.
|
||||
This is a convenient mechanism for passing state information down
|
||||
to the action routines without having to use global variables.</p>
|
||||
|
||||
<p>A typical use of a Lemon parser might look something like the
|
||||
following:
|
||||
<pre>
|
||||
01 ParseTree *ParseFile(const char *zFilename){
|
||||
02 Tokenizer *pTokenizer;
|
||||
03 void *pParser;
|
||||
04 Token sToken;
|
||||
05 int hTokenId;
|
||||
06 ParserState sState;
|
||||
07
|
||||
08 pTokenizer = TokenizerCreate(zFilename);
|
||||
09 pParser = ParseAlloc( malloc );
|
||||
10 InitParserState(&sState);
|
||||
11 while( GetNextToken(pTokenizer, &hTokenId, &sToken) ){
|
||||
12 Parse(pParser, hTokenId, sToken, &sState);
|
||||
13 }
|
||||
14 Parse(pParser, 0, sToken, &sState);
|
||||
15 ParseFree(pParser, free );
|
||||
16 TokenizerFree(pTokenizer);
|
||||
17 return sState.treeRoot;
|
||||
18 }
|
||||
</pre>
|
||||
This example shows a user-written routine that parses a file of
|
||||
text and returns a pointer to the parse tree.
|
||||
(We've omitted all error-handling from this example to keep it
|
||||
simple.)
|
||||
We assume the existence of some kind of tokenizer which is created
|
||||
using TokenizerCreate() on line 8 and deleted by TokenizerFree()
|
||||
on line 16. The GetNextToken() function on line 11 retrieves the
|
||||
next token from the input file and puts its type in the
|
||||
integer variable hTokenId. The sToken variable is assumed to be
|
||||
some kind of structure that contains details about each token,
|
||||
such as its complete text, what line it occurs on, etc. </p>
|
||||
|
||||
<p>This example also assumes the existence of structure of type
|
||||
ParserState that holds state information about a particular parse.
|
||||
An instance of such a structure is created on line 6 and initialized
|
||||
on line 10. A pointer to this structure is passed into the Parse()
|
||||
routine as the optional 4th argument.
|
||||
The action routine specified by the grammar for the parser can use
|
||||
the ParserState structure to hold whatever information is useful and
|
||||
appropriate. In the example, we note that the treeRoot field of
|
||||
the ParserState structure is left pointing to the root of the parse
|
||||
tree.</p>
|
||||
|
||||
<p>The core of this example as it relates to Lemon is as follows:
|
||||
<pre>
|
||||
ParseFile(){
|
||||
pParser = ParseAlloc( malloc );
|
||||
while( GetNextToken(pTokenizer,&hTokenId, &sToken) ){
|
||||
Parse(pParser, hTokenId, sToken);
|
||||
}
|
||||
Parse(pParser, 0, sToken);
|
||||
ParseFree(pParser, free );
|
||||
}
|
||||
</pre>
|
||||
Basically, what a program has to do to use a Lemon-generated parser
|
||||
is first create the parser, then send it lots of tokens obtained by
|
||||
tokenizing an input source. When the end of input is reached, the
|
||||
Parse() routine should be called one last time with a token type
|
||||
of 0. This step is necessary to inform the parser that the end of
|
||||
input has been reached. Finally, we reclaim memory used by the
|
||||
parser by calling ParseFree().</p>
|
||||
|
||||
<p>There is one other interface routine that should be mentioned
|
||||
before we move on.
|
||||
The ParseTrace() function can be used to generate debugging output
|
||||
from the parser. A prototype for this routine is as follows:
|
||||
<pre>
|
||||
ParseTrace(FILE *stream, char *zPrefix);
|
||||
</pre>
|
||||
After this routine is called, a short (one-line) message is written
|
||||
to the designated output stream every time the parser changes states
|
||||
or calls an action routine. Each such message is prefaced using
|
||||
the text given by zPrefix. This debugging output can be turned off
|
||||
by calling ParseTrace() again with a first argument of NULL (0).</p>
|
||||
|
||||
<h3>Differences With YACC and BISON</h3>
|
||||
|
||||
<p>Programmers who have previously used the yacc or bison parser
|
||||
generator will notice several important differences between yacc and/or
|
||||
bison and Lemon.
|
||||
<ul>
|
||||
<li>In yacc and bison, the parser calls the tokenizer. In Lemon,
|
||||
the tokenizer calls the parser.
|
||||
<li>Lemon uses no global variables. Yacc and bison use global variables
|
||||
to pass information between the tokenizer and parser.
|
||||
<li>Lemon allows multiple parsers to be running simultaneously. Yacc
|
||||
and bison do not.
|
||||
</ul>
|
||||
These differences may cause some initial confusion for programmers
|
||||
with prior yacc and bison experience.
|
||||
But after years of experience using Lemon, I firmly
|
||||
believe that the Lemon way of doing things is better.</p>
|
||||
|
||||
<h2>Input File Syntax</h2>
|
||||
|
||||
<p>The main purpose of the grammar specification file for Lemon is
|
||||
to define the grammar for the parser. But the input file also
|
||||
specifies additional information Lemon requires to do its job.
|
||||
Most of the work in using Lemon is in writing an appropriate
|
||||
grammar file.</p>
|
||||
|
||||
<p>The grammar file for lemon is, for the most part, free format.
|
||||
It does not have sections or divisions like yacc or bison. Any
|
||||
declaration can occur at any point in the file.
|
||||
Lemon ignores whitespace (except where it is needed to separate
|
||||
tokens) and it honors the same commenting conventions as C and C++.</p>
|
||||
|
||||
<h3>Terminals and Nonterminals</h3>
|
||||
|
||||
<p>A terminal symbol (token) is any string of alphanumeric
|
||||
and underscore characters
|
||||
that begins with an upper case letter.
|
||||
A terminal can contain lowercase letters after the first character,
|
||||
but the usual convention is to make terminals all upper case.
|
||||
A nonterminal, on the other hand, is any string of alphanumeric
|
||||
and underscore characters than begins with a lower case letter.
|
||||
Again, the usual convention is to make nonterminals use all lower
|
||||
case letters.</p>
|
||||
|
||||
<p>In Lemon, terminal and nonterminal symbols do not need to
|
||||
be declared or identified in a separate section of the grammar file.
|
||||
Lemon is able to generate a list of all terminals and nonterminals
|
||||
by examining the grammar rules, and it can always distinguish a
|
||||
terminal from a nonterminal by checking the case of the first
|
||||
character of the name.</p>
|
||||
|
||||
<p>Yacc and bison allow terminal symbols to have either alphanumeric
|
||||
names or to be individual characters included in single quotes, like
|
||||
this: ')' or '$'. Lemon does not allow this alternative form for
|
||||
terminal symbols. With Lemon, all symbols, terminals and nonterminals,
|
||||
must have alphanumeric names.</p>
|
||||
|
||||
<h3>Grammar Rules</h3>
|
||||
|
||||
<p>The main component of a Lemon grammar file is a sequence of grammar
|
||||
rules.
|
||||
Each grammar rule consists of a nonterminal symbol followed by
|
||||
the special symbol ``::='' and then a list of terminals and/or nonterminals.
|
||||
The rule is terminated by a period.
|
||||
The list of terminals and nonterminals on the right-hand side of the
|
||||
rule can be empty.
|
||||
Rules can occur in any order, except that the left-hand side of the
|
||||
first rule is assumed to be the start symbol for the grammar (unless
|
||||
specified otherwise using the <tt>%start</tt> directive described below.)
|
||||
A typical sequence of grammar rules might look something like this:
|
||||
<pre>
|
||||
expr ::= expr PLUS expr.
|
||||
expr ::= expr TIMES expr.
|
||||
expr ::= LPAREN expr RPAREN.
|
||||
expr ::= VALUE.
|
||||
</pre>
|
||||
</p>
|
||||
|
||||
<p>There is one non-terminal in this example, ``expr'', and five
|
||||
terminal symbols or tokens: ``PLUS'', ``TIMES'', ``LPAREN'',
|
||||
``RPAREN'' and ``VALUE''.</p>
|
||||
|
||||
<p>Like yacc and bison, Lemon allows the grammar to specify a block
|
||||
of C code that will be executed whenever a grammar rule is reduced
|
||||
by the parser.
|
||||
In Lemon, this action is specified by putting the C code (contained
|
||||
within curly braces <tt>{...}</tt>) immediately after the
|
||||
period that closes the rule.
|
||||
For example:
|
||||
<pre>
|
||||
expr ::= expr PLUS expr. { printf("Doing an addition...\n"); }
|
||||
</pre>
|
||||
</p>
|
||||
|
||||
<p>In order to be useful, grammar actions must normally be linked to
|
||||
their associated grammar rules.
|
||||
In yacc and bison, this is accomplished by embedding a ``$$'' in the
|
||||
action to stand for the value of the left-hand side of the rule and
|
||||
symbols ``$1'', ``$2'', and so forth to stand for the value of
|
||||
the terminal or nonterminal at position 1, 2 and so forth on the
|
||||
right-hand side of the rule.
|
||||
This idea is very powerful, but it is also very error-prone. The
|
||||
single most common source of errors in a yacc or bison grammar is
|
||||
to miscount the number of symbols on the right-hand side of a grammar
|
||||
rule and say ``$7'' when you really mean ``$8''.</p>
|
||||
|
||||
<p>Lemon avoids the need to count grammar symbols by assigning symbolic
|
||||
names to each symbol in a grammar rule and then using those symbolic
|
||||
names in the action.
|
||||
In yacc or bison, one would write this:
|
||||
<pre>
|
||||
expr -> expr PLUS expr { $$ = $1 + $3; };
|
||||
</pre>
|
||||
But in Lemon, the same rule becomes the following:
|
||||
<pre>
|
||||
expr(A) ::= expr(B) PLUS expr(C). { A = B+C; }
|
||||
</pre>
|
||||
In the Lemon rule, any symbol in parentheses after a grammar rule
|
||||
symbol becomes a place holder for that symbol in the grammar rule.
|
||||
This place holder can then be used in the associated C action to
|
||||
stand for the value of that symbol.<p>
|
||||
|
||||
<p>The Lemon notation for linking a grammar rule with its reduce
|
||||
action is superior to yacc/bison on several counts.
|
||||
First, as mentioned above, the Lemon method avoids the need to
|
||||
count grammar symbols.
|
||||
Secondly, if a terminal or nonterminal in a Lemon grammar rule
|
||||
includes a linking symbol in parentheses but that linking symbol
|
||||
is not actually used in the reduce action, then an error message
|
||||
is generated.
|
||||
For example, the rule
|
||||
<pre>
|
||||
expr(A) ::= expr(B) PLUS expr(C). { A = B; }
|
||||
</pre>
|
||||
will generate an error because the linking symbol ``C'' is used
|
||||
in the grammar rule but not in the reduce action.</p>
|
||||
|
||||
<p>The Lemon notation for linking grammar rules to reduce actions
|
||||
also facilitates the use of destructors for reclaiming memory
|
||||
allocated by the values of terminals and nonterminals on the
|
||||
right-hand side of a rule.</p>
|
||||
|
||||
<h3>Precedence Rules</h3>
|
||||
|
||||
<p>Lemon resolves parsing ambiguities in exactly the same way as
|
||||
yacc and bison. A shift-reduce conflict is resolved in favor
|
||||
of the shift, and a reduce-reduce conflict is resolved by reducing
|
||||
whichever rule comes first in the grammar file.</p>
|
||||
|
||||
<p>Just like in
|
||||
yacc and bison, Lemon allows a measure of control
|
||||
over the resolution of paring conflicts using precedence rules.
|
||||
A precedence value can be assigned to any terminal symbol
|
||||
using the %left, %right or %nonassoc directives. Terminal symbols
|
||||
mentioned in earlier directives have a lower precedence that
|
||||
terminal symbols mentioned in later directives. For example:</p>
|
||||
|
||||
<p><pre>
|
||||
%left AND.
|
||||
%left OR.
|
||||
%nonassoc EQ NE GT GE LT LE.
|
||||
%left PLUS MINUS.
|
||||
%left TIMES DIVIDE MOD.
|
||||
%right EXP NOT.
|
||||
</pre></p>
|
||||
|
||||
<p>In the preceding sequence of directives, the AND operator is
|
||||
defined to have the lowest precedence. The OR operator is one
|
||||
precedence level higher. And so forth. Hence, the grammar would
|
||||
attempt to group the ambiguous expression
|
||||
<pre>
|
||||
a AND b OR c
|
||||
</pre>
|
||||
like this
|
||||
<pre>
|
||||
a AND (b OR c).
|
||||
</pre>
|
||||
The associativity (left, right or nonassoc) is used to determine
|
||||
the grouping when the precedence is the same. AND is left-associative
|
||||
in our example, so
|
||||
<pre>
|
||||
a AND b AND c
|
||||
</pre>
|
||||
is parsed like this
|
||||
<pre>
|
||||
(a AND b) AND c.
|
||||
</pre>
|
||||
The EXP operator is right-associative, though, so
|
||||
<pre>
|
||||
a EXP b EXP c
|
||||
</pre>
|
||||
is parsed like this
|
||||
<pre>
|
||||
a EXP (b EXP c).
|
||||
</pre>
|
||||
The nonassoc precedence is used for non-associative operators.
|
||||
So
|
||||
<pre>
|
||||
a EQ b EQ c
|
||||
</pre>
|
||||
is an error.</p>
|
||||
|
||||
<p>The precedence of non-terminals is transferred to rules as follows:
|
||||
The precedence of a grammar rule is equal to the precedence of the
|
||||
left-most terminal symbol in the rule for which a precedence is
|
||||
defined. This is normally what you want, but in those cases where
|
||||
you want to precedence of a grammar rule to be something different,
|
||||
you can specify an alternative precedence symbol by putting the
|
||||
symbol in square braces after the period at the end of the rule and
|
||||
before any C-code. For example:</p>
|
||||
|
||||
<p><pre>
|
||||
expr = MINUS expr. [NOT]
|
||||
</pre></p>
|
||||
|
||||
<p>This rule has a precedence equal to that of the NOT symbol, not the
|
||||
MINUS symbol as would have been the case by default.</p>
|
||||
|
||||
<p>With the knowledge of how precedence is assigned to terminal
|
||||
symbols and individual
|
||||
grammar rules, we can now explain precisely how parsing conflicts
|
||||
are resolved in Lemon. Shift-reduce conflicts are resolved
|
||||
as follows:
|
||||
<ul>
|
||||
<li> If either the token to be shifted or the rule to be reduced
|
||||
lacks precedence information, then resolve in favor of the
|
||||
shift, but report a parsing conflict.
|
||||
<li> If the precedence of the token to be shifted is greater than
|
||||
the precedence of the rule to reduce, then resolve in favor
|
||||
of the shift. No parsing conflict is reported.
|
||||
<li> If the precedence of the token it be shifted is less than the
|
||||
precedence of the rule to reduce, then resolve in favor of the
|
||||
reduce action. No parsing conflict is reported.
|
||||
<li> If the precedences are the same and the shift token is
|
||||
right-associative, then resolve in favor of the shift.
|
||||
No parsing conflict is reported.
|
||||
<li> If the precedences are the same the shift token is
|
||||
left-associative, then resolve in favor of the reduce.
|
||||
No parsing conflict is reported.
|
||||
<li> Otherwise, resolve the conflict by doing the shift and
|
||||
report the parsing conflict.
|
||||
</ul>
|
||||
Reduce-reduce conflicts are resolved this way:
|
||||
<ul>
|
||||
<li> If either reduce rule
|
||||
lacks precedence information, then resolve in favor of the
|
||||
rule that appears first in the grammar and report a parsing
|
||||
conflict.
|
||||
<li> If both rules have precedence and the precedence is different
|
||||
then resolve the dispute in favor of the rule with the highest
|
||||
precedence and do not report a conflict.
|
||||
<li> Otherwise, resolve the conflict by reducing by the rule that
|
||||
appears first in the grammar and report a parsing conflict.
|
||||
</ul>
|
||||
|
||||
<h3>Special Directives</h3>
|
||||
|
||||
<p>The input grammar to Lemon consists of grammar rules and special
|
||||
directives. We've described all the grammar rules, so now we'll
|
||||
talk about the special directives.</p>
|
||||
|
||||
<p>Directives in lemon can occur in any order. You can put them before
|
||||
the grammar rules, or after the grammar rules, or in the mist of the
|
||||
grammar rules. It doesn't matter. The relative order of
|
||||
directives used to assign precedence to terminals is important, but
|
||||
other than that, the order of directives in Lemon is arbitrary.</p>
|
||||
|
||||
<p>Lemon supports the following special directives:
|
||||
<ul>
|
||||
<li><tt>%code</tt>
|
||||
<li><tt>%default_destructor</tt>
|
||||
<li><tt>%default_type</tt>
|
||||
<li><tt>%destructor</tt>
|
||||
<li><tt>%extra_argument</tt>
|
||||
<li><tt>%include</tt>
|
||||
<li><tt>%left</tt>
|
||||
<li><tt>%name</tt>
|
||||
<li><tt>%nonassoc</tt>
|
||||
<li><tt>%parse_accept</tt>
|
||||
<li><tt>%parse_failure </tt>
|
||||
<li><tt>%right</tt>
|
||||
<li><tt>%stack_overflow</tt>
|
||||
<li><tt>%stack_size</tt>
|
||||
<li><tt>%start_symbol</tt>
|
||||
<li><tt>%syntax_error</tt>
|
||||
<li><tt>%token_destructor</tt>
|
||||
<li><tt>%token_prefix</tt>
|
||||
<li><tt>%token_type</tt>
|
||||
<li><tt>%type</tt>
|
||||
</ul>
|
||||
Each of these directives will be described separately in the
|
||||
following sections:</p>
|
||||
|
||||
<h4>The <tt>%code</tt> directive</h4>
|
||||
|
||||
<p>The %code directive is used to specify addition C/C++ code that
|
||||
is added to the end of the main output file. This is similar to
|
||||
the %include directive except that %include is inserted at the
|
||||
beginning of the main output file.</p>
|
||||
|
||||
<p>%code is typically used to include some action routines or perhaps
|
||||
a tokenizer as part of the output file.</p>
|
||||
|
||||
<h4>The <tt>%default_destructor</tt> directive</h4>
|
||||
|
||||
<p>The %default_destructor directive specifies a destructor to
|
||||
use for non-terminals that do not have their own destructor
|
||||
specified by a separate %destructor directive. See the documentation
|
||||
on the %destructor directive below for additional information.</p>
|
||||
|
||||
<p>In some grammers, many different non-terminal symbols have the
|
||||
same datatype and hence the same destructor. This directive is
|
||||
a convenience way to specify the same destructor for all those
|
||||
non-terminals using a single statement.</p>
|
||||
|
||||
<h4>The <tt>%default_type</tt> directive</h4>
|
||||
|
||||
<p>The %default_type directive specifies the datatype of non-terminal
|
||||
symbols that do no have their own datatype defined using a separate
|
||||
%type directive. See the documentation on %type below for addition
|
||||
information.</p>
|
||||
|
||||
<h4>The <tt>%destructor</tt> directive</h4>
|
||||
|
||||
<p>The %destructor directive is used to specify a destructor for
|
||||
a non-terminal symbol.
|
||||
(See also the %token_destructor directive which is used to
|
||||
specify a destructor for terminal symbols.)</p>
|
||||
|
||||
<p>A non-terminal's destructor is called to dispose of the
|
||||
non-terminal's value whenever the non-terminal is popped from
|
||||
the stack. This includes all of the following circumstances:
|
||||
<ul>
|
||||
<li> When a rule reduces and the value of a non-terminal on
|
||||
the right-hand side is not linked to C code.
|
||||
<li> When the stack is popped during error processing.
|
||||
<li> When the ParseFree() function runs.
|
||||
</ul>
|
||||
The destructor can do whatever it wants with the value of
|
||||
the non-terminal, but its design is to deallocate memory
|
||||
or other resources held by that non-terminal.</p>
|
||||
|
||||
<p>Consider an example:
|
||||
<pre>
|
||||
%type nt {void*}
|
||||
%destructor nt { free($$); }
|
||||
nt(A) ::= ID NUM. { A = malloc( 100 ); }
|
||||
</pre>
|
||||
This example is a bit contrived but it serves to illustrate how
|
||||
destructors work. The example shows a non-terminal named
|
||||
``nt'' that holds values of type ``void*''. When the rule for
|
||||
an ``nt'' reduces, it sets the value of the non-terminal to
|
||||
space obtained from malloc(). Later, when the nt non-terminal
|
||||
is popped from the stack, the destructor will fire and call
|
||||
free() on this malloced space, thus avoiding a memory leak.
|
||||
(Note that the symbol ``$$'' in the destructor code is replaced
|
||||
by the value of the non-terminal.)</p>
|
||||
|
||||
<p>It is important to note that the value of a non-terminal is passed
|
||||
to the destructor whenever the non-terminal is removed from the
|
||||
stack, unless the non-terminal is used in a C-code action. If
|
||||
the non-terminal is used by C-code, then it is assumed that the
|
||||
C-code will take care of destroying it if it should really
|
||||
be destroyed. More commonly, the value is used to build some
|
||||
larger structure and we don't want to destroy it, which is why
|
||||
the destructor is not called in this circumstance.</p>
|
||||
|
||||
<p>By appropriate use of destructors, it is possible to
|
||||
build a parser using Lemon that can be used within a long-running
|
||||
program, such as a GUI, that will not leak memory or other resources.
|
||||
To do the same using yacc or bison is much more difficult.</p>
|
||||
|
||||
<h4>The <tt>%extra_argument</tt> directive</h4>
|
||||
|
||||
The %extra_argument directive instructs Lemon to add a 4th parameter
|
||||
to the parameter list of the Parse() function it generates. Lemon
|
||||
doesn't do anything itself with this extra argument, but it does
|
||||
make the argument available to C-code action routines, destructors,
|
||||
and so forth. For example, if the grammar file contains:</p>
|
||||
|
||||
<p><pre>
|
||||
%extra_argument { MyStruct *pAbc }
|
||||
</pre></p>
|
||||
|
||||
<p>Then the Parse() function generated will have an 4th parameter
|
||||
of type ``MyStruct*'' and all action routines will have access to
|
||||
a variable named ``pAbc'' that is the value of the 4th parameter
|
||||
in the most recent call to Parse().</p>
|
||||
|
||||
<h4>The <tt>%include</tt> directive</h4>
|
||||
|
||||
<p>The %include directive specifies C code that is included at the
|
||||
top of the generated parser. You can include any text you want --
|
||||
the Lemon parser generator copies it blindly. If you have multiple
|
||||
%include directives in your grammar file the value of the last
|
||||
%include directive overwrites all the others.</p.
|
||||
|
||||
<p>The %include directive is very handy for getting some extra #include
|
||||
preprocessor statements at the beginning of the generated parser.
|
||||
For example:</p>
|
||||
|
||||
<p><pre>
|
||||
%include {#include <unistd.h>}
|
||||
</pre></p>
|
||||
|
||||
<p>This might be needed, for example, if some of the C actions in the
|
||||
grammar call functions that are prototyed in unistd.h.</p>
|
||||
|
||||
<h4>The <tt>%left</tt> directive</h4>
|
||||
|
||||
The %left directive is used (along with the %right and
|
||||
%nonassoc directives) to declare precedences of terminal
|
||||
symbols. Every terminal symbol whose name appears after
|
||||
a %left directive but before the next period (``.'') is
|
||||
given the same left-associative precedence value. Subsequent
|
||||
%left directives have higher precedence. For example:</p>
|
||||
|
||||
<p><pre>
|
||||
%left AND.
|
||||
%left OR.
|
||||
%nonassoc EQ NE GT GE LT LE.
|
||||
%left PLUS MINUS.
|
||||
%left TIMES DIVIDE MOD.
|
||||
%right EXP NOT.
|
||||
</pre></p>
|
||||
|
||||
<p>Note the period that terminates each %left, %right or %nonassoc
|
||||
directive.</p>
|
||||
|
||||
<p>LALR(1) grammars can get into a situation where they require
|
||||
a large amount of stack space if you make heavy use or right-associative
|
||||
operators. For this reason, it is recommended that you use %left
|
||||
rather than %right whenever possible.</p>
|
||||
|
||||
<h4>The <tt>%name</tt> directive</h4>
|
||||
|
||||
<p>By default, the functions generated by Lemon all begin with the
|
||||
five-character string ``Parse''. You can change this string to something
|
||||
different using the %name directive. For instance:</p>
|
||||
|
||||
<p><pre>
|
||||
%name Abcde
|
||||
</pre></p>
|
||||
|
||||
<p>Putting this directive in the grammar file will cause Lemon to generate
|
||||
functions named
|
||||
<ul>
|
||||
<li> AbcdeAlloc(),
|
||||
<li> AbcdeFree(),
|
||||
<li> AbcdeTrace(), and
|
||||
<li> Abcde().
|
||||
</ul>
|
||||
The %name directive allows you to generator two or more different
|
||||
parsers and link them all into the same executable.
|
||||
</p>
|
||||
|
||||
<h4>The <tt>%nonassoc</tt> directive</h4>
|
||||
|
||||
<p>This directive is used to assign non-associative precedence to
|
||||
one or more terminal symbols. See the section on precedence rules
|
||||
or on the %left directive for additional information.</p>
|
||||
|
||||
<h4>The <tt>%parse_accept</tt> directive</h4>
|
||||
|
||||
<p>The %parse_accept directive specifies a block of C code that is
|
||||
executed whenever the parser accepts its input string. To ``accept''
|
||||
an input string means that the parser was able to process all tokens
|
||||
without error.</p>
|
||||
|
||||
<p>For example:</p>
|
||||
|
||||
<p><pre>
|
||||
%parse_accept {
|
||||
printf("parsing complete!\n");
|
||||
}
|
||||
</pre></p>
|
||||
|
||||
|
||||
<h4>The <tt>%parse_failure</tt> directive</h4>
|
||||
|
||||
<p>The %parse_failure directive specifies a block of C code that
|
||||
is executed whenever the parser fails complete. This code is not
|
||||
executed until the parser has tried and failed to resolve an input
|
||||
error using is usual error recovery strategy. The routine is
|
||||
only invoked when parsing is unable to continue.</p>
|
||||
|
||||
<p><pre>
|
||||
%parse_failure {
|
||||
fprintf(stderr,"Giving up. Parser is hopelessly lost...\n");
|
||||
}
|
||||
</pre></p>
|
||||
|
||||
<h4>The <tt>%right</tt> directive</h4>
|
||||
|
||||
<p>This directive is used to assign right-associative precedence to
|
||||
one or more terminal symbols. See the section on precedence rules
|
||||
or on the %left directive for additional information.</p>
|
||||
|
||||
<h4>The <tt>%stack_overflow</tt> directive</h4>
|
||||
|
||||
<p>The %stack_overflow directive specifies a block of C code that
|
||||
is executed if the parser's internal stack ever overflows. Typically
|
||||
this just prints an error message. After a stack overflow, the parser
|
||||
will be unable to continue and must be reset.</p>
|
||||
|
||||
<p><pre>
|
||||
%stack_overflow {
|
||||
fprintf(stderr,"Giving up. Parser stack overflow\n");
|
||||
}
|
||||
</pre></p>
|
||||
|
||||
<p>You can help prevent parser stack overflows by avoiding the use
|
||||
of right recursion and right-precedence operators in your grammar.
|
||||
Use left recursion and and left-precedence operators instead, to
|
||||
encourage rules to reduce sooner and keep the stack size down.
|
||||
For example, do rules like this:
|
||||
<pre>
|
||||
list ::= list element. // left-recursion. Good!
|
||||
list ::= .
|
||||
</pre>
|
||||
Not like this:
|
||||
<pre>
|
||||
list ::= element list. // right-recursion. Bad!
|
||||
list ::= .
|
||||
</pre>
|
||||
|
||||
<h4>The <tt>%stack_size</tt> directive</h4>
|
||||
|
||||
<p>If stack overflow is a problem and you can't resolve the trouble
|
||||
by using left-recursion, then you might want to increase the size
|
||||
of the parser's stack using this directive. Put an positive integer
|
||||
after the %stack_size directive and Lemon will generate a parse
|
||||
with a stack of the requested size. The default value is 100.</p>
|
||||
|
||||
<p><pre>
|
||||
%stack_size 2000
|
||||
</pre></p>
|
||||
|
||||
<h4>The <tt>%start_symbol</tt> directive</h4>
|
||||
|
||||
<p>By default, the start-symbol for the grammar that Lemon generates
|
||||
is the first non-terminal that appears in the grammar file. But you
|
||||
can choose a different start-symbol using the %start_symbol directive.</p>
|
||||
|
||||
<p><pre>
|
||||
%start_symbol prog
|
||||
</pre></p>
|
||||
|
||||
<h4>The <tt>%token_destructor</tt> directive</h4>
|
||||
|
||||
<p>The %destructor directive assigns a destructor to a non-terminal
|
||||
symbol. (See the description of the %destructor directive above.)
|
||||
This directive does the same thing for all terminal symbols.</p>
|
||||
|
||||
<p>Unlike non-terminal symbols which may each have a different data type
|
||||
for their values, terminals all use the same data type (defined by
|
||||
the %token_type directive) and so they use a common destructor. Other
|
||||
than that, the token destructor works just like the non-terminal
|
||||
destructors.</p>
|
||||
|
||||
<h4>The <tt>%token_prefix</tt> directive</h4>
|
||||
|
||||
<p>Lemon generates #defines that assign small integer constants
|
||||
to each terminal symbol in the grammar. If desired, Lemon will
|
||||
add a prefix specified by this directive
|
||||
to each of the #defines it generates.
|
||||
So if the default output of Lemon looked like this:
|
||||
<pre>
|
||||
#define AND 1
|
||||
#define MINUS 2
|
||||
#define OR 3
|
||||
#define PLUS 4
|
||||
</pre>
|
||||
You can insert a statement into the grammar like this:
|
||||
<pre>
|
||||
%token_prefix TOKEN_
|
||||
</pre>
|
||||
to cause Lemon to produce these symbols instead:
|
||||
<pre>
|
||||
#define TOKEN_AND 1
|
||||
#define TOKEN_MINUS 2
|
||||
#define TOKEN_OR 3
|
||||
#define TOKEN_PLUS 4
|
||||
</pre>
|
||||
|
||||
<h4>The <tt>%token_type</tt> and <tt>%type</tt> directives</h4>
|
||||
|
||||
<p>These directives are used to specify the data types for values
|
||||
on the parser's stack associated with terminal and non-terminal
|
||||
symbols. The values of all terminal symbols must be of the same
|
||||
type. This turns out to be the same data type as the 3rd parameter
|
||||
to the Parse() function generated by Lemon. Typically, you will
|
||||
make the value of a terminal symbol by a pointer to some kind of
|
||||
token structure. Like this:</p>
|
||||
|
||||
<p><pre>
|
||||
%token_type {Token*}
|
||||
</pre></p>
|
||||
|
||||
<p>If the data type of terminals is not specified, the default value
|
||||
is ``int''.</p>
|
||||
|
||||
<p>Non-terminal symbols can each have their own data types. Typically
|
||||
the data type of a non-terminal is a pointer to the root of a parse-tree
|
||||
structure that contains all information about that non-terminal.
|
||||
For example:</p>
|
||||
|
||||
<p><pre>
|
||||
%type expr {Expr*}
|
||||
</pre></p>
|
||||
|
||||
<p>Each entry on the parser's stack is actually a union containing
|
||||
instances of all data types for every non-terminal and terminal symbol.
|
||||
Lemon will automatically use the correct element of this union depending
|
||||
on what the corresponding non-terminal or terminal symbol is. But
|
||||
the grammar designer should keep in mind that the size of the union
|
||||
will be the size of its largest element. So if you have a single
|
||||
non-terminal whose data type requires 1K of storage, then your 100
|
||||
entry parser stack will require 100K of heap space. If you are willing
|
||||
and able to pay that price, fine. You just need to know.</p>
|
||||
|
||||
<h3>Error Processing</h3>
|
||||
|
||||
<p>After extensive experimentation over several years, it has been
|
||||
discovered that the error recovery strategy used by yacc is about
|
||||
as good as it gets. And so that is what Lemon uses.</p>
|
||||
|
||||
<p>When a Lemon-generated parser encounters a syntax error, it
|
||||
first invokes the code specified by the %syntax_error directive, if
|
||||
any. It then enters its error recovery strategy. The error recovery
|
||||
strategy is to begin popping the parsers stack until it enters a
|
||||
state where it is permitted to shift a special non-terminal symbol
|
||||
named ``error''. It then shifts this non-terminal and continues
|
||||
parsing. But the %syntax_error routine will not be called again
|
||||
until at least three new tokens have been successfully shifted.</p>
|
||||
|
||||
<p>If the parser pops its stack until the stack is empty, and it still
|
||||
is unable to shift the error symbol, then the %parse_failed routine
|
||||
is invoked and the parser resets itself to its start state, ready
|
||||
to begin parsing a new file. This is what will happen at the very
|
||||
first syntax error, of course, if there are no instances of the
|
||||
``error'' non-terminal in your grammar.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@ -1,76 +0,0 @@
|
||||
*** Throughout this document, a page is deemed to have been synced
|
||||
automatically as soon as it is written when PRAGMA synchronous=OFF.
|
||||
Otherwise, the page is not synced until the xSync method of the VFS
|
||||
is called successfully on the file containing the page.
|
||||
|
||||
*** Definition: A page of the database file is said to be "overwriteable" if
|
||||
one or more of the following are true about the page:
|
||||
|
||||
(a) The original content of the page as it was at the beginning of
|
||||
the transaction has been written into the rollback journal and
|
||||
synced.
|
||||
|
||||
(b) The page was a freelist leaf page at the start of the transaction.
|
||||
|
||||
(c) The page number is greater than the largest page that existed in
|
||||
the database file at the start of the transaction.
|
||||
|
||||
(1) A page of the database file is never overwritten unless one of the
|
||||
following are true:
|
||||
|
||||
(a) The page and all other pages on the same sector are overwriteable.
|
||||
|
||||
(b) The atomic page write optimization is enabled, and the entire
|
||||
transaction other than the update of the transaction sequence
|
||||
number consists of a single page change.
|
||||
|
||||
(2) The content of a page written into the rollback journal exactly matches
|
||||
both the content in the database when the rollback journal was written
|
||||
and the content in the database at the beginning of the current
|
||||
transaction.
|
||||
|
||||
(3) Writes to the database file are an integer multiple of the page size
|
||||
in length and are aligned to a page boundary.
|
||||
|
||||
(4) Reads from the database file are either aligned on a page boundary and
|
||||
an integer multiple of the page size in length or are taken from the
|
||||
first 100 bytes of the database file.
|
||||
|
||||
(5) All writes to the database file are synced prior to the rollback journal
|
||||
being deleted, truncated, or zeroed.
|
||||
|
||||
(6) If a master journal file is used, then all writes to the database file
|
||||
are synced prior to the master journal being deleted.
|
||||
|
||||
*** Definition: Two databases (or the same database at two points it time)
|
||||
are said to be "logically equivalent" if they give the same answer to
|
||||
all queries. Note in particular the content of freelist leaf
|
||||
pages can be changed arbitarily without effecting the logical equivalence
|
||||
of the database.
|
||||
|
||||
(7) At any time, if any subset, including the empty set and the total set,
|
||||
of the unsynced changes to a rollback journal are removed and the
|
||||
journal is rolled back, the resulting database file will be logical
|
||||
equivalent to the database file at the beginning of the transaction.
|
||||
|
||||
(8) When a transaction is rolled back, the xTruncate method of the VFS
|
||||
is called to restore the database file to the same size it was at
|
||||
the beginning of the transaction. (In some VFSes, the xTruncate
|
||||
method is a no-op, but that does not change the fact the SQLite will
|
||||
invoke it.)
|
||||
|
||||
(9) Whenever the database file is modified, at least one bit in the range
|
||||
of bytes from 24 through 39 inclusive will be changed prior to releasing
|
||||
the EXCLUSIVE lock.
|
||||
|
||||
(10) The pattern of bits in bytes 24 through 39 shall not repeat in less
|
||||
than one billion transactions.
|
||||
|
||||
(11) A database file is well-formed at the beginning and at the conclusion
|
||||
of every transaction.
|
||||
|
||||
(12) An EXCLUSIVE lock must be held on the database file before making
|
||||
any changes to the database file.
|
||||
|
||||
(13) A SHARED lock must be held on the database file before reading any
|
||||
content out of the database file.
|
||||
@ -1,130 +0,0 @@
|
||||
The 5 states of an historical rollback lock as implemented by the
|
||||
xLock, xUnlock, and xCheckReservedLock methods of the sqlite3_io_methods
|
||||
objec are:
|
||||
|
||||
UNLOCKED
|
||||
SHARED
|
||||
RESERVED
|
||||
PENDING
|
||||
EXCLUSIVE
|
||||
|
||||
The wal-index file has a similar locking hierarchy implemented using
|
||||
the xShmLock method of the sqlite3_vfs object, but with 7
|
||||
states. Each connection to a wal-index file must be in one of
|
||||
the following 7 states:
|
||||
|
||||
UNLOCKED
|
||||
READ
|
||||
READ_FULL
|
||||
WRITE
|
||||
PENDING
|
||||
CHECKPOINT
|
||||
RECOVER
|
||||
|
||||
These roughly correspond to the 5 states of a rollback lock except
|
||||
that SHARED is split out into 2 states: READ and READ_FULL and
|
||||
there is an extra RECOVER state used for wal-index reconstruction.
|
||||
|
||||
The meanings of the various wal-index locking states is as follows:
|
||||
|
||||
UNLOCKED - The wal-index is not in use.
|
||||
|
||||
READ - Some prefix of the wal-index is being read. Additional
|
||||
wal-index information can be appended at any time. The
|
||||
newly appended content will be ignored by the holder of
|
||||
the READ lock.
|
||||
|
||||
READ_FULL - The entire wal-index is being read. No new information
|
||||
can be added to the wal-index. The holder of a READ_FULL
|
||||
lock promises never to read pages from the database file
|
||||
that are available anywhere in the wal-index.
|
||||
|
||||
WRITE - It is OK to append to the wal-index file and to adjust
|
||||
the header to indicate the new "last valid frame".
|
||||
|
||||
PENDING - Waiting on all READ locks to clear so that a
|
||||
CHECKPOINT lock can be acquired.
|
||||
|
||||
CHECKPOINT - It is OK to write any WAL data into the database file
|
||||
and zero the last valid frame field of the wal-index
|
||||
header. The wal-index file itself may not be changed
|
||||
other than to zero the last valid frame field in the
|
||||
header.
|
||||
|
||||
RECOVER - Held during wal-index recovery. Used to prevent a
|
||||
race if multiple clients try to recover a wal-index at
|
||||
the same time.
|
||||
|
||||
|
||||
A particular lock manager implementation may coalesce one or more of
|
||||
the wal-index locking states, though with a reduction in concurrency.
|
||||
For example, an implemention might implement only exclusive locking,
|
||||
in which case all states would be equivalent to CHECKPOINT, meaning that
|
||||
only one reader or one writer or one checkpointer could be active at a
|
||||
time. Or, an implementation might combine READ and READ_FULL into
|
||||
a single state equivalent to READ, meaning that a writer could
|
||||
coexist with a reader, but no reader or writers could coexist with a
|
||||
checkpointer.
|
||||
|
||||
The lock manager must obey the following rules:
|
||||
|
||||
(1) A READ cannot coexist with CHECKPOINT.
|
||||
(2) A READ_FULL cannot coexist with WRITE.
|
||||
(3) None of WRITE, PENDING, CHECKPOINT, or RECOVER can coexist.
|
||||
|
||||
The SQLite core will obey the next set of rules. These rules are
|
||||
assertions on the behavior of the SQLite core which might be verified
|
||||
during testing using an instrumented lock manager.
|
||||
|
||||
(5) No part of the wal-index will be read without holding either some
|
||||
kind of SHM lock or an EXCLUSIVE lock on the original database.
|
||||
The original database is the file named in the 2nd parameter to
|
||||
the xShmOpen method.
|
||||
|
||||
(6) A holder of a READ_FULL will never read any page of the database
|
||||
file that is contained anywhere in the wal-index.
|
||||
|
||||
(7) No part of the wal-index other than the header will be written nor
|
||||
will the size of the wal-index grow without holding a WRITE or
|
||||
an EXCLUSIVE on the original database file.
|
||||
|
||||
(8) The wal-index header will not be written without holding one of
|
||||
WRITE, CHECKPOINT, or RECOVER on the wal-index or an EXCLUSIVE on
|
||||
the original database files.
|
||||
|
||||
(9) A CHECKPOINT or RECOVER must be held on the wal-index, or an
|
||||
EXCLUSIVE on the original database file, in order to reset the
|
||||
last valid frame counter in the header of the wal-index back to zero.
|
||||
|
||||
(10) A WRITE can only increase the last valid frame pointer in the header.
|
||||
|
||||
The SQLite core will only ever send requests for UNLOCK, READ, WRITE,
|
||||
CHECKPOINT, or RECOVER to the lock manager. The SQLite core will never
|
||||
request a READ_FULL or PENDING lock though the lock manager may deliver
|
||||
those locking states in response to READ and CHECKPOINT requests,
|
||||
respectively, if and only if the requested READ or CHECKPOINT cannot
|
||||
be delivered.
|
||||
|
||||
The following are the allowed lock transitions:
|
||||
|
||||
Original-State Request New-State
|
||||
-------------- ---------- ----------
|
||||
(11a) UNLOCK READ READ
|
||||
(11b) UNLOCK READ READ_FULL
|
||||
(11c) UNLOCK CHECKPOINT PENDING
|
||||
(11d) UNLOCK CHECKPOINT CHECKPOINT
|
||||
(11e) READ UNLOCK UNLOCK
|
||||
(11f) READ WRITE WRITE
|
||||
(11g) READ RECOVER RECOVER
|
||||
(11h) READ_FULL UNLOCK UNLOCK
|
||||
(11i) READ_FULL WRITE WRITE
|
||||
(11j) READ_FULL RECOVER RECOVER
|
||||
(11k) WRITE READ READ
|
||||
(11l) PENDING UNLOCK UNLOCK
|
||||
(11m) PENDING CHECKPOINT CHECKPOINT
|
||||
(11n) CHECKPOINT UNLOCK UNLOCK
|
||||
(11o) RECOVER READ READ
|
||||
|
||||
These 15 transitions are all that needs to be supported. The lock
|
||||
manager implementation can assert that fact. The other 27 possible
|
||||
transitions among the 7 locking states will never occur.
|
||||
@ -1,2 +0,0 @@
|
||||
Version loadable extensions to SQLite are found in subfolders
|
||||
of this folder.
|
||||
@ -1,170 +0,0 @@
|
||||
NOTE (2012-11-29):
|
||||
|
||||
The functionality implemented by this extension has been superseded
|
||||
by WAL-mode. This module is no longer supported or maintained. The
|
||||
code is retained for historical reference only.
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
Normally, when SQLite writes to a database file, it waits until the write
|
||||
operation is finished before returning control to the calling application.
|
||||
Since writing to the file-system is usually very slow compared with CPU
|
||||
bound operations, this can be a performance bottleneck. This directory
|
||||
contains an extension that causes SQLite to perform all write requests
|
||||
using a separate thread running in the background. Although this does not
|
||||
reduce the overall system resources (CPU, disk bandwidth etc.) at all, it
|
||||
allows SQLite to return control to the caller quickly even when writing to
|
||||
the database, eliminating the bottleneck.
|
||||
|
||||
1. Functionality
|
||||
|
||||
1.1 How it Works
|
||||
1.2 Limitations
|
||||
1.3 Locking and Concurrency
|
||||
|
||||
2. Compilation and Usage
|
||||
|
||||
3. Porting
|
||||
|
||||
|
||||
|
||||
1. FUNCTIONALITY
|
||||
|
||||
With asynchronous I/O, write requests are handled by a separate thread
|
||||
running in the background. This means that the thread that initiates
|
||||
a database write does not have to wait for (sometimes slow) disk I/O
|
||||
to occur. The write seems to happen very quickly, though in reality
|
||||
it is happening at its usual slow pace in the background.
|
||||
|
||||
Asynchronous I/O appears to give better responsiveness, but at a price.
|
||||
You lose the Durable property. With the default I/O backend of SQLite,
|
||||
once a write completes, you know that the information you wrote is
|
||||
safely on disk. With the asynchronous I/O, this is not the case. If
|
||||
your program crashes or if a power loss occurs after the database
|
||||
write but before the asynchronous write thread has completed, then the
|
||||
database change might never make it to disk and the next user of the
|
||||
database might not see your change.
|
||||
|
||||
You lose Durability with asynchronous I/O, but you still retain the
|
||||
other parts of ACID: Atomic, Consistent, and Isolated. Many
|
||||
appliations get along fine without the Durablity.
|
||||
|
||||
1.1 How it Works
|
||||
|
||||
Asynchronous I/O works by creating a special SQLite "vfs" structure
|
||||
and registering it with sqlite3_vfs_register(). When files opened via
|
||||
this vfs are written to (using the vfs xWrite() method), the data is not
|
||||
written directly to disk, but is placed in the "write-queue" to be
|
||||
handled by the background thread.
|
||||
|
||||
When files opened with the asynchronous vfs are read from
|
||||
(using the vfs xRead() method), the data is read from the file on
|
||||
disk and the write-queue, so that from the point of view of
|
||||
the vfs reader the xWrite() appears to have already completed.
|
||||
|
||||
The special vfs is registered (and unregistered) by calls to the
|
||||
API functions sqlite3async_initialize() and sqlite3async_shutdown().
|
||||
See section "Compilation and Usage" below for details.
|
||||
|
||||
1.2 Limitations
|
||||
|
||||
In order to gain experience with the main ideas surrounding asynchronous
|
||||
IO, this implementation is deliberately kept simple. Additional
|
||||
capabilities may be added in the future.
|
||||
|
||||
For example, as currently implemented, if writes are happening at a
|
||||
steady stream that exceeds the I/O capability of the background writer
|
||||
thread, the queue of pending write operations will grow without bound.
|
||||
If this goes on for long enough, the host system could run out of memory.
|
||||
A more sophisticated module could to keep track of the quantity of
|
||||
pending writes and stop accepting new write requests when the queue of
|
||||
pending writes grows too large.
|
||||
|
||||
1.3 Locking and Concurrency
|
||||
|
||||
Multiple connections from within a single process that use this
|
||||
implementation of asynchronous IO may access a single database
|
||||
file concurrently. From the point of view of the user, if all
|
||||
connections are from within a single process, there is no difference
|
||||
between the concurrency offered by "normal" SQLite and SQLite
|
||||
using the asynchronous backend.
|
||||
|
||||
If file-locking is enabled (it is enabled by default), then connections
|
||||
from multiple processes may also read and write the database file.
|
||||
However concurrency is reduced as follows:
|
||||
|
||||
* When a connection using asynchronous IO begins a database
|
||||
transaction, the database is locked immediately. However the
|
||||
lock is not released until after all relevant operations
|
||||
in the write-queue have been flushed to disk. This means
|
||||
(for example) that the database may remain locked for some
|
||||
time after a "COMMIT" or "ROLLBACK" is issued.
|
||||
|
||||
* If an application using asynchronous IO executes transactions
|
||||
in quick succession, other database users may be effectively
|
||||
locked out of the database. This is because when a BEGIN
|
||||
is executed, a database lock is established immediately. But
|
||||
when the corresponding COMMIT or ROLLBACK occurs, the lock
|
||||
is not released until the relevant part of the write-queue
|
||||
has been flushed through. As a result, if a COMMIT is followed
|
||||
by a BEGIN before the write-queue is flushed through, the database
|
||||
is never unlocked,preventing other processes from accessing
|
||||
the database.
|
||||
|
||||
File-locking may be disabled at runtime using the sqlite3async_control()
|
||||
API (see below). This may improve performance when an NFS or other
|
||||
network file-system, as the synchronous round-trips to the server be
|
||||
required to establish file locks are avoided. However, if multiple
|
||||
connections attempt to access the same database file when file-locking
|
||||
is disabled, application crashes and database corruption is a likely
|
||||
outcome.
|
||||
|
||||
|
||||
2. COMPILATION AND USAGE
|
||||
|
||||
The asynchronous IO extension consists of a single file of C code
|
||||
(sqlite3async.c), and a header file (sqlite3async.h) that defines the
|
||||
C API used by applications to activate and control the modules
|
||||
functionality.
|
||||
|
||||
To use the asynchronous IO extension, compile sqlite3async.c as
|
||||
part of the application that uses SQLite. Then use the API defined
|
||||
in sqlite3async.h to initialize and configure the module.
|
||||
|
||||
The asynchronous IO VFS API is described in detail in comments in
|
||||
sqlite3async.h. Using the API usually consists of the following steps:
|
||||
|
||||
1. Register the asynchronous IO VFS with SQLite by calling the
|
||||
sqlite3async_initialize() function.
|
||||
|
||||
2. Create a background thread to perform write operations and call
|
||||
sqlite3async_run().
|
||||
|
||||
3. Use the normal SQLite API to read and write to databases via
|
||||
the asynchronous IO VFS.
|
||||
|
||||
Refer to sqlite3async.h for details.
|
||||
|
||||
|
||||
3. PORTING
|
||||
|
||||
Currently the asynchronous IO extension is compatible with win32 systems
|
||||
and systems that support the pthreads interface, including Mac OSX, Linux,
|
||||
and other varieties of Unix.
|
||||
|
||||
To port the asynchronous IO extension to another platform, the user must
|
||||
implement mutex and condition variable primitives for the new platform.
|
||||
Currently there is no externally available interface to allow this, but
|
||||
modifying the code within sqlite3async.c to include the new platforms
|
||||
concurrency primitives is relatively easy. Search within sqlite3async.c
|
||||
for the comment string "PORTING FUNCTIONS" for details. Then implement
|
||||
new versions of each of the following:
|
||||
|
||||
static void async_mutex_enter(int eMutex);
|
||||
static void async_mutex_leave(int eMutex);
|
||||
static void async_cond_wait(int eCond, int eMutex);
|
||||
static void async_cond_signal(int eCond);
|
||||
static void async_sched_yield(void);
|
||||
|
||||
The functionality required of each of the above functions is described
|
||||
in comments in sqlite3async.c.
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,223 +0,0 @@
|
||||
|
||||
#ifndef __SQLITEASYNC_H_
|
||||
#define __SQLITEASYNC_H_ 1
|
||||
|
||||
/*
|
||||
** Make sure we can call this stuff from C++.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SQLITEASYNC_VFSNAME "sqlite3async"
|
||||
|
||||
/*
|
||||
** THREAD SAFETY NOTES:
|
||||
**
|
||||
** Of the four API functions in this file, the following are not threadsafe:
|
||||
**
|
||||
** sqlite3async_initialize()
|
||||
** sqlite3async_shutdown()
|
||||
**
|
||||
** Care must be taken that neither of these functions is called while
|
||||
** another thread may be calling either any sqlite3async_XXX() function
|
||||
** or an sqlite3_XXX() API function related to a database handle that
|
||||
** is using the asynchronous IO VFS.
|
||||
**
|
||||
** These functions:
|
||||
**
|
||||
** sqlite3async_run()
|
||||
** sqlite3async_control()
|
||||
**
|
||||
** are threadsafe. It is quite safe to call either of these functions even
|
||||
** if another thread may also be calling one of them or an sqlite3_XXX()
|
||||
** function related to a database handle that uses the asynchronous IO VFS.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Initialize the asynchronous IO VFS and register it with SQLite using
|
||||
** sqlite3_vfs_register(). If the asynchronous VFS is already initialized
|
||||
** and registered, this function is a no-op. The asynchronous IO VFS
|
||||
** is registered as "sqlite3async".
|
||||
**
|
||||
** The asynchronous IO VFS does not make operating system IO requests
|
||||
** directly. Instead, it uses an existing VFS implementation for all
|
||||
** required file-system operations. If the first parameter to this function
|
||||
** is NULL, then the current default VFS is used for IO. If it is not
|
||||
** NULL, then it must be the name of an existing VFS. In other words, the
|
||||
** first argument to this function is passed to sqlite3_vfs_find() to
|
||||
** locate the VFS to use for all real IO operations. This VFS is known
|
||||
** as the "parent VFS".
|
||||
**
|
||||
** If the second parameter to this function is non-zero, then the
|
||||
** asynchronous IO VFS is registered as the default VFS for all SQLite
|
||||
** database connections within the process. Otherwise, the asynchronous IO
|
||||
** VFS is only used by connections opened using sqlite3_open_v2() that
|
||||
** specifically request VFS "sqlite3async".
|
||||
**
|
||||
** If a parent VFS cannot be located, then SQLITE_ERROR is returned.
|
||||
** In the unlikely event that operating system specific initialization
|
||||
** fails (win32 systems create the required critical section and event
|
||||
** objects within this function), then SQLITE_ERROR is also returned.
|
||||
** Finally, if the call to sqlite3_vfs_register() returns an error, then
|
||||
** the error code is returned to the user by this function. In all three
|
||||
** of these cases, intialization has failed and the asynchronous IO VFS
|
||||
** is not registered with SQLite.
|
||||
**
|
||||
** Otherwise, if no error occurs, SQLITE_OK is returned.
|
||||
*/
|
||||
int sqlite3async_initialize(const char *zParent, int isDefault);
|
||||
|
||||
/*
|
||||
** This function unregisters the asynchronous IO VFS using
|
||||
** sqlite3_vfs_unregister().
|
||||
**
|
||||
** On win32 platforms, this function also releases the small number of
|
||||
** critical section and event objects created by sqlite3async_initialize().
|
||||
*/
|
||||
void sqlite3async_shutdown(void);
|
||||
|
||||
/*
|
||||
** This function may only be called when the asynchronous IO VFS is
|
||||
** installed (after a call to sqlite3async_initialize()). It processes
|
||||
** zero or more queued write operations before returning. It is expected
|
||||
** (but not required) that this function will be called by a different
|
||||
** thread than those threads that use SQLite. The "background thread"
|
||||
** that performs IO.
|
||||
**
|
||||
** How many queued write operations are performed before returning
|
||||
** depends on the global setting configured by passing the SQLITEASYNC_HALT
|
||||
** verb to sqlite3async_control() (see below for details). By default
|
||||
** this function never returns - it processes all pending operations and
|
||||
** then blocks waiting for new ones.
|
||||
**
|
||||
** If multiple simultaneous calls are made to sqlite3async_run() from two
|
||||
** or more threads, then the calls are serialized internally.
|
||||
*/
|
||||
void sqlite3async_run(void);
|
||||
|
||||
/*
|
||||
** This function may only be called when the asynchronous IO VFS is
|
||||
** installed (after a call to sqlite3async_initialize()). It is used
|
||||
** to query or configure various parameters that affect the operation
|
||||
** of the asynchronous IO VFS. At present there are three parameters
|
||||
** supported:
|
||||
**
|
||||
** * The "halt" parameter, which configures the circumstances under
|
||||
** which the sqlite3async_run() parameter is configured.
|
||||
**
|
||||
** * The "delay" parameter. Setting the delay parameter to a non-zero
|
||||
** value causes the sqlite3async_run() function to sleep for the
|
||||
** configured number of milliseconds between each queued write
|
||||
** operation.
|
||||
**
|
||||
** * The "lockfiles" parameter. This parameter determines whether or
|
||||
** not the asynchronous IO VFS locks the database files it operates
|
||||
** on. Disabling file locking can improve throughput.
|
||||
**
|
||||
** This function is always passed two arguments. When setting the value
|
||||
** of a parameter, the first argument must be one of SQLITEASYNC_HALT,
|
||||
** SQLITEASYNC_DELAY or SQLITEASYNC_LOCKFILES. The second argument must
|
||||
** be passed the new value for the parameter as type "int".
|
||||
**
|
||||
** When querying the current value of a paramter, the first argument must
|
||||
** be one of SQLITEASYNC_GET_HALT, GET_DELAY or GET_LOCKFILES. The second
|
||||
** argument to this function must be of type (int *). The current value
|
||||
** of the queried parameter is copied to the memory pointed to by the
|
||||
** second argument. For example:
|
||||
**
|
||||
** int eCurrentHalt;
|
||||
** int eNewHalt = SQLITEASYNC_HALT_IDLE;
|
||||
**
|
||||
** sqlite3async_control(SQLITEASYNC_HALT, eNewHalt);
|
||||
** sqlite3async_control(SQLITEASYNC_GET_HALT, &eCurrentHalt);
|
||||
** assert( eNewHalt==eCurrentHalt );
|
||||
**
|
||||
** See below for more detail on each configuration parameter.
|
||||
**
|
||||
** SQLITEASYNC_HALT:
|
||||
**
|
||||
** This is used to set the value of the "halt" parameter. The second
|
||||
** argument must be one of the SQLITEASYNC_HALT_XXX symbols defined
|
||||
** below (either NEVER, IDLE and NOW).
|
||||
**
|
||||
** If the parameter is set to NEVER, then calls to sqlite3async_run()
|
||||
** never return. This is the default setting. If the parameter is set
|
||||
** to IDLE, then calls to sqlite3async_run() return as soon as the
|
||||
** queue of pending write operations is empty. If the parameter is set
|
||||
** to NOW, then calls to sqlite3async_run() return as quickly as
|
||||
** possible, without processing any pending write requests.
|
||||
**
|
||||
** If an attempt is made to set this parameter to an integer value other
|
||||
** than SQLITEASYNC_HALT_NEVER, IDLE or NOW, then sqlite3async_control()
|
||||
** returns SQLITE_MISUSE and the current value of the parameter is not
|
||||
** modified.
|
||||
**
|
||||
** Modifying the "halt" parameter affects calls to sqlite3async_run()
|
||||
** made by other threads that are currently in progress.
|
||||
**
|
||||
** SQLITEASYNC_DELAY:
|
||||
**
|
||||
** This is used to set the value of the "delay" parameter. If set to
|
||||
** a non-zero value, then after completing a pending write request, the
|
||||
** sqlite3async_run() function sleeps for the configured number of
|
||||
** milliseconds.
|
||||
**
|
||||
** If an attempt is made to set this parameter to a negative value,
|
||||
** sqlite3async_control() returns SQLITE_MISUSE and the current value
|
||||
** of the parameter is not modified.
|
||||
**
|
||||
** Modifying the "delay" parameter affects calls to sqlite3async_run()
|
||||
** made by other threads that are currently in progress.
|
||||
**
|
||||
** SQLITEASYNC_LOCKFILES:
|
||||
**
|
||||
** This is used to set the value of the "lockfiles" parameter. This
|
||||
** parameter must be set to either 0 or 1. If set to 1, then the
|
||||
** asynchronous IO VFS uses the xLock() and xUnlock() methods of the
|
||||
** parent VFS to lock database files being read and/or written. If
|
||||
** the parameter is set to 0, then these locks are omitted.
|
||||
**
|
||||
** This parameter may only be set when there are no open database
|
||||
** connections using the VFS and the queue of pending write requests
|
||||
** is empty. Attempting to set it when this is not true, or to set it
|
||||
** to a value other than 0 or 1 causes sqlite3async_control() to return
|
||||
** SQLITE_MISUSE and the value of the parameter to remain unchanged.
|
||||
**
|
||||
** If this parameter is set to zero, then it is only safe to access the
|
||||
** database via the asynchronous IO VFS from within a single process. If
|
||||
** while writing to the database via the asynchronous IO VFS the database
|
||||
** is also read or written from within another process, or via another
|
||||
** connection that does not use the asynchronous IO VFS within the same
|
||||
** process, the results are undefined (and may include crashes or database
|
||||
** corruption).
|
||||
**
|
||||
** Alternatively, if this parameter is set to 1, then it is safe to access
|
||||
** the database from multiple connections within multiple processes using
|
||||
** either the asynchronous IO VFS or the parent VFS directly.
|
||||
*/
|
||||
int sqlite3async_control(int op, ...);
|
||||
|
||||
/*
|
||||
** Values that can be used as the first argument to sqlite3async_control().
|
||||
*/
|
||||
#define SQLITEASYNC_HALT 1
|
||||
#define SQLITEASYNC_GET_HALT 2
|
||||
#define SQLITEASYNC_DELAY 3
|
||||
#define SQLITEASYNC_GET_DELAY 4
|
||||
#define SQLITEASYNC_LOCKFILES 5
|
||||
#define SQLITEASYNC_GET_LOCKFILES 6
|
||||
|
||||
/*
|
||||
** If the first argument to sqlite3async_control() is SQLITEASYNC_HALT,
|
||||
** the second argument should be one of the following.
|
||||
*/
|
||||
#define SQLITEASYNC_HALT_NEVER 0 /* Never halt (default value) */
|
||||
#define SQLITEASYNC_HALT_NOW 1 /* Halt as soon as possible */
|
||||
#define SQLITEASYNC_HALT_IDLE 2 /* Halt when write-queue is empty */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* End of the 'extern "C"' block */
|
||||
#endif
|
||||
#endif /* ifndef __SQLITEASYNC_H_ */
|
||||
|
||||
@ -1,2 +0,0 @@
|
||||
This folder contains source code to the first full-text search
|
||||
extension for SQLite.
|
||||
@ -1,404 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ft_hash.h"
|
||||
|
||||
void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants HASH_INT, HASH_POINTER,
|
||||
** HASH_BINARY, or HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer. CopyKey only makes
|
||||
** sense for HASH_STRING and HASH_BINARY and is ignored
|
||||
** for other key classes.
|
||||
*/
|
||||
void HashInit(Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
#if 0
|
||||
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
|
||||
#endif
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void HashClear(Hash *pH){
|
||||
HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_INT
|
||||
*/
|
||||
static int intHash(const void *pKey, int nKey){
|
||||
return nKey ^ (nKey<<8) ^ (nKey>>8);
|
||||
}
|
||||
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
return n2 - n1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_POINTER
|
||||
*/
|
||||
static int ptrHash(const void *pKey, int nKey){
|
||||
uptr x = Addr(pKey);
|
||||
return x ^ (x<<8) ^ (x>>8);
|
||||
}
|
||||
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( pKey1==pKey2 ) return 0;
|
||||
if( pKey1<pKey2 ) return -1;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intHash;
|
||||
case HASH_POINTER: return &ptrHash;
|
||||
case HASH_STRING: return &strHash;
|
||||
case HASH_BINARY: return &binHash;;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intCompare;
|
||||
case HASH_POINTER: return &ptrCompare;
|
||||
case HASH_STRING: return &strCompare;
|
||||
case HASH_BINARY: return &binCompare;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
Hash *pH, /* The complete hash table */
|
||||
struct _ht *pEntry, /* The entry into which pNew is inserted */
|
||||
HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(Hash *pH, int new_size){
|
||||
struct _ht *new_ht; /* The new hash table */
|
||||
HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static HashElem *findElementGivenHash(
|
||||
const Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
Hash *pH, /* The pH containing "elem" */
|
||||
HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *HashFind(const Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
@ -1,111 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _HASH_H_
|
||||
#define _HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct Hash Hash;
|
||||
typedef struct HashElem HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct HashElem {
|
||||
HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 4 different modes of operation for a hash table:
|
||||
**
|
||||
** HASH_INT nKey is used as the key and pKey is ignored.
|
||||
**
|
||||
** HASH_POINTER pKey is used as the key and nKey is ignored.
|
||||
**
|
||||
** HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made for HASH_STRING and HASH_BINARY
|
||||
** if the copyKey parameter to HashInit is 1.
|
||||
*/
|
||||
/* #define HASH_INT 1 // NOT USED */
|
||||
/* #define HASH_POINTER 2 // NOT USED */
|
||||
#define HASH_STRING 3
|
||||
#define HASH_BINARY 4
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void HashInit(Hash*, int keytype, int copyKey);
|
||||
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *HashFind(const Hash*, const void *pKey, int nKey);
|
||||
void HashClear(Hash*);
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** Hash h;
|
||||
** HashElem *p;
|
||||
** ...
|
||||
** for(p=HashFirst(&h); p; p=HashNext(p)){
|
||||
** SomeStructure *pData = HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define HashFirst(H) ((H)->first)
|
||||
#define HashNext(E) ((E)->next)
|
||||
#define HashData(E) ((E)->data)
|
||||
#define HashKey(E) ((E)->pKey)
|
||||
#define HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _HASH_H_ */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts1Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@ -1,369 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include "fts1_hash.h"
|
||||
|
||||
static void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts1HashClear(fts1Hash *pH){
|
||||
fts1HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts1HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts1Hash *pH, /* The complete hash table */
|
||||
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts1HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts1HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts1Hash *pH, int new_size){
|
||||
struct _fts1ht *new_ht; /* The new hash table */
|
||||
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts1HashElem *findElementGivenHash(
|
||||
const fts1Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts1ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts1Hash *pH, /* The pH containing "elem" */
|
||||
fts1HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts1ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts1HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts1HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts1HashInsert(
|
||||
fts1Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
fts1HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
@ -1,112 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS1_HASH_H_
|
||||
#define _FTS1_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts1Hash fts1Hash;
|
||||
typedef struct fts1HashElem fts1HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts1Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts1HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts1ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts1HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts1HashElem {
|
||||
fts1HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
|
||||
*/
|
||||
#define FTS1_HASH_STRING 1
|
||||
#define FTS1_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts1HashClear(fts1Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts1HashInit sqlite3Fts1HashInit
|
||||
#define fts1HashInsert sqlite3Fts1HashInsert
|
||||
#define fts1HashFind sqlite3Fts1HashFind
|
||||
#define fts1HashClear sqlite3Fts1HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts1Hash h;
|
||||
** fts1HashElem *p;
|
||||
** ...
|
||||
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
|
||||
** SomeStructure *pData = fts1HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts1HashFirst(H) ((H)->first)
|
||||
#define fts1HashNext(E) ((E)->next)
|
||||
#define fts1HashData(E) ((E)->data)
|
||||
#define fts1HashKey(E) ((E)->pKey)
|
||||
#define fts1HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts1HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS1_HASH_H_ */
|
||||
@ -1,643 +0,0 @@
|
||||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
free(c->zToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char isIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
@ -1,90 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS1_TOKENIZER_H_
|
||||
#define _FTS1_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char *const*argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
#endif /* _FTS1_TOKENIZER_H_ */
|
||||
@ -1,221 +0,0 @@
|
||||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int isDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !isalnum(i);
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
free(c->pToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,11 +0,0 @@
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int fulltext_init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@ -1,174 +0,0 @@
|
||||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#if !defined(__APPLE__)
|
||||
#include <malloc.h>
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
/* Duplicate a string; the caller must free() the returned string.
|
||||
* (We don't use strdup() since it's not part of the standard C library and
|
||||
* may not be available everywhere.) */
|
||||
/* TODO(shess) Copied from fulltext.c, consider util.c for such
|
||||
** things. */
|
||||
static char *string_dup(const char *s){
|
||||
char *str = malloc(strlen(s) + 1);
|
||||
strcpy(str, s);
|
||||
return str;
|
||||
}
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
const char *zDelim; /* token delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
const char *pCurrent; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nTokenBytes; /* actual size of current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
|
||||
|
||||
static int simpleCreate(
|
||||
int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
t->zDelim = string_dup(argv[1]);
|
||||
} else {
|
||||
/* Build a string excluding alphanumeric ASCII characters */
|
||||
char zDelim[0x80]; /* nul-terminated, so nul not a member */
|
||||
int i, j;
|
||||
for(i=1, j=0; i<0x80; i++){
|
||||
if( !isalnum(i) ){
|
||||
zDelim[j++] = i;
|
||||
}
|
||||
}
|
||||
zDelim[j++] = '\0';
|
||||
assert( j<=sizeof(zDelim) );
|
||||
t->zDelim = string_dup(zDelim);
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
|
||||
|
||||
free((void *) t->zDelim);
|
||||
free(t);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
|
||||
c->pInput = pInput;
|
||||
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
|
||||
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenBytes = 0;
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
|
||||
if( NULL!=c->zToken ){
|
||||
free(c->zToken);
|
||||
}
|
||||
free(c);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
int ii;
|
||||
|
||||
while( c->pCurrent-c->pInput<c->nBytes ){
|
||||
int n = (int) strcspn(c->pCurrent, t->zDelim);
|
||||
if( n>0 ){
|
||||
if( n+1>c->nTokenAllocated ){
|
||||
c->zToken = realloc(c->zToken, n+1);
|
||||
}
|
||||
for(ii=0; ii<n; ii++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
char ch = c->pCurrent[ii];
|
||||
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch;
|
||||
}
|
||||
c->zToken[n] = '\0';
|
||||
*ppToken = c->zToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = (int) (c->pCurrent-c->pInput);
|
||||
*piEndOffset = *piStartOffset+n;
|
||||
*piPosition = c->iToken++;
|
||||
c->pCurrent += n + 1;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
c->pCurrent += n + 1;
|
||||
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
|
||||
** to happen in two places, though, which is annoying.
|
||||
*/
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
void get_simple_tokenizer_module(
|
||||
sqlite3_tokenizer_module **ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
@ -1,89 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _TOKENIZER_H_
|
||||
#define _TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
|
||||
|
||||
#endif /* _TOKENIZER_H_ */
|
||||
@ -1,133 +0,0 @@
|
||||
|
||||
1. FTS2 Tokenizers
|
||||
|
||||
When creating a new full-text table, FTS2 allows the user to select
|
||||
the text tokenizer implementation to be used when indexing text
|
||||
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
|
||||
statement:
|
||||
|
||||
CREATE VIRTUAL TABLE <table-name> USING fts2(
|
||||
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
|
||||
);
|
||||
|
||||
The built-in tokenizers (valid values to pass as <tokenizer name>) are
|
||||
"simple" and "porter".
|
||||
|
||||
<tokenizer-args> should consist of zero or more white-space separated
|
||||
arguments to pass to the selected tokenizer implementation. The
|
||||
interpretation of the arguments, if any, depends on the individual
|
||||
tokenizer.
|
||||
|
||||
2. Custom Tokenizers
|
||||
|
||||
FTS2 allows users to provide custom tokenizer implementations. The
|
||||
interface used to create a new tokenizer is defined and described in
|
||||
the fts2_tokenizer.h source file.
|
||||
|
||||
Registering a new FTS2 tokenizer is similar to registering a new
|
||||
virtual table module with SQLite. The user passes a pointer to a
|
||||
structure containing pointers to various callback functions that
|
||||
make up the implementation of the new tokenizer type. For tokenizers,
|
||||
the structure (defined in fts2_tokenizer.h) is called
|
||||
"sqlite3_tokenizer_module".
|
||||
|
||||
FTS2 does not expose a C-function that users call to register new
|
||||
tokenizer types with a database handle. Instead, the pointer must
|
||||
be encoded as an SQL blob value and passed to FTS2 through the SQL
|
||||
engine by evaluating a special scalar function, "fts2_tokenizer()".
|
||||
The fts2_tokenizer() function may be called with one or two arguments,
|
||||
as follows:
|
||||
|
||||
SELECT fts2_tokenizer(<tokenizer-name>);
|
||||
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
|
||||
|
||||
Where <tokenizer-name> is a string identifying the tokenizer and
|
||||
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
|
||||
structure encoded as an SQL blob. If the second argument is present,
|
||||
it is registered as tokenizer <tokenizer-name> and a copy of it
|
||||
returned. If only one argument is passed, a pointer to the tokenizer
|
||||
implementation currently registered as <tokenizer-name> is returned,
|
||||
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
|
||||
(error) is raised.
|
||||
|
||||
SECURITY: If the fts2 extension is used in an environment where potentially
|
||||
malicious users may execute arbitrary SQL (i.e. gears), they should be
|
||||
prevented from invoking the fts2_tokenizer() function, possibly using the
|
||||
authorisation callback.
|
||||
|
||||
See "Sample code" below for an example of calling the fts2_tokenizer()
|
||||
function from C code.
|
||||
|
||||
3. ICU Library Tokenizers
|
||||
|
||||
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
|
||||
symbol defined, then there exists a built-in tokenizer named "icu"
|
||||
implemented using the ICU library. The first argument passed to the
|
||||
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
|
||||
an ICU locale identifier. For example "tr_TR" for Turkish as used
|
||||
in Turkey, or "en_AU" for English as used in Australia. For example:
|
||||
|
||||
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
|
||||
|
||||
The ICU tokenizer implementation is very simple. It splits the input
|
||||
text according to the ICU rules for finding word boundaries and discards
|
||||
any tokens that consist entirely of white-space. This may be suitable
|
||||
for some applications in some locales, but not all. If more complex
|
||||
processing is required, for example to implement stemming or
|
||||
discard punctuation, this can be done by creating a tokenizer
|
||||
implementation that uses the ICU tokenizer as part of its implementation.
|
||||
|
||||
When using the ICU tokenizer this way, it is safe to overwrite the
|
||||
contents of the strings returned by the xNext() method (see
|
||||
fts2_tokenizer.h).
|
||||
|
||||
4. Sample code.
|
||||
|
||||
The following two code samples illustrate the way C code should invoke
|
||||
the fts2_tokenizer() scalar function:
|
||||
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
@ -1,4 +0,0 @@
|
||||
This folder contains source code to the second full-text search
|
||||
extension for SQLite. While the API is the same, this version uses a
|
||||
substantially different storage schema from fts1, so tables will need
|
||||
to be rebuilt.
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,26 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This header file is used by programs that want to link against the
|
||||
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts2Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@ -1,376 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_hash.h"
|
||||
|
||||
/*
|
||||
** Malloc and Free functions
|
||||
*/
|
||||
static void *fts2HashMalloc(int n){
|
||||
void *p = sqlite3_malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
static void fts2HashFree(void *p){
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts2HashClear(fts2Hash *pH){
|
||||
fts2HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts2HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts2Hash *pH, /* The complete hash table */
|
||||
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts2HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts2HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts2Hash *pH, int new_size){
|
||||
struct _fts2ht *new_ht; /* The new hash table */
|
||||
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
|
||||
if( new_ht==0 ) return;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts2HashElem *findElementGivenHash(
|
||||
const fts2Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts2ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts2Hash *pH, /* The pH containing "elem" */
|
||||
fts2HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts2ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts2HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts2HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts2HashInsert(
|
||||
fts2Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
fts2HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts2HashMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
@ -1,110 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS2_HASH_H_
|
||||
#define _FTS2_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts2Hash fts2Hash;
|
||||
typedef struct fts2HashElem fts2HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts2Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts2HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts2ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts2HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts2HashElem {
|
||||
fts2HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
|
||||
*/
|
||||
#define FTS2_HASH_STRING 1
|
||||
#define FTS2_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts2HashClear(fts2Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts2HashInit sqlite3Fts2HashInit
|
||||
#define fts2HashInsert sqlite3Fts2HashInsert
|
||||
#define fts2HashFind sqlite3Fts2HashFind
|
||||
#define fts2HashClear sqlite3Fts2HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts2Hash h;
|
||||
** fts2HashElem *p;
|
||||
** ...
|
||||
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
|
||||
** SomeStructure *pData = fts2HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts2HashFirst(H) ((H)->first)
|
||||
#define fts2HashNext(E) ((E)->next)
|
||||
#define fts2HashData(E) ((E)->data)
|
||||
#define fts2HashKey(E) ((E)->pKey)
|
||||
#define fts2HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts2HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS2_HASH_H_ */
|
||||
@ -1,260 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts2 based on the ICU library.
|
||||
**
|
||||
** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $
|
||||
*/
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
|
||||
typedef struct IcuTokenizer IcuTokenizer;
|
||||
typedef struct IcuCursor IcuCursor;
|
||||
|
||||
struct IcuTokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char *zLocale;
|
||||
};
|
||||
|
||||
struct IcuCursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
|
||||
UBreakIterator *pIter; /* ICU break-iterator object */
|
||||
int nChar; /* Number of UChar elements in pInput */
|
||||
UChar *aChar; /* Copy of input using utf-16 encoding */
|
||||
int *aOffset; /* Offsets of each character in utf-8 input */
|
||||
|
||||
int nBuffer;
|
||||
char *zBuffer;
|
||||
|
||||
int iToken;
|
||||
};
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int icuCreate(
|
||||
int argc, /* Number of entries in argv[] */
|
||||
const char * const *argv, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
){
|
||||
IcuTokenizer *p;
|
||||
int n = 0;
|
||||
|
||||
if( argc>0 ){
|
||||
n = strlen(argv[0])+1;
|
||||
}
|
||||
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
|
||||
if( !p ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(p, 0, sizeof(IcuTokenizer));
|
||||
|
||||
if( n ){
|
||||
p->zLocale = (char *)&p[1];
|
||||
memcpy(p->zLocale, argv[0], n);
|
||||
}
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)p;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int icuOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, /* Input string */
|
||||
int nInput, /* Length of zInput in bytes */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
IcuCursor *pCsr;
|
||||
|
||||
const int32_t opt = U_FOLD_CASE_DEFAULT;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int nChar;
|
||||
|
||||
UChar32 c;
|
||||
int iInput = 0;
|
||||
int iOut = 0;
|
||||
|
||||
*ppCursor = 0;
|
||||
|
||||
if( nInput<0 ){
|
||||
nInput = strlen(zInput);
|
||||
}
|
||||
nChar = nInput+1;
|
||||
pCsr = (IcuCursor *)sqlite3_malloc(
|
||||
sizeof(IcuCursor) + /* IcuCursor */
|
||||
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
|
||||
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
|
||||
);
|
||||
if( !pCsr ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(IcuCursor));
|
||||
pCsr->aChar = (UChar *)&pCsr[1];
|
||||
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
|
||||
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
while( c>0 ){
|
||||
int isError = 0;
|
||||
c = u_foldCase(c, opt);
|
||||
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
|
||||
if( isError ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
|
||||
if( iInput<nInput ){
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
}else{
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->nChar = iOut;
|
||||
|
||||
ubrk_first(pCsr->pIter);
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to icuOpen().
|
||||
*/
|
||||
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
ubrk_close(pCsr->pIter);
|
||||
sqlite3_free(pCsr->zBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor.
|
||||
*/
|
||||
static int icuNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int nByte = 0;
|
||||
|
||||
while( iStart==iEnd ){
|
||||
UChar32 c;
|
||||
|
||||
iStart = ubrk_current(pCsr->pIter);
|
||||
iEnd = ubrk_next(pCsr->pIter);
|
||||
if( iEnd==UBRK_DONE ){
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
while( iStart<iEnd ){
|
||||
int iWhite = iStart;
|
||||
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
|
||||
if( u_isspace(c) ){
|
||||
iStart = iWhite;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(iStart<=iEnd);
|
||||
}
|
||||
|
||||
do {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if( nByte ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
|
||||
if( !zNew ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
pCsr->zBuffer = zNew;
|
||||
pCsr->nBuffer = nByte;
|
||||
}
|
||||
|
||||
u_strToUTF8(
|
||||
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
|
||||
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
|
||||
&status /* Output success/failure */
|
||||
);
|
||||
} while( nByte>pCsr->nBuffer );
|
||||
|
||||
*ppToken = pCsr->zBuffer;
|
||||
*pnBytes = nByte;
|
||||
*piStartOffset = pCsr->aOffset[iStart];
|
||||
*piEndOffset = pCsr->aOffset[iEnd];
|
||||
*piPosition = pCsr->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module icuTokenizerModule = {
|
||||
0, /* iVersion */
|
||||
icuCreate, /* xCreate */
|
||||
icuDestroy, /* xCreate */
|
||||
icuOpen, /* xOpen */
|
||||
icuClose, /* xClose */
|
||||
icuNext, /* xNext */
|
||||
};
|
||||
|
||||
/*
|
||||
** Set *ppModule to point at the implementation of the ICU tokenizer.
|
||||
*/
|
||||
void sqlite3Fts2IcuTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &icuTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_ENABLE_ICU) */
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
@ -1,644 +0,0 @@
|
||||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->zToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char porterIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
@ -1,371 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This is part of an SQLite module implementing full-text search.
|
||||
** This particular file implements the generic tokenizer interface.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
|
||||
#include "fts2_hash.h"
|
||||
#include "fts2_tokenizer.h"
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
** hash table. This function may be called as follows:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
|
||||
**
|
||||
** If the <pointer> argument is specified, it must be a blob value
|
||||
** containing a pointer to be stored as the hash data corresponding
|
||||
** to the string <key-name>. If <pointer> is not specified, then
|
||||
** the string <key-name> must already exist in the has table. Otherwise,
|
||||
** an error is returned.
|
||||
**
|
||||
** Whether or not the <pointer> argument is specified, the value returned
|
||||
** is a blob containing the pointer stored as the hash data corresponding
|
||||
** to string <key-name> (after the hash-table is updated, if applicable).
|
||||
*/
|
||||
static void scalarFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
|
||||
if( argc==2 ){
|
||||
void *pOld;
|
||||
int n = sqlite3_value_bytes(argv[1]);
|
||||
if( n!=sizeof(pPtr) ){
|
||||
sqlite3_result_error(context, "argument type mismatch", -1);
|
||||
return;
|
||||
}
|
||||
pPtr = *(void **)sqlite3_value_blob(argv[1]);
|
||||
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
|
||||
if( pOld==pPtr ){
|
||||
sqlite3_result_error(context, "out of memory", -1);
|
||||
return;
|
||||
}
|
||||
}else{
|
||||
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
|
||||
if( !pPtr ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include <tcl.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of a special SQL scalar function for testing tokenizers
|
||||
** designed to be used in concert with the Tcl testing framework. This
|
||||
** function must be called with two arguments:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>, <input-string>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
|
||||
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
|
||||
**
|
||||
** The return value is a string that may be interpreted as a Tcl
|
||||
** list. For each token in the <input-string>, three elements are
|
||||
** added to the returned list. The first is the token position, the
|
||||
** second is the token text (folded, stemmed, etc.) and the third is the
|
||||
** substring of <input-string> associated with the token. For example,
|
||||
** using the built-in "simple" tokenizer:
|
||||
**
|
||||
** SELECT fts_tokenizer_test('simple', 'I don't see how');
|
||||
**
|
||||
** will return the string:
|
||||
**
|
||||
** "{0 i I 1 dont don't 2 see see 3 how how}"
|
||||
**
|
||||
*/
|
||||
static void testFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
|
||||
const char *zErr = 0;
|
||||
|
||||
const char *zName;
|
||||
int nName;
|
||||
const char *zInput;
|
||||
int nInput;
|
||||
|
||||
const char *zArg = 0;
|
||||
|
||||
const char *zToken;
|
||||
int nToken;
|
||||
int iStart;
|
||||
int iEnd;
|
||||
int iPos;
|
||||
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
assert( argc==2 || argc==3 );
|
||||
|
||||
nName = sqlite3_value_bytes(argv[0]);
|
||||
zName = (const char *)sqlite3_value_text(argv[0]);
|
||||
nInput = sqlite3_value_bytes(argv[argc-1]);
|
||||
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
|
||||
|
||||
if( argc==3 ){
|
||||
zArg = (const char *)sqlite3_value_text(argv[1]);
|
||||
}
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
|
||||
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
|
||||
zErr = "error in xCreate()";
|
||||
goto finish;
|
||||
}
|
||||
pTokenizer->pModule = p;
|
||||
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
|
||||
zErr = "error in xOpen()";
|
||||
goto finish;
|
||||
}
|
||||
pCsr->pTokenizer = pTokenizer;
|
||||
|
||||
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
zToken = &zInput[iStart];
|
||||
nToken = iEnd-iStart;
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xClose(pCsr) ){
|
||||
zErr = "error in xClose()";
|
||||
goto finish;
|
||||
}
|
||||
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
|
||||
zErr = "error in xDestroy()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
finish:
|
||||
if( zErr ){
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
}else{
|
||||
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
}
|
||||
|
||||
static
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
static
|
||||
int queryFts2Tokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function fts2_tokenizer_internal_test().
|
||||
** This function is used for testing only, it is not included in the
|
||||
** build unless SQLITE_TEST is defined.
|
||||
**
|
||||
** The purpose of this is to test that the fts2_tokenizer() function
|
||||
** can be used as designed by the C-code in the queryFts2Tokenizer and
|
||||
** registerTokenizer() functions above. These two functions are repeated
|
||||
** in the README.tokenizer file as an example, so it is important to
|
||||
** test them.
|
||||
**
|
||||
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
|
||||
** function with no arguments. An assert() will fail if a problem is
|
||||
** detected. i.e.:
|
||||
**
|
||||
** SELECT fts2_tokenizer_internal_test();
|
||||
**
|
||||
*/
|
||||
static void intTestFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
int rc;
|
||||
const sqlite3_tokenizer_module *p1;
|
||||
const sqlite3_tokenizer_module *p2;
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
|
||||
|
||||
/* Test the query function */
|
||||
sqlite3Fts2SimpleTokenizerModule(&p1);
|
||||
rc = queryFts2Tokenizer(db, "simple", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p1==p2 );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_ERROR );
|
||||
assert( p2==0 );
|
||||
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
|
||||
|
||||
/* Test the storage function */
|
||||
rc = registerTokenizer(db, "nosuchtokenizer", p1);
|
||||
assert( rc==SQLITE_OK );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p2==p1 );
|
||||
|
||||
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Set up SQL objects in database db used to access the contents of
|
||||
** the hash table pointed to by argument pHash. The hash table must
|
||||
** been initialized to use string keys, and to take a private copy
|
||||
** of the key when a value is inserted. i.e. by a call similar to:
|
||||
**
|
||||
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
|
||||
**
|
||||
** This function adds a scalar function (see header comment above
|
||||
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
|
||||
** defined at compilation time, a temporary virtual table (see header
|
||||
** comment above struct HashTableVtab) to the database schema. Both
|
||||
** provide read/write access to the contents of *pHash.
|
||||
**
|
||||
** The third argument to this function, zName, is used as the name
|
||||
** of both the scalar and, if created, the virtual table.
|
||||
*/
|
||||
int sqlite3Fts2InitHashTable(
|
||||
sqlite3 *db,
|
||||
fts2Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
void *p = (void *)pHash;
|
||||
const int any = SQLITE_ANY;
|
||||
char *zTest = 0;
|
||||
char *zTest2 = 0;
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
void *pdb = (void *)db;
|
||||
zTest = sqlite3_mprintf("%s_test", zName);
|
||||
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
|
||||
if( !zTest || !zTest2 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( rc!=SQLITE_OK
|
||||
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
|
||||
#ifdef SQLITE_TEST
|
||||
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
|
||||
#endif
|
||||
);
|
||||
|
||||
sqlite3_free(zTest);
|
||||
sqlite3_free(zTest2);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
@ -1,145 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS2_TOKENIZER_H_
|
||||
#define _FTS2_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface. When a new tokenizer
|
||||
** implementation is registered, the caller provides a pointer to
|
||||
** an sqlite3_tokenizer_module containing pointers to the callback
|
||||
** functions that make up an implementation.
|
||||
**
|
||||
** When an fts2 table is created, it passes any arguments passed to
|
||||
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
|
||||
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
|
||||
** implementation. The xCreate() function in turn returns an
|
||||
** sqlite3_tokenizer structure representing the specific tokenizer to
|
||||
** be used for the fts2 table (customized by the tokenizer clause arguments).
|
||||
**
|
||||
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
|
||||
** method is called. It returns an sqlite3_tokenizer_cursor object
|
||||
** that may be used to tokenize a specific input buffer based on
|
||||
** the tokenization rules supplied by a specific sqlite3_tokenizer
|
||||
** object.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
|
||||
/*
|
||||
** Structure version. Should always be set to 0.
|
||||
*/
|
||||
int iVersion;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer. The values in the argv[] array are the
|
||||
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
|
||||
** TABLE statement that created the fts2 table. For example, if
|
||||
** the following SQL is executed:
|
||||
**
|
||||
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
|
||||
**
|
||||
** then argc is set to 2, and the argv[] array contains pointers
|
||||
** to the strings "arg1" and "arg2".
|
||||
**
|
||||
** This method should return either SQLITE_OK (0), or an SQLite error
|
||||
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
|
||||
** to point at the newly created tokenizer structure. The generic
|
||||
** sqlite3_tokenizer.pModule variable should not be initialized by
|
||||
** this callback. The caller will do so.
|
||||
*/
|
||||
int (*xCreate)(
|
||||
int argc, /* Size of argv array */
|
||||
const char *const*argv, /* Tokenizer argument strings */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer. The fts2 module calls this method
|
||||
** exactly once for each successful call to xCreate().
|
||||
*/
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Create a tokenizer cursor to tokenize an input buffer. The caller
|
||||
** is responsible for ensuring that the input buffer remains valid
|
||||
** until the cursor is closed (using the xClose() method).
|
||||
*/
|
||||
int (*xOpen)(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer cursor. The fts2 module calls this
|
||||
** method exactly once for each successful call to xOpen().
|
||||
*/
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
|
||||
/*
|
||||
** Retrieve the next token from the tokenizer cursor pCursor. This
|
||||
** method should either return SQLITE_OK and set the values of the
|
||||
** "OUT" variables identified below, or SQLITE_DONE to indicate that
|
||||
** the end of the buffer has been reached, or an SQLite error code.
|
||||
**
|
||||
** *ppToken should be set to point at a buffer containing the
|
||||
** normalized version of the token (i.e. after any case-folding and/or
|
||||
** stemming has been performed). *pnBytes should be set to the length
|
||||
** of this buffer in bytes. The input text that generated the token is
|
||||
** identified by the byte offsets returned in *piStartOffset and
|
||||
** *piEndOffset.
|
||||
**
|
||||
** The buffer *ppToken is set to point at is managed by the tokenizer
|
||||
** implementation. It is only required to be valid until the next call
|
||||
** to xNext() or xClose().
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xNext)(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
|
||||
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
|
||||
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
|
||||
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
|
||||
int *piPosition /* OUT: Number of tokens returned before this one */
|
||||
);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
#endif /* _FTS2_TOKENIZER_H_ */
|
||||
@ -1,233 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int simpleDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
sqlite3_free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') ||
|
||||
(i>='a' && i<='z'));
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->pToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
||||
@ -1,116 +0,0 @@
|
||||
#!/usr/bin/tclsh
|
||||
#
|
||||
# This script builds a single C code file holding all of FTS2 code.
|
||||
# The name of the output file is fts2amal.c. To build this file,
|
||||
# first do:
|
||||
#
|
||||
# make target_source
|
||||
#
|
||||
# The make target above moves all of the source code files into
|
||||
# a subdirectory named "tsrc". (This script expects to find the files
|
||||
# there and will not work if they are not found.)
|
||||
#
|
||||
# After the "tsrc" directory has been created and populated, run
|
||||
# this script:
|
||||
#
|
||||
# tclsh mkfts2amal.tcl
|
||||
#
|
||||
# The amalgamated FTS2 code will be written into fts2amal.c
|
||||
#
|
||||
|
||||
# Open the output file and write a header comment at the beginning
|
||||
# of the file.
|
||||
#
|
||||
set out [open fts2amal.c w]
|
||||
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
|
||||
puts $out [subst \
|
||||
{/******************************************************************************
|
||||
** This file is an amalgamation of separate C source files from the SQLite
|
||||
** Full Text Search extension 2 (fts2). By combining all the individual C
|
||||
** code files into this single large file, the entire code can be compiled
|
||||
** as a one translation unit. This allows many compilers to do optimizations
|
||||
** that would not be possible if the files were compiled separately. It also
|
||||
** makes the code easier to import into other projects.
|
||||
**
|
||||
** This amalgamation was generated on $today.
|
||||
*/}]
|
||||
|
||||
# These are the header files used by FTS2. The first time any of these
|
||||
# files are seen in a #include statement in the C code, include the complete
|
||||
# text of the file in-line. The file only needs to be included once.
|
||||
#
|
||||
foreach hdr {
|
||||
fts2.h
|
||||
fts2_hash.h
|
||||
fts2_tokenizer.h
|
||||
sqlite3.h
|
||||
sqlite3ext.h
|
||||
} {
|
||||
set available_hdr($hdr) 1
|
||||
}
|
||||
|
||||
# 78 stars used for comment formatting.
|
||||
set s78 \
|
||||
{*****************************************************************************}
|
||||
|
||||
# Insert a comment into the code
|
||||
#
|
||||
proc section_comment {text} {
|
||||
global out s78
|
||||
set n [string length $text]
|
||||
set nstar [expr {60 - $n}]
|
||||
set stars [string range $s78 0 $nstar]
|
||||
puts $out "/************** $text $stars/"
|
||||
}
|
||||
|
||||
# Read the source file named $filename and write it into the
|
||||
# sqlite3.c output file. If any #include statements are seen,
|
||||
# process them approprately.
|
||||
#
|
||||
proc copy_file {filename} {
|
||||
global seen_hdr available_hdr out
|
||||
set tail [file tail $filename]
|
||||
section_comment "Begin file $tail"
|
||||
set in [open $filename r]
|
||||
while {![eof $in]} {
|
||||
set line [gets $in]
|
||||
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
|
||||
if {[info exists available_hdr($hdr)]} {
|
||||
if {$available_hdr($hdr)} {
|
||||
section_comment "Include $hdr in the middle of $tail"
|
||||
copy_file tsrc/$hdr
|
||||
section_comment "Continuing where we left off in $tail"
|
||||
}
|
||||
} elseif {![info exists seen_hdr($hdr)]} {
|
||||
set seen_hdr($hdr) 1
|
||||
puts $out $line
|
||||
}
|
||||
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
|
||||
puts $out "#if 0"
|
||||
} elseif {[regexp {^#line} $line]} {
|
||||
# Skip #line directives.
|
||||
} else {
|
||||
puts $out $line
|
||||
}
|
||||
}
|
||||
close $in
|
||||
section_comment "End of $tail"
|
||||
}
|
||||
|
||||
|
||||
# Process the source files. Process files containing commonly
|
||||
# used subroutines first in order to help the compiler find
|
||||
# inlining opportunities.
|
||||
#
|
||||
foreach file {
|
||||
fts2.c
|
||||
fts2_hash.c
|
||||
fts2_porter.c
|
||||
fts2_tokenizer.c
|
||||
fts2_tokenizer1.c
|
||||
fts2_icu.c
|
||||
} {
|
||||
copy_file tsrc/$file
|
||||
}
|
||||
|
||||
close $out
|
||||
@ -1,178 +0,0 @@
|
||||
|
||||
FTS4 CONTENT OPTION
|
||||
|
||||
Normally, in order to create a full-text index on a dataset, the FTS4
|
||||
module stores a copy of all indexed documents in a specially created
|
||||
database table.
|
||||
|
||||
As of SQLite version 3.7.9, FTS4 supports a new option - "content" -
|
||||
designed to extend FTS4 to support the creation of full-text indexes where:
|
||||
|
||||
* The indexed documents are not stored within the SQLite database
|
||||
at all (a "contentless" FTS4 table), or
|
||||
|
||||
* The indexed documents are stored in a database table created and
|
||||
managed by the user (an "external content" FTS4 table).
|
||||
|
||||
Because the indexed documents themselves are usually much larger than
|
||||
the full-text index, the content option can sometimes be used to achieve
|
||||
significant space savings.
|
||||
|
||||
CONTENTLESS FTS4 TABLES
|
||||
|
||||
In order to create an FTS4 table that does not store a copy of the indexed
|
||||
documents at all, the content option should be set to an empty string.
|
||||
For example, the following SQL creates such an FTS4 table with three
|
||||
columns - "a", "b", and "c":
|
||||
|
||||
CREATE VIRTUAL TABLE t1 USING fts4(content="", a, b, c);
|
||||
|
||||
Data can be inserted into such an FTS4 table using an INSERT statements.
|
||||
However, unlike ordinary FTS4 tables, the user must supply an explicit
|
||||
integer docid value. For example:
|
||||
|
||||
-- This statement is Ok:
|
||||
INSERT INTO t1(docid, a, b, c) VALUES(1, 'a b c', 'd e f', 'g h i');
|
||||
|
||||
-- This statement causes an error, as no docid value has been provided:
|
||||
INSERT INTO t1(a, b, c) VALUES('j k l', 'm n o', 'p q r');
|
||||
|
||||
It is not possible to UPDATE or DELETE a row stored in a contentless FTS4
|
||||
table. Attempting to do so is an error.
|
||||
|
||||
Contentless FTS4 tables also support SELECT statements. However, it is
|
||||
an error to attempt to retrieve the value of any table column other than
|
||||
the docid column. The auxiliary function matchinfo() may be used, but
|
||||
snippet() and offsets() may not. For example:
|
||||
|
||||
-- The following statements are Ok:
|
||||
SELECT docid FROM t1 WHERE t1 MATCH 'xxx';
|
||||
SELECT docid FROM t1 WHERE a MATCH 'xxx';
|
||||
SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'xxx';
|
||||
|
||||
-- The following statements all cause errors, as the value of columns
|
||||
-- other than docid are required to evaluate them.
|
||||
SELECT * FROM t1;
|
||||
SELECT a, b FROM t1 WHERE t1 MATCH 'xxx';
|
||||
SELECT docid FROM t1 WHERE a LIKE 'xxx%';
|
||||
SELECT snippet(t1) FROM t1 WHERE t1 MATCH 'xxx';
|
||||
|
||||
Errors related to attempting to retrieve column values other than docid
|
||||
are runtime errors that occur within sqlite3_step(). In some cases, for
|
||||
example if the MATCH expression in a SELECT query matches zero rows, there
|
||||
may be no error at all even if a statement does refer to column values
|
||||
other than docid.
|
||||
|
||||
EXTERNAL CONTENT FTS4 TABLES
|
||||
|
||||
An "external content" FTS4 table is similar to a contentless table, except
|
||||
that if evaluation of a query requires the value of a column other than
|
||||
docid, FTS4 attempts to retrieve that value from a table (or view, or
|
||||
virtual table) nominated by the user (hereafter referred to as the "content
|
||||
table"). The FTS4 module never writes to the content table, and writing
|
||||
to the content table does not affect the full-text index. It is the
|
||||
responsibility of the user to ensure that the content table and the
|
||||
full-text index are consistent.
|
||||
|
||||
An external content FTS4 table is created by setting the content option
|
||||
to the name of a table (or view, or virtual table) that may be queried by
|
||||
FTS4 to retrieve column values when required. If the nominated table does
|
||||
not exist, then an external content table behaves in the same way as
|
||||
a contentless table. For example:
|
||||
|
||||
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c);
|
||||
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", a, c);
|
||||
|
||||
Assuming the nominated table does exist, then its columns must be the same
|
||||
as or a superset of those defined for the FTS table.
|
||||
|
||||
When a users query on the FTS table requires a column value other than
|
||||
docid, FTS attempts to read this value from the corresponding column of
|
||||
the row in the content table with a rowid value equal to the current FTS
|
||||
docid. Or, if such a row cannot be found in the content table, a NULL
|
||||
value is used instead. For example:
|
||||
|
||||
CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c, d);
|
||||
CREATE VIRTUAL TABLE t3 USING fts4(content="t2", b, c);
|
||||
|
||||
INSERT INTO t2 VALUES(2, 'a b', 'c d', 'e f');
|
||||
INSERT INTO t2 VALUES(3, 'g h', 'i j', 'k l');
|
||||
INSERT INTO t3(docid, b, c) SELECT id, b, c FROM t2;
|
||||
|
||||
-- The following query returns a single row with two columns containing
|
||||
-- the text values "i j" and "k l".
|
||||
--
|
||||
-- The query uses the full-text index to discover that the MATCH
|
||||
-- term matches the row with docid=3. It then retrieves the values
|
||||
-- of columns b and c from the row with rowid=3 in the content table
|
||||
-- to return.
|
||||
--
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
-- Following the UPDATE, the query still returns a single row, this
|
||||
-- time containing the text values "xxx" and "yyy". This is because the
|
||||
-- full-text index still indicates that the row with docid=3 matches
|
||||
-- the FTS4 query 'k', even though the documents stored in the content
|
||||
-- table have been modified.
|
||||
--
|
||||
UPDATE t2 SET b = 'xxx', c = 'yyy' WHERE rowid = 3;
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
-- Following the DELETE below, the query returns one row containing two
|
||||
-- NULL values. NULL values are returned because FTS is unable to find
|
||||
-- a row with rowid=3 within the content table.
|
||||
--
|
||||
DELETE FROM t2;
|
||||
SELECT * FROM t3 WHERE t3 MATCH 'k';
|
||||
|
||||
When a row is deleted from an external content FTS4 table, FTS4 needs to
|
||||
retrieve the column values of the row being deleted from the content table.
|
||||
This is so that FTS4 can update the full-text index entries for each token
|
||||
that occurs within the deleted row to indicate that that row has been
|
||||
deleted. If the content table row cannot be found, or if it contains values
|
||||
inconsistent with the contents of the FTS index, the results can be difficult
|
||||
to predict. The FTS index may be left containing entries corresponding to the
|
||||
deleted row, which can lead to seemingly nonsensical results being returned
|
||||
by subsequent SELECT queries. The same applies when a row is updated, as
|
||||
internally an UPDATE is the same as a DELETE followed by an INSERT.
|
||||
|
||||
Instead of writing separately to the full-text index and the content table,
|
||||
some users may wish to use database triggers to keep the full-text index
|
||||
up to date with respect to the set of documents stored in the content table.
|
||||
For example, using the tables from earlier examples:
|
||||
|
||||
CREATE TRIGGER t2_bu BEFORE UPDATE ON t2 BEGIN
|
||||
DELETE FROM t3 WHERE docid=old.rowid;
|
||||
END;
|
||||
CREATE TRIGGER t2_bd BEFORE DELETE ON t2 BEGIN
|
||||
DELETE FROM t3 WHERE docid=old.rowid;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER t2_bu AFTER UPDATE ON t2 BEGIN
|
||||
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
|
||||
END;
|
||||
CREATE TRIGGER t2_bd AFTER INSERT ON t2 BEGIN
|
||||
INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
|
||||
END;
|
||||
|
||||
The DELETE trigger must be fired before the actual delete takes place
|
||||
on the content table. This is so that FTS4 can still retrieve the original
|
||||
values in order to update the full-text index. And the INSERT trigger must
|
||||
be fired after the new row is inserted, so as to handle the case where the
|
||||
rowid is assigned automatically within the system. The UPDATE trigger must
|
||||
be split into two parts, one fired before and one after the update of the
|
||||
content table, for the same reasons.
|
||||
|
||||
FTS4 features a special command similar to the 'optimize' command that
|
||||
deletes the entire full-text index and rebuilds it based on the current
|
||||
set of documents in the content table. Assuming again that "t3" is the
|
||||
name of the external content FTS4 table, the command is:
|
||||
|
||||
INSERT INTO t3(t3) VALUES('rebuild');
|
||||
|
||||
This command may also be used with ordinary FTS4 tables, although it may
|
||||
only be useful if the full-text index has somehow become corrupt. It is an
|
||||
error to attempt to rebuild the full-text index maintained by a contentless
|
||||
FTS4 table.
|
||||
|
||||
|
||||
@ -1,209 +0,0 @@
|
||||
|
||||
1. OVERVIEW
|
||||
|
||||
This README file describes the syntax of the arguments that may be passed to
|
||||
the FTS3 MATCH operator used for full-text queries. For example, if table
|
||||
"t1" is an Fts3 virtual table, the following SQL query:
|
||||
|
||||
SELECT * FROM t1 WHERE <col> MATCH <full-text query>
|
||||
|
||||
may be used to retrieve all rows that match a specified for full-text query.
|
||||
The text "<col>" should be replaced by either the name of the fts3 table
|
||||
(in this case "t1"), or by the name of one of the columns of the fts3
|
||||
table. <full-text-query> should be replaced by an SQL expression that
|
||||
computes to a string containing an Fts3 query.
|
||||
|
||||
If the left-hand-side of the MATCH operator is set to the name of the
|
||||
fts3 table, then by default the query may be matched against any column
|
||||
of the table. If it is set to a column name, then by default the query
|
||||
may only match the specified column. In both cases this may be overriden
|
||||
as part of the query text (see sections 2 and 3 below).
|
||||
|
||||
As of SQLite version 3.6.8, Fts3 supports two slightly different query
|
||||
formats; the standard syntax, which is used by default, and the enhanced
|
||||
query syntax which can be selected by compiling with the pre-processor
|
||||
symbol SQLITE_ENABLE_FTS3_PARENTHESIS defined.
|
||||
|
||||
-DSQLITE_ENABLE_FTS3_PARENTHESIS
|
||||
|
||||
2. STANDARD QUERY SYNTAX
|
||||
|
||||
When using the standard Fts3 query syntax, a query usually consists of a
|
||||
list of terms (words) separated by white-space characters. To match a
|
||||
query, a row (or column) of an Fts3 table must contain each of the specified
|
||||
terms. For example, the following query:
|
||||
|
||||
<col> MATCH 'hello world'
|
||||
|
||||
matches rows (or columns, if <col> is the name of a column name) that
|
||||
contain at least one instance of the token "hello", and at least one
|
||||
instance of the token "world". Tokens may be grouped into phrases using
|
||||
quotation marks. In this case, a matching row or column must contain each
|
||||
of the tokens in the phrase in the order specified, with no intervening
|
||||
tokens. For example, the query:
|
||||
|
||||
<col> MATCH '"hello world" joe"
|
||||
|
||||
matches the first of the following two documents, but not the second or
|
||||
third:
|
||||
|
||||
"'Hello world', said Joe."
|
||||
"One should always greet the world with a cheery hello, thought Joe."
|
||||
"How many hello world programs could their be?"
|
||||
|
||||
As well as grouping tokens together by phrase, the binary NEAR operator
|
||||
may be used to search for rows that contain two or more specified tokens
|
||||
or phrases within a specified proximity of each other. The NEAR operator
|
||||
must always be specified in upper case. The word "near" in lower or mixed
|
||||
case is treated as an ordinary token. For example, the following query:
|
||||
|
||||
<col> MATCH 'engineering NEAR consultancy'
|
||||
|
||||
matches rows that contain both the "engineering" and "consultancy" tokens
|
||||
in the same column with not more than 10 other words between them. It does
|
||||
not matter which of the two terms occurs first in the document, only that
|
||||
they be seperated by only 10 tokens or less. The user may also specify
|
||||
a different required proximity by adding "/N" immediately after the NEAR
|
||||
operator, where N is an integer. For example:
|
||||
|
||||
<col> MATCH 'engineering NEAR/5 consultancy'
|
||||
|
||||
searches for a row containing an instance of each specified token seperated
|
||||
by not more than 5 other tokens. More than one NEAR operator can be used
|
||||
in as sequence. For example this query:
|
||||
|
||||
<col> MATCH 'reliable NEAR/2 engineering NEAR/5 consultancy'
|
||||
|
||||
searches for a row that contains an instance of the token "reliable"
|
||||
seperated by not more than two tokens from an instance of "engineering",
|
||||
which is in turn separated by not more than 5 other tokens from an
|
||||
instance of the term "consultancy". Phrases enclosed in quotes may
|
||||
also be used as arguments to the NEAR operator.
|
||||
|
||||
Similar to the NEAR operator, one or more tokens or phrases may be
|
||||
separated by OR operators. In this case, only one of the specified tokens
|
||||
or phrases must appear in the document. For example, the query:
|
||||
|
||||
<col> MATCH 'hello OR world'
|
||||
|
||||
matches rows that contain either the term "hello", or the term "world",
|
||||
or both. Note that unlike in many programming languages, the OR operator
|
||||
has a higher precedence than the AND operators implied between white-space
|
||||
separated tokens. The following query matches documents that contain the
|
||||
term 'sqlite' and at least one of the terms 'fantastic' or 'impressive',
|
||||
not those that contain both 'sqlite' and 'fantastic' or 'impressive':
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
|
||||
Any token that is part of an Fts3 query expression, whether or not it is
|
||||
part of a phrase enclosed in quotes, may have a '*' character appended to
|
||||
it. In this case, the token matches all terms that begin with the characters
|
||||
of the token, not just those that exactly match it. For example, the
|
||||
following query:
|
||||
|
||||
<col> MATCH 'sql*'
|
||||
|
||||
matches all rows that contain the term "SQLite", as well as those that
|
||||
contain "SQL".
|
||||
|
||||
A token that is not part of a quoted phrase may be preceded by a '-'
|
||||
character, which indicates that matching rows must not contain the
|
||||
specified term. For example, the following:
|
||||
|
||||
<col> MATCH '"database engine" -sqlite'
|
||||
|
||||
matches rows that contain the phrase "database engine" but do not contain
|
||||
the term "sqlite". If the '-' character occurs inside a quoted phrase,
|
||||
it is ignored. It is possible to use both the '-' prefix and the '*' postfix
|
||||
on a single term. At this time, all Fts3 queries must contain at least
|
||||
one term or phrase that is not preceded by the '-' prefix.
|
||||
|
||||
Regardless of whether or not a table name or column name is used on the
|
||||
left hand side of the MATCH operator, a specific column of the fts3 table
|
||||
may be associated with each token in a query by preceding a token with
|
||||
a column name followed by a ':' character. For example, regardless of what
|
||||
is specified for <col>, the following query requires that column "col1"
|
||||
of the table contains the term "hello", and that column "col2" of the
|
||||
table contains the term "world". If the table does not contain columns
|
||||
named "col1" and "col2", then an error is returned and the query is
|
||||
not run.
|
||||
|
||||
<col> MATCH 'col1:hello col2:world'
|
||||
|
||||
It is not possible to associate a specific table column with a quoted
|
||||
phrase or a term preceded by a '-' operator. A '*' character may be
|
||||
appended to a term associated with a specific column for prefix matching.
|
||||
|
||||
3. ENHANCED QUERY SYNTAX
|
||||
|
||||
The enhanced query syntax is quite similar to the standard query syntax,
|
||||
with the following four differences:
|
||||
|
||||
1) Parenthesis are supported. When using the enhanced query syntax,
|
||||
parenthesis may be used to overcome the built-in precedence of the
|
||||
supplied binary operators. For example, the following query:
|
||||
|
||||
<col> MATCH '(hello world) OR (simple example)'
|
||||
|
||||
matches documents that contain both "hello" and "world", and documents
|
||||
that contain both "simple" and "example". It is not possible to forumlate
|
||||
such a query using the standard syntax.
|
||||
|
||||
2) Instead of separating tokens and phrases by whitespace, an AND operator
|
||||
may be explicitly specified. This does not change query processing at
|
||||
all, but may be used to improve readability. For example, the following
|
||||
query is handled identically to the one above:
|
||||
|
||||
<col> MATCH '(hello AND world) OR (simple AND example)'
|
||||
|
||||
As with the OR and NEAR operators, the AND operator must be specified
|
||||
in upper case. The word "and" specified in lower or mixed case is
|
||||
handled as a regular token.
|
||||
|
||||
3) The '-' token prefix is not supported. Instead, a new binary operator,
|
||||
NOT, is included. The NOT operator requires that the query specified
|
||||
as its left-hand operator matches, but that the query specified as the
|
||||
right-hand operator does not. For example, to query for all rows that
|
||||
contain the term "example" but not the term "simple", the following
|
||||
query could be used:
|
||||
|
||||
<col> MATCH 'example NOT simple'
|
||||
|
||||
As for all other operators, the NOT operator must be specified in
|
||||
upper case. Otherwise it will be treated as a regular token.
|
||||
|
||||
4) Unlike in the standard syntax, where the OR operator has a higher
|
||||
precedence than the implicit AND operator, when using the enhanced
|
||||
syntax implicit and explict AND operators have a higher precedence
|
||||
than OR operators. Using the enhanced syntax, the following two
|
||||
queries are equivalent:
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
<col> MATCH '(sqlite AND fantastic) OR impressive'
|
||||
|
||||
however, when using the standard syntax, the query:
|
||||
|
||||
<col> MATCH 'sqlite fantastic OR impressive'
|
||||
|
||||
is equivalent to the enhanced syntax query:
|
||||
|
||||
<col> MATCH 'sqlite AND (fantastic OR impressive)'
|
||||
|
||||
The precedence of all enhanced syntax operators, in order from highest
|
||||
to lowest, is:
|
||||
|
||||
NEAR (highest precedence, tightest grouping)
|
||||
NOT
|
||||
AND
|
||||
OR (lowest precedence, loosest grouping)
|
||||
|
||||
Using the advanced syntax, it is possible to specify expressions enclosed
|
||||
in parenthesis as operands to the NOT, AND and OR operators. However both
|
||||
the left and right hand side operands of NEAR operators must be either
|
||||
tokens or phrases. Attempting the following query will return an error:
|
||||
|
||||
<col> MATCH 'sqlite NEAR (fantastic OR impressive)'
|
||||
|
||||
Queries of this form must be re-written as:
|
||||
|
||||
<col> MATCH 'sqlite NEAR fantastic OR sqlite NEAR impressive'
|
||||
@ -1,133 +0,0 @@
|
||||
|
||||
1. FTS3 Tokenizers
|
||||
|
||||
When creating a new full-text table, FTS3 allows the user to select
|
||||
the text tokenizer implementation to be used when indexing text
|
||||
by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE
|
||||
statement:
|
||||
|
||||
CREATE VIRTUAL TABLE <table-name> USING fts3(
|
||||
<columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
|
||||
);
|
||||
|
||||
The built-in tokenizers (valid values to pass as <tokenizer name>) are
|
||||
"simple", "porter" and "unicode".
|
||||
|
||||
<tokenizer-args> should consist of zero or more white-space separated
|
||||
arguments to pass to the selected tokenizer implementation. The
|
||||
interpretation of the arguments, if any, depends on the individual
|
||||
tokenizer.
|
||||
|
||||
2. Custom Tokenizers
|
||||
|
||||
FTS3 allows users to provide custom tokenizer implementations. The
|
||||
interface used to create a new tokenizer is defined and described in
|
||||
the fts3_tokenizer.h source file.
|
||||
|
||||
Registering a new FTS3 tokenizer is similar to registering a new
|
||||
virtual table module with SQLite. The user passes a pointer to a
|
||||
structure containing pointers to various callback functions that
|
||||
make up the implementation of the new tokenizer type. For tokenizers,
|
||||
the structure (defined in fts3_tokenizer.h) is called
|
||||
"sqlite3_tokenizer_module".
|
||||
|
||||
FTS3 does not expose a C-function that users call to register new
|
||||
tokenizer types with a database handle. Instead, the pointer must
|
||||
be encoded as an SQL blob value and passed to FTS3 through the SQL
|
||||
engine by evaluating a special scalar function, "fts3_tokenizer()".
|
||||
The fts3_tokenizer() function may be called with one or two arguments,
|
||||
as follows:
|
||||
|
||||
SELECT fts3_tokenizer(<tokenizer-name>);
|
||||
SELECT fts3_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
|
||||
|
||||
Where <tokenizer-name> is a string identifying the tokenizer and
|
||||
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
|
||||
structure encoded as an SQL blob. If the second argument is present,
|
||||
it is registered as tokenizer <tokenizer-name> and a copy of it
|
||||
returned. If only one argument is passed, a pointer to the tokenizer
|
||||
implementation currently registered as <tokenizer-name> is returned,
|
||||
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
|
||||
(error) is raised.
|
||||
|
||||
SECURITY: If the fts3 extension is used in an environment where potentially
|
||||
malicious users may execute arbitrary SQL (i.e. gears), they should be
|
||||
prevented from invoking the fts3_tokenizer() function, possibly using the
|
||||
authorisation callback.
|
||||
|
||||
See "Sample code" below for an example of calling the fts3_tokenizer()
|
||||
function from C code.
|
||||
|
||||
3. ICU Library Tokenizers
|
||||
|
||||
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
|
||||
symbol defined, then there exists a built-in tokenizer named "icu"
|
||||
implemented using the ICU library. The first argument passed to the
|
||||
xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
|
||||
an ICU locale identifier. For example "tr_TR" for Turkish as used
|
||||
in Turkey, or "en_AU" for English as used in Australia. For example:
|
||||
|
||||
"CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenizer icu th_TH)"
|
||||
|
||||
The ICU tokenizer implementation is very simple. It splits the input
|
||||
text according to the ICU rules for finding word boundaries and discards
|
||||
any tokens that consist entirely of white-space. This may be suitable
|
||||
for some applications in some locales, but not all. If more complex
|
||||
processing is required, for example to implement stemming or
|
||||
discard punctuation, this can be done by creating a tokenizer
|
||||
implementation that uses the ICU tokenizer as part of its implementation.
|
||||
|
||||
When using the ICU tokenizer this way, it is safe to overwrite the
|
||||
contents of the strings returned by the xNext() method (see
|
||||
fts3_tokenizer.h).
|
||||
|
||||
4. Sample code.
|
||||
|
||||
The following two code samples illustrate the way C code should invoke
|
||||
the fts3_tokenizer() scalar function:
|
||||
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
@ -1,4 +0,0 @@
|
||||
This folder contains source code to the second full-text search
|
||||
extension for SQLite. While the API is the same, this version uses a
|
||||
substantially different storage schema from fts1, so tables will need
|
||||
to be rebuilt.
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,26 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This header file is used by programs that want to link against the
|
||||
** FTS3 library. All it does is declare the sqlite3Fts3Init() interface.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts3Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
@ -1,617 +0,0 @@
|
||||
/*
|
||||
** 2009 Nov 12
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
#ifndef _FTSINT_H
|
||||
#define _FTSINT_H
|
||||
|
||||
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
||||
# define NDEBUG 1
|
||||
#endif
|
||||
|
||||
/* FTS3/FTS4 require virtual tables */
|
||||
#ifdef SQLITE_OMIT_VIRTUALTABLE
|
||||
# undef SQLITE_ENABLE_FTS3
|
||||
# undef SQLITE_ENABLE_FTS4
|
||||
#endif
|
||||
|
||||
/*
|
||||
** FTS4 is really an extension for FTS3. It is enabled using the
|
||||
** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
|
||||
** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
|
||||
*/
|
||||
#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
|
||||
# define SQLITE_ENABLE_FTS3
|
||||
#endif
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
/* If not building as part of the core, include sqlite3ext.h. */
|
||||
#ifndef SQLITE_CORE
|
||||
# include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#endif
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "fts3_tokenizer.h"
|
||||
#include "fts3_hash.h"
|
||||
|
||||
/*
|
||||
** This constant determines the maximum depth of an FTS expression tree
|
||||
** that the library will create and use. FTS uses recursion to perform
|
||||
** various operations on the query tree, so the disadvantage of a large
|
||||
** limit is that it may allow very large queries to use large amounts
|
||||
** of stack space (perhaps causing a stack overflow).
|
||||
*/
|
||||
#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
|
||||
# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
** This constant controls how often segments are merged. Once there are
|
||||
** FTS3_MERGE_COUNT segments of level N, they are merged into a single
|
||||
** segment of level N+1.
|
||||
*/
|
||||
#define FTS3_MERGE_COUNT 16
|
||||
|
||||
/*
|
||||
** This is the maximum amount of data (in bytes) to store in the
|
||||
** Fts3Table.pendingTerms hash table. Normally, the hash table is
|
||||
** populated as documents are inserted/updated/deleted in a transaction
|
||||
** and used to create a new segment when the transaction is committed.
|
||||
** However if this limit is reached midway through a transaction, a new
|
||||
** segment is created and the hash table cleared immediately.
|
||||
*/
|
||||
#define FTS3_MAX_PENDING_DATA (1*1024*1024)
|
||||
|
||||
/*
|
||||
** Macro to return the number of elements in an array. SQLite has a
|
||||
** similar macro called ArraySize(). Use a different name to avoid
|
||||
** a collision when building an amalgamation with built-in FTS3.
|
||||
*/
|
||||
#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
|
||||
|
||||
|
||||
#ifndef MIN
|
||||
# define MIN(x,y) ((x)<(y)?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
# define MAX(x,y) ((x)>(y)?(x):(y))
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Maximum length of a varint encoded integer. The varint format is different
|
||||
** from that used by SQLite, so the maximum length is 10, not 9.
|
||||
*/
|
||||
#define FTS3_VARINT_MAX 10
|
||||
|
||||
/*
|
||||
** FTS4 virtual tables may maintain multiple indexes - one index of all terms
|
||||
** in the document set and zero or more prefix indexes. All indexes are stored
|
||||
** as one or more b+-trees in the %_segments and %_segdir tables.
|
||||
**
|
||||
** It is possible to determine which index a b+-tree belongs to based on the
|
||||
** value stored in the "%_segdir.level" column. Given this value L, the index
|
||||
** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
|
||||
** level values between 0 and 1023 (inclusive) belong to index 0, all levels
|
||||
** between 1024 and 2047 to index 1, and so on.
|
||||
**
|
||||
** It is considered impossible for an index to use more than 1024 levels. In
|
||||
** theory though this may happen, but only after at least
|
||||
** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
|
||||
*/
|
||||
#define FTS3_SEGDIR_MAXLEVEL 1024
|
||||
#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
|
||||
|
||||
/*
|
||||
** The testcase() macro is only used by the amalgamation. If undefined,
|
||||
** make it a no-op.
|
||||
*/
|
||||
#ifndef testcase
|
||||
# define testcase(X)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Terminator values for position-lists and column-lists.
|
||||
*/
|
||||
#define POS_COLUMN (1) /* Column-list terminator */
|
||||
#define POS_END (0) /* Position-list terminator */
|
||||
|
||||
/*
|
||||
** This section provides definitions to allow the
|
||||
** FTS3 extension to be compiled outside of the
|
||||
** amalgamation.
|
||||
*/
|
||||
#ifndef SQLITE_AMALGAMATION
|
||||
/*
|
||||
** Macros indicating that conditional expressions are always true or
|
||||
** false.
|
||||
*/
|
||||
#ifdef SQLITE_COVERAGE_TEST
|
||||
# define ALWAYS(x) (1)
|
||||
# define NEVER(X) (0)
|
||||
#elif defined(SQLITE_DEBUG)
|
||||
# define ALWAYS(x) sqlite3Fts3Always((x)!=0)
|
||||
# define NEVER(x) sqlite3Fts3Never((x)!=0)
|
||||
int sqlite3Fts3Always(int b);
|
||||
int sqlite3Fts3Never(int b);
|
||||
#else
|
||||
# define ALWAYS(x) (x)
|
||||
# define NEVER(x) (x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Internal types used by SQLite.
|
||||
*/
|
||||
typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
|
||||
typedef short int i16; /* 2-byte (or larger) signed integer */
|
||||
typedef unsigned int u32; /* 4-byte unsigned integer */
|
||||
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
|
||||
typedef sqlite3_int64 i64; /* 8-byte signed integer */
|
||||
|
||||
/*
|
||||
** Macro used to suppress compiler warnings for unused parameters.
|
||||
*/
|
||||
#define UNUSED_PARAMETER(x) (void)(x)
|
||||
|
||||
/*
|
||||
** Activate assert() only if SQLITE_TEST is enabled.
|
||||
*/
|
||||
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
|
||||
# define NDEBUG 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
** The TESTONLY macro is used to enclose variable declarations or
|
||||
** other bits of code that are needed to support the arguments
|
||||
** within testcase() and assert() macros.
|
||||
*/
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
||||
# define TESTONLY(X) X
|
||||
#else
|
||||
# define TESTONLY(X)
|
||||
#endif
|
||||
|
||||
#endif /* SQLITE_AMALGAMATION */
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
int sqlite3Fts3Corrupt(void);
|
||||
# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
|
||||
#else
|
||||
# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
|
||||
#endif
|
||||
|
||||
typedef struct Fts3Table Fts3Table;
|
||||
typedef struct Fts3Cursor Fts3Cursor;
|
||||
typedef struct Fts3Expr Fts3Expr;
|
||||
typedef struct Fts3Phrase Fts3Phrase;
|
||||
typedef struct Fts3PhraseToken Fts3PhraseToken;
|
||||
|
||||
typedef struct Fts3Doclist Fts3Doclist;
|
||||
typedef struct Fts3SegFilter Fts3SegFilter;
|
||||
typedef struct Fts3DeferredToken Fts3DeferredToken;
|
||||
typedef struct Fts3SegReader Fts3SegReader;
|
||||
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
|
||||
|
||||
typedef struct MatchinfoBuffer MatchinfoBuffer;
|
||||
|
||||
/*
|
||||
** A connection to a fulltext index is an instance of the following
|
||||
** structure. The xCreate and xConnect methods create an instance
|
||||
** of this structure and xDestroy and xDisconnect free that instance.
|
||||
** All other methods receive a pointer to the structure as one of their
|
||||
** arguments.
|
||||
*/
|
||||
struct Fts3Table {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
sqlite3 *db; /* The database connection */
|
||||
const char *zDb; /* logical database name */
|
||||
const char *zName; /* virtual table name */
|
||||
int nColumn; /* number of named columns in virtual table */
|
||||
char **azColumn; /* column names. malloced */
|
||||
u8 *abNotindexed; /* True for 'notindexed' columns */
|
||||
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
|
||||
char *zContentTbl; /* content=xxx option, or NULL */
|
||||
char *zLanguageid; /* languageid=xxx option, or NULL */
|
||||
int nAutoincrmerge; /* Value configured by 'automerge' */
|
||||
u32 nLeafAdd; /* Number of leaf blocks added this trans */
|
||||
|
||||
/* Precompiled statements used by the implementation. Each of these
|
||||
** statements is run and reset within a single virtual table API call.
|
||||
*/
|
||||
sqlite3_stmt *aStmt[40];
|
||||
|
||||
char *zReadExprlist;
|
||||
char *zWriteExprlist;
|
||||
|
||||
int nNodeSize; /* Soft limit for node size */
|
||||
u8 bFts4; /* True for FTS4, false for FTS3 */
|
||||
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
|
||||
u8 bHasDocsize; /* True if %_docsize table exists */
|
||||
u8 bDescIdx; /* True if doclists are in reverse order */
|
||||
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
|
||||
int nPgsz; /* Page size for host database */
|
||||
char *zSegmentsTbl; /* Name of %_segments table */
|
||||
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
|
||||
|
||||
/*
|
||||
** The following array of hash tables is used to buffer pending index
|
||||
** updates during transactions. All pending updates buffered at any one
|
||||
** time must share a common language-id (see the FTS4 langid= feature).
|
||||
** The current language id is stored in variable iPrevLangid.
|
||||
**
|
||||
** A single FTS4 table may have multiple full-text indexes. For each index
|
||||
** there is an entry in the aIndex[] array. Index 0 is an index of all the
|
||||
** terms that appear in the document set. Each subsequent index in aIndex[]
|
||||
** is an index of prefixes of a specific length.
|
||||
**
|
||||
** Variable nPendingData contains an estimate the memory consumed by the
|
||||
** pending data structures, including hash table overhead, but not including
|
||||
** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
|
||||
** tables are flushed to disk. Variable iPrevDocid is the docid of the most
|
||||
** recently inserted record.
|
||||
*/
|
||||
int nIndex; /* Size of aIndex[] */
|
||||
struct Fts3Index {
|
||||
int nPrefix; /* Prefix length (0 for main terms index) */
|
||||
Fts3Hash hPending; /* Pending terms table for this index */
|
||||
} *aIndex;
|
||||
int nMaxPendingData; /* Max pending data before flush to disk */
|
||||
int nPendingData; /* Current bytes of pending data */
|
||||
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
|
||||
int iPrevLangid; /* Langid of recently inserted document */
|
||||
int bPrevDelete; /* True if last operation was a delete */
|
||||
|
||||
#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
|
||||
/* State variables used for validating that the transaction control
|
||||
** methods of the virtual table are called at appropriate times. These
|
||||
** values do not contribute to FTS functionality; they are used for
|
||||
** verifying the operation of the SQLite core.
|
||||
*/
|
||||
int inTransaction; /* True after xBegin but before xCommit/xRollback */
|
||||
int mxSavepoint; /* Largest valid xSavepoint integer */
|
||||
#endif
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
/* True to disable the incremental doclist optimization. This is controled
|
||||
** by special insert command 'test-no-incr-doclist'. */
|
||||
int bNoIncrDoclist;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
** When the core wants to read from the virtual table, it creates a
|
||||
** virtual table cursor (an instance of the following structure) using
|
||||
** the xOpen method. Cursors are destroyed using the xClose method.
|
||||
*/
|
||||
struct Fts3Cursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
i16 eSearch; /* Search strategy (see below) */
|
||||
u8 isEof; /* True if at End Of Results */
|
||||
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
|
||||
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
|
||||
Fts3Expr *pExpr; /* Parsed MATCH query string */
|
||||
int iLangid; /* Language being queried for */
|
||||
int nPhrase; /* Number of matchable phrases in query */
|
||||
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
|
||||
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
|
||||
char *pNextId; /* Pointer into the body of aDoclist */
|
||||
char *aDoclist; /* List of docids for full-text queries */
|
||||
int nDoclist; /* Size of buffer at aDoclist */
|
||||
u8 bDesc; /* True to sort in descending order */
|
||||
int eEvalmode; /* An FTS3_EVAL_XX constant */
|
||||
int nRowAvg; /* Average size of database rows, in pages */
|
||||
sqlite3_int64 nDoc; /* Documents in table */
|
||||
i64 iMinDocid; /* Minimum docid to return */
|
||||
i64 iMaxDocid; /* Maximum docid to return */
|
||||
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
|
||||
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
|
||||
};
|
||||
|
||||
#define FTS3_EVAL_FILTER 0
|
||||
#define FTS3_EVAL_NEXT 1
|
||||
#define FTS3_EVAL_MATCHINFO 2
|
||||
|
||||
/*
|
||||
** The Fts3Cursor.eSearch member is always set to one of the following.
|
||||
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
|
||||
** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
|
||||
** of the column to be searched. For example, in
|
||||
**
|
||||
** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
|
||||
** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
|
||||
**
|
||||
** Because the LHS of the MATCH operator is 2nd column "b",
|
||||
** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
|
||||
** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
|
||||
** indicating that all columns should be searched,
|
||||
** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
|
||||
*/
|
||||
#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
|
||||
#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
|
||||
#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
|
||||
|
||||
/*
|
||||
** The lower 16-bits of the sqlite3_index_info.idxNum value set by
|
||||
** the xBestIndex() method contains the Fts3Cursor.eSearch value described
|
||||
** above. The upper 16-bits contain a combination of the following
|
||||
** bits, used to describe extra constraints on full-text searches.
|
||||
*/
|
||||
#define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */
|
||||
#define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */
|
||||
#define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */
|
||||
|
||||
struct Fts3Doclist {
|
||||
char *aAll; /* Array containing doclist (or NULL) */
|
||||
int nAll; /* Size of a[] in bytes */
|
||||
char *pNextDocid; /* Pointer to next docid */
|
||||
|
||||
sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
|
||||
int bFreeList; /* True if pList should be sqlite3_free()d */
|
||||
char *pList; /* Pointer to position list following iDocid */
|
||||
int nList; /* Length of position list */
|
||||
};
|
||||
|
||||
/*
|
||||
** A "phrase" is a sequence of one or more tokens that must match in
|
||||
** sequence. A single token is the base case and the most common case.
|
||||
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
|
||||
** nToken will be the number of tokens in the string.
|
||||
*/
|
||||
struct Fts3PhraseToken {
|
||||
char *z; /* Text of the token */
|
||||
int n; /* Number of bytes in buffer z */
|
||||
int isPrefix; /* True if token ends with a "*" character */
|
||||
int bFirst; /* True if token must appear at position 0 */
|
||||
|
||||
/* Variables above this point are populated when the expression is
|
||||
** parsed (by code in fts3_expr.c). Below this point the variables are
|
||||
** used when evaluating the expression. */
|
||||
Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
|
||||
Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
|
||||
};
|
||||
|
||||
struct Fts3Phrase {
|
||||
/* Cache of doclist for this phrase. */
|
||||
Fts3Doclist doclist;
|
||||
int bIncr; /* True if doclist is loaded incrementally */
|
||||
int iDoclistToken;
|
||||
|
||||
/* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
|
||||
** OR condition. */
|
||||
char *pOrPoslist;
|
||||
i64 iOrDocid;
|
||||
|
||||
/* Variables below this point are populated by fts3_expr.c when parsing
|
||||
** a MATCH expression. Everything above is part of the evaluation phase.
|
||||
*/
|
||||
int nToken; /* Number of tokens in the phrase */
|
||||
int iColumn; /* Index of column this phrase must match */
|
||||
Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
|
||||
};
|
||||
|
||||
/*
|
||||
** A tree of these objects forms the RHS of a MATCH operator.
|
||||
**
|
||||
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
|
||||
** points to a malloced buffer, size nDoclist bytes, containing the results
|
||||
** of this phrase query in FTS3 doclist format. As usual, the initial
|
||||
** "Length" field found in doclists stored on disk is omitted from this
|
||||
** buffer.
|
||||
**
|
||||
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
|
||||
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
|
||||
** where nCol is the number of columns in the queried FTS table. The array
|
||||
** is populated as follows:
|
||||
**
|
||||
** aMI[iCol*3 + 0] = Undefined
|
||||
** aMI[iCol*3 + 1] = Number of occurrences
|
||||
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
|
||||
**
|
||||
** The aMI array is allocated using sqlite3_malloc(). It should be freed
|
||||
** when the expression node is.
|
||||
*/
|
||||
struct Fts3Expr {
|
||||
int eType; /* One of the FTSQUERY_XXX values defined below */
|
||||
int nNear; /* Valid if eType==FTSQUERY_NEAR */
|
||||
Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
|
||||
Fts3Expr *pLeft; /* Left operand */
|
||||
Fts3Expr *pRight; /* Right operand */
|
||||
Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
|
||||
|
||||
/* The following are used by the fts3_eval.c module. */
|
||||
sqlite3_int64 iDocid; /* Current docid */
|
||||
u8 bEof; /* True this expression is at EOF already */
|
||||
u8 bStart; /* True if iDocid is valid */
|
||||
u8 bDeferred; /* True if this expression is entirely deferred */
|
||||
|
||||
/* The following are used by the fts3_snippet.c module. */
|
||||
int iPhrase; /* Index of this phrase in matchinfo() results */
|
||||
u32 *aMI; /* See above */
|
||||
};
|
||||
|
||||
/*
|
||||
** Candidate values for Fts3Query.eType. Note that the order of the first
|
||||
** four values is in order of precedence when parsing expressions. For
|
||||
** example, the following:
|
||||
**
|
||||
** "a OR b AND c NOT d NEAR e"
|
||||
**
|
||||
** is equivalent to:
|
||||
**
|
||||
** "a OR (b AND (c NOT (d NEAR e)))"
|
||||
*/
|
||||
#define FTSQUERY_NEAR 1
|
||||
#define FTSQUERY_NOT 2
|
||||
#define FTSQUERY_AND 3
|
||||
#define FTSQUERY_OR 4
|
||||
#define FTSQUERY_PHRASE 5
|
||||
|
||||
|
||||
/* fts3_write.c */
|
||||
int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
|
||||
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
|
||||
void sqlite3Fts3PendingTermsClear(Fts3Table *);
|
||||
int sqlite3Fts3Optimize(Fts3Table *);
|
||||
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
|
||||
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
|
||||
int sqlite3Fts3SegReaderPending(
|
||||
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
|
||||
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
|
||||
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
|
||||
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
|
||||
|
||||
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
|
||||
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
|
||||
|
||||
#ifndef SQLITE_DISABLE_FTS4_DEFERRED
|
||||
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
|
||||
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
|
||||
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
|
||||
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
|
||||
int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
|
||||
#else
|
||||
# define sqlite3Fts3FreeDeferredTokens(x)
|
||||
# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
|
||||
# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
|
||||
# define sqlite3Fts3FreeDeferredDoclists(x)
|
||||
# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
|
||||
#endif
|
||||
|
||||
void sqlite3Fts3SegmentsClose(Fts3Table *);
|
||||
int sqlite3Fts3MaxLevel(Fts3Table *, int *);
|
||||
|
||||
/* Special values interpreted by sqlite3SegReaderCursor() */
|
||||
#define FTS3_SEGCURSOR_PENDING -1
|
||||
#define FTS3_SEGCURSOR_ALL -2
|
||||
|
||||
int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
|
||||
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
|
||||
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
|
||||
|
||||
int sqlite3Fts3SegReaderCursor(Fts3Table *,
|
||||
int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
|
||||
|
||||
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
|
||||
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
|
||||
#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
|
||||
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
|
||||
#define FTS3_SEGMENT_PREFIX 0x00000008
|
||||
#define FTS3_SEGMENT_SCAN 0x00000010
|
||||
#define FTS3_SEGMENT_FIRST 0x00000020
|
||||
|
||||
/* Type passed as 4th argument to SegmentReaderIterate() */
|
||||
struct Fts3SegFilter {
|
||||
const char *zTerm;
|
||||
int nTerm;
|
||||
int iCol;
|
||||
int flags;
|
||||
};
|
||||
|
||||
struct Fts3MultiSegReader {
|
||||
/* Used internally by sqlite3Fts3SegReaderXXX() calls */
|
||||
Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
|
||||
int nSegment; /* Size of apSegment array */
|
||||
int nAdvance; /* How many seg-readers to advance */
|
||||
Fts3SegFilter *pFilter; /* Pointer to filter object */
|
||||
char *aBuffer; /* Buffer to merge doclists in */
|
||||
int nBuffer; /* Allocated size of aBuffer[] in bytes */
|
||||
|
||||
int iColFilter; /* If >=0, filter for this column */
|
||||
int bRestart;
|
||||
|
||||
/* Used by fts3.c only. */
|
||||
int nCost; /* Cost of running iterator */
|
||||
int bLookup; /* True if a lookup of a single entry. */
|
||||
|
||||
/* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
|
||||
char *zTerm; /* Pointer to term buffer */
|
||||
int nTerm; /* Size of zTerm in bytes */
|
||||
char *aDoclist; /* Pointer to doclist buffer */
|
||||
int nDoclist; /* Size of aDoclist[] in bytes */
|
||||
};
|
||||
|
||||
int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
|
||||
|
||||
#define fts3GetVarint32(p, piVal) ( \
|
||||
(*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
|
||||
)
|
||||
|
||||
/* fts3.c */
|
||||
void sqlite3Fts3ErrMsg(char**,const char*,...);
|
||||
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
|
||||
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
|
||||
int sqlite3Fts3GetVarint32(const char *, int *);
|
||||
int sqlite3Fts3VarintLen(sqlite3_uint64);
|
||||
void sqlite3Fts3Dequote(char *);
|
||||
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
|
||||
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
|
||||
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
|
||||
void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
|
||||
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
|
||||
|
||||
/* fts3_tokenizer.c */
|
||||
const char *sqlite3Fts3NextToken(const char *, int *);
|
||||
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
|
||||
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
|
||||
sqlite3_tokenizer **, char **
|
||||
);
|
||||
int sqlite3Fts3IsIdChar(char);
|
||||
|
||||
/* fts3_snippet.c */
|
||||
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
|
||||
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
|
||||
const char *, const char *, int, int
|
||||
);
|
||||
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
|
||||
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
|
||||
|
||||
/* fts3_expr.c */
|
||||
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
|
||||
char **, int, int, int, const char *, int, Fts3Expr **, char **
|
||||
);
|
||||
void sqlite3Fts3ExprFree(Fts3Expr *);
|
||||
#ifdef SQLITE_TEST
|
||||
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
|
||||
int sqlite3Fts3InitTerm(sqlite3 *db);
|
||||
#endif
|
||||
|
||||
int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
|
||||
sqlite3_tokenizer_cursor **
|
||||
);
|
||||
|
||||
/* fts3_aux.c */
|
||||
int sqlite3Fts3InitAux(sqlite3 *db);
|
||||
|
||||
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
|
||||
|
||||
int sqlite3Fts3MsrIncrStart(
|
||||
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
|
||||
int sqlite3Fts3MsrIncrNext(
|
||||
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
|
||||
int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
|
||||
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
|
||||
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
|
||||
|
||||
/* fts3_tokenize_vtab.c */
|
||||
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
|
||||
|
||||
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
|
||||
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
||||
int sqlite3FtsUnicodeFold(int, int);
|
||||
int sqlite3FtsUnicodeIsalnum(int);
|
||||
int sqlite3FtsUnicodeIsdiacritic(int);
|
||||
#endif
|
||||
|
||||
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
|
||||
#endif /* _FTSINT_H */
|
||||
@ -1,550 +0,0 @@
|
||||
/*
|
||||
** 2011 Jan 27
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct Fts3auxTable Fts3auxTable;
|
||||
typedef struct Fts3auxCursor Fts3auxCursor;
|
||||
|
||||
struct Fts3auxTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
Fts3Table *pFts3Tab;
|
||||
};
|
||||
|
||||
struct Fts3auxCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
char *zStop;
|
||||
int nStop; /* Byte-length of string zStop */
|
||||
int iLangid; /* Language id to query */
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
sqlite3_int64 iRowid; /* Current rowid */
|
||||
|
||||
int iCol; /* Current value of 'col' column */
|
||||
int nStat; /* Size of aStat[] array */
|
||||
struct Fts3auxColstats {
|
||||
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
|
||||
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
|
||||
} *aStat;
|
||||
};
|
||||
|
||||
/*
|
||||
** Schema of the terms table.
|
||||
*/
|
||||
#define FTS3_AUX_SCHEMA \
|
||||
"CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
*/
|
||||
static int fts3auxConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pUnused, /* Unused */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
char const *zDb; /* Name of database (e.g. "main") */
|
||||
char const *zFts3; /* Name of fts3 table */
|
||||
int nDb; /* Result of strlen(zDb) */
|
||||
int nFts3; /* Result of strlen(zFts3) */
|
||||
int nByte; /* Bytes of space to allocate here */
|
||||
int rc; /* value returned by declare_vtab() */
|
||||
Fts3auxTable *p; /* Virtual table object to return */
|
||||
|
||||
UNUSED_PARAMETER(pUnused);
|
||||
|
||||
/* The user should invoke this in one of two forms:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
|
||||
** CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
|
||||
*/
|
||||
if( argc!=4 && argc!=5 ) goto bad_args;
|
||||
|
||||
zDb = argv[1];
|
||||
nDb = (int)strlen(zDb);
|
||||
if( argc==5 ){
|
||||
if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
|
||||
zDb = argv[3];
|
||||
nDb = (int)strlen(zDb);
|
||||
zFts3 = argv[4];
|
||||
}else{
|
||||
goto bad_args;
|
||||
}
|
||||
}else{
|
||||
zFts3 = argv[3];
|
||||
}
|
||||
nFts3 = (int)strlen(zFts3);
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
|
||||
p = (Fts3auxTable *)sqlite3_malloc(nByte);
|
||||
if( !p ) return SQLITE_NOMEM;
|
||||
memset(p, 0, nByte);
|
||||
|
||||
p->pFts3Tab = (Fts3Table *)&p[1];
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = 1;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
|
||||
|
||||
*ppVtab = (sqlite3_vtab *)p;
|
||||
return SQLITE_OK;
|
||||
|
||||
bad_args:
|
||||
sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3auxTable *p = (Fts3auxTable *)pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int i;
|
||||
|
||||
/* Free any prepared statements held */
|
||||
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
|
||||
sqlite3_finalize(pFts3->aStmt[i]);
|
||||
}
|
||||
sqlite3_free(pFts3->zSegmentsTbl);
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
#define FTS4AUX_EQ_CONSTRAINT 1
|
||||
#define FTS4AUX_GE_CONSTRAINT 2
|
||||
#define FTS4AUX_LE_CONSTRAINT 4
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3auxBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
int i;
|
||||
int iEq = -1;
|
||||
int iGe = -1;
|
||||
int iLe = -1;
|
||||
int iLangid = -1;
|
||||
int iNext = 1; /* Next free argvIndex value */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
/* This vtab delivers always results in "ORDER BY term ASC" order. */
|
||||
if( pInfo->nOrderBy==1
|
||||
&& pInfo->aOrderBy[0].iColumn==0
|
||||
&& pInfo->aOrderBy[0].desc==0
|
||||
){
|
||||
pInfo->orderByConsumed = 1;
|
||||
}
|
||||
|
||||
/* Search for equality and range constraints on the "term" column.
|
||||
** And equality constraints on the hidden "languageid" column. */
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
if( pInfo->aConstraint[i].usable ){
|
||||
int op = pInfo->aConstraint[i].op;
|
||||
int iCol = pInfo->aConstraint[i].iColumn;
|
||||
|
||||
if( iCol==0 ){
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
|
||||
}
|
||||
if( iCol==4 ){
|
||||
if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( iEq>=0 ){
|
||||
pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
|
||||
pInfo->estimatedCost = 5;
|
||||
}else{
|
||||
pInfo->idxNum = 0;
|
||||
pInfo->estimatedCost = 20000;
|
||||
if( iGe>=0 ){
|
||||
pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
|
||||
pInfo->estimatedCost /= 2;
|
||||
}
|
||||
if( iLe>=0 ){
|
||||
pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
|
||||
pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
|
||||
pInfo->estimatedCost /= 2;
|
||||
}
|
||||
}
|
||||
if( iLangid>=0 ){
|
||||
pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
|
||||
pInfo->estimatedCost--;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3auxCursor *pCsr; /* Pointer to cursor object to return */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
|
||||
if( !pCsr ) return SQLITE_NOMEM;
|
||||
memset(pCsr, 0, sizeof(Fts3auxCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
|
||||
sqlite3Fts3SegmentsClose(pFts3);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->zStop);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
|
||||
if( nSize>pCsr->nStat ){
|
||||
struct Fts3auxColstats *aNew;
|
||||
aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat,
|
||||
sizeof(struct Fts3auxColstats) * nSize
|
||||
);
|
||||
if( aNew==0 ) return SQLITE_NOMEM;
|
||||
memset(&aNew[pCsr->nStat], 0,
|
||||
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
|
||||
);
|
||||
pCsr->aStat = aNew;
|
||||
pCsr->nStat = nSize;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
/* Increment our pretend rowid value. */
|
||||
pCsr->iRowid++;
|
||||
|
||||
for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
|
||||
if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
|
||||
if( rc==SQLITE_ROW ){
|
||||
int i = 0;
|
||||
int nDoclist = pCsr->csr.nDoclist;
|
||||
char *aDoclist = pCsr->csr.aDoclist;
|
||||
int iCol;
|
||||
|
||||
int eState = 0;
|
||||
|
||||
if( pCsr->zStop ){
|
||||
int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
|
||||
int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
|
||||
if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
|
||||
pCsr->isEof = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
||||
if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
|
||||
memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
|
||||
iCol = 0;
|
||||
|
||||
while( i<nDoclist ){
|
||||
sqlite3_int64 v = 0;
|
||||
|
||||
i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
|
||||
switch( eState ){
|
||||
/* State 0. In this state the integer just read was a docid. */
|
||||
case 0:
|
||||
pCsr->aStat[0].nDoc++;
|
||||
eState = 1;
|
||||
iCol = 0;
|
||||
break;
|
||||
|
||||
/* State 1. In this state we are expecting either a 1, indicating
|
||||
** that the following integer will be a column number, or the
|
||||
** start of a position list for column 0.
|
||||
**
|
||||
** The only difference between state 1 and state 2 is that if the
|
||||
** integer encountered in state 1 is not 0 or 1, then we need to
|
||||
** increment the column 0 "nDoc" count for this term.
|
||||
*/
|
||||
case 1:
|
||||
assert( iCol==0 );
|
||||
if( v>1 ){
|
||||
pCsr->aStat[1].nDoc++;
|
||||
}
|
||||
eState = 2;
|
||||
/* fall through */
|
||||
|
||||
case 2:
|
||||
if( v==0 ){ /* 0x00. Next integer will be a docid. */
|
||||
eState = 0;
|
||||
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
|
||||
eState = 3;
|
||||
}else{ /* 2 or greater. A position. */
|
||||
pCsr->aStat[iCol+1].nOcc++;
|
||||
pCsr->aStat[0].nOcc++;
|
||||
}
|
||||
break;
|
||||
|
||||
/* State 3. The integer just read is a column number. */
|
||||
default: assert( eState==3 );
|
||||
iCol = (int)v;
|
||||
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
|
||||
pCsr->aStat[iCol+1].nDoc++;
|
||||
eState = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->iCol = 0;
|
||||
rc = SQLITE_OK;
|
||||
}else{
|
||||
pCsr->isEof = 1;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3auxFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
int isScan = 0;
|
||||
int iLangVal = 0; /* Language id to query */
|
||||
|
||||
int iEq = -1; /* Index of term=? value in apVal */
|
||||
int iGe = -1; /* Index of term>=? value in apVal */
|
||||
int iLe = -1; /* Index of term<=? value in apVal */
|
||||
int iLangid = -1; /* Index of languageid=? value in apVal */
|
||||
int iNext = 0;
|
||||
|
||||
UNUSED_PARAMETER(nVal);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
|
||||
assert( idxStr==0 );
|
||||
assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
|
||||
|| idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
|
||||
|| idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
|
||||
);
|
||||
|
||||
if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
|
||||
iEq = iNext++;
|
||||
}else{
|
||||
isScan = 1;
|
||||
if( idxNum & FTS4AUX_GE_CONSTRAINT ){
|
||||
iGe = iNext++;
|
||||
}
|
||||
if( idxNum & FTS4AUX_LE_CONSTRAINT ){
|
||||
iLe = iNext++;
|
||||
}
|
||||
}
|
||||
if( iNext<nVal ){
|
||||
iLangid = iNext++;
|
||||
}
|
||||
|
||||
/* In case this cursor is being reused, close and zero it. */
|
||||
testcase(pCsr->filter.zTerm);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
|
||||
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
|
||||
|
||||
if( iEq>=0 || iGe>=0 ){
|
||||
const unsigned char *zStr = sqlite3_value_text(apVal[0]);
|
||||
assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
|
||||
if( zStr ){
|
||||
pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
|
||||
pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
|
||||
if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if( iLe>=0 ){
|
||||
pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
|
||||
pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);
|
||||
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
if( iLangid>=0 ){
|
||||
iLangVal = sqlite3_value_int(apVal[iLangid]);
|
||||
|
||||
/* If the user specified a negative value for the languageid, use zero
|
||||
** instead. This works, as the "languageid=?" constraint will also
|
||||
** be tested by the VDBE layer. The test will always be false (since
|
||||
** this module will not return a row with a negative languageid), and
|
||||
** so the overall query will return zero rows. */
|
||||
if( iLangVal<0 ) iLangVal = 0;
|
||||
}
|
||||
pCsr->iLangid = iLangVal;
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
return pCsr->isEof;
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3auxColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
|
||||
|
||||
assert( p->isEof==0 );
|
||||
switch( iCol ){
|
||||
case 0: /* term */
|
||||
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
|
||||
break;
|
||||
|
||||
case 1: /* col */
|
||||
if( p->iCol ){
|
||||
sqlite3_result_int(pCtx, p->iCol-1);
|
||||
}else{
|
||||
sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
|
||||
}
|
||||
break;
|
||||
|
||||
case 2: /* documents */
|
||||
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
|
||||
break;
|
||||
|
||||
case 3: /* occurrences */
|
||||
sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
|
||||
break;
|
||||
|
||||
default: /* languageid */
|
||||
assert( iCol==4 );
|
||||
sqlite3_result_int(pCtx, p->iLangid);
|
||||
break;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3auxRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
|
||||
*pRowid = pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3aux module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitAux(sqlite3 *db){
|
||||
static const sqlite3_module fts3aux_module = {
|
||||
0, /* iVersion */
|
||||
fts3auxConnectMethod, /* xCreate */
|
||||
fts3auxConnectMethod, /* xConnect */
|
||||
fts3auxBestIndexMethod, /* xBestIndex */
|
||||
fts3auxDisconnectMethod, /* xDisconnect */
|
||||
fts3auxDisconnectMethod, /* xDestroy */
|
||||
fts3auxOpenMethod, /* xOpen */
|
||||
fts3auxCloseMethod, /* xClose */
|
||||
fts3auxFilterMethod, /* xFilter */
|
||||
fts3auxNextMethod, /* xNext */
|
||||
fts3auxEofMethod, /* xEof */
|
||||
fts3auxColumnMethod, /* xColumn */
|
||||
fts3auxRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0 /* xRollbackTo */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,383 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_hash.h"
|
||||
|
||||
/*
|
||||
** Malloc and Free functions
|
||||
*/
|
||||
static void *fts3HashMalloc(int n){
|
||||
void *p = sqlite3_malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
static void fts3HashFree(void *p){
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts3HashClear(Fts3Hash *pH){
|
||||
Fts3HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
fts3HashFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
Fts3HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts3HashFree(elem->pKey);
|
||||
}
|
||||
fts3HashFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS3_HASH_STRING
|
||||
*/
|
||||
static int fts3StrHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
unsigned h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return (int)(h & 0x7fffffff);
|
||||
}
|
||||
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS3_HASH_BINARY
|
||||
*/
|
||||
static int fts3BinHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "ftsHashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of ftsHashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of ftsHashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*ftsHashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS3_HASH_STRING ){
|
||||
return &fts3StrHash;
|
||||
}else{
|
||||
assert( keyClass==FTS3_HASH_BINARY );
|
||||
return &fts3BinHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS3_HASH_STRING ){
|
||||
return &fts3StrCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS3_HASH_BINARY );
|
||||
return &fts3BinCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void fts3HashInsertElement(
|
||||
Fts3Hash *pH, /* The complete hash table */
|
||||
struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
|
||||
Fts3HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
Fts3HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
**
|
||||
** Return non-zero if a memory allocation error occurs.
|
||||
*/
|
||||
static int fts3Rehash(Fts3Hash *pH, int new_size){
|
||||
struct _fts3ht *new_ht; /* The new hash table */
|
||||
Fts3HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
|
||||
if( new_ht==0 ) return 1;
|
||||
fts3HashFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
fts3HashInsertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static Fts3HashElem *fts3FindElementByHash(
|
||||
const Fts3Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts3ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = ftsCompareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void fts3RemoveElementByHash(
|
||||
Fts3Hash *pH, /* The pH containing "elem" */
|
||||
Fts3HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts3ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts3HashFree(elem->pKey);
|
||||
}
|
||||
fts3HashFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts3HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
Fts3HashElem *sqlite3Fts3HashFindElem(
|
||||
const Fts3Hash *pH,
|
||||
const void *pKey,
|
||||
int nKey
|
||||
){
|
||||
int h; /* A hash on key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
}
|
||||
|
||||
/*
|
||||
** Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
|
||||
Fts3HashElem *pElem; /* The element that matches key (if any) */
|
||||
|
||||
pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
|
||||
return pElem ? pElem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts3HashInsert(
|
||||
Fts3Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
Fts3HashElem *elem; /* Used to loop thru the element list */
|
||||
Fts3HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = ftsHashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = fts3FindElementByHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
fts3RemoveElementByHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
if( (pH->htsize==0 && fts3Rehash(pH,8))
|
||||
|| (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
|
||||
){
|
||||
pH->count = 0;
|
||||
return data;
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts3HashMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
fts3HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
fts3HashInsertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,112 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS3_HASH_H_
|
||||
#define _FTS3_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct Fts3Hash Fts3Hash;
|
||||
typedef struct Fts3HashElem Fts3HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct Fts3Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
Fts3HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts3ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
Fts3HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct Fts3HashElem {
|
||||
Fts3HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS3_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS3_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.
|
||||
*/
|
||||
#define FTS3_HASH_STRING 1
|
||||
#define FTS3_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
|
||||
void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts3HashClear(Fts3Hash*);
|
||||
Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts3HashInit sqlite3Fts3HashInit
|
||||
#define fts3HashInsert sqlite3Fts3HashInsert
|
||||
#define fts3HashFind sqlite3Fts3HashFind
|
||||
#define fts3HashClear sqlite3Fts3HashClear
|
||||
#define fts3HashFindElem sqlite3Fts3HashFindElem
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** Fts3Hash h;
|
||||
** Fts3HashElem *p;
|
||||
** ...
|
||||
** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
|
||||
** SomeStructure *pData = fts3HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts3HashFirst(H) ((H)->first)
|
||||
#define fts3HashNext(E) ((E)->next)
|
||||
#define fts3HashData(E) ((E)->data)
|
||||
#define fts3HashKey(E) ((E)->pKey)
|
||||
#define fts3HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts3HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS3_HASH_H_ */
|
||||
@ -1,262 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts3 based on the ICU library.
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
|
||||
typedef struct IcuTokenizer IcuTokenizer;
|
||||
typedef struct IcuCursor IcuCursor;
|
||||
|
||||
struct IcuTokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char *zLocale;
|
||||
};
|
||||
|
||||
struct IcuCursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
|
||||
UBreakIterator *pIter; /* ICU break-iterator object */
|
||||
int nChar; /* Number of UChar elements in pInput */
|
||||
UChar *aChar; /* Copy of input using utf-16 encoding */
|
||||
int *aOffset; /* Offsets of each character in utf-8 input */
|
||||
|
||||
int nBuffer;
|
||||
char *zBuffer;
|
||||
|
||||
int iToken;
|
||||
};
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int icuCreate(
|
||||
int argc, /* Number of entries in argv[] */
|
||||
const char * const *argv, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
){
|
||||
IcuTokenizer *p;
|
||||
int n = 0;
|
||||
|
||||
if( argc>0 ){
|
||||
n = strlen(argv[0])+1;
|
||||
}
|
||||
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
|
||||
if( !p ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(p, 0, sizeof(IcuTokenizer));
|
||||
|
||||
if( n ){
|
||||
p->zLocale = (char *)&p[1];
|
||||
memcpy(p->zLocale, argv[0], n);
|
||||
}
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)p;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int icuOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, /* Input string */
|
||||
int nInput, /* Length of zInput in bytes */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
IcuCursor *pCsr;
|
||||
|
||||
const int32_t opt = U_FOLD_CASE_DEFAULT;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int nChar;
|
||||
|
||||
UChar32 c;
|
||||
int iInput = 0;
|
||||
int iOut = 0;
|
||||
|
||||
*ppCursor = 0;
|
||||
|
||||
if( zInput==0 ){
|
||||
nInput = 0;
|
||||
zInput = "";
|
||||
}else if( nInput<0 ){
|
||||
nInput = strlen(zInput);
|
||||
}
|
||||
nChar = nInput+1;
|
||||
pCsr = (IcuCursor *)sqlite3_malloc(
|
||||
sizeof(IcuCursor) + /* IcuCursor */
|
||||
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
|
||||
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
|
||||
);
|
||||
if( !pCsr ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(IcuCursor));
|
||||
pCsr->aChar = (UChar *)&pCsr[1];
|
||||
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
|
||||
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
while( c>0 ){
|
||||
int isError = 0;
|
||||
c = u_foldCase(c, opt);
|
||||
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
|
||||
if( isError ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
|
||||
if( iInput<nInput ){
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
}else{
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->nChar = iOut;
|
||||
|
||||
ubrk_first(pCsr->pIter);
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to icuOpen().
|
||||
*/
|
||||
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
ubrk_close(pCsr->pIter);
|
||||
sqlite3_free(pCsr->zBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor.
|
||||
*/
|
||||
static int icuNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int nByte = 0;
|
||||
|
||||
while( iStart==iEnd ){
|
||||
UChar32 c;
|
||||
|
||||
iStart = ubrk_current(pCsr->pIter);
|
||||
iEnd = ubrk_next(pCsr->pIter);
|
||||
if( iEnd==UBRK_DONE ){
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
while( iStart<iEnd ){
|
||||
int iWhite = iStart;
|
||||
U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
|
||||
if( u_isspace(c) ){
|
||||
iStart = iWhite;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(iStart<=iEnd);
|
||||
}
|
||||
|
||||
do {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if( nByte ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
|
||||
if( !zNew ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
pCsr->zBuffer = zNew;
|
||||
pCsr->nBuffer = nByte;
|
||||
}
|
||||
|
||||
u_strToUTF8(
|
||||
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
|
||||
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
|
||||
&status /* Output success/failure */
|
||||
);
|
||||
} while( nByte>pCsr->nBuffer );
|
||||
|
||||
*ppToken = pCsr->zBuffer;
|
||||
*pnBytes = nByte;
|
||||
*piStartOffset = pCsr->aOffset[iStart];
|
||||
*piEndOffset = pCsr->aOffset[iEnd];
|
||||
*piPosition = pCsr->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module icuTokenizerModule = {
|
||||
0, /* iVersion */
|
||||
icuCreate, /* xCreate */
|
||||
icuDestroy, /* xCreate */
|
||||
icuOpen, /* xOpen */
|
||||
icuClose, /* xClose */
|
||||
icuNext, /* xNext */
|
||||
0, /* xLanguageid */
|
||||
};
|
||||
|
||||
/*
|
||||
** Set *ppModule to point at the implementation of the ICU tokenizer.
|
||||
*/
|
||||
void sqlite3Fts3IcuTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &icuTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_ENABLE_ICU) */
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,662 +0,0 @@
|
||||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
UNUSED_PARAMETER(pTokenizer);
|
||||
|
||||
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->zToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1];
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
isVowel(z+1) &&
|
||||
isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
char c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
char c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( !stem(&z, "lanoita", "ate", m_gt_0) ){
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( !stem(&z, "icne", "ence", m_gt_0) ){
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
if( !stem(&z, "ilb", "ble", m_gt_0)
|
||||
&& !stem(&z, "illa", "al", m_gt_0)
|
||||
&& !stem(&z, "iltne", "ent", m_gt_0)
|
||||
&& !stem(&z, "ile", "e", m_gt_0)
|
||||
){
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( !stem(&z, "noitazi", "ize", m_gt_0)
|
||||
&& !stem(&z, "noita", "ate", m_gt_0)
|
||||
){
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( !stem(&z, "msila", "al", m_gt_0)
|
||||
&& !stem(&z, "ssenevi", "ive", m_gt_0)
|
||||
&& !stem(&z, "ssenluf", "ful", m_gt_0)
|
||||
){
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if( !stem(&z, "itila", "al", m_gt_0)
|
||||
&& !stem(&z, "itivi", "ive", m_gt_0)
|
||||
){
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
if( !stem(&z, "etaci", "ic", m_gt_0)
|
||||
&& !stem(&z, "evita", "", m_gt_0)
|
||||
){
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
if( !stem(&z, "laci", "ic", m_gt_0) ){
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
if( !stem(&z, "tneme", "", m_gt_1)
|
||||
&& !stem(&z, "tnem", "", m_gt_1)
|
||||
){
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
if( !stem(&z, "eta", "", m_gt_1) ){
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = (int)strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char porterIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
char *pNew;
|
||||
c->nAllocated = n+20;
|
||||
pNew = sqlite3_realloc(c->zToken, c->nAllocated);
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
c->zToken = pNew;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
0
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts3PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,373 +0,0 @@
|
||||
/*
|
||||
** 2011 Jan 27
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file is not part of the production FTS code. It is only used for
|
||||
** testing. It contains a virtual table implementation that provides direct
|
||||
** access to the full-text index of an FTS table.
|
||||
*/
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef struct Fts3termTable Fts3termTable;
|
||||
typedef struct Fts3termCursor Fts3termCursor;
|
||||
|
||||
struct Fts3termTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
int iIndex; /* Index for Fts3Table.aIndex[] */
|
||||
Fts3Table *pFts3Tab;
|
||||
};
|
||||
|
||||
struct Fts3termCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
Fts3MultiSegReader csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
char *pNext;
|
||||
|
||||
sqlite3_int64 iRowid; /* Current 'rowid' value */
|
||||
sqlite3_int64 iDocid; /* Current 'docid' value */
|
||||
int iCol; /* Current 'col' value */
|
||||
int iPos; /* Current 'pos' value */
|
||||
};
|
||||
|
||||
/*
|
||||
** Schema of the terms table.
|
||||
*/
|
||||
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, docid, col, pos)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
*/
|
||||
static int fts3termConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pCtx, /* Non-zero for an fts4prefix table */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
char const *zDb; /* Name of database (e.g. "main") */
|
||||
char const *zFts3; /* Name of fts3 table */
|
||||
int nDb; /* Result of strlen(zDb) */
|
||||
int nFts3; /* Result of strlen(zFts3) */
|
||||
int nByte; /* Bytes of space to allocate here */
|
||||
int rc; /* value returned by declare_vtab() */
|
||||
Fts3termTable *p; /* Virtual table object to return */
|
||||
int iIndex = 0;
|
||||
|
||||
UNUSED_PARAMETER(pCtx);
|
||||
if( argc==5 ){
|
||||
iIndex = atoi(argv[4]);
|
||||
argc--;
|
||||
}
|
||||
|
||||
/* The user should specify a single argument - the name of an fts3 table. */
|
||||
if( argc!=4 ){
|
||||
sqlite3Fts3ErrMsg(pzErr,
|
||||
"wrong number of arguments to fts4term constructor"
|
||||
);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
zDb = argv[1];
|
||||
nDb = (int)strlen(zDb);
|
||||
zFts3 = argv[3];
|
||||
nFts3 = (int)strlen(zFts3);
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
|
||||
p = (Fts3termTable *)sqlite3_malloc(nByte);
|
||||
if( !p ) return SQLITE_NOMEM;
|
||||
memset(p, 0, nByte);
|
||||
|
||||
p->pFts3Tab = (Fts3Table *)&p[1];
|
||||
p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
|
||||
p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
|
||||
p->pFts3Tab->db = db;
|
||||
p->pFts3Tab->nIndex = iIndex+1;
|
||||
p->iIndex = iIndex;
|
||||
|
||||
memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
|
||||
memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
|
||||
sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
|
||||
|
||||
*ppVtab = (sqlite3_vtab *)p;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3termDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3termTable *p = (Fts3termTable *)pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int i;
|
||||
|
||||
/* Free any prepared statements held */
|
||||
for(i=0; i<SizeofArray(pFts3->aStmt); i++){
|
||||
sqlite3_finalize(pFts3->aStmt[i]);
|
||||
}
|
||||
sqlite3_free(pFts3->zSegmentsTbl);
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
#define FTS4AUX_EQ_CONSTRAINT 1
|
||||
#define FTS4AUX_GE_CONSTRAINT 2
|
||||
#define FTS4AUX_LE_CONSTRAINT 4
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3termBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
/* This vtab naturally does "ORDER BY term, docid, col, pos". */
|
||||
if( pInfo->nOrderBy ){
|
||||
int i;
|
||||
for(i=0; i<pInfo->nOrderBy; i++){
|
||||
if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
|
||||
}
|
||||
if( i==pInfo->nOrderBy ){
|
||||
pInfo->orderByConsumed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3termOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3termCursor *pCsr; /* Pointer to cursor object to return */
|
||||
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3termCursor *)sqlite3_malloc(sizeof(Fts3termCursor));
|
||||
if( !pCsr ) return SQLITE_NOMEM;
|
||||
memset(pCsr, 0, sizeof(Fts3termCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3termCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
|
||||
sqlite3Fts3SegmentsClose(pFts3);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
sqlite3_int64 v;
|
||||
|
||||
/* Increment our pretend rowid value. */
|
||||
pCsr->iRowid++;
|
||||
|
||||
/* Advance to the next term in the full-text index. */
|
||||
if( pCsr->csr.aDoclist==0
|
||||
|| pCsr->pNext>=&pCsr->csr.aDoclist[pCsr->csr.nDoclist-1]
|
||||
){
|
||||
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
|
||||
if( rc!=SQLITE_ROW ){
|
||||
pCsr->isEof = 1;
|
||||
return rc;
|
||||
}
|
||||
|
||||
pCsr->iCol = 0;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->iDocid = 0;
|
||||
pCsr->pNext = pCsr->csr.aDoclist;
|
||||
|
||||
/* Read docid */
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &pCsr->iDocid);
|
||||
}
|
||||
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
if( v==0 ){
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iDocid += v;
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iCol = 0;
|
||||
pCsr->iPos = 0;
|
||||
}
|
||||
|
||||
if( v==1 ){
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
pCsr->iCol += (int)v;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
|
||||
}
|
||||
|
||||
pCsr->iPos += (int)(v - 2);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3termFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
|
||||
Fts3Table *pFts3 = p->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
UNUSED_PARAMETER(nVal);
|
||||
UNUSED_PARAMETER(idxNum);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
UNUSED_PARAMETER(apVal);
|
||||
|
||||
assert( idxStr==0 && idxNum==0 );
|
||||
|
||||
/* In case this cursor is being reused, close and zero it. */
|
||||
testcase(pCsr->filter.zTerm);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
|
||||
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
|
||||
|
||||
rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
|
||||
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts3termNextMethod(pCursor);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3termEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
return pCsr->isEof;
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3termColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3termCursor *p = (Fts3termCursor *)pCursor;
|
||||
|
||||
assert( iCol>=0 && iCol<=3 );
|
||||
switch( iCol ){
|
||||
case 0:
|
||||
sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 1:
|
||||
sqlite3_result_int64(pCtx, p->iDocid);
|
||||
break;
|
||||
case 2:
|
||||
sqlite3_result_int64(pCtx, p->iCol);
|
||||
break;
|
||||
default:
|
||||
sqlite3_result_int64(pCtx, p->iPos);
|
||||
break;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3termRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
|
||||
*pRowid = pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3term module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitTerm(sqlite3 *db){
|
||||
static const sqlite3_module fts3term_module = {
|
||||
0, /* iVersion */
|
||||
fts3termConnectMethod, /* xCreate */
|
||||
fts3termConnectMethod, /* xConnect */
|
||||
fts3termBestIndexMethod, /* xBestIndex */
|
||||
fts3termDisconnectMethod, /* xDisconnect */
|
||||
fts3termDisconnectMethod, /* xDestroy */
|
||||
fts3termOpenMethod, /* xOpen */
|
||||
fts3termCloseMethod, /* xClose */
|
||||
fts3termFilterMethod, /* xFilter */
|
||||
fts3termNextMethod, /* xNext */
|
||||
fts3termEofMethod, /* xEof */
|
||||
fts3termColumnMethod, /* xColumn */
|
||||
fts3termRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0 /* xRollbackTo */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,584 +0,0 @@
|
||||
/*
|
||||
** 2011 Jun 13
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file is not part of the production FTS code. It is only used for
|
||||
** testing. It contains a Tcl command that can be used to test if a document
|
||||
** matches an FTS NEAR expression.
|
||||
**
|
||||
** As of March 2012, it also contains a version 1 tokenizer used for testing
|
||||
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
|
||||
*/
|
||||
|
||||
#include <tcl.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(SQLITE_TEST)
|
||||
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
|
||||
|
||||
/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
|
||||
#include "fts3Int.h"
|
||||
|
||||
#define NM_MAX_TOKEN 12
|
||||
|
||||
typedef struct NearPhrase NearPhrase;
|
||||
typedef struct NearDocument NearDocument;
|
||||
typedef struct NearToken NearToken;
|
||||
|
||||
struct NearDocument {
|
||||
int nToken; /* Length of token in bytes */
|
||||
NearToken *aToken; /* Token array */
|
||||
};
|
||||
|
||||
struct NearToken {
|
||||
int n; /* Length of token in bytes */
|
||||
const char *z; /* Pointer to token string */
|
||||
};
|
||||
|
||||
struct NearPhrase {
|
||||
int nNear; /* Preceding NEAR value */
|
||||
int nToken; /* Number of tokens in this phrase */
|
||||
NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
|
||||
};
|
||||
|
||||
static int nm_phrase_match(
|
||||
NearPhrase *p,
|
||||
NearToken *aToken
|
||||
){
|
||||
int ii;
|
||||
|
||||
for(ii=0; ii<p->nToken; ii++){
|
||||
NearToken *pToken = &p->aToken[ii];
|
||||
if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
|
||||
if( aToken[ii].n<(pToken->n-1) ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
|
||||
}else{
|
||||
if( aToken[ii].n!=pToken->n ) return 0;
|
||||
if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nm_near_chain(
|
||||
int iDir, /* Direction to iterate through aPhrase[] */
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int iPos, /* Position at which iPhrase was found */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase found */
|
||||
){
|
||||
int iStart;
|
||||
int iStop;
|
||||
int ii;
|
||||
int nNear;
|
||||
int iPhrase2;
|
||||
NearPhrase *p;
|
||||
NearPhrase *pPrev;
|
||||
|
||||
assert( iDir==1 || iDir==-1 );
|
||||
|
||||
if( iDir==1 ){
|
||||
if( (iPhrase+1)==nPhrase ) return 1;
|
||||
nNear = aPhrase[iPhrase+1].nNear;
|
||||
}else{
|
||||
if( iPhrase==0 ) return 1;
|
||||
nNear = aPhrase[iPhrase].nNear;
|
||||
}
|
||||
pPrev = &aPhrase[iPhrase];
|
||||
iPhrase2 = iPhrase+iDir;
|
||||
p = &aPhrase[iPhrase2];
|
||||
|
||||
iStart = iPos - nNear - p->nToken;
|
||||
iStop = iPos + nNear + pPrev->nToken;
|
||||
|
||||
if( iStart<0 ) iStart = 0;
|
||||
if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
|
||||
|
||||
for(ii=iStart; ii<=iStop; ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nm_match_count(
|
||||
NearDocument *pDoc, /* Document to match against */
|
||||
int nPhrase, /* Size of phrase array */
|
||||
NearPhrase *aPhrase, /* Phrase array */
|
||||
int iPhrase /* Index of phrase to count matches for */
|
||||
){
|
||||
int nOcc = 0;
|
||||
int ii;
|
||||
NearPhrase *p = &aPhrase[iPhrase];
|
||||
|
||||
for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
|
||||
if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
|
||||
/* Test forward NEAR chain (i>iPhrase) */
|
||||
if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* Test reverse NEAR chain (i<iPhrase) */
|
||||
if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
|
||||
|
||||
/* This is a real match. Increment the counter. */
|
||||
nOcc++;
|
||||
}
|
||||
}
|
||||
|
||||
return nOcc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
|
||||
*/
|
||||
static int fts3_near_match_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
int nTotal = 0;
|
||||
int rc;
|
||||
int ii;
|
||||
int nPhrase;
|
||||
NearPhrase *aPhrase = 0;
|
||||
NearDocument doc = {0, 0};
|
||||
Tcl_Obj **apDocToken;
|
||||
Tcl_Obj *pRet;
|
||||
Tcl_Obj *pPhrasecount = 0;
|
||||
|
||||
Tcl_Obj **apExprToken;
|
||||
int nExprToken;
|
||||
|
||||
UNUSED_PARAMETER(clientData);
|
||||
|
||||
/* Must have 3 or more arguments. */
|
||||
if( objc<3 || (objc%2)==0 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
|
||||
for(ii=3; ii<objc; ii+=2){
|
||||
enum NM_enum { NM_PHRASECOUNTS };
|
||||
struct TestnmSubcmd {
|
||||
char *zName;
|
||||
enum NM_enum eOpt;
|
||||
} aOpt[] = {
|
||||
{ "-phrasecountvar", NM_PHRASECOUNTS },
|
||||
{ 0, 0 }
|
||||
};
|
||||
int iOpt;
|
||||
if( Tcl_GetIndexFromObjStruct(
|
||||
interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
|
||||
){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
switch( aOpt[iOpt].eOpt ){
|
||||
case NM_PHRASECOUNTS:
|
||||
pPhrasecount = objv[ii+1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
|
||||
for(ii=0; ii<doc.nToken; ii++){
|
||||
doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
|
||||
}
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
|
||||
nPhrase = (nExprToken + 1) / 2;
|
||||
aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
|
||||
memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pPhrase = apExprToken[ii*2];
|
||||
Tcl_Obj **apToken;
|
||||
int nToken;
|
||||
int jj;
|
||||
|
||||
rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
if( nToken>NM_MAX_TOKEN ){
|
||||
Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
|
||||
rc = TCL_ERROR;
|
||||
goto near_match_out;
|
||||
}
|
||||
for(jj=0; jj<nToken; jj++){
|
||||
NearToken *pT = &aPhrase[ii].aToken[jj];
|
||||
pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
|
||||
}
|
||||
aPhrase[ii].nToken = nToken;
|
||||
}
|
||||
for(ii=1; ii<nPhrase; ii++){
|
||||
Tcl_Obj *pNear = apExprToken[2*ii-1];
|
||||
int nNear;
|
||||
rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
|
||||
if( rc!=TCL_OK ) goto near_match_out;
|
||||
aPhrase[ii].nNear = nNear;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
for(ii=0; ii<nPhrase; ii++){
|
||||
int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
|
||||
Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
|
||||
nTotal += nOcc;
|
||||
}
|
||||
if( pPhrasecount ){
|
||||
Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
|
||||
|
||||
near_match_out:
|
||||
ckfree((char *)aPhrase);
|
||||
ckfree((char *)doc.aToken);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
|
||||
**
|
||||
** Normally, FTS uses hard-coded values to determine the minimum doclist
|
||||
** size eligible for incremental loading, and the size of the chunks loaded
|
||||
** when a doclist is incrementally loaded. This command allows the built-in
|
||||
** values to be overridden for testing purposes.
|
||||
**
|
||||
** If present, the first argument is the chunksize in bytes to load doclists
|
||||
** in. The second argument is the minimum doclist size in bytes to use
|
||||
** incremental loading with.
|
||||
**
|
||||
** Whether or not the arguments are present, this command returns a list of
|
||||
** two integers - the initial chunksize and threshold when the command is
|
||||
** invoked. This can be used to restore the default behavior after running
|
||||
** tests. For example:
|
||||
**
|
||||
** # Override incr-load settings for testing:
|
||||
** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
|
||||
**
|
||||
** .... run tests ....
|
||||
**
|
||||
** # Restore initial incr-load settings:
|
||||
** eval fts3_configure_incr_load $cfg
|
||||
*/
|
||||
static int fts3_configure_incr_load_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
extern int test_fts3_node_chunksize;
|
||||
extern int test_fts3_node_chunk_threshold;
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
if( objc!=1 && objc!=3 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
|
||||
Tcl_ListObjAppendElement(
|
||||
interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
|
||||
|
||||
if( objc==3 ){
|
||||
int iArg1;
|
||||
int iArg2;
|
||||
if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
|
||||
|| Tcl_GetIntFromObj(interp, objv[2], &iArg2)
|
||||
){
|
||||
Tcl_DecrRefCount(pRet);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
test_fts3_node_chunksize = iArg1;
|
||||
test_fts3_node_chunk_threshold = iArg2;
|
||||
}
|
||||
|
||||
Tcl_SetObjResult(interp, pRet);
|
||||
Tcl_DecrRefCount(pRet);
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
/**************************************************************************
|
||||
** Beginning of test tokenizer code.
|
||||
**
|
||||
** For language 0, this tokenizer is similar to the default 'simple'
|
||||
** tokenizer. For other languages L, the following:
|
||||
**
|
||||
** * Odd numbered languages are case-sensitive. Even numbered
|
||||
** languages are not.
|
||||
**
|
||||
** * Language ids 100 or greater are considered an error.
|
||||
**
|
||||
** The implementation assumes that the input contains only ASCII characters
|
||||
** (i.e. those that may be encoded in UTF-8 using a single byte).
|
||||
*/
|
||||
typedef struct test_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
} test_tokenizer;
|
||||
|
||||
typedef struct test_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *aInput; /* Input being tokenized */
|
||||
int nInput; /* Size of the input in bytes */
|
||||
int iInput; /* Current offset in aInput */
|
||||
int iToken; /* Index of next token to be returned */
|
||||
char *aBuffer; /* Buffer containing current token */
|
||||
int nBuffer; /* Number of bytes allocated at pToken */
|
||||
int iLangid; /* Configured language id */
|
||||
} test_tokenizer_cursor;
|
||||
|
||||
static int testTokenizerCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
test_tokenizer *pNew;
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
pNew = sqlite3_malloc(sizeof(test_tokenizer));
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
memset(pNew, 0, sizeof(test_tokenizer));
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)pNew;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
test_tokenizer *p = (test_tokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testTokenizerOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
test_tokenizer_cursor *pCsr; /* New cursor object */
|
||||
|
||||
UNUSED_PARAMETER(pTokenizer);
|
||||
|
||||
pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
|
||||
if( pCsr==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memset(pCsr, 0, sizeof(test_tokenizer_cursor));
|
||||
pCsr->aInput = pInput;
|
||||
if( nBytes<0 ){
|
||||
pCsr->nInput = (int)strlen(pInput);
|
||||
}else{
|
||||
pCsr->nInput = nBytes;
|
||||
}
|
||||
}
|
||||
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
sqlite3_free(pCsr->aBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int testIsTokenChar(char c){
|
||||
return (c>='a' && c<='z') || (c>='A' && c<='Z');
|
||||
}
|
||||
static int testTolower(char c){
|
||||
char ret = c;
|
||||
if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int testTokenizerNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
int rc = SQLITE_OK;
|
||||
const char *p;
|
||||
const char *pEnd;
|
||||
|
||||
p = &pCsr->aInput[pCsr->iInput];
|
||||
pEnd = &pCsr->aInput[pCsr->nInput];
|
||||
|
||||
/* Skip past any white-space */
|
||||
assert( p<=pEnd );
|
||||
while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
|
||||
|
||||
if( p==pEnd ){
|
||||
rc = SQLITE_DONE;
|
||||
}else{
|
||||
/* Advance to the end of the token */
|
||||
const char *pToken = p;
|
||||
int nToken;
|
||||
while( p<pEnd && testIsTokenChar(*p) ) p++;
|
||||
nToken = (int)(p-pToken);
|
||||
|
||||
/* Copy the token into the buffer */
|
||||
if( nToken>pCsr->nBuffer ){
|
||||
sqlite3_free(pCsr->aBuffer);
|
||||
pCsr->aBuffer = sqlite3_malloc(nToken);
|
||||
}
|
||||
if( pCsr->aBuffer==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
int i;
|
||||
|
||||
if( pCsr->iLangid & 0x00000001 ){
|
||||
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
|
||||
}else{
|
||||
for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
|
||||
}
|
||||
pCsr->iToken++;
|
||||
pCsr->iInput = (int)(p - pCsr->aInput);
|
||||
|
||||
*ppToken = pCsr->aBuffer;
|
||||
*pnBytes = nToken;
|
||||
*piStartOffset = (int)(pToken - pCsr->aInput);
|
||||
*piEndOffset = (int)(p - pCsr->aInput);
|
||||
*piPosition = pCsr->iToken;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int testTokenizerLanguage(
|
||||
sqlite3_tokenizer_cursor *pCursor,
|
||||
int iLangid
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
|
||||
pCsr->iLangid = iLangid;
|
||||
if( pCsr->iLangid>=100 ){
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int fts3_test_tokenizer_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
static const sqlite3_tokenizer_module testTokenizerModule = {
|
||||
1,
|
||||
testTokenizerCreate,
|
||||
testTokenizerDestroy,
|
||||
testTokenizerOpen,
|
||||
testTokenizerClose,
|
||||
testTokenizerNext,
|
||||
testTokenizerLanguage
|
||||
};
|
||||
const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
|
||||
if( objc!=1 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
|
||||
(const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
|
||||
));
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
static int fts3_test_varint_cmd(
|
||||
ClientData clientData,
|
||||
Tcl_Interp *interp,
|
||||
int objc,
|
||||
Tcl_Obj *CONST objv[]
|
||||
){
|
||||
#ifdef SQLITE_ENABLE_FTS3
|
||||
char aBuf[24];
|
||||
int rc;
|
||||
Tcl_WideInt w, w2;
|
||||
int nByte, nByte2;
|
||||
|
||||
if( objc!=2 ){
|
||||
Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);
|
||||
if( rc!=TCL_OK ) return rc;
|
||||
|
||||
nByte = sqlite3Fts3PutVarint(aBuf, w);
|
||||
nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);
|
||||
if( w!=w2 || nByte!=nByte2 ){
|
||||
char *zErr = sqlite3_mprintf("error testing %lld", w);
|
||||
Tcl_ResetResult(interp);
|
||||
Tcl_AppendResult(interp, zErr, 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
|
||||
if( w<=2147483647 && w>=0 ){
|
||||
int i;
|
||||
nByte2 = fts3GetVarint32(aBuf, &i);
|
||||
if( (int)w!=i || nByte!=nByte2 ){
|
||||
char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);
|
||||
Tcl_ResetResult(interp);
|
||||
Tcl_AppendResult(interp, zErr, 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
UNUSED_PARAMETER(clientData);
|
||||
return TCL_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** End of tokenizer code.
|
||||
**************************************************************************/
|
||||
|
||||
int Sqlitetestfts3_Init(Tcl_Interp *interp){
|
||||
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
|
||||
Tcl_CreateObjCommand(interp,
|
||||
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
|
||||
);
|
||||
Tcl_CreateObjCommand(
|
||||
interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
|
||||
);
|
||||
|
||||
Tcl_CreateObjCommand(
|
||||
interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0
|
||||
);
|
||||
return TCL_OK;
|
||||
}
|
||||
#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
|
||||
#endif /* ifdef SQLITE_TEST */
|
||||
@ -1,454 +0,0 @@
|
||||
/*
|
||||
** 2013 Apr 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This file contains code for the "fts3tokenize" virtual table module.
|
||||
** An fts3tokenize virtual table is created as follows:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
|
||||
** <tokenizer-name>, <arg-1>, ...
|
||||
** );
|
||||
**
|
||||
** The table created has the following schema:
|
||||
**
|
||||
** CREATE TABLE <tbl>(input, token, start, end, position)
|
||||
**
|
||||
** When queried, the query must include a WHERE clause of type:
|
||||
**
|
||||
** input = <string>
|
||||
**
|
||||
** The virtual table module tokenizes this <string>, using the FTS3
|
||||
** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE
|
||||
** statement and returns one row for each token in the result. With
|
||||
** fields set as follows:
|
||||
**
|
||||
** input: Always set to a copy of <string>
|
||||
** token: A token from the input.
|
||||
** start: Byte offset of the token within the input <string>.
|
||||
** end: Byte offset of the byte immediately following the end of the
|
||||
** token within the input string.
|
||||
** pos: Token offset of token within input.
|
||||
**
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef struct Fts3tokTable Fts3tokTable;
|
||||
typedef struct Fts3tokCursor Fts3tokCursor;
|
||||
|
||||
/*
|
||||
** Virtual table structure.
|
||||
*/
|
||||
struct Fts3tokTable {
|
||||
sqlite3_vtab base; /* Base class used by SQLite core */
|
||||
const sqlite3_tokenizer_module *pMod;
|
||||
sqlite3_tokenizer *pTok;
|
||||
};
|
||||
|
||||
/*
|
||||
** Virtual table cursor structure.
|
||||
*/
|
||||
struct Fts3tokCursor {
|
||||
sqlite3_vtab_cursor base; /* Base class used by SQLite core */
|
||||
char *zInput; /* Input string */
|
||||
sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
|
||||
int iRowid; /* Current 'rowid' value */
|
||||
const char *zToken; /* Current 'token' value */
|
||||
int nToken; /* Size of zToken in bytes */
|
||||
int iStart; /* Current 'start' value */
|
||||
int iEnd; /* Current 'end' value */
|
||||
int iPos; /* Current 'pos' value */
|
||||
};
|
||||
|
||||
/*
|
||||
** Query FTS for the tokenizer implementation named zName.
|
||||
*/
|
||||
static int fts3tokQueryTokenizer(
|
||||
Fts3Hash *pHash,
|
||||
const char *zName,
|
||||
const sqlite3_tokenizer_module **pp,
|
||||
char **pzErr
|
||||
){
|
||||
sqlite3_tokenizer_module *p;
|
||||
int nName = (int)strlen(zName);
|
||||
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
|
||||
if( !p ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
*pp = p;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The second argument, argv[], is an array of pointers to nul-terminated
|
||||
** strings. This function makes a copy of the array and strings into a
|
||||
** single block of memory. It then dequotes any of the strings that appear
|
||||
** to be quoted.
|
||||
**
|
||||
** If successful, output parameter *pazDequote is set to point at the
|
||||
** array of dequoted strings and SQLITE_OK is returned. The caller is
|
||||
** responsible for eventually calling sqlite3_free() to free the array
|
||||
** in this case. Or, if an error occurs, an SQLite error code is returned.
|
||||
** The final value of *pazDequote is undefined in this case.
|
||||
*/
|
||||
static int fts3tokDequoteArray(
|
||||
int argc, /* Number of elements in argv[] */
|
||||
const char * const *argv, /* Input array */
|
||||
char ***pazDequote /* Output array */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
if( argc==0 ){
|
||||
*pazDequote = 0;
|
||||
}else{
|
||||
int i;
|
||||
int nByte = 0;
|
||||
char **azDequote;
|
||||
|
||||
for(i=0; i<argc; i++){
|
||||
nByte += (int)(strlen(argv[i]) + 1);
|
||||
}
|
||||
|
||||
*pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
|
||||
if( azDequote==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
char *pSpace = (char *)&azDequote[argc];
|
||||
for(i=0; i<argc; i++){
|
||||
int n = (int)strlen(argv[i]);
|
||||
azDequote[i] = pSpace;
|
||||
memcpy(pSpace, argv[i], n+1);
|
||||
sqlite3Fts3Dequote(pSpace);
|
||||
pSpace += (n+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Schema of the tokenizer table.
|
||||
*/
|
||||
#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
** These tables have no persistent representation of their own, so xConnect
|
||||
** and xCreate are identical operations.
|
||||
**
|
||||
** argv[0]: module name
|
||||
** argv[1]: database name
|
||||
** argv[2]: table name
|
||||
** argv[3]: first argument (tokenizer name)
|
||||
*/
|
||||
static int fts3tokConnectMethod(
|
||||
sqlite3 *db, /* Database connection */
|
||||
void *pHash, /* Hash table of tokenizers */
|
||||
int argc, /* Number of elements in argv array */
|
||||
const char * const *argv, /* xCreate/xConnect argument array */
|
||||
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
|
||||
char **pzErr /* OUT: sqlite3_malloc'd error message */
|
||||
){
|
||||
Fts3tokTable *pTab = 0;
|
||||
const sqlite3_tokenizer_module *pMod = 0;
|
||||
sqlite3_tokenizer *pTok = 0;
|
||||
int rc;
|
||||
char **azDequote = 0;
|
||||
int nDequote;
|
||||
|
||||
rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
nDequote = argc-3;
|
||||
rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
const char *zModule;
|
||||
if( nDequote<1 ){
|
||||
zModule = "simple";
|
||||
}else{
|
||||
zModule = azDequote[0];
|
||||
}
|
||||
rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
|
||||
}
|
||||
|
||||
assert( (rc==SQLITE_OK)==(pMod!=0) );
|
||||
if( rc==SQLITE_OK ){
|
||||
const char * const *azArg = (const char * const *)&azDequote[1];
|
||||
rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
|
||||
if( pTab==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
memset(pTab, 0, sizeof(Fts3tokTable));
|
||||
pTab->pMod = pMod;
|
||||
pTab->pTok = pTok;
|
||||
*ppVtab = &pTab->base;
|
||||
}else{
|
||||
if( pTok ){
|
||||
pMod->xDestroy(pTok);
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(azDequote);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function does the work for both the xDisconnect and xDestroy methods.
|
||||
** These tables have no persistent representation of their own, so xDisconnect
|
||||
** and xDestroy are identical operations.
|
||||
*/
|
||||
static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
|
||||
|
||||
pTab->pMod->xDestroy(pTab->pTok);
|
||||
sqlite3_free(pTab);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xBestIndex - Analyze a WHERE and ORDER BY clause.
|
||||
*/
|
||||
static int fts3tokBestIndexMethod(
|
||||
sqlite3_vtab *pVTab,
|
||||
sqlite3_index_info *pInfo
|
||||
){
|
||||
int i;
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
if( pInfo->aConstraint[i].usable
|
||||
&& pInfo->aConstraint[i].iColumn==0
|
||||
&& pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ
|
||||
){
|
||||
pInfo->idxNum = 1;
|
||||
pInfo->aConstraintUsage[i].argvIndex = 1;
|
||||
pInfo->aConstraintUsage[i].omit = 1;
|
||||
pInfo->estimatedCost = 1;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
|
||||
pInfo->idxNum = 0;
|
||||
assert( pInfo->estimatedCost>1000000.0 );
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xOpen - Open a cursor.
|
||||
*/
|
||||
static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
|
||||
Fts3tokCursor *pCsr;
|
||||
UNUSED_PARAMETER(pVTab);
|
||||
|
||||
pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
|
||||
if( pCsr==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(Fts3tokCursor));
|
||||
|
||||
*ppCsr = (sqlite3_vtab_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Reset the tokenizer cursor passed as the only argument. As if it had
|
||||
** just been returned by fts3tokOpenMethod().
|
||||
*/
|
||||
static void fts3tokResetCursor(Fts3tokCursor *pCsr){
|
||||
if( pCsr->pCsr ){
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
|
||||
pTab->pMod->xClose(pCsr->pCsr);
|
||||
pCsr->pCsr = 0;
|
||||
}
|
||||
sqlite3_free(pCsr->zInput);
|
||||
pCsr->zInput = 0;
|
||||
pCsr->zToken = 0;
|
||||
pCsr->nToken = 0;
|
||||
pCsr->iStart = 0;
|
||||
pCsr->iEnd = 0;
|
||||
pCsr->iPos = 0;
|
||||
pCsr->iRowid = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** xClose - Close a cursor.
|
||||
*/
|
||||
static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
|
||||
fts3tokResetCursor(pCsr);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
|
||||
int rc; /* Return code */
|
||||
|
||||
pCsr->iRowid++;
|
||||
rc = pTab->pMod->xNext(pCsr->pCsr,
|
||||
&pCsr->zToken, &pCsr->nToken,
|
||||
&pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
|
||||
);
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
fts3tokResetCursor(pCsr);
|
||||
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** xFilter - Initialize a cursor to point at the start of its data.
|
||||
*/
|
||||
static int fts3tokFilterMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
|
||||
int idxNum, /* Strategy index */
|
||||
const char *idxStr, /* Unused */
|
||||
int nVal, /* Number of elements in apVal */
|
||||
sqlite3_value **apVal /* Arguments for the indexing scheme */
|
||||
){
|
||||
int rc = SQLITE_ERROR;
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
|
||||
UNUSED_PARAMETER(idxStr);
|
||||
UNUSED_PARAMETER(nVal);
|
||||
|
||||
fts3tokResetCursor(pCsr);
|
||||
if( idxNum==1 ){
|
||||
const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
|
||||
int nByte = sqlite3_value_bytes(apVal[0]);
|
||||
pCsr->zInput = sqlite3_malloc(nByte+1);
|
||||
if( pCsr->zInput==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memcpy(pCsr->zInput, zByte, nByte);
|
||||
pCsr->zInput[nByte] = 0;
|
||||
rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
|
||||
if( rc==SQLITE_OK ){
|
||||
pCsr->pCsr->pTokenizer = pTab->pTok;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
return fts3tokNextMethod(pCursor);
|
||||
}
|
||||
|
||||
/*
|
||||
** xEof - Return true if the cursor is at EOF, or false otherwise.
|
||||
*/
|
||||
static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
return (pCsr->zToken==0);
|
||||
}
|
||||
|
||||
/*
|
||||
** xColumn - Return a column value.
|
||||
*/
|
||||
static int fts3tokColumnMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
|
||||
int iCol /* Index of column to read value from */
|
||||
){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
|
||||
/* CREATE TABLE x(input, token, start, end, position) */
|
||||
switch( iCol ){
|
||||
case 0:
|
||||
sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 1:
|
||||
sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
|
||||
break;
|
||||
case 2:
|
||||
sqlite3_result_int(pCtx, pCsr->iStart);
|
||||
break;
|
||||
case 3:
|
||||
sqlite3_result_int(pCtx, pCsr->iEnd);
|
||||
break;
|
||||
default:
|
||||
assert( iCol==4 );
|
||||
sqlite3_result_int(pCtx, pCsr->iPos);
|
||||
break;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xRowid - Return the current rowid for the cursor.
|
||||
*/
|
||||
static int fts3tokRowidMethod(
|
||||
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
|
||||
sqlite_int64 *pRowid /* OUT: Rowid value */
|
||||
){
|
||||
Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
|
||||
*pRowid = (sqlite3_int64)pCsr->iRowid;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Register the fts3tok module with database connection db. Return SQLITE_OK
|
||||
** if successful or an error code if sqlite3_create_module() fails.
|
||||
*/
|
||||
int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
|
||||
static const sqlite3_module fts3tok_module = {
|
||||
0, /* iVersion */
|
||||
fts3tokConnectMethod, /* xCreate */
|
||||
fts3tokConnectMethod, /* xConnect */
|
||||
fts3tokBestIndexMethod, /* xBestIndex */
|
||||
fts3tokDisconnectMethod, /* xDisconnect */
|
||||
fts3tokDisconnectMethod, /* xDestroy */
|
||||
fts3tokOpenMethod, /* xOpen */
|
||||
fts3tokCloseMethod, /* xClose */
|
||||
fts3tokFilterMethod, /* xFilter */
|
||||
fts3tokNextMethod, /* xNext */
|
||||
fts3tokEofMethod, /* xEof */
|
||||
fts3tokColumnMethod, /* xColumn */
|
||||
fts3tokRowidMethod, /* xRowid */
|
||||
0, /* xUpdate */
|
||||
0, /* xBegin */
|
||||
0, /* xSync */
|
||||
0, /* xCommit */
|
||||
0, /* xRollback */
|
||||
0, /* xFindFunction */
|
||||
0, /* xRename */
|
||||
0, /* xSavepoint */
|
||||
0, /* xRelease */
|
||||
0 /* xRollbackTo */
|
||||
};
|
||||
int rc; /* Return code */
|
||||
|
||||
rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,507 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This is part of an SQLite module implementing full-text search.
|
||||
** This particular file implements the generic tokenizer interface.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
** hash table. This function may be called as follows:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
|
||||
**
|
||||
** If the <pointer> argument is specified, it must be a blob value
|
||||
** containing a pointer to be stored as the hash data corresponding
|
||||
** to the string <key-name>. If <pointer> is not specified, then
|
||||
** the string <key-name> must already exist in the has table. Otherwise,
|
||||
** an error is returned.
|
||||
**
|
||||
** Whether or not the <pointer> argument is specified, the value returned
|
||||
** is a blob containing the pointer stored as the hash data corresponding
|
||||
** to string <key-name> (after the hash-table is updated, if applicable).
|
||||
*/
|
||||
static void scalarFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
Fts3Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
|
||||
if( argc==2 ){
|
||||
#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
|
||||
void *pOld;
|
||||
int n = sqlite3_value_bytes(argv[1]);
|
||||
if( zName==0 || n!=sizeof(pPtr) ){
|
||||
sqlite3_result_error(context, "argument type mismatch", -1);
|
||||
return;
|
||||
}
|
||||
pPtr = *(void **)sqlite3_value_blob(argv[1]);
|
||||
pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
|
||||
if( pOld==pPtr ){
|
||||
sqlite3_result_error(context, "out of memory", -1);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
sqlite3_result_error(context, "fts3tokenize: "
|
||||
"disabled - rebuild with -DSQLITE_ENABLE_FTS3_TOKENIZER", -1
|
||||
);
|
||||
return;
|
||||
#endif /* SQLITE_ENABLE_FTS3_TOKENIZER */
|
||||
}else
|
||||
{
|
||||
if( zName ){
|
||||
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
|
||||
}
|
||||
if( !pPtr ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
int sqlite3Fts3IsIdChar(char c){
|
||||
static const char isFtsIdChar[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
|
||||
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
return (c&0x80 || isFtsIdChar[(int)(c)]);
|
||||
}
|
||||
|
||||
const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
|
||||
const char *z1;
|
||||
const char *z2 = 0;
|
||||
|
||||
/* Find the start of the next token. */
|
||||
z1 = zStr;
|
||||
while( z2==0 ){
|
||||
char c = *z1;
|
||||
switch( c ){
|
||||
case '\0': return 0; /* No more tokens here */
|
||||
case '\'':
|
||||
case '"':
|
||||
case '`': {
|
||||
z2 = z1;
|
||||
while( *++z2 && (*z2!=c || *++z2==c) );
|
||||
break;
|
||||
}
|
||||
case '[':
|
||||
z2 = &z1[1];
|
||||
while( *z2 && z2[0]!=']' ) z2++;
|
||||
if( *z2 ) z2++;
|
||||
break;
|
||||
|
||||
default:
|
||||
if( sqlite3Fts3IsIdChar(*z1) ){
|
||||
z2 = &z1[1];
|
||||
while( sqlite3Fts3IsIdChar(*z2) ) z2++;
|
||||
}else{
|
||||
z1++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*pn = (int)(z2-z1);
|
||||
return z1;
|
||||
}
|
||||
|
||||
int sqlite3Fts3InitTokenizer(
|
||||
Fts3Hash *pHash, /* Tokenizer hash table */
|
||||
const char *zArg, /* Tokenizer name */
|
||||
sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
|
||||
char **pzErr /* OUT: Set to malloced error message */
|
||||
){
|
||||
int rc;
|
||||
char *z = (char *)zArg;
|
||||
int n = 0;
|
||||
char *zCopy;
|
||||
char *zEnd; /* Pointer to nul-term of zCopy */
|
||||
sqlite3_tokenizer_module *m;
|
||||
|
||||
zCopy = sqlite3_mprintf("%s", zArg);
|
||||
if( !zCopy ) return SQLITE_NOMEM;
|
||||
zEnd = &zCopy[strlen(zCopy)];
|
||||
|
||||
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
|
||||
if( z==0 ){
|
||||
assert( n==0 );
|
||||
z = zCopy;
|
||||
}
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
|
||||
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
|
||||
if( !m ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z);
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
char const **aArg = 0;
|
||||
int iArg = 0;
|
||||
z = &z[n+1];
|
||||
while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
|
||||
int nNew = sizeof(char *)*(iArg+1);
|
||||
char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew);
|
||||
if( !aNew ){
|
||||
sqlite3_free(zCopy);
|
||||
sqlite3_free((void *)aArg);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
aArg = aNew;
|
||||
aArg[iArg++] = z;
|
||||
z[n] = '\0';
|
||||
sqlite3Fts3Dequote(z);
|
||||
z = &z[n+1];
|
||||
}
|
||||
rc = m->xCreate(iArg, aArg, ppTok);
|
||||
assert( rc!=SQLITE_OK || *ppTok );
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer");
|
||||
}else{
|
||||
(*ppTok)->pModule = m;
|
||||
}
|
||||
sqlite3_free((void *)aArg);
|
||||
}
|
||||
|
||||
sqlite3_free(zCopy);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#include <tcl.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of a special SQL scalar function for testing tokenizers
|
||||
** designed to be used in concert with the Tcl testing framework. This
|
||||
** function must be called with two or more arguments:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>, ..., <input-string>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
|
||||
** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
|
||||
**
|
||||
** The return value is a string that may be interpreted as a Tcl
|
||||
** list. For each token in the <input-string>, three elements are
|
||||
** added to the returned list. The first is the token position, the
|
||||
** second is the token text (folded, stemmed, etc.) and the third is the
|
||||
** substring of <input-string> associated with the token. For example,
|
||||
** using the built-in "simple" tokenizer:
|
||||
**
|
||||
** SELECT fts_tokenizer_test('simple', 'I don't see how');
|
||||
**
|
||||
** will return the string:
|
||||
**
|
||||
** "{0 i I 1 dont don't 2 see see 3 how how}"
|
||||
**
|
||||
*/
|
||||
static void testFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
Fts3Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
|
||||
const char *zErr = 0;
|
||||
|
||||
const char *zName;
|
||||
int nName;
|
||||
const char *zInput;
|
||||
int nInput;
|
||||
|
||||
const char *azArg[64];
|
||||
|
||||
const char *zToken;
|
||||
int nToken = 0;
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int iPos = 0;
|
||||
int i;
|
||||
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
if( argc<2 ){
|
||||
sqlite3_result_error(context, "insufficient arguments", -1);
|
||||
return;
|
||||
}
|
||||
|
||||
nName = sqlite3_value_bytes(argv[0]);
|
||||
zName = (const char *)sqlite3_value_text(argv[0]);
|
||||
nInput = sqlite3_value_bytes(argv[argc-1]);
|
||||
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
|
||||
|
||||
pHash = (Fts3Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr2, -1);
|
||||
sqlite3_free(zErr2);
|
||||
return;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
|
||||
for(i=1; i<argc-1; i++){
|
||||
azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
|
||||
zErr = "error in xCreate()";
|
||||
goto finish;
|
||||
}
|
||||
pTokenizer->pModule = p;
|
||||
if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
|
||||
zErr = "error in xOpen()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
zToken = &zInput[iStart];
|
||||
nToken = iEnd-iStart;
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xClose(pCsr) ){
|
||||
zErr = "error in xClose()";
|
||||
goto finish;
|
||||
}
|
||||
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
|
||||
zErr = "error in xDestroy()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
finish:
|
||||
if( zErr ){
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
}else{
|
||||
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
}
|
||||
|
||||
#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
|
||||
static
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
#endif /* SQLITE_ENABLE_FTS3_TOKENIZER */
|
||||
|
||||
|
||||
static
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts3_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function fts3_tokenizer_internal_test().
|
||||
** This function is used for testing only, it is not included in the
|
||||
** build unless SQLITE_TEST is defined.
|
||||
**
|
||||
** The purpose of this is to test that the fts3_tokenizer() function
|
||||
** can be used as designed by the C-code in the queryTokenizer and
|
||||
** registerTokenizer() functions above. These two functions are repeated
|
||||
** in the README.tokenizer file as an example, so it is important to
|
||||
** test them.
|
||||
**
|
||||
** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
|
||||
** function with no arguments. An assert() will fail if a problem is
|
||||
** detected. i.e.:
|
||||
**
|
||||
** SELECT fts3_tokenizer_internal_test();
|
||||
**
|
||||
*/
|
||||
static void intTestFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
int rc;
|
||||
const sqlite3_tokenizer_module *p1;
|
||||
const sqlite3_tokenizer_module *p2;
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
|
||||
|
||||
UNUSED_PARAMETER(argc);
|
||||
UNUSED_PARAMETER(argv);
|
||||
|
||||
/* Test the query function */
|
||||
sqlite3Fts3SimpleTokenizerModule(&p1);
|
||||
rc = queryTokenizer(db, "simple", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p1==p2 );
|
||||
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_ERROR );
|
||||
assert( p2==0 );
|
||||
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
|
||||
|
||||
/* Test the storage function */
|
||||
#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
|
||||
rc = registerTokenizer(db, "nosuchtokenizer", p1);
|
||||
assert( rc==SQLITE_OK );
|
||||
rc = queryTokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p2==p1 );
|
||||
#endif
|
||||
|
||||
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Set up SQL objects in database db used to access the contents of
|
||||
** the hash table pointed to by argument pHash. The hash table must
|
||||
** been initialized to use string keys, and to take a private copy
|
||||
** of the key when a value is inserted. i.e. by a call similar to:
|
||||
**
|
||||
** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
|
||||
**
|
||||
** This function adds a scalar function (see header comment above
|
||||
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
|
||||
** defined at compilation time, a temporary virtual table (see header
|
||||
** comment above struct HashTableVtab) to the database schema. Both
|
||||
** provide read/write access to the contents of *pHash.
|
||||
**
|
||||
** The third argument to this function, zName, is used as the name
|
||||
** of both the scalar and, if created, the virtual table.
|
||||
*/
|
||||
int sqlite3Fts3InitHashTable(
|
||||
sqlite3 *db,
|
||||
Fts3Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
void *p = (void *)pHash;
|
||||
const int any = SQLITE_ANY;
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
char *zTest = 0;
|
||||
char *zTest2 = 0;
|
||||
void *pdb = (void *)db;
|
||||
zTest = sqlite3_mprintf("%s_test", zName);
|
||||
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
|
||||
if( !zTest || !zTest2 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0);
|
||||
}
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0);
|
||||
}
|
||||
#ifdef SQLITE_TEST
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
|
||||
}
|
||||
if( SQLITE_OK==rc ){
|
||||
rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
sqlite3_free(zTest);
|
||||
sqlite3_free(zTest2);
|
||||
#endif
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,161 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS3_TOKENIZER_H_
|
||||
#define _FTS3_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface. When a new tokenizer
|
||||
** implementation is registered, the caller provides a pointer to
|
||||
** an sqlite3_tokenizer_module containing pointers to the callback
|
||||
** functions that make up an implementation.
|
||||
**
|
||||
** When an fts3 table is created, it passes any arguments passed to
|
||||
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
|
||||
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
|
||||
** implementation. The xCreate() function in turn returns an
|
||||
** sqlite3_tokenizer structure representing the specific tokenizer to
|
||||
** be used for the fts3 table (customized by the tokenizer clause arguments).
|
||||
**
|
||||
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
|
||||
** method is called. It returns an sqlite3_tokenizer_cursor object
|
||||
** that may be used to tokenize a specific input buffer based on
|
||||
** the tokenization rules supplied by a specific sqlite3_tokenizer
|
||||
** object.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
|
||||
/*
|
||||
** Structure version. Should always be set to 0 or 1.
|
||||
*/
|
||||
int iVersion;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer. The values in the argv[] array are the
|
||||
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
|
||||
** TABLE statement that created the fts3 table. For example, if
|
||||
** the following SQL is executed:
|
||||
**
|
||||
** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
|
||||
**
|
||||
** then argc is set to 2, and the argv[] array contains pointers
|
||||
** to the strings "arg1" and "arg2".
|
||||
**
|
||||
** This method should return either SQLITE_OK (0), or an SQLite error
|
||||
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
|
||||
** to point at the newly created tokenizer structure. The generic
|
||||
** sqlite3_tokenizer.pModule variable should not be initialized by
|
||||
** this callback. The caller will do so.
|
||||
*/
|
||||
int (*xCreate)(
|
||||
int argc, /* Size of argv array */
|
||||
const char *const*argv, /* Tokenizer argument strings */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer. The fts3 module calls this method
|
||||
** exactly once for each successful call to xCreate().
|
||||
*/
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Create a tokenizer cursor to tokenize an input buffer. The caller
|
||||
** is responsible for ensuring that the input buffer remains valid
|
||||
** until the cursor is closed (using the xClose() method).
|
||||
*/
|
||||
int (*xOpen)(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer cursor. The fts3 module calls this
|
||||
** method exactly once for each successful call to xOpen().
|
||||
*/
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
|
||||
/*
|
||||
** Retrieve the next token from the tokenizer cursor pCursor. This
|
||||
** method should either return SQLITE_OK and set the values of the
|
||||
** "OUT" variables identified below, or SQLITE_DONE to indicate that
|
||||
** the end of the buffer has been reached, or an SQLite error code.
|
||||
**
|
||||
** *ppToken should be set to point at a buffer containing the
|
||||
** normalized version of the token (i.e. after any case-folding and/or
|
||||
** stemming has been performed). *pnBytes should be set to the length
|
||||
** of this buffer in bytes. The input text that generated the token is
|
||||
** identified by the byte offsets returned in *piStartOffset and
|
||||
** *piEndOffset. *piStartOffset should be set to the index of the first
|
||||
** byte of the token in the input buffer. *piEndOffset should be set
|
||||
** to the index of the first byte just past the end of the token in
|
||||
** the input buffer.
|
||||
**
|
||||
** The buffer *ppToken is set to point at is managed by the tokenizer
|
||||
** implementation. It is only required to be valid until the next call
|
||||
** to xNext() or xClose().
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xNext)(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
|
||||
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
|
||||
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
|
||||
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
|
||||
int *piPosition /* OUT: Number of tokens returned before this one */
|
||||
);
|
||||
|
||||
/***********************************************************************
|
||||
** Methods below this point are only available if iVersion>=1.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Configure the language id of a tokenizer cursor.
|
||||
*/
|
||||
int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
int fts3_global_term_cnt(int iTerm, int iCol);
|
||||
int fts3_term_cnt(int iTerm, int iCol);
|
||||
|
||||
|
||||
#endif /* _FTS3_TOKENIZER_H_ */
|
||||
@ -1,234 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS3 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS3 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
|
||||
*/
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
static int simpleDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
static int fts3_isalnum(int x){
|
||||
return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = (int)strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
sqlite3_free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !fts3_isalnum(i) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
UNUSED_PARAMETER(pTokenizer);
|
||||
|
||||
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->pToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
char *pNew;
|
||||
c->nTokenAllocated = n+20;
|
||||
pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( !pNew ) return SQLITE_NOMEM;
|
||||
c->pToken = pNew;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch);
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
0,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts3SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
@ -1,393 +0,0 @@
|
||||
/*
|
||||
** 2012 May 24
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Implementation of the "unicode" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
||||
|
||||
#include "fts3Int.h"
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "fts3_tokenizer.h"
|
||||
|
||||
/*
|
||||
** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
|
||||
** from the sqlite3 source file utf.c. If this file is compiled as part
|
||||
** of the amalgamation, they are not required.
|
||||
*/
|
||||
#ifndef SQLITE_AMALGAMATION
|
||||
|
||||
static const unsigned char sqlite3Utf8Trans1[] = {
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
|
||||
};
|
||||
|
||||
#define READ_UTF8(zIn, zTerm, c) \
|
||||
c = *(zIn++); \
|
||||
if( c>=0xc0 ){ \
|
||||
c = sqlite3Utf8Trans1[c-0xc0]; \
|
||||
while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
|
||||
c = (c<<6) + (0x3f & *(zIn++)); \
|
||||
} \
|
||||
if( c<0x80 \
|
||||
|| (c&0xFFFFF800)==0xD800 \
|
||||
|| (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
|
||||
}
|
||||
|
||||
#define WRITE_UTF8(zOut, c) { \
|
||||
if( c<0x00080 ){ \
|
||||
*zOut++ = (u8)(c&0xFF); \
|
||||
} \
|
||||
else if( c<0x00800 ){ \
|
||||
*zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \
|
||||
*zOut++ = 0x80 + (u8)(c & 0x3F); \
|
||||
} \
|
||||
else if( c<0x10000 ){ \
|
||||
*zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \
|
||||
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
|
||||
*zOut++ = 0x80 + (u8)(c & 0x3F); \
|
||||
}else{ \
|
||||
*zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \
|
||||
*zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
|
||||
*zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
|
||||
*zOut++ = 0x80 + (u8)(c & 0x3F); \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif /* ifndef SQLITE_AMALGAMATION */
|
||||
|
||||
typedef struct unicode_tokenizer unicode_tokenizer;
|
||||
typedef struct unicode_cursor unicode_cursor;
|
||||
|
||||
struct unicode_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
int bRemoveDiacritic;
|
||||
int nException;
|
||||
int *aiException;
|
||||
};
|
||||
|
||||
struct unicode_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const unsigned char *aInput; /* Input text being tokenized */
|
||||
int nInput; /* Size of aInput[] in bytes */
|
||||
int iOff; /* Current offset within aInput[] */
|
||||
int iToken; /* Index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAlloc; /* space allocated at zToken */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer allocated by unicodeCreate().
|
||||
*/
|
||||
static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
if( pTokenizer ){
|
||||
unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
|
||||
sqlite3_free(p->aiException);
|
||||
sqlite3_free(p);
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
|
||||
** statement has specified that the tokenizer for this table shall consider
|
||||
** all characters in string zIn/nIn to be separators (if bAlnum==0) or
|
||||
** token characters (if bAlnum==1).
|
||||
**
|
||||
** For each codepoint in the zIn/nIn string, this function checks if the
|
||||
** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
|
||||
** If so, no action is taken. Otherwise, the codepoint is added to the
|
||||
** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
|
||||
** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
|
||||
** codepoints in the aiException[] array.
|
||||
**
|
||||
** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
|
||||
** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
|
||||
** It is not possible to change the behavior of the tokenizer with respect
|
||||
** to these codepoints.
|
||||
*/
|
||||
static int unicodeAddExceptions(
|
||||
unicode_tokenizer *p, /* Tokenizer to add exceptions to */
|
||||
int bAlnum, /* Replace Isalnum() return value with this */
|
||||
const char *zIn, /* Array of characters to make exceptions */
|
||||
int nIn /* Length of z in bytes */
|
||||
){
|
||||
const unsigned char *z = (const unsigned char *)zIn;
|
||||
const unsigned char *zTerm = &z[nIn];
|
||||
int iCode;
|
||||
int nEntry = 0;
|
||||
|
||||
assert( bAlnum==0 || bAlnum==1 );
|
||||
|
||||
while( z<zTerm ){
|
||||
READ_UTF8(z, zTerm, iCode);
|
||||
assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
|
||||
if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
|
||||
&& sqlite3FtsUnicodeIsdiacritic(iCode)==0
|
||||
){
|
||||
nEntry++;
|
||||
}
|
||||
}
|
||||
|
||||
if( nEntry ){
|
||||
int *aNew; /* New aiException[] array */
|
||||
int nNew; /* Number of valid entries in array aNew[] */
|
||||
|
||||
aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
|
||||
if( aNew==0 ) return SQLITE_NOMEM;
|
||||
nNew = p->nException;
|
||||
|
||||
z = (const unsigned char *)zIn;
|
||||
while( z<zTerm ){
|
||||
READ_UTF8(z, zTerm, iCode);
|
||||
if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
|
||||
&& sqlite3FtsUnicodeIsdiacritic(iCode)==0
|
||||
){
|
||||
int i, j;
|
||||
for(i=0; i<nNew && aNew[i]<iCode; i++);
|
||||
for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
|
||||
aNew[i] = iCode;
|
||||
nNew++;
|
||||
}
|
||||
}
|
||||
p->aiException = aNew;
|
||||
p->nException = nNew;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return true if the p->aiException[] array contains the value iCode.
|
||||
*/
|
||||
static int unicodeIsException(unicode_tokenizer *p, int iCode){
|
||||
if( p->nException>0 ){
|
||||
int *a = p->aiException;
|
||||
int iLo = 0;
|
||||
int iHi = p->nException-1;
|
||||
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
if( iCode==a[iTest] ){
|
||||
return 1;
|
||||
}else if( iCode>a[iTest] ){
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return true if, for the purposes of tokenization, codepoint iCode is
|
||||
** considered a token character (not a separator).
|
||||
*/
|
||||
static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
|
||||
assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
|
||||
return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int unicodeCreate(
|
||||
int nArg, /* Size of array argv[] */
|
||||
const char * const *azArg, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **pp /* OUT: New tokenizer handle */
|
||||
){
|
||||
unicode_tokenizer *pNew; /* New tokenizer object */
|
||||
int i;
|
||||
int rc = SQLITE_OK;
|
||||
|
||||
pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
|
||||
if( pNew==NULL ) return SQLITE_NOMEM;
|
||||
memset(pNew, 0, sizeof(unicode_tokenizer));
|
||||
pNew->bRemoveDiacritic = 1;
|
||||
|
||||
for(i=0; rc==SQLITE_OK && i<nArg; i++){
|
||||
const char *z = azArg[i];
|
||||
int n = (int)strlen(z);
|
||||
|
||||
if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
|
||||
pNew->bRemoveDiacritic = 1;
|
||||
}
|
||||
else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
|
||||
pNew->bRemoveDiacritic = 0;
|
||||
}
|
||||
else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
|
||||
rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
|
||||
}
|
||||
else if( n>=11 && memcmp("separators=", z, 11)==0 ){
|
||||
rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
|
||||
}
|
||||
else{
|
||||
/* Unrecognized argument */
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
unicodeDestroy((sqlite3_tokenizer *)pNew);
|
||||
pNew = 0;
|
||||
}
|
||||
*pp = (sqlite3_tokenizer *)pNew;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int unicodeOpen(
|
||||
sqlite3_tokenizer *p, /* The tokenizer */
|
||||
const char *aInput, /* Input string */
|
||||
int nInput, /* Size of string aInput in bytes */
|
||||
sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */
|
||||
){
|
||||
unicode_cursor *pCsr;
|
||||
|
||||
pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
|
||||
if( pCsr==0 ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(unicode_cursor));
|
||||
|
||||
pCsr->aInput = (const unsigned char *)aInput;
|
||||
if( aInput==0 ){
|
||||
pCsr->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
pCsr->nInput = (int)strlen(aInput);
|
||||
}else{
|
||||
pCsr->nInput = nInput;
|
||||
}
|
||||
|
||||
*pp = &pCsr->base;
|
||||
UNUSED_PARAMETER(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
unicode_cursor *pCsr = (unicode_cursor *) pCursor;
|
||||
sqlite3_free(pCsr->zToken);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int unicodeNext(
|
||||
sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */
|
||||
const char **paToken, /* OUT: Token text */
|
||||
int *pnToken, /* OUT: Number of bytes at *paToken */
|
||||
int *piStart, /* OUT: Starting offset of token */
|
||||
int *piEnd, /* OUT: Ending offset of token */
|
||||
int *piPos /* OUT: Position integer of token */
|
||||
){
|
||||
unicode_cursor *pCsr = (unicode_cursor *)pC;
|
||||
unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
|
||||
int iCode = 0;
|
||||
char *zOut;
|
||||
const unsigned char *z = &pCsr->aInput[pCsr->iOff];
|
||||
const unsigned char *zStart = z;
|
||||
const unsigned char *zEnd;
|
||||
const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
|
||||
|
||||
/* Scan past any delimiter characters before the start of the next token.
|
||||
** Return SQLITE_DONE early if this takes us all the way to the end of
|
||||
** the input. */
|
||||
while( z<zTerm ){
|
||||
READ_UTF8(z, zTerm, iCode);
|
||||
if( unicodeIsAlnum(p, iCode) ) break;
|
||||
zStart = z;
|
||||
}
|
||||
if( zStart>=zTerm ) return SQLITE_DONE;
|
||||
|
||||
zOut = pCsr->zToken;
|
||||
do {
|
||||
int iOut;
|
||||
|
||||
/* Grow the output buffer if required. */
|
||||
if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
|
||||
if( !zNew ) return SQLITE_NOMEM;
|
||||
zOut = &zNew[zOut - pCsr->zToken];
|
||||
pCsr->zToken = zNew;
|
||||
pCsr->nAlloc += 64;
|
||||
}
|
||||
|
||||
/* Write the folded case of the last character read to the output */
|
||||
zEnd = z;
|
||||
iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
|
||||
if( iOut ){
|
||||
WRITE_UTF8(zOut, iOut);
|
||||
}
|
||||
|
||||
/* If the cursor is not at EOF, read the next character */
|
||||
if( z>=zTerm ) break;
|
||||
READ_UTF8(z, zTerm, iCode);
|
||||
}while( unicodeIsAlnum(p, iCode)
|
||||
|| sqlite3FtsUnicodeIsdiacritic(iCode)
|
||||
);
|
||||
|
||||
/* Set the output variables and return. */
|
||||
pCsr->iOff = (int)(z - pCsr->aInput);
|
||||
*paToken = pCsr->zToken;
|
||||
*pnToken = (int)(zOut - pCsr->zToken);
|
||||
*piStart = (int)(zStart - pCsr->aInput);
|
||||
*piEnd = (int)(zEnd - pCsr->aInput);
|
||||
*piPos = pCsr->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Set *ppModule to a pointer to the sqlite3_tokenizer_module
|
||||
** structure for the unicode tokenizer.
|
||||
*/
|
||||
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
|
||||
static const sqlite3_tokenizer_module module = {
|
||||
0,
|
||||
unicodeCreate,
|
||||
unicodeDestroy,
|
||||
unicodeOpen,
|
||||
unicodeClose,
|
||||
unicodeNext,
|
||||
0,
|
||||
};
|
||||
*ppModule = &module;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|
||||
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */
|
||||
@ -1,365 +0,0 @@
|
||||
/*
|
||||
** 2012 May 25
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
** DO NOT EDIT THIS MACHINE GENERATED FILE.
|
||||
*/
|
||||
|
||||
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|
||||
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
** Return true if the argument corresponds to a unicode codepoint
|
||||
** classified as either a letter or a number. Otherwise false.
|
||||
**
|
||||
** The results are undefined if the value passed to this function
|
||||
** is less than zero.
|
||||
*/
|
||||
int sqlite3FtsUnicodeIsalnum(int c){
|
||||
/* Each unsigned integer in the following array corresponds to a contiguous
|
||||
** range of unicode codepoints that are not either letters or numbers (i.e.
|
||||
** codepoints for which this function should return 0).
|
||||
**
|
||||
** The most significant 22 bits in each 32-bit value contain the first
|
||||
** codepoint in the range. The least significant 10 bits are used to store
|
||||
** the size of the range (always at least 1). In other words, the value
|
||||
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
|
||||
** C. It is not possible to represent a range larger than 1023 codepoints
|
||||
** using this format.
|
||||
*/
|
||||
static const unsigned int aEntry[] = {
|
||||
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
|
||||
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
|
||||
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
|
||||
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
|
||||
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
|
||||
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
|
||||
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
|
||||
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
|
||||
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
|
||||
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
|
||||
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
|
||||
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
|
||||
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
|
||||
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
|
||||
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
|
||||
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
|
||||
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
|
||||
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
|
||||
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
|
||||
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
|
||||
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
|
||||
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
|
||||
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
|
||||
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
|
||||
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
|
||||
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
|
||||
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
|
||||
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
|
||||
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
|
||||
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
|
||||
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
|
||||
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
|
||||
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
|
||||
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
|
||||
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
|
||||
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
|
||||
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
|
||||
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
|
||||
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
|
||||
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
|
||||
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
|
||||
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
|
||||
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
|
||||
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
|
||||
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
|
||||
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
|
||||
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
|
||||
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
|
||||
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
|
||||
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
|
||||
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
|
||||
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
|
||||
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
|
||||
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
|
||||
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
|
||||
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
|
||||
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
|
||||
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
|
||||
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
|
||||
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
|
||||
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
|
||||
0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
|
||||
0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
|
||||
0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
|
||||
0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
|
||||
0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
|
||||
0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
|
||||
0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
|
||||
0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
|
||||
0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
|
||||
0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
|
||||
0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
|
||||
0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
|
||||
0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
|
||||
0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
|
||||
0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
|
||||
0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
|
||||
0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
|
||||
0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
|
||||
0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
|
||||
0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
|
||||
0x380400F0,
|
||||
};
|
||||
static const unsigned int aAscii[4] = {
|
||||
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
|
||||
};
|
||||
|
||||
if( c<128 ){
|
||||
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
|
||||
}else if( c<(1<<22) ){
|
||||
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
|
||||
int iRes = 0;
|
||||
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
|
||||
int iLo = 0;
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
if( key >= aEntry[iTest] ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
assert( aEntry[0]<key );
|
||||
assert( key>=aEntry[iRes] );
|
||||
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** If the argument is a codepoint corresponding to a lowercase letter
|
||||
** in the ASCII range with a diacritic added, return the codepoint
|
||||
** of the ASCII letter only. For example, if passed 235 - "LATIN
|
||||
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
|
||||
** E"). The resuls of passing a codepoint that corresponds to an
|
||||
** uppercase letter are undefined.
|
||||
*/
|
||||
static int remove_diacritic(int c){
|
||||
unsigned short aDia[] = {
|
||||
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
|
||||
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
|
||||
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
|
||||
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
|
||||
3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
|
||||
3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
|
||||
4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
|
||||
6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
|
||||
61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
|
||||
61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
|
||||
62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
|
||||
62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
|
||||
62924, 63050, 63082, 63274, 63390,
|
||||
};
|
||||
char aChar[] = {
|
||||
'\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
|
||||
'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
|
||||
's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
|
||||
'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
|
||||
'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
|
||||
'\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
|
||||
'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
|
||||
'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
|
||||
'e', 'i', 'o', 'u', 'y',
|
||||
};
|
||||
|
||||
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
|
||||
int iRes = 0;
|
||||
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
|
||||
int iLo = 0;
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
if( key >= aDia[iTest] ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
assert( key>=aDia[iRes] );
|
||||
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Return true if the argument interpreted as a unicode codepoint
|
||||
** is a diacritical modifier character.
|
||||
*/
|
||||
int sqlite3FtsUnicodeIsdiacritic(int c){
|
||||
unsigned int mask0 = 0x08029FDF;
|
||||
unsigned int mask1 = 0x000361F8;
|
||||
if( c<768 || c>817 ) return 0;
|
||||
return (c < 768+32) ?
|
||||
(mask0 & (1 << (c-768))) :
|
||||
(mask1 & (1 << (c-768-32)));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Interpret the argument as a unicode codepoint. If the codepoint
|
||||
** is an upper case character that has a lower case equivalent,
|
||||
** return the codepoint corresponding to the lower case version.
|
||||
** Otherwise, return a copy of the argument.
|
||||
**
|
||||
** The results are undefined if the value passed to this function
|
||||
** is less than zero.
|
||||
*/
|
||||
int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
|
||||
/* Each entry in the following array defines a rule for folding a range
|
||||
** of codepoints to lower case. The rule applies to a range of nRange
|
||||
** codepoints starting at codepoint iCode.
|
||||
**
|
||||
** If the least significant bit in flags is clear, then the rule applies
|
||||
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
|
||||
** need to be folded). Or, if it is set, then the rule only applies to
|
||||
** every second codepoint in the range, starting with codepoint C.
|
||||
**
|
||||
** The 7 most significant bits in flags are an index into the aiOff[]
|
||||
** array. If a specific codepoint C does require folding, then its lower
|
||||
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
|
||||
**
|
||||
** The contents of this array are generated by parsing the CaseFolding.txt
|
||||
** file distributed as part of the "Unicode Character Database". See
|
||||
** http://www.unicode.org for details.
|
||||
*/
|
||||
static const struct TableEntry {
|
||||
unsigned short iCode;
|
||||
unsigned char flags;
|
||||
unsigned char nRange;
|
||||
} aEntry[] = {
|
||||
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
|
||||
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
|
||||
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
|
||||
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
|
||||
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
|
||||
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
|
||||
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
|
||||
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
|
||||
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
|
||||
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
|
||||
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
|
||||
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
|
||||
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
|
||||
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
|
||||
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
|
||||
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
|
||||
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
|
||||
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
|
||||
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
|
||||
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
|
||||
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
|
||||
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
|
||||
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
|
||||
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
|
||||
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
|
||||
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
|
||||
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
|
||||
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
|
||||
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
|
||||
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
|
||||
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
|
||||
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
|
||||
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
|
||||
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
|
||||
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
|
||||
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
|
||||
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
|
||||
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
|
||||
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
|
||||
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
|
||||
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
|
||||
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
|
||||
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
|
||||
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
|
||||
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
|
||||
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
|
||||
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
|
||||
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
|
||||
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
|
||||
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
|
||||
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
|
||||
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
|
||||
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
|
||||
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
|
||||
{65313, 14, 26},
|
||||
};
|
||||
static const unsigned short aiOff[] = {
|
||||
1, 2, 8, 15, 16, 26, 28, 32,
|
||||
37, 38, 40, 48, 63, 64, 69, 71,
|
||||
79, 80, 116, 202, 203, 205, 206, 207,
|
||||
209, 210, 211, 213, 214, 217, 218, 219,
|
||||
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
|
||||
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
|
||||
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
|
||||
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
|
||||
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
|
||||
65514, 65521, 65527, 65528, 65529,
|
||||
};
|
||||
|
||||
int ret = c;
|
||||
|
||||
assert( c>=0 );
|
||||
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
|
||||
|
||||
if( c<128 ){
|
||||
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
|
||||
}else if( c<65536 ){
|
||||
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
|
||||
int iLo = 0;
|
||||
int iRes = -1;
|
||||
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
int cmp = (c - aEntry[iTest].iCode);
|
||||
if( cmp>=0 ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
assert( iRes<0 || c>=aEntry[iRes].iCode );
|
||||
|
||||
if( iRes>=0 ){
|
||||
const struct TableEntry *p = &aEntry[iRes];
|
||||
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
|
||||
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
|
||||
assert( ret>0 );
|
||||
}
|
||||
}
|
||||
|
||||
if( bRemoveDiacritic ) ret = remove_diacritic(ret);
|
||||
}
|
||||
|
||||
else if( c>=66560 && c<66600 ){
|
||||
ret = c + 40;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
|
||||
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,122 +0,0 @@
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
# This script contains several sub-programs used to test FTS3/FTS4
|
||||
# performance. It does not run the queries directly, but generates SQL
|
||||
# scripts that can be run using the shell tool.
|
||||
#
|
||||
# The following cases are tested:
|
||||
#
|
||||
# 1. Inserting documents into an FTS3 table.
|
||||
# 2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')").
|
||||
# 3. Deleting documents from an FTS3 table.
|
||||
# 4. Querying FTS3 tables.
|
||||
#
|
||||
|
||||
# Number of tokens in vocabulary. And number of tokens in each document.
|
||||
#
|
||||
set VOCAB_SIZE 2000
|
||||
set DOC_SIZE 100
|
||||
|
||||
set NUM_INSERTS 100000
|
||||
set NUM_SELECTS 1000
|
||||
|
||||
# Force everything in this script to be deterministic.
|
||||
#
|
||||
expr {srand(0)}
|
||||
|
||||
proc usage {} {
|
||||
puts stderr "Usage: $::argv0 <rows> <selects>"
|
||||
exit -1
|
||||
}
|
||||
|
||||
proc sql {sql} {
|
||||
puts $::fd $sql
|
||||
}
|
||||
|
||||
|
||||
# Return a list of $nWord randomly generated tokens each between 2 and 10
|
||||
# characters in length.
|
||||
#
|
||||
proc build_vocab {nWord} {
|
||||
set ret [list]
|
||||
set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
|
||||
for {set i 0} {$i<$nWord} {incr i} {
|
||||
set len [expr {int((rand()*9.0)+2)}]
|
||||
set term ""
|
||||
for {set j 0} {$j<$len} {incr j} {
|
||||
append term [lindex $chars [expr {int(rand()*[llength $chars])}]]
|
||||
}
|
||||
lappend ret $term
|
||||
}
|
||||
set ret
|
||||
}
|
||||
|
||||
proc select_term {} {
|
||||
set n [llength $::vocab]
|
||||
set t [expr int(rand()*$n*3)]
|
||||
if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] }
|
||||
if {$t>=$n} { set t [expr {($t-$n)/10}] }
|
||||
lindex $::vocab $t
|
||||
}
|
||||
|
||||
proc select_doc {nTerm} {
|
||||
set ret [list]
|
||||
for {set i 0} {$i<$nTerm} {incr i} {
|
||||
lappend ret [select_term]
|
||||
}
|
||||
set ret
|
||||
}
|
||||
|
||||
proc test_1 {nInsert} {
|
||||
sql "PRAGMA synchronous = OFF;"
|
||||
sql "DROP TABLE IF EXISTS t1;"
|
||||
sql "CREATE VIRTUAL TABLE t1 USING fts4;"
|
||||
for {set i 0} {$i < $nInsert} {incr i} {
|
||||
set doc [select_doc $::DOC_SIZE]
|
||||
sql "INSERT INTO t1 VALUES('$doc');"
|
||||
}
|
||||
}
|
||||
|
||||
proc test_2 {} {
|
||||
sql "INSERT INTO t1(t1) VALUES('optimize');"
|
||||
}
|
||||
|
||||
proc test_3 {nSelect} {
|
||||
for {set i 0} {$i < $nSelect} {incr i} {
|
||||
sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';"
|
||||
}
|
||||
}
|
||||
|
||||
proc test_4 {nSelect} {
|
||||
for {set i 0} {$i < $nSelect} {incr i} {
|
||||
sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';"
|
||||
}
|
||||
}
|
||||
|
||||
if {[llength $argv]!=0} usage
|
||||
|
||||
set ::vocab [build_vocab $::VOCAB_SIZE]
|
||||
|
||||
set ::fd [open fts3speed_insert.sql w]
|
||||
test_1 $NUM_INSERTS
|
||||
close $::fd
|
||||
|
||||
set ::fd [open fts3speed_select.sql w]
|
||||
test_3 $NUM_SELECTS
|
||||
close $::fd
|
||||
|
||||
set ::fd [open fts3speed_select2.sql w]
|
||||
test_4 $NUM_SELECTS
|
||||
close $::fd
|
||||
|
||||
set ::fd [open fts3speed_optimize.sql w]
|
||||
test_2
|
||||
close $::fd
|
||||
|
||||
puts "Success. Created files:"
|
||||
puts " fts3speed_insert.sql"
|
||||
puts " fts3speed_select.sql"
|
||||
puts " fts3speed_select2.sql"
|
||||
puts " fts3speed_optimize.sql"
|
||||
|
||||
@ -1,115 +0,0 @@
|
||||
#!/usr/bin/tclsh
|
||||
#
|
||||
# This script builds a single C code file holding all of FTS3 code.
|
||||
# The name of the output file is fts3amal.c. To build this file,
|
||||
# first do:
|
||||
#
|
||||
# make target_source
|
||||
#
|
||||
# The make target above moves all of the source code files into
|
||||
# a subdirectory named "tsrc". (This script expects to find the files
|
||||
# there and will not work if they are not found.)
|
||||
#
|
||||
# After the "tsrc" directory has been created and populated, run
|
||||
# this script:
|
||||
#
|
||||
# tclsh mkfts3amal.tcl
|
||||
#
|
||||
# The amalgamated FTS3 code will be written into fts3amal.c
|
||||
#
|
||||
|
||||
# Open the output file and write a header comment at the beginning
|
||||
# of the file.
|
||||
#
|
||||
set out [open fts3amal.c w]
|
||||
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
|
||||
puts $out [subst \
|
||||
{/******************************************************************************
|
||||
** This file is an amalgamation of separate C source files from the SQLite
|
||||
** Full Text Search extension 2 (fts3). By combining all the individual C
|
||||
** code files into this single large file, the entire code can be compiled
|
||||
** as a one translation unit. This allows many compilers to do optimizations
|
||||
** that would not be possible if the files were compiled separately. It also
|
||||
** makes the code easier to import into other projects.
|
||||
**
|
||||
** This amalgamation was generated on $today.
|
||||
*/}]
|
||||
|
||||
# These are the header files used by FTS3. The first time any of these
|
||||
# files are seen in a #include statement in the C code, include the complete
|
||||
# text of the file in-line. The file only needs to be included once.
|
||||
#
|
||||
foreach hdr {
|
||||
fts3.h
|
||||
fts3_hash.h
|
||||
fts3_tokenizer.h
|
||||
sqlite3.h
|
||||
sqlite3ext.h
|
||||
} {
|
||||
set available_hdr($hdr) 1
|
||||
}
|
||||
|
||||
# 78 stars used for comment formatting.
|
||||
set s78 \
|
||||
{*****************************************************************************}
|
||||
|
||||
# Insert a comment into the code
|
||||
#
|
||||
proc section_comment {text} {
|
||||
global out s78
|
||||
set n [string length $text]
|
||||
set nstar [expr {60 - $n}]
|
||||
set stars [string range $s78 0 $nstar]
|
||||
puts $out "/************** $text $stars/"
|
||||
}
|
||||
|
||||
# Read the source file named $filename and write it into the
|
||||
# sqlite3.c output file. If any #include statements are seen,
|
||||
# process them approprately.
|
||||
#
|
||||
proc copy_file {filename} {
|
||||
global seen_hdr available_hdr out
|
||||
set tail [file tail $filename]
|
||||
section_comment "Begin file $tail"
|
||||
set in [open $filename r]
|
||||
while {![eof $in]} {
|
||||
set line [gets $in]
|
||||
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
|
||||
if {[info exists available_hdr($hdr)]} {
|
||||
if {$available_hdr($hdr)} {
|
||||
section_comment "Include $hdr in the middle of $tail"
|
||||
copy_file tsrc/$hdr
|
||||
section_comment "Continuing where we left off in $tail"
|
||||
}
|
||||
} elseif {![info exists seen_hdr($hdr)]} {
|
||||
set seen_hdr($hdr) 1
|
||||
puts $out $line
|
||||
}
|
||||
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
|
||||
puts $out "#if 0"
|
||||
} elseif {[regexp {^#line} $line]} {
|
||||
# Skip #line directives.
|
||||
} else {
|
||||
puts $out $line
|
||||
}
|
||||
}
|
||||
close $in
|
||||
section_comment "End of $tail"
|
||||
}
|
||||
|
||||
|
||||
# Process the source files. Process files containing commonly
|
||||
# used subroutines first in order to help the compiler find
|
||||
# inlining opportunities.
|
||||
#
|
||||
foreach file {
|
||||
fts3.c
|
||||
fts3_hash.c
|
||||
fts3_porter.c
|
||||
fts3_tokenizer.c
|
||||
fts3_tokenizer1.c
|
||||
} {
|
||||
copy_file tsrc/$file
|
||||
}
|
||||
|
||||
close $out
|
||||
@ -1,875 +0,0 @@
|
||||
/*
|
||||
** This program is a debugging and analysis utility that displays
|
||||
** information about an FTS3 or FTS4 index.
|
||||
**
|
||||
** Link this program against the SQLite3 amalgamation with the
|
||||
** SQLITE_ENABLE_FTS4 compile-time option. Then run it as:
|
||||
**
|
||||
** fts3view DATABASE
|
||||
**
|
||||
** to get a list of all FTS3/4 tables in DATABASE, or do
|
||||
**
|
||||
** fts3view DATABASE TABLE COMMAND ....
|
||||
**
|
||||
** to see various aspects of the TABLE table. Type fts3view with no
|
||||
** arguments for a list of available COMMANDs.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Extra command-line arguments:
|
||||
*/
|
||||
int nExtra;
|
||||
char **azExtra;
|
||||
|
||||
/*
|
||||
** Look for a command-line argument.
|
||||
*/
|
||||
const char *findOption(const char *zName, int hasArg, const char *zDefault){
|
||||
int i;
|
||||
const char *zResult = zDefault;
|
||||
for(i=0; i<nExtra; i++){
|
||||
const char *z = azExtra[i];
|
||||
while( z[0]=='-' ) z++;
|
||||
if( strcmp(z, zName)==0 ){
|
||||
int j = 1;
|
||||
if( hasArg==0 || i==nExtra-1 ) j = 0;
|
||||
zResult = azExtra[i+j];
|
||||
while( i+j<nExtra ){
|
||||
azExtra[i] = azExtra[i+j+1];
|
||||
i++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return zResult;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Prepare an SQL query
|
||||
*/
|
||||
static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
|
||||
va_list ap;
|
||||
char *zSql;
|
||||
sqlite3_stmt *pStmt;
|
||||
int rc;
|
||||
|
||||
va_start(ap, zFormat);
|
||||
zSql = sqlite3_vmprintf(zFormat, ap);
|
||||
va_end(ap);
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc ){
|
||||
fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
|
||||
exit(1);
|
||||
}
|
||||
sqlite3_free(zSql);
|
||||
return pStmt;
|
||||
}
|
||||
|
||||
/*
|
||||
** Run an SQL statement
|
||||
*/
|
||||
static int runSql(sqlite3 *db, const char *zFormat, ...){
|
||||
va_list ap;
|
||||
char *zSql;
|
||||
int rc;
|
||||
|
||||
va_start(ap, zFormat);
|
||||
zSql = sqlite3_vmprintf(zFormat, ap);
|
||||
rc = sqlite3_exec(db, zSql, 0, 0, 0);
|
||||
va_end(ap);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Show the table schema
|
||||
*/
|
||||
static void showSchema(sqlite3 *db, const char *zTab){
|
||||
sqlite3_stmt *pStmt;
|
||||
pStmt = prepare(db,
|
||||
"SELECT sql FROM sqlite_master"
|
||||
" WHERE name LIKE '%q%%'"
|
||||
" ORDER BY 1",
|
||||
zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
printf("%s;\n", sqlite3_column_text(pStmt, 0));
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db, "PRAGMA page_size");
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db, "PRAGMA journal_mode");
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db, "PRAGMA auto_vacuum");
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
const char *zType = "???";
|
||||
switch( sqlite3_column_int(pStmt, 0) ){
|
||||
case 0: zType = "OFF"; break;
|
||||
case 1: zType = "FULL"; break;
|
||||
case 2: zType = "INCREMENTAL"; break;
|
||||
}
|
||||
printf("PRAGMA auto_vacuum=%s;\n", zType);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db, "PRAGMA encoding");
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
/*
|
||||
** Read a 64-bit variable-length integer from memory starting at p[0].
|
||||
** Return the number of bytes read, or 0 on error.
|
||||
** The value is stored in *v.
|
||||
*/
|
||||
int getVarint(const unsigned char *p, sqlite_int64 *v){
|
||||
const unsigned char *q = p;
|
||||
sqlite_uint64 x = 0, y = 1;
|
||||
while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
|
||||
x += y * (*q++ & 0x7f);
|
||||
y <<= 7;
|
||||
}
|
||||
x += y * (*q++);
|
||||
*v = (sqlite_int64) x;
|
||||
return (int) (q - (unsigned char *)p);
|
||||
}
|
||||
|
||||
|
||||
/* Show the content of the %_stat table
|
||||
*/
|
||||
static void showStat(sqlite3 *db, const char *zTab){
|
||||
sqlite3_stmt *pStmt;
|
||||
pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
|
||||
switch( sqlite3_column_type(pStmt, 1) ){
|
||||
case SQLITE_INTEGER: {
|
||||
printf(" %d\n", sqlite3_column_int(pStmt, 1));
|
||||
break;
|
||||
}
|
||||
case SQLITE_BLOB: {
|
||||
unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
|
||||
int len = sqlite3_column_bytes(pStmt, 1);
|
||||
int i = 0;
|
||||
sqlite3_int64 v;
|
||||
while( i<len ){
|
||||
i += getVarint(x, &v);
|
||||
printf(" %lld", v);
|
||||
}
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
/*
|
||||
** Report on the vocabulary. This creates an fts4aux table with a random
|
||||
** name, but deletes it in the end.
|
||||
*/
|
||||
static void showVocabulary(sqlite3 *db, const char *zTab){
|
||||
char *zAux;
|
||||
sqlite3_uint64 r;
|
||||
sqlite3_stmt *pStmt;
|
||||
int nDoc = 0;
|
||||
int nToken = 0;
|
||||
int nOccurrence = 0;
|
||||
int nTop;
|
||||
int n, i;
|
||||
|
||||
sqlite3_randomness(sizeof(r), &r);
|
||||
zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
|
||||
runSql(db, "BEGIN");
|
||||
pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nDoc = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Number of documents...................... %9d\n", nDoc);
|
||||
|
||||
runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
|
||||
zAux);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nToken = sqlite3_column_int(pStmt, 0);
|
||||
nOccurrence = sqlite3_column_int(pStmt, 1);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Total tokens in all documents............ %9d\n", nOccurrence);
|
||||
printf("Total number of distinct tokens.......... %9d\n", nToken);
|
||||
if( nToken==0 ) goto end_vocab;
|
||||
|
||||
n = 0;
|
||||
pStmt = prepare(db, "SELECT count(*) FROM %s"
|
||||
" WHERE col='*' AND occurrences==1", zAux);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
n = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Tokens used exactly once................. %9d %5.2f%%\n",
|
||||
n, n*100.0/nToken);
|
||||
|
||||
n = 0;
|
||||
pStmt = prepare(db, "SELECT count(*) FROM %s"
|
||||
" WHERE col='*' AND documents==1", zAux);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
n = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Tokens used in only one document......... %9d %5.2f%%\n",
|
||||
n, n*100.0/nToken);
|
||||
|
||||
if( nDoc>=2000 ){
|
||||
n = 0;
|
||||
pStmt = prepare(db, "SELECT count(*) FROM %s"
|
||||
" WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
n = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
|
||||
n, n*100.0/nToken);
|
||||
}
|
||||
|
||||
if( nDoc>=200 ){
|
||||
n = 0;
|
||||
pStmt = prepare(db, "SELECT count(*) FROM %s"
|
||||
" WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
n = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
|
||||
n, n*100.0/nToken);
|
||||
}
|
||||
|
||||
nTop = atoi(findOption("top", 1, "25"));
|
||||
printf("The %d most common tokens:\n", nTop);
|
||||
pStmt = prepare(db,
|
||||
"SELECT term, documents FROM %s"
|
||||
" WHERE col='*'"
|
||||
" ORDER BY documents DESC, term"
|
||||
" LIMIT %d", zAux, nTop);
|
||||
i = 0;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
i++;
|
||||
n = sqlite3_column_int(pStmt, 1);
|
||||
printf(" %2d. %-30s %9d docs %5.2f%%\n", i,
|
||||
sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
|
||||
end_vocab:
|
||||
runSql(db, "ROLLBACK");
|
||||
sqlite3_free(zAux);
|
||||
}
|
||||
|
||||
/*
|
||||
** Report on the number and sizes of segments
|
||||
*/
|
||||
static void showSegmentStats(sqlite3 *db, const char *zTab){
|
||||
sqlite3_stmt *pStmt;
|
||||
int nSeg = 0;
|
||||
sqlite3_int64 szSeg = 0, mxSeg = 0;
|
||||
int nIdx = 0;
|
||||
sqlite3_int64 szIdx = 0, mxIdx = 0;
|
||||
int nRoot = 0;
|
||||
sqlite3_int64 szRoot = 0, mxRoot = 0;
|
||||
sqlite3_int64 mx;
|
||||
int nLeaf;
|
||||
int n;
|
||||
int pgsz;
|
||||
int mxLevel;
|
||||
int i;
|
||||
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*), sum(length(block)), max(length(block))"
|
||||
" FROM '%q_segments'",
|
||||
zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nSeg = sqlite3_column_int(pStmt, 0);
|
||||
szSeg = sqlite3_column_int64(pStmt, 1);
|
||||
mxSeg = sqlite3_column_int64(pStmt, 2);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*), sum(length(block)), max(length(block))"
|
||||
" FROM '%q_segments' a JOIN '%q_segdir' b"
|
||||
" WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
|
||||
zTab, zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nIdx = sqlite3_column_int(pStmt, 0);
|
||||
szIdx = sqlite3_column_int64(pStmt, 1);
|
||||
mxIdx = sqlite3_column_int64(pStmt, 2);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*), sum(length(root)), max(length(root))"
|
||||
" FROM '%q_segdir'",
|
||||
zTab);
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
nRoot = sqlite3_column_int(pStmt, 0);
|
||||
szRoot = sqlite3_column_int64(pStmt, 1);
|
||||
mxRoot = sqlite3_column_int64(pStmt, 2);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
|
||||
printf("Number of segments....................... %9d\n", nSeg+nRoot);
|
||||
printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
|
||||
printf("Number of index segments................. %9d\n", nIdx);
|
||||
printf("Number of root segments.................. %9d\n", nRoot);
|
||||
printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
|
||||
printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
|
||||
printf("Total size of all index segments......... %9lld\n", szIdx);
|
||||
printf("Total size of all root segments.......... %9lld\n", szRoot);
|
||||
if( nSeg>0 ){
|
||||
printf("Average size of all segments............. %11.1f\n",
|
||||
(double)(szSeg+szRoot)/(double)(nSeg+nRoot));
|
||||
printf("Average size of leaf segments............ %11.1f\n",
|
||||
(double)(szSeg-szIdx)/(double)(nSeg-nIdx));
|
||||
}
|
||||
if( nIdx>0 ){
|
||||
printf("Average size of index segments........... %11.1f\n",
|
||||
(double)szIdx/(double)nIdx);
|
||||
}
|
||||
if( nRoot>0 ){
|
||||
printf("Average size of root segments............ %11.1f\n",
|
||||
(double)szRoot/(double)nRoot);
|
||||
}
|
||||
mx = mxSeg;
|
||||
if( mx<mxRoot ) mx = mxRoot;
|
||||
printf("Maximum segment size..................... %9lld\n", mx);
|
||||
printf("Maximum index segment size............... %9lld\n", mxIdx);
|
||||
printf("Maximum root segment size................ %9lld\n", mxRoot);
|
||||
|
||||
pStmt = prepare(db, "PRAGMA page_size");
|
||||
pgsz = 1024;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
pgsz = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
printf("Database page size....................... %9d\n", pgsz);
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*)"
|
||||
" FROM '%q_segments' a JOIN '%q_segdir' b"
|
||||
" WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
|
||||
" AND length(a.block)>%d",
|
||||
zTab, zTab, pgsz-45);
|
||||
n = 0;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
n = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
nLeaf = nSeg - nIdx;
|
||||
printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
|
||||
pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
|
||||
|
||||
pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
|
||||
mxLevel = 0;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
mxLevel = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
|
||||
for(i=0; i<=mxLevel; i++){
|
||||
pStmt = prepare(db,
|
||||
"SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
|
||||
" count(distinct idx)"
|
||||
" FROM (SELECT length(a.block) AS len, idx"
|
||||
" FROM '%q_segments' a JOIN '%q_segdir' b"
|
||||
" WHERE (a.blockid BETWEEN b.start_block"
|
||||
" AND b.leaves_end_block)"
|
||||
" AND (b.level%%1024)==%d)",
|
||||
pgsz-45, zTab, zTab, i);
|
||||
if( sqlite3_step(pStmt)==SQLITE_ROW
|
||||
&& (nLeaf = sqlite3_column_int(pStmt, 0))>0
|
||||
){
|
||||
nIdx = sqlite3_column_int(pStmt, 5);
|
||||
sqlite3_int64 sz;
|
||||
printf("For level %d:\n", i);
|
||||
printf(" Number of indexes...................... %9d\n", nIdx);
|
||||
printf(" Number of leaf segments................ %9d\n", nLeaf);
|
||||
if( nIdx>1 ){
|
||||
printf(" Average leaf segments per index........ %11.1f\n",
|
||||
(double)nLeaf/(double)nIdx);
|
||||
}
|
||||
printf(" Total size of all leaf segments........ %9lld\n",
|
||||
(sz = sqlite3_column_int64(pStmt, 1)));
|
||||
printf(" Average size of leaf segments.......... %11.1f\n",
|
||||
sqlite3_column_double(pStmt, 2));
|
||||
if( nIdx>1 ){
|
||||
printf(" Average leaf segment size per index.... %11.1f\n",
|
||||
(double)sz/(double)nIdx);
|
||||
}
|
||||
printf(" Maximum leaf segment size.............. %9lld\n",
|
||||
sqlite3_column_int64(pStmt, 3));
|
||||
n = sqlite3_column_int(pStmt, 4);
|
||||
printf(" Leaf segments larger than %5d bytes.. %9d %5.2f%%\n",
|
||||
pgsz-45, n, n*100.0/nLeaf);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Print a single "tree" line of the segdir map output.
|
||||
*/
|
||||
static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
|
||||
printf(" tree %9lld", iLower);
|
||||
if( iUpper>iLower ){
|
||||
printf(" thru %9lld (%lld blocks)", iUpper, iUpper-iLower+1);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
** Check to see if the block of a %_segments entry is NULL.
|
||||
*/
|
||||
static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
|
||||
sqlite3_stmt *pStmt;
|
||||
int rc = 1;
|
||||
|
||||
pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
|
||||
" WHERE blockid=%lld", zTab, iBlockId);
|
||||
if( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
rc = sqlite3_column_int(pStmt, 0);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Show a map of segments derived from the %_segdir table.
|
||||
*/
|
||||
static void showSegdirMap(sqlite3 *db, const char *zTab){
|
||||
int mxIndex, iIndex;
|
||||
sqlite3_stmt *pStmt = 0;
|
||||
sqlite3_stmt *pStmt2 = 0;
|
||||
int prevLevel;
|
||||
|
||||
pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
|
||||
if( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
mxIndex = sqlite3_column_int(pStmt, 0);
|
||||
}else{
|
||||
mxIndex = 0;
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
|
||||
printf("Number of inverted indices............... %3d\n", mxIndex+1);
|
||||
pStmt = prepare(db,
|
||||
"SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
|
||||
" FROM '%q_segdir'"
|
||||
" WHERE level/1024==?"
|
||||
" ORDER BY level DESC, idx",
|
||||
zTab);
|
||||
pStmt2 = prepare(db,
|
||||
"SELECT blockid FROM '%q_segments'"
|
||||
" WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
|
||||
zTab);
|
||||
for(iIndex=0; iIndex<=mxIndex; iIndex++){
|
||||
if( mxIndex>0 ){
|
||||
printf("**************************** Index %d "
|
||||
"****************************\n", iIndex);
|
||||
}
|
||||
sqlite3_bind_int(pStmt, 1, iIndex);
|
||||
prevLevel = -1;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
int iLevel = sqlite3_column_int(pStmt, 0)%1024;
|
||||
int iIdx = sqlite3_column_int(pStmt, 1);
|
||||
sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
|
||||
sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
|
||||
sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
|
||||
char rtag[20];
|
||||
if( iLevel!=prevLevel ){
|
||||
printf("level %2d idx %2d", iLevel, iIdx);
|
||||
prevLevel = iLevel;
|
||||
}else{
|
||||
printf(" idx %2d", iIdx);
|
||||
}
|
||||
sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
|
||||
sqlite3_column_int64(pStmt,5));
|
||||
printf(" root %9s\n", rtag);
|
||||
if( iLEnd>iStart ){
|
||||
sqlite3_int64 iLower, iPrev = 0, iX;
|
||||
if( iLEnd+1<=iEnd ){
|
||||
sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
|
||||
sqlite3_bind_int64(pStmt2, 2, iEnd);
|
||||
iLower = -1;
|
||||
while( sqlite3_step(pStmt2)==SQLITE_ROW ){
|
||||
iX = sqlite3_column_int64(pStmt2, 0);
|
||||
if( iLower<0 ){
|
||||
iLower = iPrev = iX;
|
||||
}else if( iX==iPrev+1 ){
|
||||
iPrev = iX;
|
||||
}else{
|
||||
printTreeLine(iLower, iPrev);
|
||||
iLower = iPrev = iX;
|
||||
}
|
||||
}
|
||||
sqlite3_reset(pStmt2);
|
||||
if( iLower>=0 ){
|
||||
if( iLower==iPrev && iLower==iEnd
|
||||
&& isNullSegment(db,zTab,iLower)
|
||||
){
|
||||
printf(" null %9lld\n", iLower);
|
||||
}else{
|
||||
printTreeLine(iLower, iPrev);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(" leaves %9lld thru %9lld (%lld blocks)\n",
|
||||
iStart, iLEnd, iLEnd - iStart + 1);
|
||||
}
|
||||
}
|
||||
sqlite3_reset(pStmt);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
sqlite3_finalize(pStmt2);
|
||||
}
|
||||
|
||||
/*
|
||||
** Decode a single segment block and display the results on stdout.
|
||||
*/
|
||||
static void decodeSegment(
|
||||
const unsigned char *aData, /* Content to print */
|
||||
int nData /* Number of bytes of content */
|
||||
){
|
||||
sqlite3_int64 iChild = 0;
|
||||
sqlite3_int64 iPrefix;
|
||||
sqlite3_int64 nTerm;
|
||||
sqlite3_int64 n;
|
||||
sqlite3_int64 iDocsz;
|
||||
int iHeight;
|
||||
sqlite3_int64 i = 0;
|
||||
int cnt = 0;
|
||||
char zTerm[1000];
|
||||
|
||||
i += getVarint(aData, &n);
|
||||
iHeight = (int)n;
|
||||
printf("height: %d\n", iHeight);
|
||||
if( iHeight>0 ){
|
||||
i += getVarint(aData+i, &iChild);
|
||||
printf("left-child: %lld\n", iChild);
|
||||
}
|
||||
while( i<nData ){
|
||||
if( (cnt++)>0 ){
|
||||
i += getVarint(aData+i, &iPrefix);
|
||||
}else{
|
||||
iPrefix = 0;
|
||||
}
|
||||
i += getVarint(aData+i, &nTerm);
|
||||
if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
|
||||
fprintf(stderr, "term to long\n");
|
||||
exit(1);
|
||||
}
|
||||
memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
|
||||
zTerm[iPrefix+nTerm] = 0;
|
||||
i += nTerm;
|
||||
if( iHeight==0 ){
|
||||
i += getVarint(aData+i, &iDocsz);
|
||||
printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
|
||||
i += iDocsz;
|
||||
}else{
|
||||
printf("term: %-25s child %lld\n", zTerm, ++iChild);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Print a a blob as hex and ascii.
|
||||
*/
|
||||
static void printBlob(
|
||||
const unsigned char *aData, /* Content to print */
|
||||
int nData /* Number of bytes of content */
|
||||
){
|
||||
int i, j;
|
||||
const char *zOfstFmt;
|
||||
const int perLine = 16;
|
||||
|
||||
if( (nData&~0xfff)==0 ){
|
||||
zOfstFmt = " %03x: ";
|
||||
}else if( (nData&~0xffff)==0 ){
|
||||
zOfstFmt = " %04x: ";
|
||||
}else if( (nData&~0xfffff)==0 ){
|
||||
zOfstFmt = " %05x: ";
|
||||
}else if( (nData&~0xffffff)==0 ){
|
||||
zOfstFmt = " %06x: ";
|
||||
}else{
|
||||
zOfstFmt = " %08x: ";
|
||||
}
|
||||
|
||||
for(i=0; i<nData; i += perLine){
|
||||
fprintf(stdout, zOfstFmt, i);
|
||||
for(j=0; j<perLine; j++){
|
||||
if( i+j>nData ){
|
||||
fprintf(stdout, " ");
|
||||
}else{
|
||||
fprintf(stdout,"%02x ", aData[i+j]);
|
||||
}
|
||||
}
|
||||
for(j=0; j<perLine; j++){
|
||||
if( i+j>nData ){
|
||||
fprintf(stdout, " ");
|
||||
}else{
|
||||
fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
|
||||
}
|
||||
}
|
||||
fprintf(stdout,"\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Convert text to a 64-bit integer
|
||||
*/
|
||||
static sqlite3_int64 atoi64(const char *z){
|
||||
sqlite3_int64 v = 0;
|
||||
while( z[0]>='0' && z[0]<='9' ){
|
||||
v = v*10 + z[0] - '0';
|
||||
z++;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a prepared statement which, when stepped, will return in its
|
||||
** first column the blob associated with segment zId. If zId begins with
|
||||
** 'r' then it is a rowid of a %_segdir entry. Otherwise it is a
|
||||
** %_segment entry.
|
||||
*/
|
||||
static sqlite3_stmt *prepareToGetSegment(
|
||||
sqlite3 *db, /* The database */
|
||||
const char *zTab, /* The FTS3/4 table name */
|
||||
const char *zId /* ID of the segment to open */
|
||||
){
|
||||
sqlite3_stmt *pStmt;
|
||||
if( zId[0]=='r' ){
|
||||
pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
|
||||
zTab, atoi64(zId+1));
|
||||
}else{
|
||||
pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
|
||||
zTab, atoi64(zId));
|
||||
}
|
||||
return pStmt;
|
||||
}
|
||||
|
||||
/*
|
||||
** Print the content of a segment or of the root of a segdir. The segment
|
||||
** or root is identified by azExtra[0]. If the first character of azExtra[0]
|
||||
** is 'r' then the remainder is the integer rowid of the %_segdir entry.
|
||||
** If the first character of azExtra[0] is not 'r' then, then all of
|
||||
** azExtra[0] is an integer which is the block number.
|
||||
**
|
||||
** If the --raw option is present in azExtra, then a hex dump is provided.
|
||||
** Otherwise a decoding is shown.
|
||||
*/
|
||||
static void showSegment(sqlite3 *db, const char *zTab){
|
||||
const unsigned char *aData;
|
||||
int nData;
|
||||
sqlite3_stmt *pStmt;
|
||||
|
||||
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
|
||||
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
|
||||
sqlite3_finalize(pStmt);
|
||||
return;
|
||||
}
|
||||
nData = sqlite3_column_bytes(pStmt, 0);
|
||||
aData = sqlite3_column_blob(pStmt, 0);
|
||||
printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
|
||||
if( findOption("raw", 0, 0)!=0 ){
|
||||
printBlob(aData, nData);
|
||||
}else{
|
||||
decodeSegment(aData, nData);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
/*
|
||||
** Decode a single doclist and display the results on stdout.
|
||||
*/
|
||||
static void decodeDoclist(
|
||||
const unsigned char *aData, /* Content to print */
|
||||
int nData /* Number of bytes of content */
|
||||
){
|
||||
sqlite3_int64 iPrevDocid = 0;
|
||||
sqlite3_int64 iDocid;
|
||||
sqlite3_int64 iPos;
|
||||
sqlite3_int64 iPrevPos = 0;
|
||||
sqlite3_int64 iCol;
|
||||
int i = 0;
|
||||
|
||||
while( i<nData ){
|
||||
i += getVarint(aData+i, &iDocid);
|
||||
printf("docid %lld col0", iDocid+iPrevDocid);
|
||||
iPrevDocid += iDocid;
|
||||
iPrevPos = 0;
|
||||
while( 1 ){
|
||||
i += getVarint(aData+i, &iPos);
|
||||
if( iPos==1 ){
|
||||
i += getVarint(aData+i, &iCol);
|
||||
printf(" col%lld", iCol);
|
||||
iPrevPos = 0;
|
||||
}else if( iPos==0 ){
|
||||
printf("\n");
|
||||
break;
|
||||
}else{
|
||||
iPrevPos += iPos - 2;
|
||||
printf(" %lld", iPrevPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Print the content of a doclist. The segment or segdir-root is
|
||||
** identified by azExtra[0]. If the first character of azExtra[0]
|
||||
** is 'r' then the remainder is the integer rowid of the %_segdir entry.
|
||||
** If the first character of azExtra[0] is not 'r' then, then all of
|
||||
** azExtra[0] is an integer which is the block number. The offset
|
||||
** into the segment is identified by azExtra[1]. The size of the doclist
|
||||
** is azExtra[2].
|
||||
**
|
||||
** If the --raw option is present in azExtra, then a hex dump is provided.
|
||||
** Otherwise a decoding is shown.
|
||||
*/
|
||||
static void showDoclist(sqlite3 *db, const char *zTab){
|
||||
const unsigned char *aData;
|
||||
sqlite3_int64 offset;
|
||||
int nData;
|
||||
sqlite3_stmt *pStmt;
|
||||
|
||||
offset = atoi64(azExtra[1]);
|
||||
nData = atoi(azExtra[2]);
|
||||
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
|
||||
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
|
||||
sqlite3_finalize(pStmt);
|
||||
return;
|
||||
}
|
||||
aData = sqlite3_column_blob(pStmt, 0);
|
||||
printf("Doclist at %s offset %lld of size %d bytes:\n",
|
||||
azExtra[0], offset, nData);
|
||||
if( findOption("raw", 0, 0)!=0 ){
|
||||
printBlob(aData+offset, nData);
|
||||
}else{
|
||||
decodeDoclist(aData+offset, nData);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
/*
|
||||
** Show the top N largest segments
|
||||
*/
|
||||
static void listBigSegments(sqlite3 *db, const char *zTab){
|
||||
int nTop, i;
|
||||
sqlite3_stmt *pStmt;
|
||||
sqlite3_int64 sz;
|
||||
sqlite3_int64 id;
|
||||
|
||||
nTop = atoi(findOption("top", 1, "25"));
|
||||
printf("The %d largest segments:\n", nTop);
|
||||
pStmt = prepare(db,
|
||||
"SELECT blockid, length(block) AS len FROM '%q_segments'"
|
||||
" ORDER BY 2 DESC, 1"
|
||||
" LIMIT %d", zTab, nTop);
|
||||
i = 0;
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
i++;
|
||||
id = sqlite3_column_int64(pStmt, 0);
|
||||
sz = sqlite3_column_int64(pStmt, 1);
|
||||
printf(" %2d. %9lld size %lld\n", i, id, sz);
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void usage(const char *argv0){
|
||||
fprintf(stderr, "Usage: %s DATABASE\n"
|
||||
" or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
|
||||
fprintf(stderr,
|
||||
"ARGS:\n"
|
||||
" big-segments [--top N] show the largest segments\n"
|
||||
" doclist BLOCKID OFFSET SIZE [--raw] Decode a doclist\n"
|
||||
" schema FTS table schema\n"
|
||||
" segdir directory of segments\n"
|
||||
" segment BLOCKID [--raw] content of a segment\n"
|
||||
" segment-stats info on segment sizes\n"
|
||||
" stat the %%_stat table\n"
|
||||
" vocabulary [--top N] document vocabulary\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv){
|
||||
sqlite3 *db;
|
||||
int rc;
|
||||
const char *zTab;
|
||||
const char *zCmd;
|
||||
|
||||
if( argc<2 ) usage(argv[0]);
|
||||
rc = sqlite3_open(argv[1], &db);
|
||||
if( rc ){
|
||||
fprintf(stderr, "Cannot open %s\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
if( argc==2 ){
|
||||
sqlite3_stmt *pStmt;
|
||||
int cnt = 0;
|
||||
pStmt = prepare(db, "SELECT b.sql"
|
||||
" FROM sqlite_master a, sqlite_master b"
|
||||
" WHERE a.name GLOB '*_segdir'"
|
||||
" AND b.name=substr(a.name,1,length(a.name)-7)"
|
||||
" ORDER BY 1");
|
||||
while( sqlite3_step(pStmt)==SQLITE_ROW ){
|
||||
cnt++;
|
||||
printf("%s;\n", sqlite3_column_text(pStmt, 0));
|
||||
}
|
||||
sqlite3_finalize(pStmt);
|
||||
if( cnt==0 ){
|
||||
printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if( argc<4 ) usage(argv[0]);
|
||||
zTab = argv[2];
|
||||
zCmd = argv[3];
|
||||
nExtra = argc-4;
|
||||
azExtra = argv+4;
|
||||
if( strcmp(zCmd,"big-segments")==0 ){
|
||||
listBigSegments(db, zTab);
|
||||
}else if( strcmp(zCmd,"doclist")==0 ){
|
||||
if( argc<7 ) usage(argv[0]);
|
||||
showDoclist(db, zTab);
|
||||
}else if( strcmp(zCmd,"schema")==0 ){
|
||||
showSchema(db, zTab);
|
||||
}else if( strcmp(zCmd,"segdir")==0 ){
|
||||
showSegdirMap(db, zTab);
|
||||
}else if( strcmp(zCmd,"segment")==0 ){
|
||||
if( argc<5 ) usage(argv[0]);
|
||||
showSegment(db, zTab);
|
||||
}else if( strcmp(zCmd,"segment-stats")==0 ){
|
||||
showSegmentStats(db, zTab);
|
||||
}else if( strcmp(zCmd,"stat")==0 ){
|
||||
showStat(db, zTab);
|
||||
}else if( strcmp(zCmd,"vocabulary")==0 ){
|
||||
showVocabulary(db, zTab);
|
||||
}else{
|
||||
usage(argv[0]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,694 +0,0 @@
|
||||
|
||||
source [file join [file dirname [info script]] parseunicode.tcl]
|
||||
|
||||
proc print_rd {map} {
|
||||
global tl_lookup_table
|
||||
set aChar [list]
|
||||
set lRange [list]
|
||||
|
||||
set nRange 1
|
||||
set iFirst [lindex $map 0 0]
|
||||
set cPrev [lindex $map 0 1]
|
||||
|
||||
foreach m [lrange $map 1 end] {
|
||||
foreach {i c} $m {}
|
||||
|
||||
if {$cPrev == $c} {
|
||||
for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {
|
||||
if {[info exists tl_lookup_table($j)]==0} break
|
||||
}
|
||||
|
||||
if {$j==$i} {
|
||||
set nNew [expr {(1 + $i - $iFirst)}]
|
||||
if {$nNew<=8} {
|
||||
set nRange $nNew
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lappend lRange [list $iFirst $nRange]
|
||||
lappend aChar $cPrev
|
||||
|
||||
set iFirst $i
|
||||
set cPrev $c
|
||||
set nRange 1
|
||||
}
|
||||
lappend lRange [list $iFirst $nRange]
|
||||
lappend aChar $cPrev
|
||||
|
||||
puts "/*"
|
||||
puts "** If the argument is a codepoint corresponding to a lowercase letter"
|
||||
puts "** in the ASCII range with a diacritic added, return the codepoint"
|
||||
puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
|
||||
puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
|
||||
puts "** E\"). The resuls of passing a codepoint that corresponds to an"
|
||||
puts "** uppercase letter are undefined."
|
||||
puts "*/"
|
||||
puts "static int ${::remove_diacritic}(int c)\{"
|
||||
puts " unsigned short aDia\[\] = \{"
|
||||
puts -nonewline " 0, "
|
||||
set i 1
|
||||
foreach r $lRange {
|
||||
foreach {iCode nRange} $r {}
|
||||
if {($i % 8)==0} {puts "" ; puts -nonewline " " }
|
||||
incr i
|
||||
|
||||
puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
|
||||
puts -nonewline ", "
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
puts " char aChar\[\] = \{"
|
||||
puts -nonewline " '\\0', "
|
||||
set i 1
|
||||
foreach c $aChar {
|
||||
set str "'$c', "
|
||||
if {$c == ""} { set str "'\\0', " }
|
||||
|
||||
if {($i % 12)==0} {puts "" ; puts -nonewline " " }
|
||||
incr i
|
||||
puts -nonewline "$str"
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
puts {
|
||||
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
|
||||
int iRes = 0;
|
||||
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
|
||||
int iLo = 0;
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
if( key >= aDia[iTest] ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
assert( key>=aDia[iRes] );
|
||||
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
proc print_isdiacritic {zFunc map} {
|
||||
|
||||
set lCode [list]
|
||||
foreach m $map {
|
||||
foreach {code char} $m {}
|
||||
if {$code && $char == ""} { lappend lCode $code }
|
||||
}
|
||||
set lCode [lsort -integer $lCode]
|
||||
set iFirst [lindex $lCode 0]
|
||||
set iLast [lindex $lCode end]
|
||||
|
||||
set i1 0
|
||||
set i2 0
|
||||
|
||||
foreach c $lCode {
|
||||
set i [expr $c - $iFirst]
|
||||
if {$i < 32} {
|
||||
set i1 [expr {$i1 | (1<<$i)}]
|
||||
} else {
|
||||
set i2 [expr {$i2 | (1<<($i-32))}]
|
||||
}
|
||||
}
|
||||
|
||||
puts "/*"
|
||||
puts "** Return true if the argument interpreted as a unicode codepoint"
|
||||
puts "** is a diacritical modifier character."
|
||||
puts "*/"
|
||||
puts "int ${zFunc}\(int c)\{"
|
||||
puts " unsigned int mask0 = [format "0x%08X" $i1];"
|
||||
puts " unsigned int mask1 = [format "0x%08X" $i2];"
|
||||
|
||||
puts " if( c<$iFirst || c>$iLast ) return 0;"
|
||||
puts " return (c < $iFirst+32) ?"
|
||||
puts " (mask0 & (1 << (c-$iFirst))) :"
|
||||
puts " (mask1 & (1 << (c-$iFirst-32)));"
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
proc an_load_separator_ranges {} {
|
||||
global unicodedata.txt
|
||||
set lSep [an_load_unicodedata_text ${unicodedata.txt}]
|
||||
unset -nocomplain iFirst
|
||||
unset -nocomplain nRange
|
||||
set lRange [list]
|
||||
foreach sep $lSep {
|
||||
if {0==[info exists iFirst]} {
|
||||
set iFirst $sep
|
||||
set nRange 1
|
||||
} elseif { $sep == ($iFirst+$nRange) } {
|
||||
incr nRange
|
||||
} else {
|
||||
lappend lRange [list $iFirst $nRange]
|
||||
set iFirst $sep
|
||||
set nRange 1
|
||||
}
|
||||
}
|
||||
lappend lRange [list $iFirst $nRange]
|
||||
set lRange
|
||||
}
|
||||
|
||||
proc an_print_range_array {lRange} {
|
||||
set iFirstMax 0
|
||||
set nRangeMax 0
|
||||
foreach range $lRange {
|
||||
foreach {iFirst nRange} $range {}
|
||||
if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}
|
||||
if {$nRange > $nRangeMax} {set nRangeMax $nRange}
|
||||
}
|
||||
if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}
|
||||
if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}
|
||||
|
||||
puts -nonewline " "
|
||||
puts [string trim {
|
||||
/* Each unsigned integer in the following array corresponds to a contiguous
|
||||
** range of unicode codepoints that are not either letters or numbers (i.e.
|
||||
** codepoints for which this function should return 0).
|
||||
**
|
||||
** The most significant 22 bits in each 32-bit value contain the first
|
||||
** codepoint in the range. The least significant 10 bits are used to store
|
||||
** the size of the range (always at least 1). In other words, the value
|
||||
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
|
||||
** C. It is not possible to represent a range larger than 1023 codepoints
|
||||
** using this format.
|
||||
*/
|
||||
}]
|
||||
puts -nonewline " static const unsigned int aEntry\[\] = \{"
|
||||
set i 0
|
||||
foreach range $lRange {
|
||||
foreach {iFirst nRange} $range {}
|
||||
set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]
|
||||
|
||||
if {($i % 5)==0} {puts "" ; puts -nonewline " "}
|
||||
puts -nonewline " $u32,"
|
||||
incr i
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
}
|
||||
|
||||
proc an_print_ascii_bitmap {lRange} {
|
||||
foreach range $lRange {
|
||||
foreach {iFirst nRange} $range {}
|
||||
for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
|
||||
if {$i<=127} { set a($i) 1 }
|
||||
}
|
||||
}
|
||||
|
||||
set aAscii [list 0 0 0 0]
|
||||
foreach key [array names a] {
|
||||
set idx [expr $key >> 5]
|
||||
lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
|
||||
}
|
||||
|
||||
puts " static const unsigned int aAscii\[4\] = \{"
|
||||
puts -nonewline " "
|
||||
foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
|
||||
puts ""
|
||||
puts " \};"
|
||||
}
|
||||
|
||||
proc print_isalnum {zFunc lRange} {
|
||||
puts "/*"
|
||||
puts "** Return true if the argument corresponds to a unicode codepoint"
|
||||
puts "** classified as either a letter or a number. Otherwise false."
|
||||
puts "**"
|
||||
puts "** The results are undefined if the value passed to this function"
|
||||
puts "** is less than zero."
|
||||
puts "*/"
|
||||
puts "int ${zFunc}\(int c)\{"
|
||||
an_print_range_array $lRange
|
||||
an_print_ascii_bitmap $lRange
|
||||
puts {
|
||||
if( (unsigned int)c<128 ){
|
||||
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
|
||||
}else if( (unsigned int)c<(1<<22) ){
|
||||
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
|
||||
int iRes = 0;
|
||||
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
|
||||
int iLo = 0;
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
if( key >= aEntry[iTest] ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
assert( aEntry[0]<key );
|
||||
assert( key>=aEntry[iRes] );
|
||||
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
|
||||
}
|
||||
return 1;}
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
proc print_test_isalnum {zFunc lRange} {
|
||||
foreach range $lRange {
|
||||
foreach {iFirst nRange} $range {}
|
||||
for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }
|
||||
}
|
||||
|
||||
puts "static int isalnum_test(int *piCode)\{"
|
||||
puts -nonewline " unsigned char aAlnum\[\] = \{"
|
||||
for {set i 0} {$i < 70000} {incr i} {
|
||||
if {($i % 32)==0} { puts "" ; puts -nonewline " " }
|
||||
set bFlag [expr ![info exists a($i)]]
|
||||
puts -nonewline "${bFlag},"
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
|
||||
puts -nonewline " int aLargeSep\[\] = \{"
|
||||
set i 0
|
||||
foreach iSep [lsort -integer [array names a]] {
|
||||
if {$iSep<70000} continue
|
||||
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
|
||||
puts -nonewline " $iSep,"
|
||||
incr i
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
puts -nonewline " int aLargeOther\[\] = \{"
|
||||
set i 0
|
||||
foreach iSep [lsort -integer [array names a]] {
|
||||
if {$iSep<70000} continue
|
||||
if {[info exists a([expr $iSep-1])]==0} {
|
||||
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
|
||||
puts -nonewline " [expr $iSep-1],"
|
||||
incr i
|
||||
}
|
||||
if {[info exists a([expr $iSep+1])]==0} {
|
||||
if {($i % 8)==0} { puts "" ; puts -nonewline " " }
|
||||
puts -nonewline " [expr $iSep+1],"
|
||||
incr i
|
||||
}
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
|
||||
puts [subst -nocommands {
|
||||
int i;
|
||||
for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){
|
||||
if( ${zFunc}(i)!=aAlnum[i] ){
|
||||
*piCode = i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){
|
||||
if( ${zFunc}(aLargeSep[i])!=0 ){
|
||||
*piCode = aLargeSep[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){
|
||||
if( ${zFunc}(aLargeOther[i])!=1 ){
|
||||
*piCode = aLargeOther[i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}]
|
||||
puts " return 0;"
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
|
||||
proc tl_create_records {} {
|
||||
global tl_lookup_table
|
||||
|
||||
set iFirst ""
|
||||
set nOff 0
|
||||
set nRange 0
|
||||
set nIncr 0
|
||||
|
||||
set lRecord [list]
|
||||
foreach code [lsort -integer [array names tl_lookup_table]] {
|
||||
set mapping $tl_lookup_table($code)
|
||||
if {$iFirst == ""} {
|
||||
set iFirst $code
|
||||
set nOff [expr $mapping - $code]
|
||||
set nRange 1
|
||||
set nIncr 1
|
||||
} else {
|
||||
set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]
|
||||
if { $nRange==1 && ($diff==1 || $diff==2) } {
|
||||
set nIncr $diff
|
||||
}
|
||||
|
||||
if {$diff != $nIncr || ($mapping - $code)!=$nOff} {
|
||||
if { $nRange==1 } {set nIncr 1}
|
||||
lappend lRecord [list $iFirst $nIncr $nRange $nOff]
|
||||
set iFirst $code
|
||||
set nOff [expr $mapping - $code]
|
||||
set nRange 1
|
||||
set nIncr 1
|
||||
} else {
|
||||
incr nRange
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lappend lRecord [list $iFirst $nIncr $nRange $nOff]
|
||||
|
||||
set lRecord
|
||||
}
|
||||
|
||||
proc tl_print_table_header {} {
|
||||
puts -nonewline " "
|
||||
puts [string trim {
|
||||
/* Each entry in the following array defines a rule for folding a range
|
||||
** of codepoints to lower case. The rule applies to a range of nRange
|
||||
** codepoints starting at codepoint iCode.
|
||||
**
|
||||
** If the least significant bit in flags is clear, then the rule applies
|
||||
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
|
||||
** need to be folded). Or, if it is set, then the rule only applies to
|
||||
** every second codepoint in the range, starting with codepoint C.
|
||||
**
|
||||
** The 7 most significant bits in flags are an index into the aiOff[]
|
||||
** array. If a specific codepoint C does require folding, then its lower
|
||||
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
|
||||
**
|
||||
** The contents of this array are generated by parsing the CaseFolding.txt
|
||||
** file distributed as part of the "Unicode Character Database". See
|
||||
** http://www.unicode.org for details.
|
||||
*/
|
||||
}]
|
||||
puts " static const struct TableEntry \{"
|
||||
puts " unsigned short iCode;"
|
||||
puts " unsigned char flags;"
|
||||
puts " unsigned char nRange;"
|
||||
puts " \} aEntry\[\] = \{"
|
||||
}
|
||||
|
||||
proc tl_print_table_entry {togglevar entry liOff} {
|
||||
upvar $togglevar t
|
||||
foreach {iFirst nIncr nRange nOff} $entry {}
|
||||
|
||||
if {$iFirst > (1<<16)} { return 1 }
|
||||
|
||||
if {[info exists t]==0} {set t 0}
|
||||
if {$t==0} { puts -nonewline " " }
|
||||
|
||||
set flags 0
|
||||
if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}
|
||||
if {$nOff<0} { incr nOff [expr (1<<16)] }
|
||||
|
||||
set idx [lsearch $liOff $nOff]
|
||||
if {$idx<0} {error "malfunction generating aiOff"}
|
||||
set flags [expr $flags + $idx*2]
|
||||
|
||||
set txt "{$iFirst, $flags, $nRange},"
|
||||
if {$t==2} {
|
||||
puts $txt
|
||||
} else {
|
||||
puts -nonewline [format "% -23s" $txt]
|
||||
}
|
||||
set t [expr ($t+1)%3]
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
proc tl_print_table_footer {togglevar} {
|
||||
upvar $togglevar t
|
||||
if {$t!=0} {puts ""}
|
||||
puts " \};"
|
||||
}
|
||||
|
||||
proc tl_print_if_entry {entry} {
|
||||
foreach {iFirst nIncr nRange nOff} $entry {}
|
||||
if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}
|
||||
|
||||
puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"
|
||||
puts " ret = c + $nOff;"
|
||||
puts " \}"
|
||||
}
|
||||
|
||||
proc tl_generate_ioff_table {lRecord} {
|
||||
foreach entry $lRecord {
|
||||
foreach {iFirst nIncr nRange iOff} $entry {}
|
||||
if {$iOff<0} { incr iOff [expr (1<<16)] }
|
||||
if {[info exists a($iOff)]} continue
|
||||
set a($iOff) 1
|
||||
}
|
||||
|
||||
set liOff [lsort -integer [array names a]]
|
||||
if {[llength $liOff]>128} { error "Too many distinct ioffs" }
|
||||
return $liOff
|
||||
}
|
||||
|
||||
proc tl_print_ioff_table {liOff} {
|
||||
puts -nonewline " static const unsigned short aiOff\[\] = \{"
|
||||
set i 0
|
||||
foreach off $liOff {
|
||||
if {($i % 8)==0} {puts "" ; puts -nonewline " "}
|
||||
puts -nonewline [format "% -7s" "$off,"]
|
||||
incr i
|
||||
}
|
||||
puts ""
|
||||
puts " \};"
|
||||
|
||||
}
|
||||
|
||||
proc print_fold {zFunc} {
|
||||
|
||||
set lRecord [tl_create_records]
|
||||
|
||||
set lHigh [list]
|
||||
puts "/*"
|
||||
puts "** Interpret the argument as a unicode codepoint. If the codepoint"
|
||||
puts "** is an upper case character that has a lower case equivalent,"
|
||||
puts "** return the codepoint corresponding to the lower case version."
|
||||
puts "** Otherwise, return a copy of the argument."
|
||||
puts "**"
|
||||
puts "** The results are undefined if the value passed to this function"
|
||||
puts "** is less than zero."
|
||||
puts "*/"
|
||||
puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"
|
||||
|
||||
set liOff [tl_generate_ioff_table $lRecord]
|
||||
tl_print_table_header
|
||||
foreach entry $lRecord {
|
||||
if {[tl_print_table_entry toggle $entry $liOff]} {
|
||||
lappend lHigh $entry
|
||||
}
|
||||
}
|
||||
tl_print_table_footer toggle
|
||||
tl_print_ioff_table $liOff
|
||||
|
||||
puts [subst -nocommands {
|
||||
int ret = c;
|
||||
|
||||
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
|
||||
|
||||
if( c<128 ){
|
||||
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
|
||||
}else if( c<65536 ){
|
||||
const struct TableEntry *p;
|
||||
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
|
||||
int iLo = 0;
|
||||
int iRes = -1;
|
||||
|
||||
assert( c>aEntry[0].iCode );
|
||||
while( iHi>=iLo ){
|
||||
int iTest = (iHi + iLo) / 2;
|
||||
int cmp = (c - aEntry[iTest].iCode);
|
||||
if( cmp>=0 ){
|
||||
iRes = iTest;
|
||||
iLo = iTest+1;
|
||||
}else{
|
||||
iHi = iTest-1;
|
||||
}
|
||||
}
|
||||
|
||||
assert( iRes>=0 && c>=aEntry[iRes].iCode );
|
||||
p = &aEntry[iRes];
|
||||
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
|
||||
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
|
||||
assert( ret>0 );
|
||||
}
|
||||
|
||||
if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
|
||||
}
|
||||
}]
|
||||
|
||||
foreach entry $lHigh {
|
||||
tl_print_if_entry $entry
|
||||
}
|
||||
|
||||
puts ""
|
||||
puts " return ret;"
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
proc print_fold_test {zFunc mappings} {
|
||||
global tl_lookup_table
|
||||
|
||||
foreach m $mappings {
|
||||
set c [lindex $m 1]
|
||||
if {$c == ""} {
|
||||
set extra([lindex $m 0]) 0
|
||||
} else {
|
||||
scan $c %c i
|
||||
set extra([lindex $m 0]) $i
|
||||
}
|
||||
}
|
||||
|
||||
puts "static int fold_test(int *piCode)\{"
|
||||
puts -nonewline " static int aLookup\[\] = \{"
|
||||
for {set i 0} {$i < 70000} {incr i} {
|
||||
|
||||
set expected $i
|
||||
catch { set expected $tl_lookup_table($i) }
|
||||
set expected2 $expected
|
||||
catch { set expected2 $extra($expected2) }
|
||||
|
||||
if {($i % 4)==0} { puts "" ; puts -nonewline " " }
|
||||
puts -nonewline "$expected, $expected2, "
|
||||
}
|
||||
puts " \};"
|
||||
puts " int i;"
|
||||
puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"
|
||||
puts " int iCode = (i/2);"
|
||||
puts " int bFlag = i & 0x0001;"
|
||||
puts " if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{"
|
||||
puts " *piCode = iCode;"
|
||||
puts " return 1;"
|
||||
puts " \}"
|
||||
puts " \}"
|
||||
puts " return 0;"
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
|
||||
proc print_fileheader {} {
|
||||
puts [string trim {
|
||||
/*
|
||||
** 2012 May 25
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
/*
|
||||
** DO NOT EDIT THIS MACHINE GENERATED FILE.
|
||||
*/
|
||||
}]
|
||||
puts ""
|
||||
if {$::generate_fts5_code} {
|
||||
# no-op
|
||||
} else {
|
||||
puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
|
||||
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
|
||||
}
|
||||
puts ""
|
||||
puts "#include <assert.h>"
|
||||
puts ""
|
||||
}
|
||||
|
||||
proc print_test_main {} {
|
||||
puts ""
|
||||
puts "#include <stdio.h>"
|
||||
puts ""
|
||||
puts "int main(int argc, char **argv)\{"
|
||||
puts " int r1, r2;"
|
||||
puts " int code;"
|
||||
puts " r1 = isalnum_test(&code);"
|
||||
puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"
|
||||
puts " else printf(\"isalnum(): test passed\\n\");"
|
||||
puts " r2 = fold_test(&code);"
|
||||
puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);"
|
||||
puts " else printf(\"fold(): test passed\\n\");"
|
||||
puts " return (r1 || r2);"
|
||||
puts "\}"
|
||||
}
|
||||
|
||||
# Proces the command line arguments. Exit early if they are not to
|
||||
# our liking.
|
||||
#
|
||||
proc usage {} {
|
||||
puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
|
||||
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
|
||||
exit 1
|
||||
}
|
||||
if {[llength $argv]<2} usage
|
||||
set unicodedata.txt [lindex $argv end]
|
||||
set casefolding.txt [lindex $argv end-1]
|
||||
|
||||
set remove_diacritic remove_diacritic
|
||||
set generate_test_code 0
|
||||
set generate_fts5_code 0
|
||||
set function_prefix "sqlite3Fts"
|
||||
for {set i 0} {$i < [llength $argv]-2} {incr i} {
|
||||
switch -- [lindex $argv $i] {
|
||||
-test {
|
||||
set generate_test_code 1
|
||||
}
|
||||
-fts5 {
|
||||
set function_prefix sqlite3Fts5
|
||||
set generate_fts5_code 1
|
||||
set remove_diacritic fts5_remove_diacritic
|
||||
}
|
||||
default {
|
||||
usage
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print_fileheader
|
||||
|
||||
# Print the isalnum() function to stdout.
|
||||
#
|
||||
set lRange [an_load_separator_ranges]
|
||||
print_isalnum ${function_prefix}UnicodeIsalnum $lRange
|
||||
|
||||
# Leave a gap between the two generated C functions.
|
||||
#
|
||||
puts ""
|
||||
puts ""
|
||||
|
||||
# Load the fold data. This is used by the [rd_XXX] commands
|
||||
# as well as [print_fold].
|
||||
tl_load_casefolding_txt ${casefolding.txt}
|
||||
|
||||
set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
|
||||
print_rd $mappings
|
||||
puts ""
|
||||
puts ""
|
||||
print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
|
||||
puts ""
|
||||
puts ""
|
||||
|
||||
# Print the fold() function to stdout.
|
||||
#
|
||||
print_fold ${function_prefix}UnicodeFold
|
||||
|
||||
# Print the test routines and main() function to stdout, if -test
|
||||
# was specified.
|
||||
#
|
||||
if {$::generate_test_code} {
|
||||
print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
|
||||
print_fold_test ${function_prefix}UnicodeFold $mappings
|
||||
print_test_main
|
||||
}
|
||||
|
||||
if {$generate_fts5_code} {
|
||||
# no-op
|
||||
} else {
|
||||
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
|
||||
puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
|
||||
}
|
||||
@ -1,146 +0,0 @@
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
||||
# reads the file and returns a list of mappings required to remove all
|
||||
# diacritical marks from a unicode string. Each mapping is itself a list
|
||||
# consisting of two elements - the unicode codepoint and the single ASCII
|
||||
# character that it should be replaced with, or an empty string if the
|
||||
# codepoint should simply be removed from the input. Examples:
|
||||
#
|
||||
# { 224 a } (replace codepoint 224 to "a")
|
||||
# { 769 "" } (remove codepoint 769 from input)
|
||||
#
|
||||
# Mappings are only returned for non-upper case codepoints. It is assumed
|
||||
# that the input has already been folded to lower case.
|
||||
#
|
||||
proc rd_load_unicodedata_text {zName} {
|
||||
global tl_lookup_table
|
||||
|
||||
set fd [open $zName]
|
||||
set lField {
|
||||
code
|
||||
character_name
|
||||
general_category
|
||||
canonical_combining_classes
|
||||
bidirectional_category
|
||||
character_decomposition_mapping
|
||||
decimal_digit_value
|
||||
digit_value
|
||||
numeric_value
|
||||
mirrored
|
||||
unicode_1_name
|
||||
iso10646_comment_field
|
||||
uppercase_mapping
|
||||
lowercase_mapping
|
||||
titlecase_mapping
|
||||
}
|
||||
set lRet [list]
|
||||
|
||||
while { ![eof $fd] } {
|
||||
set line [gets $fd]
|
||||
if {$line == ""} continue
|
||||
|
||||
set fields [split $line ";"]
|
||||
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
||||
foreach $lField $fields {}
|
||||
if { [llength $character_decomposition_mapping]!=2
|
||||
|| [string is xdigit [lindex $character_decomposition_mapping 0]]==0
|
||||
} {
|
||||
continue
|
||||
}
|
||||
|
||||
set iCode [expr "0x$code"]
|
||||
set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
|
||||
set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
|
||||
|
||||
if {[info exists tl_lookup_table($iCode)]} continue
|
||||
|
||||
if { ($iAscii >= 97 && $iAscii <= 122)
|
||||
|| ($iAscii >= 65 && $iAscii <= 90)
|
||||
} {
|
||||
lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
|
||||
set dia($iDia) 1
|
||||
}
|
||||
}
|
||||
|
||||
foreach d [array names dia] {
|
||||
lappend lRet [list $d ""]
|
||||
}
|
||||
set lRet [lsort -integer -index 0 $lRet]
|
||||
|
||||
close $fd
|
||||
set lRet
|
||||
}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# Parameter $zName must be a path to the file UnicodeData.txt. This command
|
||||
# reads the file and returns a list of codepoints (integers). The list
|
||||
# contains all codepoints in the UnicodeData.txt assigned to any "General
|
||||
# Category" that is not a "Letter" or "Number".
|
||||
#
|
||||
proc an_load_unicodedata_text {zName} {
|
||||
set fd [open $zName]
|
||||
set lField {
|
||||
code
|
||||
character_name
|
||||
general_category
|
||||
canonical_combining_classes
|
||||
bidirectional_category
|
||||
character_decomposition_mapping
|
||||
decimal_digit_value
|
||||
digit_value
|
||||
numeric_value
|
||||
mirrored
|
||||
unicode_1_name
|
||||
iso10646_comment_field
|
||||
uppercase_mapping
|
||||
lowercase_mapping
|
||||
titlecase_mapping
|
||||
}
|
||||
set lRet [list]
|
||||
|
||||
while { ![eof $fd] } {
|
||||
set line [gets $fd]
|
||||
if {$line == ""} continue
|
||||
|
||||
set fields [split $line ";"]
|
||||
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
||||
foreach $lField $fields {}
|
||||
|
||||
set iCode [expr "0x$code"]
|
||||
set bAlnum [expr {
|
||||
[lsearch {L N} [string range $general_category 0 0]] >= 0
|
||||
|| $general_category=="Co"
|
||||
}]
|
||||
|
||||
if { !$bAlnum } { lappend lRet $iCode }
|
||||
}
|
||||
|
||||
close $fd
|
||||
set lRet
|
||||
}
|
||||
|
||||
proc tl_load_casefolding_txt {zName} {
|
||||
global tl_lookup_table
|
||||
|
||||
set fd [open $zName]
|
||||
while { ![eof $fd] } {
|
||||
set line [gets $fd]
|
||||
if {[string range $line 0 0] == "#"} continue
|
||||
if {$line == ""} continue
|
||||
|
||||
foreach x {a b c d} {unset -nocomplain $x}
|
||||
foreach {a b c d} [split $line ";"] {}
|
||||
|
||||
set a2 [list]
|
||||
set c2 [list]
|
||||
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
|
||||
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
|
||||
set b [string trim $b]
|
||||
set d [string trim $d]
|
||||
|
||||
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,252 +0,0 @@
|
||||
#
|
||||
# 2014 August 24
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#--------------------------------------------------------------------------
|
||||
#
|
||||
# This script extracts the documentation for the API used by fts5 auxiliary
|
||||
# functions from header file fts5.h. It outputs html text on stdout that
|
||||
# is included in the documentation on the web.
|
||||
#
|
||||
|
||||
set ::fts5_docs_output ""
|
||||
if {[info commands hd_putsnl]==""} {
|
||||
if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] }
|
||||
proc output {text} {
|
||||
puts $text
|
||||
}
|
||||
} else {
|
||||
proc output {text} {
|
||||
append ::fts5_docs_output "$text\n"
|
||||
}
|
||||
}
|
||||
if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api}
|
||||
|
||||
|
||||
set input_file [file join [file dir [info script]] fts5.h]
|
||||
set fd [open $input_file]
|
||||
set data [read $fd]
|
||||
close $fd
|
||||
|
||||
|
||||
# Argument $data is the entire text of the fts5.h file. This function
|
||||
# extracts the definition of the Fts5ExtensionApi structure from it and
|
||||
# returns a key/value list of structure member names and definitions. i.e.
|
||||
#
|
||||
# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
|
||||
#
|
||||
proc get_struct_members {data} {
|
||||
|
||||
# Extract the structure definition from the fts5.h file.
|
||||
regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn
|
||||
|
||||
# Remove all comments from the structure definition
|
||||
regsub -all {/[*].*?[*]/} $defn {} defn2
|
||||
|
||||
set res [list]
|
||||
foreach member [split $defn2 {;}] {
|
||||
|
||||
set member [string trim $member]
|
||||
if {$member!=""} {
|
||||
catch { set name [lindex $member end] }
|
||||
regexp {.*?[(][*]([^)]*)[)]} $member -> name
|
||||
lappend res $name $member
|
||||
}
|
||||
}
|
||||
|
||||
set res
|
||||
}
|
||||
|
||||
proc get_struct_docs {data names} {
|
||||
# Extract the structure definition from the fts5.h file.
|
||||
regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
|
||||
|
||||
set current_doc ""
|
||||
set current_header ""
|
||||
|
||||
foreach line [split $docs "\n"] {
|
||||
regsub {[*]*} $line {} line
|
||||
if {[regexp {^ } $line]} {
|
||||
append current_doc "$line\n"
|
||||
} elseif {[string trim $line]==""} {
|
||||
if {$current_header!=""} { append current_doc "\n" }
|
||||
} else {
|
||||
if {$current_doc != ""} {
|
||||
lappend res $current_header $current_doc
|
||||
set current_doc ""
|
||||
}
|
||||
set subject n/a
|
||||
regexp {^ *([[:alpha:]]*)} $line -> subject
|
||||
if {[lsearch $names $subject]>=0} {
|
||||
set current_header $subject
|
||||
} else {
|
||||
set current_header [string trim $line]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if {$current_doc != ""} {
|
||||
lappend res $current_header $current_doc
|
||||
}
|
||||
|
||||
set res
|
||||
}
|
||||
|
||||
proc get_tokenizer_docs {data} {
|
||||
regexp {(xCreate:.*?)[*]/} $data -> docs
|
||||
|
||||
set res "<dl>\n"
|
||||
foreach line [split [string trim $docs] "\n"] {
|
||||
regexp {[*][*](.*)} $line -> line
|
||||
if {[regexp {^ ?x.*:} $line]} {
|
||||
append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
|
||||
continue
|
||||
}
|
||||
if {[regexp {SYNONYM SUPPORT} $line]} {
|
||||
set line "</dl><h3>Synonym Support</h3>"
|
||||
}
|
||||
if {[string trim $line] == ""} {
|
||||
append res "<p>\n"
|
||||
} else {
|
||||
append res "$line\n"
|
||||
}
|
||||
}
|
||||
|
||||
set res
|
||||
}
|
||||
|
||||
proc get_api_docs {data} {
|
||||
# Initialize global array M as a map from Fts5StructureApi member name
|
||||
# to member definition. i.e.
|
||||
#
|
||||
# iVersion -> {int iVersion}
|
||||
# xUserData -> {void *(*xUserData)(Fts5Context*)}
|
||||
# ...
|
||||
#
|
||||
array set M [get_struct_members $data]
|
||||
|
||||
# Initialize global list D as a map from section name to documentation
|
||||
# text. Most (all?) section names are structure member names.
|
||||
#
|
||||
set D [get_struct_docs $data [array names M]]
|
||||
|
||||
output "<dl>"
|
||||
foreach {sub docs} $D {
|
||||
if {[info exists M($sub)]} {
|
||||
set hdr $M($sub)
|
||||
set link " id=$sub"
|
||||
} else {
|
||||
set link ""
|
||||
}
|
||||
|
||||
#output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>"
|
||||
#set style "padding-left:6ex;font-size:1.4em;display:block"
|
||||
#output "<h style=\"$style\"><pre>$hdr</pre></h>"
|
||||
|
||||
regsub -line {^ *[)]} $hdr ")" hdr
|
||||
output "<dt style=\"white-space:pre;font-family:monospace;font-size:120%\""
|
||||
output "$link>"
|
||||
output "<b>$hdr</b></dt><dd>"
|
||||
|
||||
set mode ""
|
||||
set margin " style=margin-top:0.1em"
|
||||
foreach line [split [string trim $docs] "\n"] {
|
||||
if {[string trim $line]==""} {
|
||||
if {$mode != ""} {output "</$mode>"}
|
||||
set mode ""
|
||||
} elseif {$mode == ""} {
|
||||
if {[regexp {^ } $line]} {
|
||||
set mode codeblock
|
||||
} else {
|
||||
set mode p
|
||||
}
|
||||
output "<$mode$margin>"
|
||||
set margin ""
|
||||
}
|
||||
output $line
|
||||
}
|
||||
if {$mode != ""} {output "</$mode>"}
|
||||
output "</dd>"
|
||||
}
|
||||
output "</dl>"
|
||||
}
|
||||
|
||||
proc get_fts5_struct {data start end} {
|
||||
set res ""
|
||||
set bOut 0
|
||||
foreach line [split $data "\n"] {
|
||||
if {$bOut==0} {
|
||||
if {[regexp $start $line]} {
|
||||
set bOut 1
|
||||
}
|
||||
}
|
||||
|
||||
if {$bOut} {
|
||||
append res "$line\n"
|
||||
}
|
||||
|
||||
if {$bOut} {
|
||||
if {[regexp $end $line]} {
|
||||
set bOut 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
set map [list /* <i>/* */ */</i>]
|
||||
string map $map $res
|
||||
}
|
||||
|
||||
proc main {data} {
|
||||
switch $::extract_api_docs_mode {
|
||||
fts5_api {
|
||||
output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
|
||||
}
|
||||
|
||||
fts5_tokenizer {
|
||||
output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
|
||||
output [get_fts5_struct $data \
|
||||
"Flags that may be passed as the third argument to xTokenize()" \
|
||||
"#define FTS5_TOKEN_COLOCATED"
|
||||
]
|
||||
}
|
||||
|
||||
fts5_extension {
|
||||
output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
|
||||
}
|
||||
|
||||
Fts5ExtensionApi {
|
||||
set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"]
|
||||
set map [list]
|
||||
foreach {k v} [get_struct_members $data] {
|
||||
if {[string match x* $k]==0} continue
|
||||
lappend map $k "<a href=#$k>$k</a>"
|
||||
}
|
||||
output [string map $map $struct]
|
||||
}
|
||||
|
||||
api {
|
||||
get_api_docs $data
|
||||
}
|
||||
|
||||
tokenizer_api {
|
||||
output [get_tokenizer_docs $data]
|
||||
}
|
||||
|
||||
default {
|
||||
}
|
||||
}
|
||||
}
|
||||
main $data
|
||||
|
||||
set ::fts5_docs_output
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1,578 +0,0 @@
|
||||
/*
|
||||
** 2014 May 31
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Interfaces to extend FTS5. Using the interfaces defined in this file,
|
||||
** FTS5 may be extended with:
|
||||
**
|
||||
** * custom tokenizers, and
|
||||
** * custom auxiliary functions.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _FTS5_H
|
||||
#define _FTS5_H
|
||||
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*************************************************************************
|
||||
** CUSTOM AUXILIARY FUNCTIONS
|
||||
**
|
||||
** Virtual table implementations may overload SQL functions by implementing
|
||||
** the sqlite3_module.xFindFunction() method.
|
||||
*/
|
||||
|
||||
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
|
||||
typedef struct Fts5Context Fts5Context;
|
||||
typedef struct Fts5PhraseIter Fts5PhraseIter;
|
||||
|
||||
typedef void (*fts5_extension_function)(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
sqlite3_context *pCtx, /* Context for returning result/error */
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
);
|
||||
|
||||
struct Fts5PhraseIter {
|
||||
const unsigned char *a;
|
||||
const unsigned char *b;
|
||||
};
|
||||
|
||||
/*
|
||||
** EXTENSION API FUNCTIONS
|
||||
**
|
||||
** xUserData(pFts):
|
||||
** Return a copy of the context pointer the extension function was
|
||||
** registered with.
|
||||
**
|
||||
** xColumnTotalSize(pFts, iCol, pnToken):
|
||||
** If parameter iCol is less than zero, set output variable *pnToken
|
||||
** to the total number of tokens in the FTS5 table. Or, if iCol is
|
||||
** non-negative but less than the number of columns in the table, return
|
||||
** the total number of tokens in column iCol, considering all rows in
|
||||
** the FTS5 table.
|
||||
**
|
||||
** If parameter iCol is greater than or equal to the number of columns
|
||||
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
|
||||
** an OOM condition or IO error), an appropriate SQLite error code is
|
||||
** returned.
|
||||
**
|
||||
** xColumnCount(pFts):
|
||||
** Return the number of columns in the table.
|
||||
**
|
||||
** xColumnSize(pFts, iCol, pnToken):
|
||||
** If parameter iCol is less than zero, set output variable *pnToken
|
||||
** to the total number of tokens in the current row. Or, if iCol is
|
||||
** non-negative but less than the number of columns in the table, set
|
||||
** *pnToken to the number of tokens in column iCol of the current row.
|
||||
**
|
||||
** If parameter iCol is greater than or equal to the number of columns
|
||||
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
|
||||
** an OOM condition or IO error), an appropriate SQLite error code is
|
||||
** returned.
|
||||
**
|
||||
** This function may be quite inefficient if used with an FTS5 table
|
||||
** created with the "columnsize=0" option.
|
||||
**
|
||||
** xColumnText:
|
||||
** This function attempts to retrieve the text of column iCol of the
|
||||
** current document. If successful, (*pz) is set to point to a buffer
|
||||
** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
|
||||
** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
|
||||
** if an error occurs, an SQLite error code is returned and the final values
|
||||
** of (*pz) and (*pn) are undefined.
|
||||
**
|
||||
** xPhraseCount:
|
||||
** Returns the number of phrases in the current query expression.
|
||||
**
|
||||
** xPhraseSize:
|
||||
** Returns the number of tokens in phrase iPhrase of the query. Phrases
|
||||
** are numbered starting from zero.
|
||||
**
|
||||
** xInstCount:
|
||||
** Set *pnInst to the total number of occurrences of all phrases within
|
||||
** the query within the current row. Return SQLITE_OK if successful, or
|
||||
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
|
||||
**
|
||||
** This API can be quite slow if used with an FTS5 table created with the
|
||||
** "detail=none" or "detail=column" option. If the FTS5 table is created
|
||||
** with either "detail=none" or "detail=column" and "content=" option
|
||||
** (i.e. if it is a contentless table), then this API always returns 0.
|
||||
**
|
||||
** xInst:
|
||||
** Query for the details of phrase match iIdx within the current row.
|
||||
** Phrase matches are numbered starting from zero, so the iIdx argument
|
||||
** should be greater than or equal to zero and smaller than the value
|
||||
** output by xInstCount().
|
||||
**
|
||||
** Usually, output parameter *piPhrase is set to the phrase number, *piCol
|
||||
** to the column in which it occurs and *piOff the token offset of the
|
||||
** first token of the phrase. The exception is if the table was created
|
||||
** with the offsets=0 option specified. In this case *piOff is always
|
||||
** set to -1.
|
||||
**
|
||||
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
|
||||
** if an error occurs.
|
||||
**
|
||||
** This API can be quite slow if used with an FTS5 table created with the
|
||||
** "detail=none" or "detail=column" option.
|
||||
**
|
||||
** xRowid:
|
||||
** Returns the rowid of the current row.
|
||||
**
|
||||
** xTokenize:
|
||||
** Tokenize text using the tokenizer belonging to the FTS5 table.
|
||||
**
|
||||
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
|
||||
** This API function is used to query the FTS table for phrase iPhrase
|
||||
** of the current query. Specifically, a query equivalent to:
|
||||
**
|
||||
** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
|
||||
**
|
||||
** with $p set to a phrase equivalent to the phrase iPhrase of the
|
||||
** current query is executed. For each row visited, the callback function
|
||||
** passed as the fourth argument is invoked. The context and API objects
|
||||
** passed to the callback function may be used to access the properties of
|
||||
** each matched row. Invoking Api.xUserData() returns a copy of the pointer
|
||||
** passed as the third argument to pUserData.
|
||||
**
|
||||
** If the callback function returns any value other than SQLITE_OK, the
|
||||
** query is abandoned and the xQueryPhrase function returns immediately.
|
||||
** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
|
||||
** Otherwise, the error code is propagated upwards.
|
||||
**
|
||||
** If the query runs to completion without incident, SQLITE_OK is returned.
|
||||
** Or, if some error occurs before the query completes or is aborted by
|
||||
** the callback, an SQLite error code is returned.
|
||||
**
|
||||
**
|
||||
** xSetAuxdata(pFts5, pAux, xDelete)
|
||||
**
|
||||
** Save the pointer passed as the second argument as the extension functions
|
||||
** "auxiliary data". The pointer may then be retrieved by the current or any
|
||||
** future invocation of the same fts5 extension function made as part of
|
||||
** of the same MATCH query using the xGetAuxdata() API.
|
||||
**
|
||||
** Each extension function is allocated a single auxiliary data slot for
|
||||
** each FTS query (MATCH expression). If the extension function is invoked
|
||||
** more than once for a single FTS query, then all invocations share a
|
||||
** single auxiliary data context.
|
||||
**
|
||||
** If there is already an auxiliary data pointer when this function is
|
||||
** invoked, then it is replaced by the new pointer. If an xDelete callback
|
||||
** was specified along with the original pointer, it is invoked at this
|
||||
** point.
|
||||
**
|
||||
** The xDelete callback, if one is specified, is also invoked on the
|
||||
** auxiliary data pointer after the FTS5 query has finished.
|
||||
**
|
||||
** If an error (e.g. an OOM condition) occurs within this function, an
|
||||
** the auxiliary data is set to NULL and an error code returned. If the
|
||||
** xDelete parameter was not NULL, it is invoked on the auxiliary data
|
||||
** pointer before returning.
|
||||
**
|
||||
**
|
||||
** xGetAuxdata(pFts5, bClear)
|
||||
**
|
||||
** Returns the current auxiliary data pointer for the fts5 extension
|
||||
** function. See the xSetAuxdata() method for details.
|
||||
**
|
||||
** If the bClear argument is non-zero, then the auxiliary data is cleared
|
||||
** (set to NULL) before this function returns. In this case the xDelete,
|
||||
** if any, is not invoked.
|
||||
**
|
||||
**
|
||||
** xRowCount(pFts5, pnRow)
|
||||
**
|
||||
** This function is used to retrieve the total number of rows in the table.
|
||||
** In other words, the same value that would be returned by:
|
||||
**
|
||||
** SELECT count(*) FROM ftstable;
|
||||
**
|
||||
** xPhraseFirst()
|
||||
** This function is used, along with type Fts5PhraseIter and the xPhraseNext
|
||||
** method, to iterate through all instances of a single query phrase within
|
||||
** the current row. This is the same information as is accessible via the
|
||||
** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
|
||||
** to use, this API may be faster under some circumstances. To iterate
|
||||
** through instances of phrase iPhrase, use the following code:
|
||||
**
|
||||
** Fts5PhraseIter iter;
|
||||
** int iCol, iOff;
|
||||
** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
|
||||
** iCol>=0;
|
||||
** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
|
||||
** ){
|
||||
** // An instance of phrase iPhrase at offset iOff of column iCol
|
||||
** }
|
||||
**
|
||||
** The Fts5PhraseIter structure is defined above. Applications should not
|
||||
** modify this structure directly - it should only be used as shown above
|
||||
** with the xPhraseFirst() and xPhraseNext() API methods (and by
|
||||
** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
|
||||
**
|
||||
** This API can be quite slow if used with an FTS5 table created with the
|
||||
** "detail=none" or "detail=column" option. If the FTS5 table is created
|
||||
** with either "detail=none" or "detail=column" and "content=" option
|
||||
** (i.e. if it is a contentless table), then this API always iterates
|
||||
** through an empty set (all calls to xPhraseFirst() set iCol to -1).
|
||||
**
|
||||
** xPhraseNext()
|
||||
** See xPhraseFirst above.
|
||||
**
|
||||
** xPhraseFirstColumn()
|
||||
** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
|
||||
** and xPhraseNext() APIs described above. The difference is that instead
|
||||
** of iterating through all instances of a phrase in the current row, these
|
||||
** APIs are used to iterate through the set of columns in the current row
|
||||
** that contain one or more instances of a specified phrase. For example:
|
||||
**
|
||||
** Fts5PhraseIter iter;
|
||||
** int iCol;
|
||||
** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
|
||||
** iCol>=0;
|
||||
** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
|
||||
** ){
|
||||
** // Column iCol contains at least one instance of phrase iPhrase
|
||||
** }
|
||||
**
|
||||
** This API can be quite slow if used with an FTS5 table created with the
|
||||
** "detail=none" option. If the FTS5 table is created with either
|
||||
** "detail=none" "content=" option (i.e. if it is a contentless table),
|
||||
** then this API always iterates through an empty set (all calls to
|
||||
** xPhraseFirstColumn() set iCol to -1).
|
||||
**
|
||||
** The information accessed using this API and its companion
|
||||
** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
|
||||
** (or xInst/xInstCount). The chief advantage of this API is that it is
|
||||
** significantly more efficient than those alternatives when used with
|
||||
** "detail=column" tables.
|
||||
**
|
||||
** xPhraseNextColumn()
|
||||
** See xPhraseFirstColumn above.
|
||||
*/
|
||||
struct Fts5ExtensionApi {
|
||||
int iVersion; /* Currently always set to 3 */
|
||||
|
||||
void *(*xUserData)(Fts5Context*);
|
||||
|
||||
int (*xColumnCount)(Fts5Context*);
|
||||
int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
|
||||
int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
|
||||
|
||||
int (*xTokenize)(Fts5Context*,
|
||||
const char *pText, int nText, /* Text to tokenize */
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
);
|
||||
|
||||
int (*xPhraseCount)(Fts5Context*);
|
||||
int (*xPhraseSize)(Fts5Context*, int iPhrase);
|
||||
|
||||
int (*xInstCount)(Fts5Context*, int *pnInst);
|
||||
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
|
||||
|
||||
sqlite3_int64 (*xRowid)(Fts5Context*);
|
||||
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
|
||||
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
|
||||
|
||||
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
|
||||
int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
|
||||
);
|
||||
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
|
||||
void *(*xGetAuxdata)(Fts5Context*, int bClear);
|
||||
|
||||
int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
|
||||
void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
|
||||
|
||||
int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
|
||||
void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
|
||||
};
|
||||
|
||||
/*
|
||||
** CUSTOM AUXILIARY FUNCTIONS
|
||||
*************************************************************************/
|
||||
|
||||
/*************************************************************************
|
||||
** CUSTOM TOKENIZERS
|
||||
**
|
||||
** Applications may also register custom tokenizer types. A tokenizer
|
||||
** is registered by providing fts5 with a populated instance of the
|
||||
** following structure. All structure methods must be defined, setting
|
||||
** any member of the fts5_tokenizer struct to NULL leads to undefined
|
||||
** behaviour. The structure methods are expected to function as follows:
|
||||
**
|
||||
** xCreate:
|
||||
** This function is used to allocate and inititalize a tokenizer instance.
|
||||
** A tokenizer instance is required to actually tokenize text.
|
||||
**
|
||||
** The first argument passed to this function is a copy of the (void*)
|
||||
** pointer provided by the application when the fts5_tokenizer object
|
||||
** was registered with FTS5 (the third argument to xCreateTokenizer()).
|
||||
** The second and third arguments are an array of nul-terminated strings
|
||||
** containing the tokenizer arguments, if any, specified following the
|
||||
** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
|
||||
** to create the FTS5 table.
|
||||
**
|
||||
** The final argument is an output variable. If successful, (*ppOut)
|
||||
** should be set to point to the new tokenizer handle and SQLITE_OK
|
||||
** returned. If an error occurs, some value other than SQLITE_OK should
|
||||
** be returned. In this case, fts5 assumes that the final value of *ppOut
|
||||
** is undefined.
|
||||
**
|
||||
** xDelete:
|
||||
** This function is invoked to delete a tokenizer handle previously
|
||||
** allocated using xCreate(). Fts5 guarantees that this function will
|
||||
** be invoked exactly once for each successful call to xCreate().
|
||||
**
|
||||
** xTokenize:
|
||||
** This function is expected to tokenize the nText byte string indicated
|
||||
** by argument pText. pText may or may not be nul-terminated. The first
|
||||
** argument passed to this function is a pointer to an Fts5Tokenizer object
|
||||
** returned by an earlier call to xCreate().
|
||||
**
|
||||
** The second argument indicates the reason that FTS5 is requesting
|
||||
** tokenization of the supplied text. This is always one of the following
|
||||
** four values:
|
||||
**
|
||||
** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
|
||||
** or removed from the FTS table. The tokenizer is being invoked to
|
||||
** determine the set of tokens to add to (or delete from) the
|
||||
** FTS index.
|
||||
**
|
||||
** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
|
||||
** against the FTS index. The tokenizer is being called to tokenize
|
||||
** a bareword or quoted string specified as part of the query.
|
||||
**
|
||||
** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
|
||||
** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
|
||||
** followed by a "*" character, indicating that the last token
|
||||
** returned by the tokenizer will be treated as a token prefix.
|
||||
**
|
||||
** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
|
||||
** satisfy an fts5_api.xTokenize() request made by an auxiliary
|
||||
** function. Or an fts5_api.xColumnSize() request made by the same
|
||||
** on a columnsize=0 database.
|
||||
** </ul>
|
||||
**
|
||||
** For each token in the input string, the supplied callback xToken() must
|
||||
** be invoked. The first argument to it should be a copy of the pointer
|
||||
** passed as the second argument to xTokenize(). The third and fourth
|
||||
** arguments are a pointer to a buffer containing the token text, and the
|
||||
** size of the token in bytes. The 4th and 5th arguments are the byte offsets
|
||||
** of the first byte of and first byte immediately following the text from
|
||||
** which the token is derived within the input.
|
||||
**
|
||||
** The second argument passed to the xToken() callback ("tflags") should
|
||||
** normally be set to 0. The exception is if the tokenizer supports
|
||||
** synonyms. In this case see the discussion below for details.
|
||||
**
|
||||
** FTS5 assumes the xToken() callback is invoked for each token in the
|
||||
** order that they occur within the input text.
|
||||
**
|
||||
** If an xToken() callback returns any value other than SQLITE_OK, then
|
||||
** the tokenization should be abandoned and the xTokenize() method should
|
||||
** immediately return a copy of the xToken() return value. Or, if the
|
||||
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
|
||||
** if an error occurs with the xTokenize() implementation itself, it
|
||||
** may abandon the tokenization and return any error code other than
|
||||
** SQLITE_OK or SQLITE_DONE.
|
||||
**
|
||||
** SYNONYM SUPPORT
|
||||
**
|
||||
** Custom tokenizers may also support synonyms. Consider a case in which a
|
||||
** user wishes to query for a phrase such as "first place". Using the
|
||||
** built-in tokenizers, the FTS5 query 'first + place' will match instances
|
||||
** of "first place" within the document set, but not alternative forms
|
||||
** such as "1st place". In some applications, it would be better to match
|
||||
** all instances of "first place" or "1st place" regardless of which form
|
||||
** the user specified in the MATCH query text.
|
||||
**
|
||||
** There are several ways to approach this in FTS5:
|
||||
**
|
||||
** <ol><li> By mapping all synonyms to a single token. In this case, the
|
||||
** In the above example, this means that the tokenizer returns the
|
||||
** same token for inputs "first" and "1st". Say that token is in
|
||||
** fact "first", so that when the user inserts the document "I won
|
||||
** 1st place" entries are added to the index for tokens "i", "won",
|
||||
** "first" and "place". If the user then queries for '1st + place',
|
||||
** the tokenizer substitutes "first" for "1st" and the query works
|
||||
** as expected.
|
||||
**
|
||||
** <li> By adding multiple synonyms for a single term to the FTS index.
|
||||
** In this case, when tokenizing query text, the tokenizer may
|
||||
** provide multiple synonyms for a single term within the document.
|
||||
** FTS5 then queries the index for each synonym individually. For
|
||||
** example, faced with the query:
|
||||
**
|
||||
** <codeblock>
|
||||
** ... MATCH 'first place'</codeblock>
|
||||
**
|
||||
** the tokenizer offers both "1st" and "first" as synonyms for the
|
||||
** first token in the MATCH query and FTS5 effectively runs a query
|
||||
** similar to:
|
||||
**
|
||||
** <codeblock>
|
||||
** ... MATCH '(first OR 1st) place'</codeblock>
|
||||
**
|
||||
** except that, for the purposes of auxiliary functions, the query
|
||||
** still appears to contain just two phrases - "(first OR 1st)"
|
||||
** being treated as a single phrase.
|
||||
**
|
||||
** <li> By adding multiple synonyms for a single term to the FTS index.
|
||||
** Using this method, when tokenizing document text, the tokenizer
|
||||
** provides multiple synonyms for each token. So that when a
|
||||
** document such as "I won first place" is tokenized, entries are
|
||||
** added to the FTS index for "i", "won", "first", "1st" and
|
||||
** "place".
|
||||
**
|
||||
** This way, even if the tokenizer does not provide synonyms
|
||||
** when tokenizing query text (it should not - to do would be
|
||||
** inefficient), it doesn't matter if the user queries for
|
||||
** 'first + place' or '1st + place', as there are entires in the
|
||||
** FTS index corresponding to both forms of the first token.
|
||||
** </ol>
|
||||
**
|
||||
** Whether it is parsing document or query text, any call to xToken that
|
||||
** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
|
||||
** is considered to supply a synonym for the previous token. For example,
|
||||
** when parsing the document "I won first place", a tokenizer that supports
|
||||
** synonyms would call xToken() 5 times, as follows:
|
||||
**
|
||||
** <codeblock>
|
||||
** xToken(pCtx, 0, "i", 1, 0, 1);
|
||||
** xToken(pCtx, 0, "won", 3, 2, 5);
|
||||
** xToken(pCtx, 0, "first", 5, 6, 11);
|
||||
** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
|
||||
** xToken(pCtx, 0, "place", 5, 12, 17);
|
||||
**</codeblock>
|
||||
**
|
||||
** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
|
||||
** xToken() is called. Multiple synonyms may be specified for a single token
|
||||
** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
|
||||
** There is no limit to the number of synonyms that may be provided for a
|
||||
** single token.
|
||||
**
|
||||
** In many cases, method (1) above is the best approach. It does not add
|
||||
** extra data to the FTS index or require FTS5 to query for multiple terms,
|
||||
** so it is efficient in terms of disk space and query speed. However, it
|
||||
** does not support prefix queries very well. If, as suggested above, the
|
||||
** token "first" is subsituted for "1st" by the tokenizer, then the query:
|
||||
**
|
||||
** <codeblock>
|
||||
** ... MATCH '1s*'</codeblock>
|
||||
**
|
||||
** will not match documents that contain the token "1st" (as the tokenizer
|
||||
** will probably not map "1s" to any prefix of "first").
|
||||
**
|
||||
** For full prefix support, method (3) may be preferred. In this case,
|
||||
** because the index contains entries for both "first" and "1st", prefix
|
||||
** queries such as 'fi*' or '1s*' will match correctly. However, because
|
||||
** extra entries are added to the FTS index, this method uses more space
|
||||
** within the database.
|
||||
**
|
||||
** Method (2) offers a midpoint between (1) and (3). Using this method,
|
||||
** a query such as '1s*' will match documents that contain the literal
|
||||
** token "1st", but not "first" (assuming the tokenizer is not able to
|
||||
** provide synonyms for prefixes). However, a non-prefix query like '1st'
|
||||
** will match against "1st" and "first". This method does not require
|
||||
** extra disk space, as no extra entries are added to the FTS index.
|
||||
** On the other hand, it may require more CPU cycles to run MATCH queries,
|
||||
** as separate queries of the FTS index are required for each synonym.
|
||||
**
|
||||
** When using methods (2) or (3), it is important that the tokenizer only
|
||||
** provide synonyms when tokenizing document text (method (2)) or query
|
||||
** text (method (3)), not both. Doing so will not cause any errors, but is
|
||||
** inefficient.
|
||||
*/
|
||||
typedef struct Fts5Tokenizer Fts5Tokenizer;
|
||||
typedef struct fts5_tokenizer fts5_tokenizer;
|
||||
struct fts5_tokenizer {
|
||||
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
|
||||
void (*xDelete)(Fts5Tokenizer*);
|
||||
int (*xTokenize)(Fts5Tokenizer*,
|
||||
void *pCtx,
|
||||
int flags, /* Mask of FTS5_TOKENIZE_* flags */
|
||||
const char *pText, int nText,
|
||||
int (*xToken)(
|
||||
void *pCtx, /* Copy of 2nd argument to xTokenize() */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Pointer to buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Byte offset of token within input text */
|
||||
int iEnd /* Byte offset of end of token within input text */
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
/* Flags that may be passed as the third argument to xTokenize() */
|
||||
#define FTS5_TOKENIZE_QUERY 0x0001
|
||||
#define FTS5_TOKENIZE_PREFIX 0x0002
|
||||
#define FTS5_TOKENIZE_DOCUMENT 0x0004
|
||||
#define FTS5_TOKENIZE_AUX 0x0008
|
||||
|
||||
/* Flags that may be passed by the tokenizer implementation back to FTS5
|
||||
** as the third argument to the supplied xToken callback. */
|
||||
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
|
||||
|
||||
/*
|
||||
** END OF CUSTOM TOKENIZERS
|
||||
*************************************************************************/
|
||||
|
||||
/*************************************************************************
|
||||
** FTS5 EXTENSION REGISTRATION API
|
||||
*/
|
||||
typedef struct fts5_api fts5_api;
|
||||
struct fts5_api {
|
||||
int iVersion; /* Currently always set to 2 */
|
||||
|
||||
/* Create a new tokenizer */
|
||||
int (*xCreateTokenizer)(
|
||||
fts5_api *pApi,
|
||||
const char *zName,
|
||||
void *pContext,
|
||||
fts5_tokenizer *pTokenizer,
|
||||
void (*xDestroy)(void*)
|
||||
);
|
||||
|
||||
/* Find an existing tokenizer */
|
||||
int (*xFindTokenizer)(
|
||||
fts5_api *pApi,
|
||||
const char *zName,
|
||||
void **ppContext,
|
||||
fts5_tokenizer *pTokenizer
|
||||
);
|
||||
|
||||
/* Create a new auxiliary function */
|
||||
int (*xCreateFunction)(
|
||||
fts5_api *pApi,
|
||||
const char *zName,
|
||||
void *pContext,
|
||||
fts5_extension_function xFunction,
|
||||
void (*xDestroy)(void*)
|
||||
);
|
||||
};
|
||||
|
||||
/*
|
||||
** END OF REGISTRATION API
|
||||
*************************************************************************/
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of the 'extern "C"' block */
|
||||
#endif
|
||||
|
||||
#endif /* _FTS5_H */
|
||||
|
||||
@ -1,776 +0,0 @@
|
||||
/*
|
||||
** 2014 May 31
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS5INT_H
|
||||
#define _FTS5INT_H
|
||||
|
||||
#include "fts5.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT1
|
||||
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifndef SQLITE_AMALGAMATION
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
typedef short i16;
|
||||
typedef sqlite3_int64 i64;
|
||||
typedef sqlite3_uint64 u64;
|
||||
|
||||
#define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
|
||||
|
||||
#define testcase(x)
|
||||
#define ALWAYS(x) 1
|
||||
#define NEVER(x) 0
|
||||
|
||||
#define MIN(x,y) (((x) < (y)) ? (x) : (y))
|
||||
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
/*
|
||||
** Constants for the largest and smallest possible 64-bit signed integers.
|
||||
*/
|
||||
# define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
|
||||
# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
** Maximum number of prefix indexes on single FTS5 table. This must be
|
||||
** less than 32. If it is set to anything large than that, an #error
|
||||
** directive in fts5_index.c will cause the build to fail.
|
||||
*/
|
||||
#define FTS5_MAX_PREFIX_INDEXES 31
|
||||
|
||||
#define FTS5_DEFAULT_NEARDIST 10
|
||||
#define FTS5_DEFAULT_RANK "bm25"
|
||||
|
||||
/* Name of rank and rowid columns */
|
||||
#define FTS5_RANK_NAME "rank"
|
||||
#define FTS5_ROWID_NAME "rowid"
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
|
||||
int sqlite3Fts5Corrupt(void);
|
||||
#else
|
||||
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
|
||||
#endif
|
||||
|
||||
/*
|
||||
** The assert_nc() macro is similar to the assert() macro, except that it
|
||||
** is used for assert() conditions that are true only if it can be
|
||||
** guranteed that the database is not corrupt.
|
||||
*/
|
||||
#ifdef SQLITE_DEBUG
|
||||
extern int sqlite3_fts5_may_be_corrupt;
|
||||
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
|
||||
#else
|
||||
# define assert_nc(x) assert(x)
|
||||
#endif
|
||||
|
||||
/* Mark a function parameter as unused, to suppress nuisance compiler
|
||||
** warnings. */
|
||||
#ifndef UNUSED_PARAM
|
||||
# define UNUSED_PARAM(X) (void)(X)
|
||||
#endif
|
||||
|
||||
#ifndef UNUSED_PARAM2
|
||||
# define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
|
||||
#endif
|
||||
|
||||
typedef struct Fts5Global Fts5Global;
|
||||
typedef struct Fts5Colset Fts5Colset;
|
||||
|
||||
/* If a NEAR() clump or phrase may only match a specific set of columns,
|
||||
** then an object of the following type is used to record the set of columns.
|
||||
** Each entry in the aiCol[] array is a column that may be matched.
|
||||
**
|
||||
** This object is used by fts5_expr.c and fts5_index.c.
|
||||
*/
|
||||
struct Fts5Colset {
|
||||
int nCol;
|
||||
int aiCol[1];
|
||||
};
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_config.c. fts5_config.c contains contains code
|
||||
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
|
||||
*/
|
||||
|
||||
typedef struct Fts5Config Fts5Config;
|
||||
|
||||
/*
|
||||
** An instance of the following structure encodes all information that can
|
||||
** be gleaned from the CREATE VIRTUAL TABLE statement.
|
||||
**
|
||||
** And all information loaded from the %_config table.
|
||||
**
|
||||
** nAutomerge:
|
||||
** The minimum number of segments that an auto-merge operation should
|
||||
** attempt to merge together. A value of 1 sets the object to use the
|
||||
** compile time default. Zero disables auto-merge altogether.
|
||||
**
|
||||
** zContent:
|
||||
**
|
||||
** zContentRowid:
|
||||
** The value of the content_rowid= option, if one was specified. Or
|
||||
** the string "rowid" otherwise. This text is not quoted - if it is
|
||||
** used as part of an SQL statement it needs to be quoted appropriately.
|
||||
**
|
||||
** zContentExprlist:
|
||||
**
|
||||
** pzErrmsg:
|
||||
** This exists in order to allow the fts5_index.c module to return a
|
||||
** decent error message if it encounters a file-format version it does
|
||||
** not understand.
|
||||
**
|
||||
** bColumnsize:
|
||||
** True if the %_docsize table is created.
|
||||
**
|
||||
** bPrefixIndex:
|
||||
** This is only used for debugging. If set to false, any prefix indexes
|
||||
** are ignored. This value is configured using:
|
||||
**
|
||||
** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
|
||||
**
|
||||
*/
|
||||
struct Fts5Config {
|
||||
sqlite3 *db; /* Database handle */
|
||||
char *zDb; /* Database holding FTS index (e.g. "main") */
|
||||
char *zName; /* Name of FTS index */
|
||||
int nCol; /* Number of columns */
|
||||
char **azCol; /* Column names */
|
||||
u8 *abUnindexed; /* True for unindexed columns */
|
||||
int nPrefix; /* Number of prefix indexes */
|
||||
int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
|
||||
int eContent; /* An FTS5_CONTENT value */
|
||||
char *zContent; /* content table */
|
||||
char *zContentRowid; /* "content_rowid=" option value */
|
||||
int bColumnsize; /* "columnsize=" option value (dflt==1) */
|
||||
int eDetail; /* FTS5_DETAIL_XXX value */
|
||||
char *zContentExprlist;
|
||||
Fts5Tokenizer *pTok;
|
||||
fts5_tokenizer *pTokApi;
|
||||
|
||||
/* Values loaded from the %_config table */
|
||||
int iCookie; /* Incremented when %_config is modified */
|
||||
int pgsz; /* Approximate page size used in %_data */
|
||||
int nAutomerge; /* 'automerge' setting */
|
||||
int nCrisisMerge; /* Maximum allowed segments per level */
|
||||
int nHashSize; /* Bytes of memory for in-memory hash */
|
||||
char *zRank; /* Name of rank function */
|
||||
char *zRankArgs; /* Arguments to rank function */
|
||||
|
||||
/* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
|
||||
char **pzErrmsg;
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
int bPrefixIndex; /* True to use prefix-indexes */
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Current expected value of %_config table 'version' field */
|
||||
#define FTS5_CURRENT_VERSION 4
|
||||
|
||||
#define FTS5_CONTENT_NORMAL 0
|
||||
#define FTS5_CONTENT_NONE 1
|
||||
#define FTS5_CONTENT_EXTERNAL 2
|
||||
|
||||
#define FTS5_DETAIL_FULL 0
|
||||
#define FTS5_DETAIL_NONE 1
|
||||
#define FTS5_DETAIL_COLUMNS 2
|
||||
|
||||
|
||||
|
||||
int sqlite3Fts5ConfigParse(
|
||||
Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
|
||||
);
|
||||
void sqlite3Fts5ConfigFree(Fts5Config*);
|
||||
|
||||
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
|
||||
|
||||
int sqlite3Fts5Tokenize(
|
||||
Fts5Config *pConfig, /* FTS5 Configuration object */
|
||||
int flags, /* FTS5_TOKENIZE_* flags */
|
||||
const char *pText, int nText, /* Text to tokenize */
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
);
|
||||
|
||||
void sqlite3Fts5Dequote(char *z);
|
||||
|
||||
/* Load the contents of the %_config table */
|
||||
int sqlite3Fts5ConfigLoad(Fts5Config*, int);
|
||||
|
||||
/* Set the value of a single config attribute */
|
||||
int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
|
||||
|
||||
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_config.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_buffer.c.
|
||||
*/
|
||||
|
||||
/*
|
||||
** Buffer object for the incremental building of string data.
|
||||
*/
|
||||
typedef struct Fts5Buffer Fts5Buffer;
|
||||
struct Fts5Buffer {
|
||||
u8 *p;
|
||||
int n;
|
||||
int nSpace;
|
||||
};
|
||||
|
||||
int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
|
||||
void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
|
||||
void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
|
||||
void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
|
||||
void sqlite3Fts5BufferFree(Fts5Buffer*);
|
||||
void sqlite3Fts5BufferZero(Fts5Buffer*);
|
||||
void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
|
||||
void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
|
||||
|
||||
char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
|
||||
|
||||
#define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
|
||||
#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
|
||||
#define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
|
||||
#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
|
||||
#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
|
||||
|
||||
#define fts5BufferGrow(pRc,pBuf,nn) ( \
|
||||
(u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
|
||||
sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
|
||||
)
|
||||
|
||||
/* Write and decode big-endian 32-bit integer values */
|
||||
void sqlite3Fts5Put32(u8*, int);
|
||||
int sqlite3Fts5Get32(const u8*);
|
||||
|
||||
#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
|
||||
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
|
||||
|
||||
typedef struct Fts5PoslistReader Fts5PoslistReader;
|
||||
struct Fts5PoslistReader {
|
||||
/* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
|
||||
const u8 *a; /* Position list to iterate through */
|
||||
int n; /* Size of buffer at a[] in bytes */
|
||||
int i; /* Current offset in a[] */
|
||||
|
||||
u8 bFlag; /* For client use (any custom purpose) */
|
||||
|
||||
/* Output variables */
|
||||
u8 bEof; /* Set to true at EOF */
|
||||
i64 iPos; /* (iCol<<32) + iPos */
|
||||
};
|
||||
int sqlite3Fts5PoslistReaderInit(
|
||||
const u8 *a, int n, /* Poslist buffer to iterate through */
|
||||
Fts5PoslistReader *pIter /* Iterator object to initialize */
|
||||
);
|
||||
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
|
||||
|
||||
typedef struct Fts5PoslistWriter Fts5PoslistWriter;
|
||||
struct Fts5PoslistWriter {
|
||||
i64 iPrev;
|
||||
};
|
||||
int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
|
||||
void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
|
||||
|
||||
int sqlite3Fts5PoslistNext64(
|
||||
const u8 *a, int n, /* Buffer containing poslist */
|
||||
int *pi, /* IN/OUT: Offset within a[] */
|
||||
i64 *piOff /* IN/OUT: Current offset */
|
||||
);
|
||||
|
||||
/* Malloc utility */
|
||||
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
|
||||
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
|
||||
|
||||
/* Character set tests (like isspace(), isalpha() etc.) */
|
||||
int sqlite3Fts5IsBareword(char t);
|
||||
|
||||
|
||||
/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
|
||||
typedef struct Fts5Termset Fts5Termset;
|
||||
int sqlite3Fts5TermsetNew(Fts5Termset**);
|
||||
int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
|
||||
void sqlite3Fts5TermsetFree(Fts5Termset*);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_buffer.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_index.c. fts5_index.c contains contains code
|
||||
** to access the data stored in the %_data table.
|
||||
*/
|
||||
|
||||
typedef struct Fts5Index Fts5Index;
|
||||
typedef struct Fts5IndexIter Fts5IndexIter;
|
||||
|
||||
struct Fts5IndexIter {
|
||||
i64 iRowid;
|
||||
const u8 *pData;
|
||||
int nData;
|
||||
u8 bEof;
|
||||
};
|
||||
|
||||
#define sqlite3Fts5IterEof(x) ((x)->bEof)
|
||||
|
||||
/*
|
||||
** Values used as part of the flags argument passed to IndexQuery().
|
||||
*/
|
||||
#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
|
||||
#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
|
||||
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
|
||||
#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
|
||||
|
||||
/* The following are used internally by the fts5_index.c module. They are
|
||||
** defined here only to make it easier to avoid clashes with the flags
|
||||
** above. */
|
||||
#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
|
||||
#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
|
||||
|
||||
/*
|
||||
** Create/destroy an Fts5Index object.
|
||||
*/
|
||||
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
|
||||
int sqlite3Fts5IndexClose(Fts5Index *p);
|
||||
|
||||
/*
|
||||
** Return a simple checksum value based on the arguments.
|
||||
*/
|
||||
u64 sqlite3Fts5IndexEntryCksum(
|
||||
i64 iRowid,
|
||||
int iCol,
|
||||
int iPos,
|
||||
int iIdx,
|
||||
const char *pTerm,
|
||||
int nTerm
|
||||
);
|
||||
|
||||
/*
|
||||
** Argument p points to a buffer containing utf-8 text that is n bytes in
|
||||
** size. Return the number of bytes in the nChar character prefix of the
|
||||
** buffer, or 0 if there are less than nChar characters in total.
|
||||
*/
|
||||
int sqlite3Fts5IndexCharlenToBytelen(
|
||||
const char *p,
|
||||
int nByte,
|
||||
int nChar
|
||||
);
|
||||
|
||||
/*
|
||||
** Open a new iterator to iterate though all rowids that match the
|
||||
** specified token or token prefix.
|
||||
*/
|
||||
int sqlite3Fts5IndexQuery(
|
||||
Fts5Index *p, /* FTS index to query */
|
||||
const char *pToken, int nToken, /* Token (or prefix) to query for */
|
||||
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
|
||||
Fts5Colset *pColset, /* Match these columns only */
|
||||
Fts5IndexIter **ppIter /* OUT: New iterator object */
|
||||
);
|
||||
|
||||
/*
|
||||
** The various operations on open token or token prefix iterators opened
|
||||
** using sqlite3Fts5IndexQuery().
|
||||
*/
|
||||
int sqlite3Fts5IterNext(Fts5IndexIter*);
|
||||
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
|
||||
|
||||
/*
|
||||
** Close an iterator opened by sqlite3Fts5IndexQuery().
|
||||
*/
|
||||
void sqlite3Fts5IterClose(Fts5IndexIter*);
|
||||
|
||||
/*
|
||||
** This interface is used by the fts5vocab module.
|
||||
*/
|
||||
const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
|
||||
int sqlite3Fts5IterNextScan(Fts5IndexIter*);
|
||||
|
||||
|
||||
/*
|
||||
** Insert or remove data to or from the index. Each time a document is
|
||||
** added to or removed from the index, this function is called one or more
|
||||
** times.
|
||||
**
|
||||
** For an insert, it must be called once for each token in the new document.
|
||||
** If the operation is a delete, it must be called (at least) once for each
|
||||
** unique token in the document with an iCol value less than zero. The iPos
|
||||
** argument is ignored for a delete.
|
||||
*/
|
||||
int sqlite3Fts5IndexWrite(
|
||||
Fts5Index *p, /* Index to write to */
|
||||
int iCol, /* Column token appears in (-ve -> delete) */
|
||||
int iPos, /* Position of token within column */
|
||||
const char *pToken, int nToken /* Token to add or remove to or from index */
|
||||
);
|
||||
|
||||
/*
|
||||
** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
|
||||
** document iDocid.
|
||||
*/
|
||||
int sqlite3Fts5IndexBeginWrite(
|
||||
Fts5Index *p, /* Index to write to */
|
||||
int bDelete, /* True if current operation is a delete */
|
||||
i64 iDocid /* Docid to add or remove data from */
|
||||
);
|
||||
|
||||
/*
|
||||
** Flush any data stored in the in-memory hash tables to the database.
|
||||
** If the bCommit flag is true, also close any open blob handles.
|
||||
*/
|
||||
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);
|
||||
|
||||
/*
|
||||
** Discard any data stored in the in-memory hash tables. Do not write it
|
||||
** to the database. Additionally, assume that the contents of the %_data
|
||||
** table may have changed on disk. So any in-memory caches of %_data
|
||||
** records must be invalidated.
|
||||
*/
|
||||
int sqlite3Fts5IndexRollback(Fts5Index *p);
|
||||
|
||||
/*
|
||||
** Get or set the "averages" values.
|
||||
*/
|
||||
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
|
||||
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
|
||||
|
||||
/*
|
||||
** Functions called by the storage module as part of integrity-check.
|
||||
*/
|
||||
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
|
||||
|
||||
/*
|
||||
** Called during virtual module initialization to register UDF
|
||||
** fts5_decode() with SQLite
|
||||
*/
|
||||
int sqlite3Fts5IndexInit(sqlite3*);
|
||||
|
||||
int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
|
||||
|
||||
/*
|
||||
** Return the total number of entries read from the %_data table by
|
||||
** this connection since it was created.
|
||||
*/
|
||||
int sqlite3Fts5IndexReads(Fts5Index *p);
|
||||
|
||||
int sqlite3Fts5IndexReinit(Fts5Index *p);
|
||||
int sqlite3Fts5IndexOptimize(Fts5Index *p);
|
||||
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
|
||||
|
||||
int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_index.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_varint.c.
|
||||
*/
|
||||
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
|
||||
int sqlite3Fts5GetVarintLen(u32 iVal);
|
||||
u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
|
||||
int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
|
||||
|
||||
#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
|
||||
#define fts5GetVarint sqlite3Fts5GetVarint
|
||||
|
||||
#define fts5FastGetVarint32(a, iOff, nVal) { \
|
||||
nVal = (a)[iOff++]; \
|
||||
if( nVal & 0x80 ){ \
|
||||
iOff--; \
|
||||
iOff += fts5GetVarint32(&(a)[iOff], nVal); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_varint.c.
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5.c.
|
||||
*/
|
||||
|
||||
int sqlite3Fts5GetTokenizer(
|
||||
Fts5Global*,
|
||||
const char **azArg,
|
||||
int nArg,
|
||||
Fts5Tokenizer**,
|
||||
fts5_tokenizer**,
|
||||
char **pzErr
|
||||
);
|
||||
|
||||
Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, Fts5Config **);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_hash.c.
|
||||
*/
|
||||
typedef struct Fts5Hash Fts5Hash;
|
||||
|
||||
/*
|
||||
** Create a hash table, free a hash table.
|
||||
*/
|
||||
int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
|
||||
void sqlite3Fts5HashFree(Fts5Hash*);
|
||||
|
||||
int sqlite3Fts5HashWrite(
|
||||
Fts5Hash*,
|
||||
i64 iRowid, /* Rowid for this entry */
|
||||
int iCol, /* Column token appears in (-ve -> delete) */
|
||||
int iPos, /* Position of token within column */
|
||||
char bByte,
|
||||
const char *pToken, int nToken /* Token to add or remove to or from index */
|
||||
);
|
||||
|
||||
/*
|
||||
** Empty (but do not delete) a hash table.
|
||||
*/
|
||||
void sqlite3Fts5HashClear(Fts5Hash*);
|
||||
|
||||
int sqlite3Fts5HashQuery(
|
||||
Fts5Hash*, /* Hash table to query */
|
||||
const char *pTerm, int nTerm, /* Query term */
|
||||
const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
|
||||
int *pnDoclist /* OUT: Size of doclist in bytes */
|
||||
);
|
||||
|
||||
int sqlite3Fts5HashScanInit(
|
||||
Fts5Hash*, /* Hash table to query */
|
||||
const char *pTerm, int nTerm /* Query prefix */
|
||||
);
|
||||
void sqlite3Fts5HashScanNext(Fts5Hash*);
|
||||
int sqlite3Fts5HashScanEof(Fts5Hash*);
|
||||
void sqlite3Fts5HashScanEntry(Fts5Hash *,
|
||||
const char **pzTerm, /* OUT: term (nul-terminated) */
|
||||
const u8 **ppDoclist, /* OUT: pointer to doclist */
|
||||
int *pnDoclist /* OUT: size of doclist in bytes */
|
||||
);
|
||||
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_hash.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_storage.c. fts5_storage.c contains contains
|
||||
** code to access the data stored in the %_content and %_docsize tables.
|
||||
*/
|
||||
|
||||
#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
|
||||
#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
|
||||
#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
|
||||
|
||||
typedef struct Fts5Storage Fts5Storage;
|
||||
|
||||
int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
|
||||
int sqlite3Fts5StorageClose(Fts5Storage *p);
|
||||
int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
|
||||
|
||||
int sqlite3Fts5DropAll(Fts5Config*);
|
||||
int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
|
||||
|
||||
int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
|
||||
int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*);
|
||||
int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
|
||||
|
||||
int sqlite3Fts5StorageIntegrity(Fts5Storage *p);
|
||||
|
||||
int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
|
||||
void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
|
||||
|
||||
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
|
||||
int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
|
||||
int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
|
||||
|
||||
int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
|
||||
int sqlite3Fts5StorageRollback(Fts5Storage *p);
|
||||
|
||||
int sqlite3Fts5StorageConfigValue(
|
||||
Fts5Storage *p, const char*, sqlite3_value*, int
|
||||
);
|
||||
|
||||
int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
|
||||
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
|
||||
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
|
||||
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_storage.c.
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_expr.c.
|
||||
*/
|
||||
typedef struct Fts5Expr Fts5Expr;
|
||||
typedef struct Fts5ExprNode Fts5ExprNode;
|
||||
typedef struct Fts5Parse Fts5Parse;
|
||||
typedef struct Fts5Token Fts5Token;
|
||||
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
|
||||
typedef struct Fts5ExprNearset Fts5ExprNearset;
|
||||
|
||||
struct Fts5Token {
|
||||
const char *p; /* Token text (not NULL terminated) */
|
||||
int n; /* Size of buffer p in bytes */
|
||||
};
|
||||
|
||||
/* Parse a MATCH expression. */
|
||||
int sqlite3Fts5ExprNew(
|
||||
Fts5Config *pConfig,
|
||||
const char *zExpr,
|
||||
Fts5Expr **ppNew,
|
||||
char **pzErr
|
||||
);
|
||||
|
||||
/*
|
||||
** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
|
||||
** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
|
||||
** rc = sqlite3Fts5ExprNext(pExpr)
|
||||
** ){
|
||||
** // The document with rowid iRowid matches the expression!
|
||||
** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
|
||||
** }
|
||||
*/
|
||||
int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
|
||||
int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
|
||||
int sqlite3Fts5ExprEof(Fts5Expr*);
|
||||
i64 sqlite3Fts5ExprRowid(Fts5Expr*);
|
||||
|
||||
void sqlite3Fts5ExprFree(Fts5Expr*);
|
||||
|
||||
/* Called during startup to register a UDF with SQLite */
|
||||
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
|
||||
|
||||
int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
|
||||
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
|
||||
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
|
||||
|
||||
typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
|
||||
Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
|
||||
int sqlite3Fts5ExprPopulatePoslists(
|
||||
Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
|
||||
);
|
||||
void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
|
||||
void sqlite3Fts5ExprClearEof(Fts5Expr*);
|
||||
|
||||
int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
|
||||
|
||||
int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
|
||||
|
||||
/*******************************************
|
||||
** The fts5_expr.c API above this point is used by the other hand-written
|
||||
** C code in this module. The interfaces below this point are called by
|
||||
** the parser code in fts5parse.y. */
|
||||
|
||||
void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
|
||||
|
||||
Fts5ExprNode *sqlite3Fts5ParseNode(
|
||||
Fts5Parse *pParse,
|
||||
int eType,
|
||||
Fts5ExprNode *pLeft,
|
||||
Fts5ExprNode *pRight,
|
||||
Fts5ExprNearset *pNear
|
||||
);
|
||||
|
||||
Fts5ExprPhrase *sqlite3Fts5ParseTerm(
|
||||
Fts5Parse *pParse,
|
||||
Fts5ExprPhrase *pPhrase,
|
||||
Fts5Token *pToken,
|
||||
int bPrefix
|
||||
);
|
||||
|
||||
Fts5ExprNearset *sqlite3Fts5ParseNearset(
|
||||
Fts5Parse*,
|
||||
Fts5ExprNearset*,
|
||||
Fts5ExprPhrase*
|
||||
);
|
||||
|
||||
Fts5Colset *sqlite3Fts5ParseColset(
|
||||
Fts5Parse*,
|
||||
Fts5Colset*,
|
||||
Fts5Token *
|
||||
);
|
||||
|
||||
void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
|
||||
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
|
||||
void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
|
||||
|
||||
void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
|
||||
void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5Colset*);
|
||||
void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
|
||||
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_expr.c.
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_aux.c.
|
||||
*/
|
||||
|
||||
int sqlite3Fts5AuxInit(fts5_api*);
|
||||
/*
|
||||
** End of interface to code in fts5_aux.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_tokenizer.c.
|
||||
*/
|
||||
|
||||
int sqlite3Fts5TokenizerInit(fts5_api*);
|
||||
/*
|
||||
** End of interface to code in fts5_tokenizer.c.
|
||||
**************************************************************************/
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5_vocab.c.
|
||||
*/
|
||||
|
||||
int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_vocab.c.
|
||||
**************************************************************************/
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to automatically generated code in fts5_unicode2.c.
|
||||
*/
|
||||
int sqlite3Fts5UnicodeIsalnum(int c);
|
||||
int sqlite3Fts5UnicodeIsdiacritic(int c);
|
||||
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
|
||||
/*
|
||||
** End of interface to code in fts5_unicode2.c.
|
||||
**************************************************************************/
|
||||
|
||||
#endif
|
||||
@ -1,562 +0,0 @@
|
||||
/*
|
||||
** 2014 May 31
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#include "fts5Int.h"
|
||||
#include <math.h> /* amalgamator: keep */
|
||||
|
||||
/*
|
||||
** Object used to iterate through all "coalesced phrase instances" in
|
||||
** a single column of the current row. If the phrase instances in the
|
||||
** column being considered do not overlap, this object simply iterates
|
||||
** through them. Or, if they do overlap (share one or more tokens in
|
||||
** common), each set of overlapping instances is treated as a single
|
||||
** match. See documentation for the highlight() auxiliary function for
|
||||
** details.
|
||||
**
|
||||
** Usage is:
|
||||
**
|
||||
** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
|
||||
** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
|
||||
** rc = fts5CInstIterNext(&iter)
|
||||
** ){
|
||||
** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
|
||||
** }
|
||||
**
|
||||
*/
|
||||
typedef struct CInstIter CInstIter;
|
||||
struct CInstIter {
|
||||
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
|
||||
Fts5Context *pFts; /* First arg to pass to pApi functions */
|
||||
int iCol; /* Column to search */
|
||||
int iInst; /* Next phrase instance index */
|
||||
int nInst; /* Total number of phrase instances */
|
||||
|
||||
/* Output variables */
|
||||
int iStart; /* First token in coalesced phrase instance */
|
||||
int iEnd; /* Last token in coalesced phrase instance */
|
||||
};
|
||||
|
||||
/*
|
||||
** Advance the iterator to the next coalesced phrase instance. Return
|
||||
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
|
||||
*/
|
||||
static int fts5CInstIterNext(CInstIter *pIter){
|
||||
int rc = SQLITE_OK;
|
||||
pIter->iStart = -1;
|
||||
pIter->iEnd = -1;
|
||||
|
||||
while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
|
||||
int ip; int ic; int io;
|
||||
rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
|
||||
if( rc==SQLITE_OK ){
|
||||
if( ic==pIter->iCol ){
|
||||
int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
|
||||
if( pIter->iStart<0 ){
|
||||
pIter->iStart = io;
|
||||
pIter->iEnd = iEnd;
|
||||
}else if( io<=pIter->iEnd ){
|
||||
if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
pIter->iInst++;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Initialize the iterator object indicated by the final parameter to
|
||||
** iterate through coalesced phrase instances in column iCol.
|
||||
*/
|
||||
static int fts5CInstIterInit(
|
||||
const Fts5ExtensionApi *pApi,
|
||||
Fts5Context *pFts,
|
||||
int iCol,
|
||||
CInstIter *pIter
|
||||
){
|
||||
int rc;
|
||||
|
||||
memset(pIter, 0, sizeof(CInstIter));
|
||||
pIter->pApi = pApi;
|
||||
pIter->pFts = pFts;
|
||||
pIter->iCol = iCol;
|
||||
rc = pApi->xInstCount(pFts, &pIter->nInst);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterNext(pIter);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
** Start of highlight() implementation.
|
||||
*/
|
||||
typedef struct HighlightContext HighlightContext;
|
||||
struct HighlightContext {
|
||||
CInstIter iter; /* Coalesced Instance Iterator */
|
||||
int iPos; /* Current token offset in zIn[] */
|
||||
int iRangeStart; /* First token to include */
|
||||
int iRangeEnd; /* If non-zero, last token to include */
|
||||
const char *zOpen; /* Opening highlight */
|
||||
const char *zClose; /* Closing highlight */
|
||||
const char *zIn; /* Input text */
|
||||
int nIn; /* Size of input text in bytes */
|
||||
int iOff; /* Current offset within zIn[] */
|
||||
char *zOut; /* Output value */
|
||||
};
|
||||
|
||||
/*
|
||||
** Append text to the HighlightContext output string - p->zOut. Argument
|
||||
** z points to a buffer containing n bytes of text to append. If n is
|
||||
** negative, everything up until the first '\0' is appended to the output.
|
||||
**
|
||||
** If *pRc is set to any value other than SQLITE_OK when this function is
|
||||
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
|
||||
** *pRc is set to an error code before returning.
|
||||
*/
|
||||
static void fts5HighlightAppend(
|
||||
int *pRc,
|
||||
HighlightContext *p,
|
||||
const char *z, int n
|
||||
){
|
||||
if( *pRc==SQLITE_OK ){
|
||||
if( n<0 ) n = (int)strlen(z);
|
||||
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
|
||||
if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Tokenizer callback used by implementation of highlight() function.
|
||||
*/
|
||||
static int fts5HighlightCb(
|
||||
void *pContext, /* Pointer to HighlightContext object */
|
||||
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStartOff, /* Start offset of token */
|
||||
int iEndOff /* End offset of token */
|
||||
){
|
||||
HighlightContext *p = (HighlightContext*)pContext;
|
||||
int rc = SQLITE_OK;
|
||||
int iPos;
|
||||
|
||||
UNUSED_PARAM2(pToken, nToken);
|
||||
|
||||
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
|
||||
iPos = p->iPos++;
|
||||
|
||||
if( p->iRangeEnd>0 ){
|
||||
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
|
||||
if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
|
||||
}
|
||||
|
||||
if( iPos==p->iter.iStart ){
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
|
||||
fts5HighlightAppend(&rc, p, p->zOpen, -1);
|
||||
p->iOff = iStartOff;
|
||||
}
|
||||
|
||||
if( iPos==p->iter.iEnd ){
|
||||
if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
|
||||
fts5HighlightAppend(&rc, p, p->zOpen, -1);
|
||||
}
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
||||
fts5HighlightAppend(&rc, p, p->zClose, -1);
|
||||
p->iOff = iEndOff;
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterNext(&p->iter);
|
||||
}
|
||||
}
|
||||
|
||||
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
||||
p->iOff = iEndOff;
|
||||
if( iPos<p->iter.iEnd ){
|
||||
fts5HighlightAppend(&rc, p, p->zClose, -1);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of highlight() function.
|
||||
*/
|
||||
static void fts5HighlightFunction(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
sqlite3_context *pCtx, /* Context for returning result/error */
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
){
|
||||
HighlightContext ctx;
|
||||
int rc;
|
||||
int iCol;
|
||||
|
||||
if( nVal!=3 ){
|
||||
const char *zErr = "wrong number of arguments to function highlight()";
|
||||
sqlite3_result_error(pCtx, zErr, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
iCol = sqlite3_value_int(apVal[0]);
|
||||
memset(&ctx, 0, sizeof(HighlightContext));
|
||||
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
||||
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
||||
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
|
||||
|
||||
if( ctx.zIn ){
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
|
||||
}
|
||||
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
sqlite3_free(ctx.zOut);
|
||||
}
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3_result_error_code(pCtx, rc);
|
||||
}
|
||||
}
|
||||
/*
|
||||
** End of highlight() implementation.
|
||||
**************************************************************************/
|
||||
|
||||
/*
|
||||
** Implementation of snippet() function.
|
||||
*/
|
||||
static void fts5SnippetFunction(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
sqlite3_context *pCtx, /* Context for returning result/error */
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
){
|
||||
HighlightContext ctx;
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
int iCol; /* 1st argument to snippet() */
|
||||
const char *zEllips; /* 4th argument to snippet() */
|
||||
int nToken; /* 5th argument to snippet() */
|
||||
int nInst = 0; /* Number of instance matches this row */
|
||||
int i; /* Used to iterate through instances */
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
unsigned char *aSeen; /* Array of "seen instance" flags */
|
||||
int iBestCol; /* Column containing best snippet */
|
||||
int iBestStart = 0; /* First token of best snippet */
|
||||
int iBestLast; /* Last token of best snippet */
|
||||
int nBestScore = 0; /* Score of best snippet */
|
||||
int nColSize = 0; /* Total size of iBestCol in tokens */
|
||||
|
||||
if( nVal!=5 ){
|
||||
const char *zErr = "wrong number of arguments to function snippet()";
|
||||
sqlite3_result_error(pCtx, zErr, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&ctx, 0, sizeof(HighlightContext));
|
||||
iCol = sqlite3_value_int(apVal[0]);
|
||||
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
||||
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
||||
zEllips = (const char*)sqlite3_value_text(apVal[3]);
|
||||
nToken = sqlite3_value_int(apVal[4]);
|
||||
iBestLast = nToken-1;
|
||||
|
||||
iBestCol = (iCol>=0 ? iCol : 0);
|
||||
nPhrase = pApi->xPhraseCount(pFts);
|
||||
aSeen = sqlite3_malloc(nPhrase);
|
||||
if( aSeen==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xInstCount(pFts, &nInst);
|
||||
}
|
||||
for(i=0; rc==SQLITE_OK && i<nInst; i++){
|
||||
int ip, iSnippetCol, iStart;
|
||||
memset(aSeen, 0, nPhrase);
|
||||
rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
|
||||
if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
|
||||
int nScore = 1000;
|
||||
int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
|
||||
int j;
|
||||
aSeen[ip] = 1;
|
||||
|
||||
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
|
||||
int ic; int io; int iFinal;
|
||||
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
|
||||
iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
|
||||
if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
|
||||
nScore += aSeen[ip] ? 1000 : 1;
|
||||
aSeen[ip] = 1;
|
||||
if( iFinal>iLast ) iLast = iFinal;
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK && nScore>nBestScore ){
|
||||
iBestCol = iSnippetCol;
|
||||
iBestStart = iStart;
|
||||
iBestLast = iLast;
|
||||
nBestScore = nScore;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
|
||||
}
|
||||
if( ctx.zIn ){
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
|
||||
}
|
||||
|
||||
if( (iBestStart+nToken-1)>iBestLast ){
|
||||
iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
|
||||
}
|
||||
if( iBestStart+nToken>nColSize ){
|
||||
iBestStart = nColSize - nToken;
|
||||
}
|
||||
if( iBestStart<0 ) iBestStart = 0;
|
||||
|
||||
ctx.iRangeStart = iBestStart;
|
||||
ctx.iRangeEnd = iBestStart + nToken - 1;
|
||||
|
||||
if( iBestStart>0 ){
|
||||
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
|
||||
}
|
||||
if( ctx.iRangeEnd>=(nColSize-1) ){
|
||||
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
|
||||
}else{
|
||||
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
||||
}else{
|
||||
sqlite3_result_error_code(pCtx, rc);
|
||||
}
|
||||
sqlite3_free(ctx.zOut);
|
||||
}
|
||||
sqlite3_free(aSeen);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
||||
/*
|
||||
** The first time the bm25() function is called for a query, an instance
|
||||
** of the following structure is allocated and populated.
|
||||
*/
|
||||
typedef struct Fts5Bm25Data Fts5Bm25Data;
|
||||
struct Fts5Bm25Data {
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
double avgdl; /* Average number of tokens in each row */
|
||||
double *aIDF; /* IDF for each phrase */
|
||||
double *aFreq; /* Array used to calculate phrase freq. */
|
||||
};
|
||||
|
||||
/*
|
||||
** Callback used by fts5Bm25GetData() to count the number of rows in the
|
||||
** table matched by each individual phrase within the query.
|
||||
*/
|
||||
static int fts5CountCb(
|
||||
const Fts5ExtensionApi *pApi,
|
||||
Fts5Context *pFts,
|
||||
void *pUserData /* Pointer to sqlite3_int64 variable */
|
||||
){
|
||||
sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
|
||||
UNUSED_PARAM2(pApi, pFts);
|
||||
(*pn)++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Set *ppData to point to the Fts5Bm25Data object for the current query.
|
||||
** If the object has not already been allocated, allocate and populate it
|
||||
** now.
|
||||
*/
|
||||
static int fts5Bm25GetData(
|
||||
const Fts5ExtensionApi *pApi,
|
||||
Fts5Context *pFts,
|
||||
Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
|
||||
){
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
Fts5Bm25Data *p; /* Object to return */
|
||||
|
||||
p = pApi->xGetAuxdata(pFts, 0);
|
||||
if( p==0 ){
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
sqlite3_int64 nRow = 0; /* Number of rows in table */
|
||||
sqlite3_int64 nToken = 0; /* Number of tokens in table */
|
||||
int nByte; /* Bytes of space to allocate */
|
||||
int i;
|
||||
|
||||
/* Allocate the Fts5Bm25Data object */
|
||||
nPhrase = pApi->xPhraseCount(pFts);
|
||||
nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
|
||||
p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
|
||||
if( p==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}else{
|
||||
memset(p, 0, nByte);
|
||||
p->nPhrase = nPhrase;
|
||||
p->aIDF = (double*)&p[1];
|
||||
p->aFreq = &p->aIDF[nPhrase];
|
||||
}
|
||||
|
||||
/* Calculate the average document length for this FTS5 table */
|
||||
if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
|
||||
if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
|
||||
if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
|
||||
|
||||
/* Calculate an IDF for each phrase in the query */
|
||||
for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
|
||||
sqlite3_int64 nHit = 0;
|
||||
rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
|
||||
if( rc==SQLITE_OK ){
|
||||
/* Calculate the IDF (Inverse Document Frequency) for phrase i.
|
||||
** This is done using the standard BM25 formula as found on wikipedia:
|
||||
**
|
||||
** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
|
||||
**
|
||||
** where "N" is the total number of documents in the set and nHit
|
||||
** is the number that contain at least one instance of the phrase
|
||||
** under consideration.
|
||||
**
|
||||
** The problem with this is that if (N < 2*nHit), the IDF is
|
||||
** negative. Which is undesirable. So the mimimum allowable IDF is
|
||||
** (1e-6) - roughly the same as a term that appears in just over
|
||||
** half of set of 5,000,000 documents. */
|
||||
double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
|
||||
if( idf<=0.0 ) idf = 1e-6;
|
||||
p->aIDF[i] = idf;
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3_free(p);
|
||||
}else{
|
||||
rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
|
||||
}
|
||||
if( rc!=SQLITE_OK ) p = 0;
|
||||
}
|
||||
*ppData = p;
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of bm25() function.
|
||||
*/
|
||||
static void fts5Bm25Function(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
sqlite3_context *pCtx, /* Context for returning result/error */
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
){
|
||||
const double k1 = 1.2; /* Constant "k1" from BM25 formula */
|
||||
const double b = 0.75; /* Constant "b" from BM25 formula */
|
||||
int rc = SQLITE_OK; /* Error code */
|
||||
double score = 0.0; /* SQL function return value */
|
||||
Fts5Bm25Data *pData; /* Values allocated/calculated once only */
|
||||
int i; /* Iterator variable */
|
||||
int nInst = 0; /* Value returned by xInstCount() */
|
||||
double D = 0.0; /* Total number of tokens in row */
|
||||
double *aFreq = 0; /* Array of phrase freq. for current row */
|
||||
|
||||
/* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
|
||||
** for each phrase in the query for the current row. */
|
||||
rc = fts5Bm25GetData(pApi, pFts, &pData);
|
||||
if( rc==SQLITE_OK ){
|
||||
aFreq = pData->aFreq;
|
||||
memset(aFreq, 0, sizeof(double) * pData->nPhrase);
|
||||
rc = pApi->xInstCount(pFts, &nInst);
|
||||
}
|
||||
for(i=0; rc==SQLITE_OK && i<nInst; i++){
|
||||
int ip; int ic; int io;
|
||||
rc = pApi->xInst(pFts, i, &ip, &ic, &io);
|
||||
if( rc==SQLITE_OK ){
|
||||
double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
|
||||
aFreq[ip] += w;
|
||||
}
|
||||
}
|
||||
|
||||
/* Figure out the total size of the current row in tokens. */
|
||||
if( rc==SQLITE_OK ){
|
||||
int nTok;
|
||||
rc = pApi->xColumnSize(pFts, -1, &nTok);
|
||||
D = (double)nTok;
|
||||
}
|
||||
|
||||
/* Determine the BM25 score for the current row. */
|
||||
for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
|
||||
score += pData->aIDF[i] * (
|
||||
( aFreq[i] * (k1 + 1.0) ) /
|
||||
( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
|
||||
);
|
||||
}
|
||||
|
||||
/* If no error has occurred, return the calculated score. Otherwise,
|
||||
** throw an SQL exception. */
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_double(pCtx, -1.0 * score);
|
||||
}else{
|
||||
sqlite3_result_error_code(pCtx, rc);
|
||||
}
|
||||
}
|
||||
|
||||
int sqlite3Fts5AuxInit(fts5_api *pApi){
|
||||
struct Builtin {
|
||||
const char *zFunc; /* Function name (nul-terminated) */
|
||||
void *pUserData; /* User-data pointer */
|
||||
fts5_extension_function xFunc;/* Callback function */
|
||||
void (*xDestroy)(void*); /* Destructor function */
|
||||
} aBuiltin [] = {
|
||||
{ "snippet", 0, fts5SnippetFunction, 0 },
|
||||
{ "highlight", 0, fts5HighlightFunction, 0 },
|
||||
{ "bm25", 0, fts5Bm25Function, 0 },
|
||||
};
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
int i; /* To iterate through builtin functions */
|
||||
|
||||
for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
|
||||
rc = pApi->xCreateFunction(pApi,
|
||||
aBuiltin[i].zFunc,
|
||||
aBuiltin[i].pUserData,
|
||||
aBuiltin[i].xFunc,
|
||||
aBuiltin[i].xDestroy
|
||||
);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user