MXS-2732 Add sqlite3 version 3110100

2019-10-30 10:43:42 +02:00
parent 81e78726eb
commit 6df8fb2497
1429 changed files with 742953 additions and 0 deletions
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.content
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.content
@ -0,0 +1,178 @@
+
+FTS4 CONTENT OPTION
+
+  Normally, in order to create a full-text index on a dataset, the FTS4 
+  module stores a copy of all indexed documents in a specially created 
+  database table.
+
+  As of SQLite version 3.7.9, FTS4 supports a new option - "content" -
+  designed to extend FTS4 to support the creation of full-text indexes where:
+
+    * The indexed documents are not stored within the SQLite database 
+      at all (a "contentless" FTS4 table), or
+
+    * The indexed documents are stored in a database table created and
+      managed by the user (an "external content" FTS4 table).
+
+  Because the indexed documents themselves are usually much larger than 
+  the full-text index, the content option can sometimes be used to achieve 
+  significant space savings.
+
+CONTENTLESS FTS4 TABLES
+
+  In order to create an FTS4 table that does not store a copy of the indexed
+  documents at all, the content option should be set to an empty string.
+  For example, the following SQL creates such an FTS4 table with three
+  columns - "a", "b", and "c":
+
+    CREATE VIRTUAL TABLE t1 USING fts4(content="", a, b, c);
+
+  Data can be inserted into such an FTS4 table using an INSERT statements.
+  However, unlike ordinary FTS4 tables, the user must supply an explicit
+  integer docid value. For example:
+
+    -- This statement is Ok:
+    INSERT INTO t1(docid, a, b, c) VALUES(1, 'a b c', 'd e f', 'g h i');
+
+    -- This statement causes an error, as no docid value has been provided:
+    INSERT INTO t1(a, b, c) VALUES('j k l', 'm n o', 'p q r');
+
+  It is not possible to UPDATE or DELETE a row stored in a contentless FTS4
+  table. Attempting to do so is an error.
+
+  Contentless FTS4 tables also support SELECT statements. However, it is
+  an error to attempt to retrieve the value of any table column other than
+  the docid column. The auxiliary function matchinfo() may be used, but
+  snippet() and offsets() may not. For example:
+
+    -- The following statements are Ok:
+    SELECT docid FROM t1 WHERE t1 MATCH 'xxx';
+    SELECT docid FROM t1 WHERE a MATCH 'xxx';
+    SELECT matchinfo(t1) FROM t1 WHERE t1 MATCH 'xxx';
+
+    -- The following statements all cause errors, as the value of columns
+    -- other than docid are required to evaluate them.
+    SELECT * FROM t1;
+    SELECT a, b FROM t1 WHERE t1 MATCH 'xxx';
+    SELECT docid FROM t1 WHERE a LIKE 'xxx%';
+    SELECT snippet(t1) FROM t1 WHERE t1 MATCH 'xxx';
+
+  Errors related to attempting to retrieve column values other than docid
+  are runtime errors that occur within sqlite3_step(). In some cases, for
+  example if the MATCH expression in a SELECT query matches zero rows, there
+  may be no error at all even if a statement does refer to column values 
+  other than docid.
+
+EXTERNAL CONTENT FTS4 TABLES
+
+  An "external content" FTS4 table is similar to a contentless table, except
+  that if evaluation of a query requires the value of a column other than 
+  docid, FTS4 attempts to retrieve that value from a table (or view, or 
+  virtual table) nominated by the user (hereafter referred to as the "content
+  table"). The FTS4 module never writes to the content table, and writing
+  to the content table does not affect the full-text index. It is the
+  responsibility of the user to ensure that the content table and the 
+  full-text index are consistent.
+
+  An external content FTS4 table is created by setting the content option
+  to the name of a table (or view, or virtual table) that may be queried by
+  FTS4 to retrieve column values when required. If the nominated table does
+  not exist, then an external content table behaves in the same way as
+  a contentless table. For example:
+
+    CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c);
+    CREATE VIRTUAL TABLE t3 USING fts4(content="t2", a, c);
+
+  Assuming the nominated table does exist, then its columns must be the same 
+  as or a superset of those defined for the FTS table.
+
+  When a users query on the FTS table requires a column value other than
+  docid, FTS attempts to read this value from the corresponding column of
+  the row in the content table with a rowid value equal to the current FTS
+  docid. Or, if such a row cannot be found in the content table, a NULL
+  value is used instead. For example:
+
+    CREATE TABLE t2(id INTEGER PRIMARY KEY, a, b, c, d);
+    CREATE VIRTUAL TABLE t3 USING fts4(content="t2", b, c);
+  
+    INSERT INTO t2 VALUES(2, 'a b', 'c d', 'e f');
+    INSERT INTO t2 VALUES(3, 'g h', 'i j', 'k l');
+    INSERT INTO t3(docid, b, c) SELECT id, b, c FROM t2;
+
+    -- The following query returns a single row with two columns containing
+    -- the text values "i j" and "k l".
+    --
+    -- The query uses the full-text index to discover that the MATCH 
+    -- term matches the row with docid=3. It then retrieves the values
+    -- of columns b and c from the row with rowid=3 in the content table
+    -- to return.
+    --
+    SELECT * FROM t3 WHERE t3 MATCH 'k';
+
+    -- Following the UPDATE, the query still returns a single row, this
+    -- time containing the text values "xxx" and "yyy". This is because the
+    -- full-text index still indicates that the row with docid=3 matches
+    -- the FTS4 query 'k', even though the documents stored in the content
+    -- table have been modified.
+    --
+    UPDATE t2 SET b = 'xxx', c = 'yyy' WHERE rowid = 3;
+    SELECT * FROM t3 WHERE t3 MATCH 'k';
+
+    -- Following the DELETE below, the query returns one row containing two
+    -- NULL values. NULL values are returned because FTS is unable to find
+    -- a row with rowid=3 within the content table.
+    --
+    DELETE FROM t2;
+    SELECT * FROM t3 WHERE t3 MATCH 'k';
+
+  When a row is deleted from an external content FTS4 table, FTS4 needs to
+  retrieve the column values of the row being deleted from the content table.
+  This is so that FTS4 can update the full-text index entries for each token
+  that occurs within the deleted row to indicate that that row has been 
+  deleted. If the content table row cannot be found, or if it contains values
+  inconsistent with the contents of the FTS index, the results can be difficult
+  to predict. The FTS index may be left containing entries corresponding to the
+  deleted row, which can lead to seemingly nonsensical results being returned
+  by subsequent SELECT queries. The same applies when a row is updated, as
+  internally an UPDATE is the same as a DELETE followed by an INSERT.
+  
+  Instead of writing separately to the full-text index and the content table,
+  some users may wish to use database triggers to keep the full-text index
+  up to date with respect to the set of documents stored in the content table.
+  For example, using the tables from earlier examples:
+
+    CREATE TRIGGER t2_bu BEFORE UPDATE ON t2 BEGIN
+      DELETE FROM t3 WHERE docid=old.rowid;
+    END;
+    CREATE TRIGGER t2_bd BEFORE DELETE ON t2 BEGIN
+      DELETE FROM t3 WHERE docid=old.rowid;
+    END;
+
+    CREATE TRIGGER t2_bu AFTER UPDATE ON t2 BEGIN
+      INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
+    END;
+    CREATE TRIGGER t2_bd AFTER INSERT ON t2 BEGIN
+      INSERT INTO t3(docid, b, c) VALUES(new.rowid, new.b, new.c);
+    END;
+
+  The DELETE trigger must be fired before the actual delete takes place
+  on the content table. This is so that FTS4 can still retrieve the original
+  values in order to update the full-text index. And the INSERT trigger must
+  be fired after the new row is inserted, so as to handle the case where the
+  rowid is assigned automatically within the system. The UPDATE trigger must
+  be split into two parts, one fired before and one after the update of the
+  content table, for the same reasons.
+
+  FTS4 features a special command similar to the 'optimize' command that
+  deletes the entire full-text index and rebuilds it based on the current
+  set of documents in the content table. Assuming again that "t3" is the
+  name of the external content FTS4 table, the command is:
+
+    INSERT INTO t3(t3) VALUES('rebuild');
+
+  This command may also be used with ordinary FTS4 tables, although it may
+  only be useful if the full-text index has somehow become corrupt. It is an
+  error to attempt to rebuild the full-text index maintained by a contentless
+  FTS4 table.
+
+
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.syntax
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.syntax
@ -0,0 +1,209 @@
+
+1. OVERVIEW
+
+  This README file describes the syntax of the arguments that may be passed to
+  the FTS3 MATCH operator used for full-text queries. For example, if table 
+  "t1" is an Fts3 virtual table, the following SQL query:
+
+    SELECT * FROM t1 WHERE <col> MATCH <full-text query>
+
+  may be used to retrieve all rows that match a specified for full-text query. 
+  The text "<col>" should be replaced by either the name of the fts3 table 
+  (in this case "t1"), or by the name of one of the columns of the fts3 
+  table. <full-text-query> should be replaced by an SQL expression that 
+  computes to a string containing an Fts3 query.
+
+  If the left-hand-side of the MATCH operator is set to the name of the
+  fts3 table, then by default the query may be matched against any column
+  of the table. If it is set to a column name, then by default the query
+  may only match the specified column. In both cases this may be overriden
+  as part of the query text (see sections 2 and 3 below).
+
+  As of SQLite version 3.6.8, Fts3 supports two slightly different query 
+  formats; the standard syntax, which is used by default, and the enhanced
+  query syntax which can be selected by compiling with the pre-processor
+  symbol SQLITE_ENABLE_FTS3_PARENTHESIS defined.
+
+    -DSQLITE_ENABLE_FTS3_PARENTHESIS
+
+2. STANDARD QUERY SYNTAX
+
+  When using the standard Fts3 query syntax, a query usually consists of a 
+  list of terms (words) separated by white-space characters. To match a
+  query, a row (or column) of an Fts3 table must contain each of the specified
+  terms. For example, the following query:
+
+    <col> MATCH 'hello world'
+
+  matches rows (or columns, if <col> is the name of a column name) that 
+  contain at least one instance of the token "hello", and at least one 
+  instance of the token "world". Tokens may be grouped into phrases using
+  quotation marks. In this case, a matching row or column must contain each
+  of the tokens in the phrase in the order specified, with no intervening
+  tokens. For example, the query:
+
+    <col> MATCH '"hello world" joe"
+
+  matches the first of the following two documents, but not the second or
+  third:
+
+    "'Hello world', said Joe."
+    "One should always greet the world with a cheery hello, thought Joe."
+    "How many hello world programs could their be?"
+
+  As well as grouping tokens together by phrase, the binary NEAR operator 
+  may be used to search for rows that contain two or more specified tokens 
+  or phrases within a specified proximity of each other. The NEAR operator
+  must always be specified in upper case. The word "near" in lower or mixed
+  case is treated as an ordinary token. For example, the following query:
+
+    <col> MATCH 'engineering NEAR consultancy'
+
+  matches rows that contain both the "engineering" and "consultancy" tokens
+  in the same column with not more than 10 other words between them. It does
+  not matter which of the two terms occurs first in the document, only that
+  they be seperated by only 10 tokens or less. The user may also specify
+  a different required proximity by adding "/N" immediately after the NEAR
+  operator, where N is an integer. For example:
+
+    <col> MATCH 'engineering NEAR/5 consultancy'
+
+  searches for a row containing an instance of each specified token seperated
+  by not more than 5 other tokens. More than one NEAR operator can be used
+  in as sequence. For example this query:
+
+    <col> MATCH 'reliable NEAR/2 engineering NEAR/5 consultancy'
+
+  searches for a row that contains an instance of the token "reliable" 
+  seperated by not more than two tokens from an instance of "engineering",
+  which is in turn separated by not more than 5 other tokens from an
+  instance of the term "consultancy". Phrases enclosed in quotes may
+  also be used as arguments to the NEAR operator.
+
+  Similar to the NEAR operator, one or more tokens or phrases may be 
+  separated by OR operators. In this case, only one of the specified tokens
+  or phrases must appear in the document. For example, the query:
+
+    <col> MATCH 'hello OR world'
+
+  matches rows that contain either the term "hello", or the term "world",
+  or both. Note that unlike in many programming languages, the OR operator
+  has a higher precedence than the AND operators implied between white-space
+  separated tokens. The following query matches documents that contain the
+  term 'sqlite' and at least one of the terms 'fantastic' or 'impressive',
+  not those that contain both 'sqlite' and 'fantastic' or 'impressive':
+
+    <col> MATCH 'sqlite fantastic OR impressive'
+
+  Any token that is part of an Fts3 query expression, whether or not it is
+  part of a phrase enclosed in quotes, may have a '*' character appended to
+  it. In this case, the token matches all terms that begin with the characters
+  of the token, not just those that exactly match it. For example, the 
+  following query:
+
+    <col> MATCH 'sql*'
+
+  matches all rows that contain the term "SQLite", as well as those that
+  contain "SQL".
+
+  A token that is not part of a quoted phrase may be preceded by a '-'
+  character, which indicates that matching rows must not contain the 
+  specified term. For example, the following:
+
+    <col> MATCH '"database engine" -sqlite'
+
+  matches rows that contain the phrase "database engine" but do not contain
+  the term "sqlite". If the '-' character occurs inside a quoted phrase,
+  it is ignored. It is possible to use both the '-' prefix and the '*' postfix
+  on a single term. At this time, all Fts3 queries must contain at least
+  one term or phrase that is not preceded by the '-' prefix.
+
+  Regardless of whether or not a table name or column name is used on the 
+  left hand side of the MATCH operator, a specific column of the fts3 table
+  may be associated with each token in a query by preceding a token with
+  a column name followed by a ':' character. For example, regardless of what
+  is specified for <col>, the following query requires that column "col1"
+  of the table contains the term "hello", and that column "col2" of the
+  table contains the term "world". If the table does not contain columns
+  named "col1" and "col2", then an error is returned and the query is
+  not run.
+
+    <col> MATCH 'col1:hello col2:world'
+
+  It is not possible to associate a specific table column with a quoted 
+  phrase or a term preceded by a '-' operator. A '*' character may be
+  appended to a term associated with a specific column for prefix matching.
+
+3. ENHANCED QUERY SYNTAX
+
+  The enhanced query syntax is quite similar to the standard query syntax,
+  with the following four differences:
+
+  1) Parenthesis are supported. When using the enhanced query syntax,
+     parenthesis may be used to overcome the built-in precedence of the
+     supplied binary operators. For example, the following query:
+
+       <col> MATCH '(hello world) OR (simple example)'
+
+     matches documents that contain both "hello" and "world", and documents
+     that contain both "simple" and "example". It is not possible to forumlate
+     such a query using the standard syntax.
+
+  2) Instead of separating tokens and phrases by whitespace, an AND operator
+     may be explicitly specified. This does not change query processing at
+     all, but may be used to improve readability. For example, the following
+     query is handled identically to the one above:
+
+       <col> MATCH '(hello AND world) OR (simple AND example)'
+
+     As with the OR and NEAR operators, the AND operator must be specified
+     in upper case. The word "and" specified in lower or mixed case is 
+     handled as a regular token.
+
+  3) The '-' token prefix is not supported. Instead, a new binary operator,
+     NOT, is included. The NOT operator requires that the query specified
+     as its left-hand operator matches, but that the query specified as the
+     right-hand operator does not. For example, to query for all rows that
+     contain the term "example" but not the term "simple", the following
+     query could be used:
+
+       <col> MATCH 'example NOT simple'
+
+     As for all other operators, the NOT operator must be specified in
+     upper case. Otherwise it will be treated as a regular token.
+
+  4) Unlike in the standard syntax, where the OR operator has a higher
+     precedence than the implicit AND operator, when using the enhanced
+     syntax implicit and explict AND operators have a higher precedence
+     than OR operators. Using the enhanced syntax, the following two
+     queries are equivalent:
+
+       <col> MATCH 'sqlite fantastic OR impressive'
+       <col> MATCH '(sqlite AND fantastic) OR impressive'
+
+     however, when using the standard syntax, the query:
+
+       <col> MATCH 'sqlite fantastic OR impressive'
+
+     is equivalent to the enhanced syntax query:
+
+       <col> MATCH 'sqlite AND (fantastic OR impressive)'
+
+     The precedence of all enhanced syntax operators, in order from highest
+     to lowest, is:
+
+       NEAR       (highest precedence, tightest grouping)
+       NOT
+       AND
+       OR         (lowest precedence, loosest grouping)
+
+  Using the advanced syntax, it is possible to specify expressions enclosed
+  in parenthesis as operands to the NOT, AND and OR operators. However both
+  the left and right hand side operands of NEAR operators must be either
+  tokens or phrases. Attempting the following query will return an error:
+
+    <col> MATCH 'sqlite NEAR (fantastic OR impressive)'
+
+  Queries of this form must be re-written as:
+
+    <col> MATCH 'sqlite NEAR fantastic OR sqlite NEAR impressive'
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.tokenizers
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.tokenizers
@ -0,0 +1,133 @@
+
+1. FTS3 Tokenizers
+
+  When creating a new full-text table, FTS3 allows the user to select
+  the text tokenizer implementation to be used when indexing text
+  by specifying a "tokenize" clause as part of the CREATE VIRTUAL TABLE
+  statement:
+
+    CREATE VIRTUAL TABLE <table-name> USING fts3(
+      <columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
+    );
+
+  The built-in tokenizers (valid values to pass as <tokenizer name>) are
+  "simple", "porter" and "unicode".
+
+  <tokenizer-args> should consist of zero or more white-space separated
+  arguments to pass to the selected tokenizer implementation. The 
+  interpretation of the arguments, if any, depends on the individual 
+  tokenizer.
+
+2. Custom Tokenizers
+
+  FTS3 allows users to provide custom tokenizer implementations. The 
+  interface used to create a new tokenizer is defined and described in 
+  the fts3_tokenizer.h source file.
+
+  Registering a new FTS3 tokenizer is similar to registering a new 
+  virtual table module with SQLite. The user passes a pointer to a
+  structure containing pointers to various callback functions that
+  make up the implementation of the new tokenizer type. For tokenizers,
+  the structure (defined in fts3_tokenizer.h) is called
+  "sqlite3_tokenizer_module".
+
+  FTS3 does not expose a C-function that users call to register new
+  tokenizer types with a database handle. Instead, the pointer must
+  be encoded as an SQL blob value and passed to FTS3 through the SQL
+  engine by evaluating a special scalar function, "fts3_tokenizer()".
+  The fts3_tokenizer() function may be called with one or two arguments,
+  as follows:
+
+    SELECT fts3_tokenizer(<tokenizer-name>);
+    SELECT fts3_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
+  
+  Where <tokenizer-name> is a string identifying the tokenizer and
+  <sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
+  structure encoded as an SQL blob. If the second argument is present,
+  it is registered as tokenizer <tokenizer-name> and a copy of it
+  returned. If only one argument is passed, a pointer to the tokenizer
+  implementation currently registered as <tokenizer-name> is returned,
+  encoded as a blob. Or, if no such tokenizer exists, an SQL exception
+  (error) is raised.
+
+  SECURITY: If the fts3 extension is used in an environment where potentially
+    malicious users may execute arbitrary SQL (i.e. gears), they should be
+    prevented from invoking the fts3_tokenizer() function, possibly using the
+    authorisation callback.
+
+  See "Sample code" below for an example of calling the fts3_tokenizer()
+  function from C code.
+
+3. ICU Library Tokenizers
+
+  If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor 
+  symbol defined, then there exists a built-in tokenizer named "icu" 
+  implemented using the ICU library. The first argument passed to the
+  xCreate() method (see fts3_tokenizer.h) of this tokenizer may be
+  an ICU locale identifier. For example "tr_TR" for Turkish as used
+  in Turkey, or "en_AU" for English as used in Australia. For example:
+
+    "CREATE VIRTUAL TABLE thai_text USING fts3(text, tokenizer icu th_TH)"
+
+  The ICU tokenizer implementation is very simple. It splits the input
+  text according to the ICU rules for finding word boundaries and discards
+  any tokens that consist entirely of white-space. This may be suitable
+  for some applications in some locales, but not all. If more complex
+  processing is required, for example to implement stemming or 
+  discard punctuation, this can be done by creating a tokenizer 
+  implementation that uses the ICU tokenizer as part of its implementation.
+
+  When using the ICU tokenizer this way, it is safe to overwrite the
+  contents of the strings returned by the xNext() method (see
+  fts3_tokenizer.h).
+
+4. Sample code.
+
+  The following two code samples illustrate the way C code should invoke
+  the fts3_tokenizer() scalar function:
+
+      int registerTokenizer(
+        sqlite3 *db, 
+        char *zName, 
+        const sqlite3_tokenizer_module *p
+      ){
+        int rc;
+        sqlite3_stmt *pStmt;
+        const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
+      
+        rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+      
+        sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+        sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
+        sqlite3_step(pStmt);
+      
+        return sqlite3_finalize(pStmt);
+      }
+      
+      int queryTokenizer(
+        sqlite3 *db, 
+        char *zName,  
+        const sqlite3_tokenizer_module **pp
+      ){
+        int rc;
+        sqlite3_stmt *pStmt;
+        const char zSql[] = "SELECT fts3_tokenizer(?)";
+      
+        *pp = 0;
+        rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+        if( rc!=SQLITE_OK ){
+          return rc;
+        }
+      
+        sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+        if( SQLITE_ROW==sqlite3_step(pStmt) ){
+          if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+            memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+          }
+        }
+      
+        return sqlite3_finalize(pStmt);
+      }
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.txt
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/README.txt
@ -0,0 +1,4 @@
+This folder contains source code to the second full-text search
+extension for SQLite.  While the API is the same, this version uses a
+substantially different storage schema from fts1, so tables will need
+to be rebuilt.
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3.c
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3.h
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3.h
@ -0,0 +1,26 @@
+/*
+** 2006 Oct 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This header file is used by programs that want to link against the
+** FTS3 library.  All it does is declare the sqlite3Fts3Init() interface.
+*/
+#include "sqlite3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  /* __cplusplus */
+
+int sqlite3Fts3Init(sqlite3 *db);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif  /* __cplusplus */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3Int.h
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3Int.h
@ -0,0 +1,617 @@
+/*
+** 2009 Nov 12
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+#ifndef _FTSINT_H
+#define _FTSINT_H
+
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
+# define NDEBUG 1
+#endif
+
+/* FTS3/FTS4 require virtual tables */
+#ifdef SQLITE_OMIT_VIRTUALTABLE
+# undef SQLITE_ENABLE_FTS3
+# undef SQLITE_ENABLE_FTS4
+#endif
+
+/*
+** FTS4 is really an extension for FTS3.  It is enabled using the
+** SQLITE_ENABLE_FTS3 macro.  But to avoid confusion we also all
+** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
+*/
+#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
+# define SQLITE_ENABLE_FTS3
+#endif
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* If not building as part of the core, include sqlite3ext.h. */
+#ifndef SQLITE_CORE
+# include "sqlite3ext.h" 
+SQLITE_EXTENSION_INIT3
+#endif
+
+#include "sqlite3.h"
+#include "fts3_tokenizer.h"
+#include "fts3_hash.h"
+
+/*
+** This constant determines the maximum depth of an FTS expression tree
+** that the library will create and use. FTS uses recursion to perform 
+** various operations on the query tree, so the disadvantage of a large
+** limit is that it may allow very large queries to use large amounts
+** of stack space (perhaps causing a stack overflow).
+*/
+#ifndef SQLITE_FTS3_MAX_EXPR_DEPTH
+# define SQLITE_FTS3_MAX_EXPR_DEPTH 12
+#endif
+
+
+/*
+** This constant controls how often segments are merged. Once there are
+** FTS3_MERGE_COUNT segments of level N, they are merged into a single
+** segment of level N+1.
+*/
+#define FTS3_MERGE_COUNT 16
+
+/*
+** This is the maximum amount of data (in bytes) to store in the 
+** Fts3Table.pendingTerms hash table. Normally, the hash table is
+** populated as documents are inserted/updated/deleted in a transaction
+** and used to create a new segment when the transaction is committed.
+** However if this limit is reached midway through a transaction, a new 
+** segment is created and the hash table cleared immediately.
+*/
+#define FTS3_MAX_PENDING_DATA (1*1024*1024)
+
+/*
+** Macro to return the number of elements in an array. SQLite has a
+** similar macro called ArraySize(). Use a different name to avoid
+** a collision when building an amalgamation with built-in FTS3.
+*/
+#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
+
+
+#ifndef MIN
+# define MIN(x,y) ((x)<(y)?(x):(y))
+#endif
+#ifndef MAX
+# define MAX(x,y) ((x)>(y)?(x):(y))
+#endif
+
+/*
+** Maximum length of a varint encoded integer. The varint format is different
+** from that used by SQLite, so the maximum length is 10, not 9.
+*/
+#define FTS3_VARINT_MAX 10
+
+/*
+** FTS4 virtual tables may maintain multiple indexes - one index of all terms
+** in the document set and zero or more prefix indexes. All indexes are stored
+** as one or more b+-trees in the %_segments and %_segdir tables. 
+**
+** It is possible to determine which index a b+-tree belongs to based on the
+** value stored in the "%_segdir.level" column. Given this value L, the index
+** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
+** level values between 0 and 1023 (inclusive) belong to index 0, all levels
+** between 1024 and 2047 to index 1, and so on.
+**
+** It is considered impossible for an index to use more than 1024 levels. In 
+** theory though this may happen, but only after at least 
+** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
+*/
+#define FTS3_SEGDIR_MAXLEVEL      1024
+#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
+
+/*
+** The testcase() macro is only used by the amalgamation.  If undefined,
+** make it a no-op.
+*/
+#ifndef testcase
+# define testcase(X)
+#endif
+
+/*
+** Terminator values for position-lists and column-lists.
+*/
+#define POS_COLUMN  (1)     /* Column-list terminator */
+#define POS_END     (0)     /* Position-list terminator */ 
+
+/*
+** This section provides definitions to allow the
+** FTS3 extension to be compiled outside of the 
+** amalgamation.
+*/
+#ifndef SQLITE_AMALGAMATION
+/*
+** Macros indicating that conditional expressions are always true or
+** false.
+*/
+#ifdef SQLITE_COVERAGE_TEST
+# define ALWAYS(x) (1)
+# define NEVER(X)  (0)
+#elif defined(SQLITE_DEBUG)
+# define ALWAYS(x) sqlite3Fts3Always((x)!=0)
+# define NEVER(x) sqlite3Fts3Never((x)!=0)
+int sqlite3Fts3Always(int b);
+int sqlite3Fts3Never(int b);
+#else
+# define ALWAYS(x) (x)
+# define NEVER(x)  (x)
+#endif
+
+/*
+** Internal types used by SQLite.
+*/
+typedef unsigned char u8;         /* 1-byte (or larger) unsigned integer */
+typedef short int i16;            /* 2-byte (or larger) signed integer */
+typedef unsigned int u32;         /* 4-byte unsigned integer */
+typedef sqlite3_uint64 u64;       /* 8-byte unsigned integer */
+typedef sqlite3_int64 i64;        /* 8-byte signed integer */
+
+/*
+** Macro used to suppress compiler warnings for unused parameters.
+*/
+#define UNUSED_PARAMETER(x) (void)(x)
+
+/*
+** Activate assert() only if SQLITE_TEST is enabled.
+*/
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
+# define NDEBUG 1
+#endif
+
+/*
+** The TESTONLY macro is used to enclose variable declarations or
+** other bits of code that are needed to support the arguments
+** within testcase() and assert() macros.
+*/
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
+# define TESTONLY(X)  X
+#else
+# define TESTONLY(X)
+#endif
+
+#endif /* SQLITE_AMALGAMATION */
+
+#ifdef SQLITE_DEBUG
+int sqlite3Fts3Corrupt(void);
+# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
+#else
+# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
+#endif
+
+typedef struct Fts3Table Fts3Table;
+typedef struct Fts3Cursor Fts3Cursor;
+typedef struct Fts3Expr Fts3Expr;
+typedef struct Fts3Phrase Fts3Phrase;
+typedef struct Fts3PhraseToken Fts3PhraseToken;
+
+typedef struct Fts3Doclist Fts3Doclist;
+typedef struct Fts3SegFilter Fts3SegFilter;
+typedef struct Fts3DeferredToken Fts3DeferredToken;
+typedef struct Fts3SegReader Fts3SegReader;
+typedef struct Fts3MultiSegReader Fts3MultiSegReader;
+
+typedef struct MatchinfoBuffer MatchinfoBuffer;
+
+/*
+** A connection to a fulltext index is an instance of the following
+** structure. The xCreate and xConnect methods create an instance
+** of this structure and xDestroy and xDisconnect free that instance.
+** All other methods receive a pointer to the structure as one of their
+** arguments.
+*/
+struct Fts3Table {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  sqlite3 *db;                    /* The database connection */
+  const char *zDb;                /* logical database name */
+  const char *zName;              /* virtual table name */
+  int nColumn;                    /* number of named columns in virtual table */
+  char **azColumn;                /* column names.  malloced */
+  u8 *abNotindexed;               /* True for 'notindexed' columns */
+  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
+  char *zContentTbl;              /* content=xxx option, or NULL */
+  char *zLanguageid;              /* languageid=xxx option, or NULL */
+  int nAutoincrmerge;             /* Value configured by 'automerge' */
+  u32 nLeafAdd;                   /* Number of leaf blocks added this trans */
+
+  /* Precompiled statements used by the implementation. Each of these 
+  ** statements is run and reset within a single virtual table API call. 
+  */
+  sqlite3_stmt *aStmt[40];
+
+  char *zReadExprlist;
+  char *zWriteExprlist;
+
+  int nNodeSize;                  /* Soft limit for node size */
+  u8 bFts4;                       /* True for FTS4, false for FTS3 */
+  u8 bHasStat;                    /* True if %_stat table exists (2==unknown) */
+  u8 bHasDocsize;                 /* True if %_docsize table exists */
+  u8 bDescIdx;                    /* True if doclists are in reverse order */
+  u8 bIgnoreSavepoint;            /* True to ignore xSavepoint invocations */
+  int nPgsz;                      /* Page size for host database */
+  char *zSegmentsTbl;             /* Name of %_segments table */
+  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */
+
+  /* 
+  ** The following array of hash tables is used to buffer pending index 
+  ** updates during transactions. All pending updates buffered at any one
+  ** time must share a common language-id (see the FTS4 langid= feature).
+  ** The current language id is stored in variable iPrevLangid.
+  **
+  ** A single FTS4 table may have multiple full-text indexes. For each index
+  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
+  ** terms that appear in the document set. Each subsequent index in aIndex[]
+  ** is an index of prefixes of a specific length.
+  **
+  ** Variable nPendingData contains an estimate the memory consumed by the 
+  ** pending data structures, including hash table overhead, but not including
+  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, all hash
+  ** tables are flushed to disk. Variable iPrevDocid is the docid of the most 
+  ** recently inserted record.
+  */
+  int nIndex;                     /* Size of aIndex[] */
+  struct Fts3Index {
+    int nPrefix;                  /* Prefix length (0 for main terms index) */
+    Fts3Hash hPending;            /* Pending terms table for this index */
+  } *aIndex;
+  int nMaxPendingData;            /* Max pending data before flush to disk */
+  int nPendingData;               /* Current bytes of pending data */
+  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
+  int iPrevLangid;                /* Langid of recently inserted document */
+  int bPrevDelete;                /* True if last operation was a delete */
+
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
+  /* State variables used for validating that the transaction control
+  ** methods of the virtual table are called at appropriate times.  These
+  ** values do not contribute to FTS functionality; they are used for
+  ** verifying the operation of the SQLite core.
+  */
+  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
+  int mxSavepoint;       /* Largest valid xSavepoint integer */
+#endif
+
+#ifdef SQLITE_TEST
+  /* True to disable the incremental doclist optimization. This is controled
+  ** by special insert command 'test-no-incr-doclist'.  */
+  int bNoIncrDoclist;
+#endif
+};
+
+/*
+** When the core wants to read from the virtual table, it creates a
+** virtual table cursor (an instance of the following structure) using
+** the xOpen method. Cursors are destroyed using the xClose method.
+*/
+struct Fts3Cursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  i16 eSearch;                    /* Search strategy (see below) */
+  u8 isEof;                       /* True if at End Of Results */
+  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
+  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
+  Fts3Expr *pExpr;                /* Parsed MATCH query string */
+  int iLangid;                    /* Language being queried for */
+  int nPhrase;                    /* Number of matchable phrases in query */
+  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
+  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
+  char *pNextId;                  /* Pointer into the body of aDoclist */
+  char *aDoclist;                 /* List of docids for full-text queries */
+  int nDoclist;                   /* Size of buffer at aDoclist */
+  u8 bDesc;                       /* True to sort in descending order */
+  int eEvalmode;                  /* An FTS3_EVAL_XX constant */
+  int nRowAvg;                    /* Average size of database rows, in pages */
+  sqlite3_int64 nDoc;             /* Documents in table */
+  i64 iMinDocid;                  /* Minimum docid to return */
+  i64 iMaxDocid;                  /* Maximum docid to return */
+  int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */
+  MatchinfoBuffer *pMIBuffer;     /* Buffer for matchinfo data */
+};
+
+#define FTS3_EVAL_FILTER    0
+#define FTS3_EVAL_NEXT      1
+#define FTS3_EVAL_MATCHINFO 2
+
+/*
+** The Fts3Cursor.eSearch member is always set to one of the following.
+** Actualy, Fts3Cursor.eSearch can be greater than or equal to
+** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
+** of the column to be searched.  For example, in
+**
+**     CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
+**     SELECT docid FROM ex1 WHERE b MATCH 'one two three';
+** 
+** Because the LHS of the MATCH operator is 2nd column "b",
+** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1.  (+0 for a,
+** +1 for b, +2 for c, +3 for d.)  If the LHS of MATCH were "ex1" 
+** indicating that all columns should be searched,
+** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
+*/
+#define FTS3_FULLSCAN_SEARCH 0    /* Linear scan of %_content table */
+#define FTS3_DOCID_SEARCH    1    /* Lookup by rowid on %_content table */
+#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */
+
+/*
+** The lower 16-bits of the sqlite3_index_info.idxNum value set by
+** the xBestIndex() method contains the Fts3Cursor.eSearch value described
+** above. The upper 16-bits contain a combination of the following
+** bits, used to describe extra constraints on full-text searches.
+*/
+#define FTS3_HAVE_LANGID    0x00010000      /* languageid=? */
+#define FTS3_HAVE_DOCID_GE  0x00020000      /* docid>=? */
+#define FTS3_HAVE_DOCID_LE  0x00040000      /* docid<=? */
+
+struct Fts3Doclist {
+  char *aAll;                    /* Array containing doclist (or NULL) */
+  int nAll;                      /* Size of a[] in bytes */
+  char *pNextDocid;              /* Pointer to next docid */
+
+  sqlite3_int64 iDocid;          /* Current docid (if pList!=0) */
+  int bFreeList;                 /* True if pList should be sqlite3_free()d */
+  char *pList;                   /* Pointer to position list following iDocid */
+  int nList;                     /* Length of position list */
+};
+
+/*
+** A "phrase" is a sequence of one or more tokens that must match in
+** sequence.  A single token is the base case and the most common case.
+** For a sequence of tokens contained in double-quotes (i.e. "one two three")
+** nToken will be the number of tokens in the string.
+*/
+struct Fts3PhraseToken {
+  char *z;                        /* Text of the token */
+  int n;                          /* Number of bytes in buffer z */
+  int isPrefix;                   /* True if token ends with a "*" character */
+  int bFirst;                     /* True if token must appear at position 0 */
+
+  /* Variables above this point are populated when the expression is
+  ** parsed (by code in fts3_expr.c). Below this point the variables are
+  ** used when evaluating the expression. */
+  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
+  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
+};
+
+struct Fts3Phrase {
+  /* Cache of doclist for this phrase. */
+  Fts3Doclist doclist;
+  int bIncr;                 /* True if doclist is loaded incrementally */
+  int iDoclistToken;
+
+  /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
+  ** OR condition.  */
+  char *pOrPoslist;
+  i64 iOrDocid;
+
+  /* Variables below this point are populated by fts3_expr.c when parsing 
+  ** a MATCH expression. Everything above is part of the evaluation phase. 
+  */
+  int nToken;                /* Number of tokens in the phrase */
+  int iColumn;               /* Index of column this phrase must match */
+  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
+};
+
+/*
+** A tree of these objects forms the RHS of a MATCH operator.
+**
+** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist 
+** points to a malloced buffer, size nDoclist bytes, containing the results 
+** of this phrase query in FTS3 doclist format. As usual, the initial 
+** "Length" field found in doclists stored on disk is omitted from this 
+** buffer.
+**
+** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
+** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
+** where nCol is the number of columns in the queried FTS table. The array
+** is populated as follows:
+**
+**   aMI[iCol*3 + 0] = Undefined
+**   aMI[iCol*3 + 1] = Number of occurrences
+**   aMI[iCol*3 + 2] = Number of rows containing at least one instance
+**
+** The aMI array is allocated using sqlite3_malloc(). It should be freed 
+** when the expression node is.
+*/
+struct Fts3Expr {
+  int eType;                 /* One of the FTSQUERY_XXX values defined below */
+  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
+  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
+  Fts3Expr *pLeft;           /* Left operand */
+  Fts3Expr *pRight;          /* Right operand */
+  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
+
+  /* The following are used by the fts3_eval.c module. */
+  sqlite3_int64 iDocid;      /* Current docid */
+  u8 bEof;                   /* True this expression is at EOF already */
+  u8 bStart;                 /* True if iDocid is valid */
+  u8 bDeferred;              /* True if this expression is entirely deferred */
+
+  /* The following are used by the fts3_snippet.c module. */
+  int iPhrase;               /* Index of this phrase in matchinfo() results */
+  u32 *aMI;                  /* See above */
+};
+
+/*
+** Candidate values for Fts3Query.eType. Note that the order of the first
+** four values is in order of precedence when parsing expressions. For 
+** example, the following:
+**
+**   "a OR b AND c NOT d NEAR e"
+**
+** is equivalent to:
+**
+**   "a OR (b AND (c NOT (d NEAR e)))"
+*/
+#define FTSQUERY_NEAR   1
+#define FTSQUERY_NOT    2
+#define FTSQUERY_AND    3
+#define FTSQUERY_OR     4
+#define FTSQUERY_PHRASE 5
+
+
+/* fts3_write.c */
+int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
+int sqlite3Fts3PendingTermsFlush(Fts3Table *);
+void sqlite3Fts3PendingTermsClear(Fts3Table *);
+int sqlite3Fts3Optimize(Fts3Table *);
+int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
+  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
+int sqlite3Fts3SegReaderPending(
+  Fts3Table*,int,const char*,int,int,Fts3SegReader**);
+void sqlite3Fts3SegReaderFree(Fts3SegReader *);
+int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
+int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
+
+int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
+int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
+
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
+int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
+int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
+void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
+int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
+#else
+# define sqlite3Fts3FreeDeferredTokens(x)
+# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
+# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
+# define sqlite3Fts3FreeDeferredDoclists(x)
+# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
+#endif
+
+void sqlite3Fts3SegmentsClose(Fts3Table *);
+int sqlite3Fts3MaxLevel(Fts3Table *, int *);
+
+/* Special values interpreted by sqlite3SegReaderCursor() */
+#define FTS3_SEGCURSOR_PENDING        -1
+#define FTS3_SEGCURSOR_ALL            -2
+
+int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
+int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
+void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
+
+int sqlite3Fts3SegReaderCursor(Fts3Table *, 
+    int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
+
+/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
+#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
+#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
+#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
+#define FTS3_SEGMENT_PREFIX        0x00000008
+#define FTS3_SEGMENT_SCAN          0x00000010
+#define FTS3_SEGMENT_FIRST         0x00000020
+
+/* Type passed as 4th argument to SegmentReaderIterate() */
+struct Fts3SegFilter {
+  const char *zTerm;
+  int nTerm;
+  int iCol;
+  int flags;
+};
+
+struct Fts3MultiSegReader {
+  /* Used internally by sqlite3Fts3SegReaderXXX() calls */
+  Fts3SegReader **apSegment;      /* Array of Fts3SegReader objects */
+  int nSegment;                   /* Size of apSegment array */
+  int nAdvance;                   /* How many seg-readers to advance */
+  Fts3SegFilter *pFilter;         /* Pointer to filter object */
+  char *aBuffer;                  /* Buffer to merge doclists in */
+  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
+
+  int iColFilter;                 /* If >=0, filter for this column */
+  int bRestart;
+
+  /* Used by fts3.c only. */
+  int nCost;                      /* Cost of running iterator */
+  int bLookup;                    /* True if a lookup of a single entry. */
+
+  /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
+  char *zTerm;                    /* Pointer to term buffer */
+  int nTerm;                      /* Size of zTerm in bytes */
+  char *aDoclist;                 /* Pointer to doclist buffer */
+  int nDoclist;                   /* Size of aDoclist[] in bytes */
+};
+
+int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
+
+#define fts3GetVarint32(p, piVal) (                                           \
+  (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \
+)
+
+/* fts3.c */
+void sqlite3Fts3ErrMsg(char**,const char*,...);
+int sqlite3Fts3PutVarint(char *, sqlite3_int64);
+int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
+int sqlite3Fts3GetVarint32(const char *, int *);
+int sqlite3Fts3VarintLen(sqlite3_uint64);
+void sqlite3Fts3Dequote(char *);
+void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
+int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
+int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
+void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
+int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
+
+/* fts3_tokenizer.c */
+const char *sqlite3Fts3NextToken(const char *, int *);
+int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
+int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 
+    sqlite3_tokenizer **, char **
+);
+int sqlite3Fts3IsIdChar(char);
+
+/* fts3_snippet.c */
+void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
+void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
+  const char *, const char *, int, int
+);
+void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
+void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
+
+/* fts3_expr.c */
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
+  char **, int, int, int, const char *, int, Fts3Expr **, char **
+);
+void sqlite3Fts3ExprFree(Fts3Expr *);
+#ifdef SQLITE_TEST
+int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
+int sqlite3Fts3InitTerm(sqlite3 *db);
+#endif
+
+int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
+  sqlite3_tokenizer_cursor **
+);
+
+/* fts3_aux.c */
+int sqlite3Fts3InitAux(sqlite3 *db);
+
+void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
+
+int sqlite3Fts3MsrIncrStart(
+    Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
+int sqlite3Fts3MsrIncrNext(
+    Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
+int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); 
+int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
+int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
+
+/* fts3_tokenize_vtab.c */
+int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
+
+/* fts3_unicode2.c (functions generated by parsing unicode text files) */
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+int sqlite3FtsUnicodeFold(int, int);
+int sqlite3FtsUnicodeIsalnum(int);
+int sqlite3FtsUnicodeIsdiacritic(int);
+#endif
+
+#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
+#endif /* _FTSINT_H */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_aux.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_aux.c
@ -0,0 +1,550 @@
+/*
+** 2011 Jan 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <string.h>
+#include <assert.h>
+
+typedef struct Fts3auxTable Fts3auxTable;
+typedef struct Fts3auxCursor Fts3auxCursor;
+
+struct Fts3auxTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  Fts3Table *pFts3Tab;
+};
+
+struct Fts3auxCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  Fts3MultiSegReader csr;        /* Must be right after "base" */
+  Fts3SegFilter filter;
+  char *zStop;
+  int nStop;                      /* Byte-length of string zStop */
+  int iLangid;                    /* Language id to query */
+  int isEof;                      /* True if cursor is at EOF */
+  sqlite3_int64 iRowid;           /* Current rowid */
+
+  int iCol;                       /* Current value of 'col' column */
+  int nStat;                      /* Size of aStat[] array */
+  struct Fts3auxColstats {
+    sqlite3_int64 nDoc;           /* 'documents' values for current csr row */
+    sqlite3_int64 nOcc;           /* 'occurrences' values for current csr row */
+  } *aStat;
+};
+
+/*
+** Schema of the terms table.
+*/
+#define FTS3_AUX_SCHEMA \
+  "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+*/
+static int fts3auxConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pUnused,                  /* Unused */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  char const *zDb;                /* Name of database (e.g. "main") */
+  char const *zFts3;              /* Name of fts3 table */
+  int nDb;                        /* Result of strlen(zDb) */
+  int nFts3;                      /* Result of strlen(zFts3) */
+  int nByte;                      /* Bytes of space to allocate here */
+  int rc;                         /* value returned by declare_vtab() */
+  Fts3auxTable *p;                /* Virtual table object to return */
+
+  UNUSED_PARAMETER(pUnused);
+
+  /* The user should invoke this in one of two forms:
+  **
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
+  **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
+  */
+  if( argc!=4 && argc!=5 ) goto bad_args;
+
+  zDb = argv[1]; 
+  nDb = (int)strlen(zDb);
+  if( argc==5 ){
+    if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
+      zDb = argv[3]; 
+      nDb = (int)strlen(zDb);
+      zFts3 = argv[4];
+    }else{
+      goto bad_args;
+    }
+  }else{
+    zFts3 = argv[3];
+  }
+  nFts3 = (int)strlen(zFts3);
+
+  rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
+  p = (Fts3auxTable *)sqlite3_malloc(nByte);
+  if( !p ) return SQLITE_NOMEM;
+  memset(p, 0, nByte);
+
+  p->pFts3Tab = (Fts3Table *)&p[1];
+  p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
+  p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
+  p->pFts3Tab->db = db;
+  p->pFts3Tab->nIndex = 1;
+
+  memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
+  memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
+  sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
+
+  *ppVtab = (sqlite3_vtab *)p;
+  return SQLITE_OK;
+
+ bad_args:
+  sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
+  return SQLITE_ERROR;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3auxTable *p = (Fts3auxTable *)pVtab;
+  Fts3Table *pFts3 = p->pFts3Tab;
+  int i;
+
+  /* Free any prepared statements held */
+  for(i=0; i<SizeofArray(pFts3->aStmt); i++){
+    sqlite3_finalize(pFts3->aStmt[i]);
+  }
+  sqlite3_free(pFts3->zSegmentsTbl);
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+#define FTS4AUX_EQ_CONSTRAINT 1
+#define FTS4AUX_GE_CONSTRAINT 2
+#define FTS4AUX_LE_CONSTRAINT 4
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3auxBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  int i;
+  int iEq = -1;
+  int iGe = -1;
+  int iLe = -1;
+  int iLangid = -1;
+  int iNext = 1;                  /* Next free argvIndex value */
+
+  UNUSED_PARAMETER(pVTab);
+
+  /* This vtab delivers always results in "ORDER BY term ASC" order. */
+  if( pInfo->nOrderBy==1 
+   && pInfo->aOrderBy[0].iColumn==0 
+   && pInfo->aOrderBy[0].desc==0
+  ){
+    pInfo->orderByConsumed = 1;
+  }
+
+  /* Search for equality and range constraints on the "term" column. 
+  ** And equality constraints on the hidden "languageid" column. */
+  for(i=0; i<pInfo->nConstraint; i++){
+    if( pInfo->aConstraint[i].usable ){
+      int op = pInfo->aConstraint[i].op;
+      int iCol = pInfo->aConstraint[i].iColumn;
+
+      if( iCol==0 ){
+        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
+        if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
+      }
+      if( iCol==4 ){
+        if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
+      }
+    }
+  }
+
+  if( iEq>=0 ){
+    pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
+    pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
+    pInfo->estimatedCost = 5;
+  }else{
+    pInfo->idxNum = 0;
+    pInfo->estimatedCost = 20000;
+    if( iGe>=0 ){
+      pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
+      pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
+      pInfo->estimatedCost /= 2;
+    }
+    if( iLe>=0 ){
+      pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
+      pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
+      pInfo->estimatedCost /= 2;
+    }
+  }
+  if( iLangid>=0 ){
+    pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
+    pInfo->estimatedCost--;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3auxCursor *pCsr;            /* Pointer to cursor object to return */
+
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
+  if( !pCsr ) return SQLITE_NOMEM;
+  memset(pCsr, 0, sizeof(Fts3auxCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+
+  sqlite3Fts3SegmentsClose(pFts3);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  sqlite3_free((void *)pCsr->filter.zTerm);
+  sqlite3_free(pCsr->zStop);
+  sqlite3_free(pCsr->aStat);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
+  if( nSize>pCsr->nStat ){
+    struct Fts3auxColstats *aNew;
+    aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat, 
+        sizeof(struct Fts3auxColstats) * nSize
+    );
+    if( aNew==0 ) return SQLITE_NOMEM;
+    memset(&aNew[pCsr->nStat], 0, 
+        sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
+    );
+    pCsr->aStat = aNew;
+    pCsr->nStat = nSize;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  int rc;
+
+  /* Increment our pretend rowid value. */
+  pCsr->iRowid++;
+
+  for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
+    if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
+  }
+
+  rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
+  if( rc==SQLITE_ROW ){
+    int i = 0;
+    int nDoclist = pCsr->csr.nDoclist;
+    char *aDoclist = pCsr->csr.aDoclist;
+    int iCol;
+
+    int eState = 0;
+
+    if( pCsr->zStop ){
+      int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
+      int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
+      if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
+        pCsr->isEof = 1;
+        return SQLITE_OK;
+      }
+    }
+
+    if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
+    memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
+    iCol = 0;
+
+    while( i<nDoclist ){
+      sqlite3_int64 v = 0;
+
+      i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
+      switch( eState ){
+        /* State 0. In this state the integer just read was a docid. */
+        case 0:
+          pCsr->aStat[0].nDoc++;
+          eState = 1;
+          iCol = 0;
+          break;
+
+        /* State 1. In this state we are expecting either a 1, indicating
+        ** that the following integer will be a column number, or the
+        ** start of a position list for column 0.  
+        ** 
+        ** The only difference between state 1 and state 2 is that if the
+        ** integer encountered in state 1 is not 0 or 1, then we need to
+        ** increment the column 0 "nDoc" count for this term.
+        */
+        case 1:
+          assert( iCol==0 );
+          if( v>1 ){
+            pCsr->aStat[1].nDoc++;
+          }
+          eState = 2;
+          /* fall through */
+
+        case 2:
+          if( v==0 ){       /* 0x00. Next integer will be a docid. */
+            eState = 0;
+          }else if( v==1 ){ /* 0x01. Next integer will be a column number. */
+            eState = 3;
+          }else{            /* 2 or greater. A position. */
+            pCsr->aStat[iCol+1].nOcc++;
+            pCsr->aStat[0].nOcc++;
+          }
+          break;
+
+        /* State 3. The integer just read is a column number. */
+        default: assert( eState==3 );
+          iCol = (int)v;
+          if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
+          pCsr->aStat[iCol+1].nDoc++;
+          eState = 2;
+          break;
+      }
+    }
+
+    pCsr->iCol = 0;
+    rc = SQLITE_OK;
+  }else{
+    pCsr->isEof = 1;
+  }
+  return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3auxFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
+  int rc;
+  int isScan = 0;
+  int iLangVal = 0;               /* Language id to query */
+
+  int iEq = -1;                   /* Index of term=? value in apVal */
+  int iGe = -1;                   /* Index of term>=? value in apVal */
+  int iLe = -1;                   /* Index of term<=? value in apVal */
+  int iLangid = -1;               /* Index of languageid=? value in apVal */
+  int iNext = 0;
+
+  UNUSED_PARAMETER(nVal);
+  UNUSED_PARAMETER(idxStr);
+
+  assert( idxStr==0 );
+  assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
+       || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
+       || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
+  );
+
+  if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
+    iEq = iNext++;
+  }else{
+    isScan = 1;
+    if( idxNum & FTS4AUX_GE_CONSTRAINT ){
+      iGe = iNext++;
+    }
+    if( idxNum & FTS4AUX_LE_CONSTRAINT ){
+      iLe = iNext++;
+    }
+  }
+  if( iNext<nVal ){
+    iLangid = iNext++;
+  }
+
+  /* In case this cursor is being reused, close and zero it. */
+  testcase(pCsr->filter.zTerm);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  sqlite3_free((void *)pCsr->filter.zTerm);
+  sqlite3_free(pCsr->aStat);
+  memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
+
+  pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
+  if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
+
+  if( iEq>=0 || iGe>=0 ){
+    const unsigned char *zStr = sqlite3_value_text(apVal[0]);
+    assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
+    if( zStr ){
+      pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
+      pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
+      if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
+    }
+  }
+
+  if( iLe>=0 ){
+    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
+    pCsr->nStop = sqlite3_value_bytes(apVal[iLe]);
+    if( pCsr->zStop==0 ) return SQLITE_NOMEM;
+  }
+  
+  if( iLangid>=0 ){
+    iLangVal = sqlite3_value_int(apVal[iLangid]);
+
+    /* If the user specified a negative value for the languageid, use zero
+    ** instead. This works, as the "languageid=?" constraint will also
+    ** be tested by the VDBE layer. The test will always be false (since
+    ** this module will not return a row with a negative languageid), and
+    ** so the overall query will return zero rows.  */
+    if( iLangVal<0 ) iLangVal = 0;
+  }
+  pCsr->iLangid = iLangVal;
+
+  rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
+      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
+  );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
+  }
+
+  if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
+  return rc;
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  return pCsr->isEof;
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3auxColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
+
+  assert( p->isEof==0 );
+  switch( iCol ){
+    case 0: /* term */
+      sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
+      break;
+
+    case 1: /* col */
+      if( p->iCol ){
+        sqlite3_result_int(pCtx, p->iCol-1);
+      }else{
+        sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
+      }
+      break;
+
+    case 2: /* documents */
+      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
+      break;
+
+    case 3: /* occurrences */
+      sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
+      break;
+
+    default: /* languageid */
+      assert( iCol==4 );
+      sqlite3_result_int(pCtx, p->iLangid);
+      break;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3auxRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
+  *pRowid = pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3aux module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+int sqlite3Fts3InitAux(sqlite3 *db){
+  static const sqlite3_module fts3aux_module = {
+     0,                           /* iVersion      */
+     fts3auxConnectMethod,        /* xCreate       */
+     fts3auxConnectMethod,        /* xConnect      */
+     fts3auxBestIndexMethod,      /* xBestIndex    */
+     fts3auxDisconnectMethod,     /* xDisconnect   */
+     fts3auxDisconnectMethod,     /* xDestroy      */
+     fts3auxOpenMethod,           /* xOpen         */
+     fts3auxCloseMethod,          /* xClose        */
+     fts3auxFilterMethod,         /* xFilter       */
+     fts3auxNextMethod,           /* xNext         */
+     fts3auxEofMethod,            /* xEof          */
+     fts3auxColumnMethod,         /* xColumn       */
+     fts3auxRowidMethod,          /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_expr.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_expr.c
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_hash.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_hash.c
@ -0,0 +1,383 @@
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the implementation of generic hash-tables used in SQLite.
+** We've modified it slightly to serve as a standalone hash table
+** implementation for the full-text indexing module.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "fts3_hash.h"
+
+/*
+** Malloc and Free functions
+*/
+static void *fts3HashMalloc(int n){
+  void *p = sqlite3_malloc(n);
+  if( p ){
+    memset(p, 0, n);
+  }
+  return p;
+}
+static void fts3HashFree(void *p){
+  sqlite3_free(p);
+}
+
+/* Turn bulk memory into a hash table object by initializing the
+** fields of the Hash structure.
+**
+** "pNew" is a pointer to the hash table that is to be initialized.
+** keyClass is one of the constants 
+** FTS3_HASH_BINARY or FTS3_HASH_STRING.  The value of keyClass 
+** determines what kind of key the hash table will use.  "copyKey" is
+** true if the hash table should make its own private copy of keys and
+** false if it should just use the supplied pointer.
+*/
+void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
+  assert( pNew!=0 );
+  assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
+  pNew->keyClass = keyClass;
+  pNew->copyKey = copyKey;
+  pNew->first = 0;
+  pNew->count = 0;
+  pNew->htsize = 0;
+  pNew->ht = 0;
+}
+
+/* Remove all entries from a hash table.  Reclaim all memory.
+** Call this routine to delete a hash table or to reset a hash table
+** to the empty state.
+*/
+void sqlite3Fts3HashClear(Fts3Hash *pH){
+  Fts3HashElem *elem;         /* For looping over all elements of the table */
+
+  assert( pH!=0 );
+  elem = pH->first;
+  pH->first = 0;
+  fts3HashFree(pH->ht);
+  pH->ht = 0;
+  pH->htsize = 0;
+  while( elem ){
+    Fts3HashElem *next_elem = elem->next;
+    if( pH->copyKey && elem->pKey ){
+      fts3HashFree(elem->pKey);
+    }
+    fts3HashFree(elem);
+    elem = next_elem;
+  }
+  pH->count = 0;
+}
+
+/*
+** Hash and comparison functions when the mode is FTS3_HASH_STRING
+*/
+static int fts3StrHash(const void *pKey, int nKey){
+  const char *z = (const char *)pKey;
+  unsigned h = 0;
+  if( nKey<=0 ) nKey = (int) strlen(z);
+  while( nKey > 0  ){
+    h = (h<<3) ^ h ^ *z++;
+    nKey--;
+  }
+  return (int)(h & 0x7fffffff);
+}
+static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
+  if( n1!=n2 ) return 1;
+  return strncmp((const char*)pKey1,(const char*)pKey2,n1);
+}
+
+/*
+** Hash and comparison functions when the mode is FTS3_HASH_BINARY
+*/
+static int fts3BinHash(const void *pKey, int nKey){
+  int h = 0;
+  const char *z = (const char *)pKey;
+  while( nKey-- > 0 ){
+    h = (h<<3) ^ h ^ *(z++);
+  }
+  return h & 0x7fffffff;
+}
+static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
+  if( n1!=n2 ) return 1;
+  return memcmp(pKey1,pKey2,n1);
+}
+
+/*
+** Return a pointer to the appropriate hash function given the key class.
+**
+** The C syntax in this function definition may be unfamilar to some 
+** programmers, so we provide the following additional explanation:
+**
+** The name of the function is "ftsHashFunction".  The function takes a
+** single parameter "keyClass".  The return value of ftsHashFunction()
+** is a pointer to another function.  Specifically, the return value
+** of ftsHashFunction() is a pointer to a function that takes two parameters
+** with types "const void*" and "int" and returns an "int".
+*/
+static int (*ftsHashFunction(int keyClass))(const void*,int){
+  if( keyClass==FTS3_HASH_STRING ){
+    return &fts3StrHash;
+  }else{
+    assert( keyClass==FTS3_HASH_BINARY );
+    return &fts3BinHash;
+  }
+}
+
+/*
+** Return a pointer to the appropriate hash function given the key class.
+**
+** For help in interpreted the obscure C code in the function definition,
+** see the header comment on the previous function.
+*/
+static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
+  if( keyClass==FTS3_HASH_STRING ){
+    return &fts3StrCompare;
+  }else{
+    assert( keyClass==FTS3_HASH_BINARY );
+    return &fts3BinCompare;
+  }
+}
+
+/* Link an element into the hash table
+*/
+static void fts3HashInsertElement(
+  Fts3Hash *pH,            /* The complete hash table */
+  struct _fts3ht *pEntry,  /* The entry into which pNew is inserted */
+  Fts3HashElem *pNew       /* The element to be inserted */
+){
+  Fts3HashElem *pHead;     /* First element already in pEntry */
+  pHead = pEntry->chain;
+  if( pHead ){
+    pNew->next = pHead;
+    pNew->prev = pHead->prev;
+    if( pHead->prev ){ pHead->prev->next = pNew; }
+    else             { pH->first = pNew; }
+    pHead->prev = pNew;
+  }else{
+    pNew->next = pH->first;
+    if( pH->first ){ pH->first->prev = pNew; }
+    pNew->prev = 0;
+    pH->first = pNew;
+  }
+  pEntry->count++;
+  pEntry->chain = pNew;
+}
+
+
+/* Resize the hash table so that it cantains "new_size" buckets.
+** "new_size" must be a power of 2.  The hash table might fail 
+** to resize if sqliteMalloc() fails.
+**
+** Return non-zero if a memory allocation error occurs.
+*/
+static int fts3Rehash(Fts3Hash *pH, int new_size){
+  struct _fts3ht *new_ht;          /* The new hash table */
+  Fts3HashElem *elem, *next_elem;  /* For looping over existing elements */
+  int (*xHash)(const void*,int);   /* The hash function */
+
+  assert( (new_size & (new_size-1))==0 );
+  new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
+  if( new_ht==0 ) return 1;
+  fts3HashFree(pH->ht);
+  pH->ht = new_ht;
+  pH->htsize = new_size;
+  xHash = ftsHashFunction(pH->keyClass);
+  for(elem=pH->first, pH->first=0; elem; elem = next_elem){
+    int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
+    next_elem = elem->next;
+    fts3HashInsertElement(pH, &new_ht[h], elem);
+  }
+  return 0;
+}
+
+/* This function (for internal use only) locates an element in an
+** hash table that matches the given key.  The hash for this key has
+** already been computed and is passed as the 4th parameter.
+*/
+static Fts3HashElem *fts3FindElementByHash(
+  const Fts3Hash *pH, /* The pH to be searched */
+  const void *pKey,   /* The key we are searching for */
+  int nKey,
+  int h               /* The hash for this key. */
+){
+  Fts3HashElem *elem;            /* Used to loop thru the element list */
+  int count;                     /* Number of elements left to test */
+  int (*xCompare)(const void*,int,const void*,int);  /* comparison function */
+
+  if( pH->ht ){
+    struct _fts3ht *pEntry = &pH->ht[h];
+    elem = pEntry->chain;
+    count = pEntry->count;
+    xCompare = ftsCompareFunction(pH->keyClass);
+    while( count-- && elem ){
+      if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ 
+        return elem;
+      }
+      elem = elem->next;
+    }
+  }
+  return 0;
+}
+
+/* Remove a single entry from the hash table given a pointer to that
+** element and a hash on the element's key.
+*/
+static void fts3RemoveElementByHash(
+  Fts3Hash *pH,         /* The pH containing "elem" */
+  Fts3HashElem* elem,   /* The element to be removed from the pH */
+  int h                 /* Hash value for the element */
+){
+  struct _fts3ht *pEntry;
+  if( elem->prev ){
+    elem->prev->next = elem->next; 
+  }else{
+    pH->first = elem->next;
+  }
+  if( elem->next ){
+    elem->next->prev = elem->prev;
+  }
+  pEntry = &pH->ht[h];
+  if( pEntry->chain==elem ){
+    pEntry->chain = elem->next;
+  }
+  pEntry->count--;
+  if( pEntry->count<=0 ){
+    pEntry->chain = 0;
+  }
+  if( pH->copyKey && elem->pKey ){
+    fts3HashFree(elem->pKey);
+  }
+  fts3HashFree( elem );
+  pH->count--;
+  if( pH->count<=0 ){
+    assert( pH->first==0 );
+    assert( pH->count==0 );
+    fts3HashClear(pH);
+  }
+}
+
+Fts3HashElem *sqlite3Fts3HashFindElem(
+  const Fts3Hash *pH, 
+  const void *pKey, 
+  int nKey
+){
+  int h;                          /* A hash on key */
+  int (*xHash)(const void*,int);  /* The hash function */
+
+  if( pH==0 || pH->ht==0 ) return 0;
+  xHash = ftsHashFunction(pH->keyClass);
+  assert( xHash!=0 );
+  h = (*xHash)(pKey,nKey);
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
+}
+
+/* 
+** Attempt to locate an element of the hash table pH with a key
+** that matches pKey,nKey.  Return the data for this element if it is
+** found, or NULL if there is no match.
+*/
+void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
+  Fts3HashElem *pElem;            /* The element that matches key (if any) */
+
+  pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
+  return pElem ? pElem->data : 0;
+}
+
+/* Insert an element into the hash table pH.  The key is pKey,nKey
+** and the data is "data".
+**
+** If no element exists with a matching key, then a new
+** element is created.  A copy of the key is made if the copyKey
+** flag is set.  NULL is returned.
+**
+** If another element already exists with the same key, then the
+** new data replaces the old data and the old data is returned.
+** The key is not copied in this instance.  If a malloc fails, then
+** the new data is returned and the hash table is unchanged.
+**
+** If the "data" parameter to this function is NULL, then the
+** element corresponding to "key" is removed from the hash table.
+*/
+void *sqlite3Fts3HashInsert(
+  Fts3Hash *pH,        /* The hash table to insert into */
+  const void *pKey,    /* The key */
+  int nKey,            /* Number of bytes in the key */
+  void *data           /* The data */
+){
+  int hraw;                 /* Raw hash value of the key */
+  int h;                    /* the hash of the key modulo hash table size */
+  Fts3HashElem *elem;       /* Used to loop thru the element list */
+  Fts3HashElem *new_elem;   /* New element added to the pH */
+  int (*xHash)(const void*,int);  /* The hash function */
+
+  assert( pH!=0 );
+  xHash = ftsHashFunction(pH->keyClass);
+  assert( xHash!=0 );
+  hraw = (*xHash)(pKey, nKey);
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  h = hraw & (pH->htsize-1);
+  elem = fts3FindElementByHash(pH,pKey,nKey,h);
+  if( elem ){
+    void *old_data = elem->data;
+    if( data==0 ){
+      fts3RemoveElementByHash(pH,elem,h);
+    }else{
+      elem->data = data;
+    }
+    return old_data;
+  }
+  if( data==0 ) return 0;
+  if( (pH->htsize==0 && fts3Rehash(pH,8))
+   || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
+  ){
+    pH->count = 0;
+    return data;
+  }
+  assert( pH->htsize>0 );
+  new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
+  if( new_elem==0 ) return data;
+  if( pH->copyKey && pKey!=0 ){
+    new_elem->pKey = fts3HashMalloc( nKey );
+    if( new_elem->pKey==0 ){
+      fts3HashFree(new_elem);
+      return data;
+    }
+    memcpy((void*)new_elem->pKey, pKey, nKey);
+  }else{
+    new_elem->pKey = (void*)pKey;
+  }
+  new_elem->nKey = nKey;
+  pH->count++;
+  assert( pH->htsize>0 );
+  assert( (pH->htsize & (pH->htsize-1))==0 );
+  h = hraw & (pH->htsize-1);
+  fts3HashInsertElement(pH, &pH->ht[h], new_elem);
+  new_elem->data = data;
+  return 0;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_hash.h
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_hash.h
@ -0,0 +1,112 @@
+/*
+** 2001 September 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This is the header file for the generic hash-table implementation
+** used in SQLite.  We've modified it slightly to serve as a standalone
+** hash table implementation for the full-text indexing module.
+**
+*/
+#ifndef _FTS3_HASH_H_
+#define _FTS3_HASH_H_
+
+/* Forward declarations of structures. */
+typedef struct Fts3Hash Fts3Hash;
+typedef struct Fts3HashElem Fts3HashElem;
+
+/* A complete hash table is an instance of the following structure.
+** The internals of this structure are intended to be opaque -- client
+** code should not attempt to access or modify the fields of this structure
+** directly.  Change this structure only by using the routines below.
+** However, many of the "procedures" and "functions" for modifying and
+** accessing this structure are really macros, so we can't really make
+** this structure opaque.
+*/
+struct Fts3Hash {
+  char keyClass;          /* HASH_INT, _POINTER, _STRING, _BINARY */
+  char copyKey;           /* True if copy of key made on insert */
+  int count;              /* Number of entries in this table */
+  Fts3HashElem *first;    /* The first element of the array */
+  int htsize;             /* Number of buckets in the hash table */
+  struct _fts3ht {        /* the hash table */
+    int count;               /* Number of entries with this hash */
+    Fts3HashElem *chain;     /* Pointer to first entry with this hash */
+  } *ht;
+};
+
+/* Each element in the hash table is an instance of the following 
+** structure.  All elements are stored on a single doubly-linked list.
+**
+** Again, this structure is intended to be opaque, but it can't really
+** be opaque because it is used by macros.
+*/
+struct Fts3HashElem {
+  Fts3HashElem *next, *prev; /* Next and previous elements in the table */
+  void *data;                /* Data associated with this element */
+  void *pKey; int nKey;      /* Key associated with this element */
+};
+
+/*
+** There are 2 different modes of operation for a hash table:
+**
+**   FTS3_HASH_STRING        pKey points to a string that is nKey bytes long
+**                           (including the null-terminator, if any).  Case
+**                           is respected in comparisons.
+**
+**   FTS3_HASH_BINARY        pKey points to binary data nKey bytes long. 
+**                           memcmp() is used to compare keys.
+**
+** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.  
+*/
+#define FTS3_HASH_STRING    1
+#define FTS3_HASH_BINARY    2
+
+/*
+** Access routines.  To delete, insert a NULL pointer.
+*/
+void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
+void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
+void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
+void sqlite3Fts3HashClear(Fts3Hash*);
+Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
+
+/*
+** Shorthand for the functions above
+*/
+#define fts3HashInit     sqlite3Fts3HashInit
+#define fts3HashInsert   sqlite3Fts3HashInsert
+#define fts3HashFind     sqlite3Fts3HashFind
+#define fts3HashClear    sqlite3Fts3HashClear
+#define fts3HashFindElem sqlite3Fts3HashFindElem
+
+/*
+** Macros for looping over all elements of a hash table.  The idiom is
+** like this:
+**
+**   Fts3Hash h;
+**   Fts3HashElem *p;
+**   ...
+**   for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
+**     SomeStructure *pData = fts3HashData(p);
+**     // do something with pData
+**   }
+*/
+#define fts3HashFirst(H)  ((H)->first)
+#define fts3HashNext(E)   ((E)->next)
+#define fts3HashData(E)   ((E)->data)
+#define fts3HashKey(E)    ((E)->pKey)
+#define fts3HashKeysize(E) ((E)->nKey)
+
+/*
+** Number of entries in a hash table
+*/
+#define fts3HashCount(H)  ((H)->count)
+
+#endif /* _FTS3_HASH_H_ */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_icu.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_icu.c
@ -0,0 +1,262 @@
+/*
+** 2007 June 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements a tokenizer for fts3 based on the ICU library.
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+#ifdef SQLITE_ENABLE_ICU
+
+#include <assert.h>
+#include <string.h>
+#include "fts3_tokenizer.h"
+
+#include <unicode/ubrk.h>
+#include <unicode/ucol.h>
+#include <unicode/ustring.h>
+#include <unicode/utf16.h>
+
+typedef struct IcuTokenizer IcuTokenizer;
+typedef struct IcuCursor IcuCursor;
+
+struct IcuTokenizer {
+  sqlite3_tokenizer base;
+  char *zLocale;
+};
+
+struct IcuCursor {
+  sqlite3_tokenizer_cursor base;
+
+  UBreakIterator *pIter;      /* ICU break-iterator object */
+  int nChar;                  /* Number of UChar elements in pInput */
+  UChar *aChar;               /* Copy of input using utf-16 encoding */
+  int *aOffset;               /* Offsets of each character in utf-8 input */
+
+  int nBuffer;
+  char *zBuffer;
+
+  int iToken;
+};
+
+/*
+** Create a new tokenizer instance.
+*/
+static int icuCreate(
+  int argc,                            /* Number of entries in argv[] */
+  const char * const *argv,            /* Tokenizer creation arguments */
+  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
+){
+  IcuTokenizer *p;
+  int n = 0;
+
+  if( argc>0 ){
+    n = strlen(argv[0])+1;
+  }
+  p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
+  if( !p ){
+    return SQLITE_NOMEM;
+  }
+  memset(p, 0, sizeof(IcuTokenizer));
+
+  if( n ){
+    p->zLocale = (char *)&p[1];
+    memcpy(p->zLocale, argv[0], n);
+  }
+
+  *ppTokenizer = (sqlite3_tokenizer *)p;
+
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int icuDestroy(sqlite3_tokenizer *pTokenizer){
+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int icuOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *zInput,                    /* Input string */
+  int nInput,                            /* Length of zInput in bytes */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+  IcuCursor *pCsr;
+
+  const int32_t opt = U_FOLD_CASE_DEFAULT;
+  UErrorCode status = U_ZERO_ERROR;
+  int nChar;
+
+  UChar32 c;
+  int iInput = 0;
+  int iOut = 0;
+
+  *ppCursor = 0;
+
+  if( zInput==0 ){
+    nInput = 0;
+    zInput = "";
+  }else if( nInput<0 ){
+    nInput = strlen(zInput);
+  }
+  nChar = nInput+1;
+  pCsr = (IcuCursor *)sqlite3_malloc(
+      sizeof(IcuCursor) +                /* IcuCursor */
+      ((nChar+3)&~3) * sizeof(UChar) +   /* IcuCursor.aChar[] */
+      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
+  );
+  if( !pCsr ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(IcuCursor));
+  pCsr->aChar = (UChar *)&pCsr[1];
+  pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
+
+  pCsr->aOffset[iOut] = iInput;
+  U8_NEXT(zInput, iInput, nInput, c); 
+  while( c>0 ){
+    int isError = 0;
+    c = u_foldCase(c, opt);
+    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
+    if( isError ){
+      sqlite3_free(pCsr);
+      return SQLITE_ERROR;
+    }
+    pCsr->aOffset[iOut] = iInput;
+
+    if( iInput<nInput ){
+      U8_NEXT(zInput, iInput, nInput, c);
+    }else{
+      c = 0;
+    }
+  }
+
+  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
+  if( !U_SUCCESS(status) ){
+    sqlite3_free(pCsr);
+    return SQLITE_ERROR;
+  }
+  pCsr->nChar = iOut;
+
+  ubrk_first(pCsr->pIter);
+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to icuOpen().
+*/
+static int icuClose(sqlite3_tokenizer_cursor *pCursor){
+  IcuCursor *pCsr = (IcuCursor *)pCursor;
+  ubrk_close(pCsr->pIter);
+  sqlite3_free(pCsr->zBuffer);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.
+*/
+static int icuNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  IcuCursor *pCsr = (IcuCursor *)pCursor;
+
+  int iStart = 0;
+  int iEnd = 0;
+  int nByte = 0;
+
+  while( iStart==iEnd ){
+    UChar32 c;
+
+    iStart = ubrk_current(pCsr->pIter);
+    iEnd = ubrk_next(pCsr->pIter);
+    if( iEnd==UBRK_DONE ){
+      return SQLITE_DONE;
+    }
+
+    while( iStart<iEnd ){
+      int iWhite = iStart;
+      U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
+      if( u_isspace(c) ){
+        iStart = iWhite;
+      }else{
+        break;
+      }
+    }
+    assert(iStart<=iEnd);
+  }
+
+  do {
+    UErrorCode status = U_ZERO_ERROR;
+    if( nByte ){
+      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
+      if( !zNew ){
+        return SQLITE_NOMEM;
+      }
+      pCsr->zBuffer = zNew;
+      pCsr->nBuffer = nByte;
+    }
+
+    u_strToUTF8(
+        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
+        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
+        &status                                  /* Output success/failure */
+    );
+  } while( nByte>pCsr->nBuffer );
+
+  *ppToken = pCsr->zBuffer;
+  *pnBytes = nByte;
+  *piStartOffset = pCsr->aOffset[iStart];
+  *piEndOffset = pCsr->aOffset[iEnd];
+  *piPosition = pCsr->iToken++;
+
+  return SQLITE_OK;
+}
+
+/*
+** The set of routines that implement the simple tokenizer
+*/
+static const sqlite3_tokenizer_module icuTokenizerModule = {
+  0,                           /* iVersion    */
+  icuCreate,                   /* xCreate     */
+  icuDestroy,                  /* xCreate     */
+  icuOpen,                     /* xOpen       */
+  icuClose,                    /* xClose      */
+  icuNext,                     /* xNext       */
+  0,                           /* xLanguageid */
+};
+
+/*
+** Set *ppModule to point at the implementation of the ICU tokenizer.
+*/
+void sqlite3Fts3IcuTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &icuTokenizerModule;
+}
+
+#endif /* defined(SQLITE_ENABLE_ICU) */
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_porter.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_porter.c
@ -0,0 +1,662 @@
+/*
+** 2006 September 30
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Implementation of the full-text-search tokenizer that implements
+** a Porter stemmer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fts3_tokenizer.h"
+
+/*
+** Class derived from sqlite3_tokenizer
+*/
+typedef struct porter_tokenizer {
+  sqlite3_tokenizer base;      /* Base class */
+} porter_tokenizer;
+
+/*
+** Class derived from sqlite3_tokenizer_cursor
+*/
+typedef struct porter_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *zInput;          /* input we are tokenizing */
+  int nInput;                  /* size of the input */
+  int iOffset;                 /* current position in zInput */
+  int iToken;                  /* index of next token to be returned */
+  char *zToken;                /* storage for current token */
+  int nAllocated;              /* space allocated to zToken buffer */
+} porter_tokenizer_cursor;
+
+
+/*
+** Create a new tokenizer instance.
+*/
+static int porterCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  porter_tokenizer *t;
+
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
+  if( t==NULL ) return SQLITE_NOMEM;
+  memset(t, 0, sizeof(*t));
+  *ppTokenizer = &t->base;
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int porterDestroy(sqlite3_tokenizer *pTokenizer){
+  sqlite3_free(pTokenizer);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is zInput[0..nInput-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int porterOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *zInput, int nInput,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  porter_tokenizer_cursor *c;
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+  if( c==NULL ) return SQLITE_NOMEM;
+
+  c->zInput = zInput;
+  if( zInput==0 ){
+    c->nInput = 0;
+  }else if( nInput<0 ){
+    c->nInput = (int)strlen(zInput);
+  }else{
+    c->nInput = nInput;
+  }
+  c->iOffset = 0;                 /* start tokenizing at the beginning */
+  c->iToken = 0;
+  c->zToken = NULL;               /* no space allocated, yet. */
+  c->nAllocated = 0;
+
+  *ppCursor = &c->base;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** porterOpen() above.
+*/
+static int porterClose(sqlite3_tokenizer_cursor *pCursor){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  sqlite3_free(c->zToken);
+  sqlite3_free(c);
+  return SQLITE_OK;
+}
+/*
+** Vowel or consonant
+*/
+static const char cType[] = {
+   0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
+   1, 1, 1, 2, 1
+};
+
+/*
+** isConsonant() and isVowel() determine if their first character in
+** the string they point to is a consonant or a vowel, according
+** to Porter ruls.  
+**
+** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
+** 'Y' is a consonant unless it follows another consonant,
+** in which case it is a vowel.
+**
+** In these routine, the letters are in reverse order.  So the 'y' rule
+** is that 'y' is a consonant unless it is followed by another
+** consonent.
+*/
+static int isVowel(const char*);
+static int isConsonant(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return j;
+  return z[1]==0 || isVowel(z + 1);
+}
+static int isVowel(const char *z){
+  int j;
+  char x = *z;
+  if( x==0 ) return 0;
+  assert( x>='a' && x<='z' );
+  j = cType[x-'a'];
+  if( j<2 ) return 1-j;
+  return isConsonant(z + 1);
+}
+
+/*
+** Let any sequence of one or more vowels be represented by V and let
+** C be sequence of one or more consonants.  Then every word can be
+** represented as:
+**
+**           [C] (VC){m} [V]
+**
+** In prose:  A word is an optional consonant followed by zero or
+** vowel-consonant pairs followed by an optional vowel.  "m" is the
+** number of vowel consonant pairs.  This routine computes the value
+** of m for the first i bytes of a word.
+**
+** Return true if the m-value for z is 1 or more.  In other words,
+** return true if z contains at least one vowel that is followed
+** by a consonant.
+**
+** In this routine z[] is in reverse order.  So we are really looking
+** for an instance of a consonant followed by a vowel.
+*/
+static int m_gt_0(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/* Like mgt0 above except we are looking for a value of m which is
+** exactly 1
+*/
+static int m_eq_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 1;
+  while( isConsonant(z) ){ z++; }
+  return *z==0;
+}
+
+/* Like mgt0 above except we are looking for a value of m>1 instead
+** or m>0
+*/
+static int m_gt_1(const char *z){
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isVowel(z) ){ z++; }
+  if( *z==0 ) return 0;
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if there is a vowel anywhere within z[0..n-1]
+*/
+static int hasVowel(const char *z){
+  while( isConsonant(z) ){ z++; }
+  return *z!=0;
+}
+
+/*
+** Return TRUE if the word ends in a double consonant.
+**
+** The text is reversed here. So we are really looking at
+** the first two characters of z[].
+*/
+static int doubleConsonant(const char *z){
+  return isConsonant(z) && z[0]==z[1];
+}
+
+/*
+** Return TRUE if the word ends with three letters which
+** are consonant-vowel-consonent and where the final consonant
+** is not 'w', 'x', or 'y'.
+**
+** The word is reversed here.  So we are really checking the
+** first three letters and the first one cannot be in [wxy].
+*/
+static int star_oh(const char *z){
+  return
+    isConsonant(z) &&
+    z[0]!='w' && z[0]!='x' && z[0]!='y' &&
+    isVowel(z+1) &&
+    isConsonant(z+2);
+}
+
+/*
+** If the word ends with zFrom and xCond() is true for the stem
+** of the word that preceeds the zFrom ending, then change the 
+** ending to zTo.
+**
+** The input word *pz and zFrom are both in reverse order.  zTo
+** is in normal order. 
+**
+** Return TRUE if zFrom matches.  Return FALSE if zFrom does not
+** match.  Not that TRUE is returned even if xCond() fails and
+** no substitution occurs.
+*/
+static int stem(
+  char **pz,             /* The word being stemmed (Reversed) */
+  const char *zFrom,     /* If the ending matches this... (Reversed) */
+  const char *zTo,       /* ... change the ending to this (not reversed) */
+  int (*xCond)(const char*)   /* Condition that must be true */
+){
+  char *z = *pz;
+  while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
+  if( *zFrom!=0 ) return 0;
+  if( xCond && !xCond(z) ) return 1;
+  while( *zTo ){
+    *(--z) = *(zTo++);
+  }
+  *pz = z;
+  return 1;
+}
+
+/*
+** This is the fallback stemmer used when the porter stemmer is
+** inappropriate.  The input word is copied into the output with
+** US-ASCII case folding.  If the input word is too long (more
+** than 20 bytes if it contains no digits or more than 6 bytes if
+** it contains digits) then word is truncated to 20 or 6 bytes
+** by taking 10 or 3 bytes from the beginning and end.
+*/
+static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, mx, j;
+  int hasDigit = 0;
+  for(i=0; i<nIn; i++){
+    char c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zOut[i] = c - 'A' + 'a';
+    }else{
+      if( c>='0' && c<='9' ) hasDigit = 1;
+      zOut[i] = c;
+    }
+  }
+  mx = hasDigit ? 3 : 10;
+  if( nIn>mx*2 ){
+    for(j=mx, i=nIn-mx; i<nIn; i++, j++){
+      zOut[j] = zOut[i];
+    }
+    i = j;
+  }
+  zOut[i] = 0;
+  *pnOut = i;
+}
+
+
+/*
+** Stem the input word zIn[0..nIn-1].  Store the output in zOut.
+** zOut is at least big enough to hold nIn bytes.  Write the actual
+** size of the output word (exclusive of the '\0' terminator) into *pnOut.
+**
+** Any upper-case characters in the US-ASCII character set ([A-Z])
+** are converted to lower case.  Upper-case UTF characters are
+** unchanged.
+**
+** Words that are longer than about 20 bytes are stemmed by retaining
+** a few bytes from the beginning and the end of the word.  If the
+** word contains digits, 3 bytes are taken from the beginning and
+** 3 bytes from the end.  For long words without digits, 10 bytes
+** are taken from each end.  US-ASCII case folding still applies.
+** 
+** If the input word contains not digits but does characters not 
+** in [a-zA-Z] then no stemming is attempted and this routine just 
+** copies the input into the input into the output with US-ASCII
+** case folding.
+**
+** Stemming never increases the length of the word.  So there is
+** no chance of overflowing the zOut buffer.
+*/
+static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
+  int i, j;
+  char zReverse[28];
+  char *z, *z2;
+  if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
+    /* The word is too big or too small for the porter stemmer.
+    ** Fallback to the copy stemmer */
+    copy_stemmer(zIn, nIn, zOut, pnOut);
+    return;
+  }
+  for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
+    char c = zIn[i];
+    if( c>='A' && c<='Z' ){
+      zReverse[j] = c + 'a' - 'A';
+    }else if( c>='a' && c<='z' ){
+      zReverse[j] = c;
+    }else{
+      /* The use of a character not in [a-zA-Z] means that we fallback
+      ** to the copy stemmer */
+      copy_stemmer(zIn, nIn, zOut, pnOut);
+      return;
+    }
+  }
+  memset(&zReverse[sizeof(zReverse)-5], 0, 5);
+  z = &zReverse[j+1];
+
+
+  /* Step 1a */
+  if( z[0]=='s' ){
+    if(
+     !stem(&z, "sess", "ss", 0) &&
+     !stem(&z, "sei", "i", 0)  &&
+     !stem(&z, "ss", "ss", 0)
+    ){
+      z++;
+    }
+  }
+
+  /* Step 1b */  
+  z2 = z;
+  if( stem(&z, "dee", "ee", m_gt_0) ){
+    /* Do nothing.  The work was all in the test */
+  }else if( 
+     (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
+      && z!=z2
+  ){
+     if( stem(&z, "ta", "ate", 0) ||
+         stem(&z, "lb", "ble", 0) ||
+         stem(&z, "zi", "ize", 0) ){
+       /* Do nothing.  The work was all in the test */
+     }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
+       z++;
+     }else if( m_eq_1(z) && star_oh(z) ){
+       *(--z) = 'e';
+     }
+  }
+
+  /* Step 1c */
+  if( z[0]=='y' && hasVowel(z+1) ){
+    z[0] = 'i';
+  }
+
+  /* Step 2 */
+  switch( z[1] ){
+   case 'a':
+     if( !stem(&z, "lanoita", "ate", m_gt_0) ){
+       stem(&z, "lanoit", "tion", m_gt_0);
+     }
+     break;
+   case 'c':
+     if( !stem(&z, "icne", "ence", m_gt_0) ){
+       stem(&z, "icna", "ance", m_gt_0);
+     }
+     break;
+   case 'e':
+     stem(&z, "rezi", "ize", m_gt_0);
+     break;
+   case 'g':
+     stem(&z, "igol", "log", m_gt_0);
+     break;
+   case 'l':
+     if( !stem(&z, "ilb", "ble", m_gt_0) 
+      && !stem(&z, "illa", "al", m_gt_0)
+      && !stem(&z, "iltne", "ent", m_gt_0)
+      && !stem(&z, "ile", "e", m_gt_0)
+     ){
+       stem(&z, "ilsuo", "ous", m_gt_0);
+     }
+     break;
+   case 'o':
+     if( !stem(&z, "noitazi", "ize", m_gt_0)
+      && !stem(&z, "noita", "ate", m_gt_0)
+     ){
+       stem(&z, "rota", "ate", m_gt_0);
+     }
+     break;
+   case 's':
+     if( !stem(&z, "msila", "al", m_gt_0)
+      && !stem(&z, "ssenevi", "ive", m_gt_0)
+      && !stem(&z, "ssenluf", "ful", m_gt_0)
+     ){
+       stem(&z, "ssensuo", "ous", m_gt_0);
+     }
+     break;
+   case 't':
+     if( !stem(&z, "itila", "al", m_gt_0)
+      && !stem(&z, "itivi", "ive", m_gt_0)
+     ){
+       stem(&z, "itilib", "ble", m_gt_0);
+     }
+     break;
+  }
+
+  /* Step 3 */
+  switch( z[0] ){
+   case 'e':
+     if( !stem(&z, "etaci", "ic", m_gt_0)
+      && !stem(&z, "evita", "", m_gt_0)
+     ){
+       stem(&z, "ezila", "al", m_gt_0);
+     }
+     break;
+   case 'i':
+     stem(&z, "itici", "ic", m_gt_0);
+     break;
+   case 'l':
+     if( !stem(&z, "laci", "ic", m_gt_0) ){
+       stem(&z, "luf", "", m_gt_0);
+     }
+     break;
+   case 's':
+     stem(&z, "ssen", "", m_gt_0);
+     break;
+  }
+
+  /* Step 4 */
+  switch( z[1] ){
+   case 'a':
+     if( z[0]=='l' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'c':
+     if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e')  && m_gt_1(z+4)  ){
+       z += 4;
+     }
+     break;
+   case 'e':
+     if( z[0]=='r' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'i':
+     if( z[0]=='c' && m_gt_1(z+2) ){
+       z += 2;
+     }
+     break;
+   case 'l':
+     if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
+       z += 4;
+     }
+     break;
+   case 'n':
+     if( z[0]=='t' ){
+       if( z[2]=='a' ){
+         if( m_gt_1(z+3) ){
+           z += 3;
+         }
+       }else if( z[2]=='e' ){
+         if( !stem(&z, "tneme", "", m_gt_1)
+          && !stem(&z, "tnem", "", m_gt_1)
+         ){
+           stem(&z, "tne", "", m_gt_1);
+         }
+       }
+     }
+     break;
+   case 'o':
+     if( z[0]=='u' ){
+       if( m_gt_1(z+2) ){
+         z += 2;
+       }
+     }else if( z[3]=='s' || z[3]=='t' ){
+       stem(&z, "noi", "", m_gt_1);
+     }
+     break;
+   case 's':
+     if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 't':
+     if( !stem(&z, "eta", "", m_gt_1) ){
+       stem(&z, "iti", "", m_gt_1);
+     }
+     break;
+   case 'u':
+     if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+   case 'v':
+   case 'z':
+     if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
+       z += 3;
+     }
+     break;
+  }
+
+  /* Step 5a */
+  if( z[0]=='e' ){
+    if( m_gt_1(z+1) ){
+      z++;
+    }else if( m_eq_1(z+1) && !star_oh(z+1) ){
+      z++;
+    }
+  }
+
+  /* Step 5b */
+  if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
+    z++;
+  }
+
+  /* z[] is now the stemmed word in reverse order.  Flip it back
+  ** around into forward order and return.
+  */
+  *pnOut = i = (int)strlen(z);
+  zOut[i] = 0;
+  while( *z ){
+    zOut[--i] = *(z++);
+  }
+}
+
+/*
+** Characters that can be part of a token.  We assume any character
+** whose value is greater than 0x80 (any UTF character) can be
+** part of a token.  In other words, delimiters all must have
+** values of 0x7f or lower.
+*/
+static const char porterIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
+};
+#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to porterOpen().
+*/
+static int porterNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by porterOpen */
+  const char **pzToken,               /* OUT: *pzToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
+  const char *z = c->zInput;
+
+  while( c->iOffset<c->nInput ){
+    int iStartOffset, ch;
+
+    /* Scan past delimiter characters */
+    while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    /* Count non-delimiter characters. */
+    iStartOffset = c->iOffset;
+    while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    if( c->iOffset>iStartOffset ){
+      int n = c->iOffset-iStartOffset;
+      if( n>c->nAllocated ){
+        char *pNew;
+        c->nAllocated = n+20;
+        pNew = sqlite3_realloc(c->zToken, c->nAllocated);
+        if( !pNew ) return SQLITE_NOMEM;
+        c->zToken = pNew;
+      }
+      porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
+      *pzToken = c->zToken;
+      *piStartOffset = iStartOffset;
+      *piEndOffset = c->iOffset;
+      *piPosition = c->iToken++;
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the porter-stemmer tokenizer
+*/
+static const sqlite3_tokenizer_module porterTokenizerModule = {
+  0,
+  porterCreate,
+  porterDestroy,
+  porterOpen,
+  porterClose,
+  porterNext,
+  0
+};
+
+/*
+** Allocate a new porter tokenizer.  Return a pointer to the new
+** tokenizer in *ppModule
+*/
+void sqlite3Fts3PorterTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &porterTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_snippet.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_snippet.c
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_term.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_term.c
@ -0,0 +1,373 @@
+/*
+** 2011 Jan 27
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file is not part of the production FTS code. It is only used for
+** testing. It contains a virtual table implementation that provides direct 
+** access to the full-text index of an FTS table. 
+*/
+
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+#ifdef SQLITE_TEST
+
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+typedef struct Fts3termTable Fts3termTable;
+typedef struct Fts3termCursor Fts3termCursor;
+
+struct Fts3termTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  int iIndex;                     /* Index for Fts3Table.aIndex[] */
+  Fts3Table *pFts3Tab;
+};
+
+struct Fts3termCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  Fts3MultiSegReader csr;        /* Must be right after "base" */
+  Fts3SegFilter filter;
+
+  int isEof;                      /* True if cursor is at EOF */
+  char *pNext;
+
+  sqlite3_int64 iRowid;           /* Current 'rowid' value */
+  sqlite3_int64 iDocid;           /* Current 'docid' value */
+  int iCol;                       /* Current 'col' value */
+  int iPos;                       /* Current 'pos' value */
+};
+
+/*
+** Schema of the terms table.
+*/
+#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, docid, col, pos)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+*/
+static int fts3termConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pCtx,                     /* Non-zero for an fts4prefix table */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  char const *zDb;                /* Name of database (e.g. "main") */
+  char const *zFts3;              /* Name of fts3 table */
+  int nDb;                        /* Result of strlen(zDb) */
+  int nFts3;                      /* Result of strlen(zFts3) */
+  int nByte;                      /* Bytes of space to allocate here */
+  int rc;                         /* value returned by declare_vtab() */
+  Fts3termTable *p;                /* Virtual table object to return */
+  int iIndex = 0;
+
+  UNUSED_PARAMETER(pCtx);
+  if( argc==5 ){
+    iIndex = atoi(argv[4]);
+    argc--;
+  }
+
+  /* The user should specify a single argument - the name of an fts3 table. */
+  if( argc!=4 ){
+    sqlite3Fts3ErrMsg(pzErr,
+        "wrong number of arguments to fts4term constructor"
+    );
+    return SQLITE_ERROR;
+  }
+
+  zDb = argv[1]; 
+  nDb = (int)strlen(zDb);
+  zFts3 = argv[3];
+  nFts3 = (int)strlen(zFts3);
+
+  rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
+  p = (Fts3termTable *)sqlite3_malloc(nByte);
+  if( !p ) return SQLITE_NOMEM;
+  memset(p, 0, nByte);
+
+  p->pFts3Tab = (Fts3Table *)&p[1];
+  p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
+  p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
+  p->pFts3Tab->db = db;
+  p->pFts3Tab->nIndex = iIndex+1;
+  p->iIndex = iIndex;
+
+  memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
+  memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
+  sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
+
+  *ppVtab = (sqlite3_vtab *)p;
+  return SQLITE_OK;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3termDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3termTable *p = (Fts3termTable *)pVtab;
+  Fts3Table *pFts3 = p->pFts3Tab;
+  int i;
+
+  /* Free any prepared statements held */
+  for(i=0; i<SizeofArray(pFts3->aStmt); i++){
+    sqlite3_finalize(pFts3->aStmt[i]);
+  }
+  sqlite3_free(pFts3->zSegmentsTbl);
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+#define FTS4AUX_EQ_CONSTRAINT 1
+#define FTS4AUX_GE_CONSTRAINT 2
+#define FTS4AUX_LE_CONSTRAINT 4
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3termBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  UNUSED_PARAMETER(pVTab);
+
+  /* This vtab naturally does "ORDER BY term, docid, col, pos".  */
+  if( pInfo->nOrderBy ){
+    int i;
+    for(i=0; i<pInfo->nOrderBy; i++){
+      if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
+    }
+    if( i==pInfo->nOrderBy ){
+      pInfo->orderByConsumed = 1;
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3termOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3termCursor *pCsr;            /* Pointer to cursor object to return */
+
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3termCursor *)sqlite3_malloc(sizeof(Fts3termCursor));
+  if( !pCsr ) return SQLITE_NOMEM;
+  memset(pCsr, 0, sizeof(Fts3termCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3termCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
+  Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
+
+  sqlite3Fts3SegmentsClose(pFts3);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
+  Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
+  int rc;
+  sqlite3_int64 v;
+
+  /* Increment our pretend rowid value. */
+  pCsr->iRowid++;
+
+  /* Advance to the next term in the full-text index. */
+  if( pCsr->csr.aDoclist==0 
+   || pCsr->pNext>=&pCsr->csr.aDoclist[pCsr->csr.nDoclist-1]
+  ){
+    rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
+    if( rc!=SQLITE_ROW ){
+      pCsr->isEof = 1;
+      return rc;
+    }
+
+    pCsr->iCol = 0;
+    pCsr->iPos = 0;
+    pCsr->iDocid = 0;
+    pCsr->pNext = pCsr->csr.aDoclist;
+
+    /* Read docid */
+    pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &pCsr->iDocid);
+  }
+
+  pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
+  if( v==0 ){
+    pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
+    pCsr->iDocid += v;
+    pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
+    pCsr->iCol = 0;
+    pCsr->iPos = 0;
+  }
+
+  if( v==1 ){
+    pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
+    pCsr->iCol += (int)v;
+    pCsr->iPos = 0;
+    pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
+  }
+
+  pCsr->iPos += (int)(v - 2);
+
+  return SQLITE_OK;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3termFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
+  Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
+  Fts3Table *pFts3 = p->pFts3Tab;
+  int rc;
+
+  UNUSED_PARAMETER(nVal);
+  UNUSED_PARAMETER(idxNum);
+  UNUSED_PARAMETER(idxStr);
+  UNUSED_PARAMETER(apVal);
+
+  assert( idxStr==0 && idxNum==0 );
+
+  /* In case this cursor is being reused, close and zero it. */
+  testcase(pCsr->filter.zTerm);
+  sqlite3Fts3SegReaderFinish(&pCsr->csr);
+  memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
+
+  pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
+  pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
+
+  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
+      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
+  );
+  if( rc==SQLITE_OK ){
+    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
+  }
+  if( rc==SQLITE_OK ){
+    rc = fts3termNextMethod(pCursor);
+  }
+  return rc;
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3termEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
+  return pCsr->isEof;
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3termColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3termCursor *p = (Fts3termCursor *)pCursor;
+
+  assert( iCol>=0 && iCol<=3 );
+  switch( iCol ){
+    case 0:
+      sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
+      break;
+    case 1:
+      sqlite3_result_int64(pCtx, p->iDocid);
+      break;
+    case 2:
+      sqlite3_result_int64(pCtx, p->iCol);
+      break;
+    default:
+      sqlite3_result_int64(pCtx, p->iPos);
+      break;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3termRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
+  *pRowid = pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3term module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+int sqlite3Fts3InitTerm(sqlite3 *db){
+  static const sqlite3_module fts3term_module = {
+     0,                           /* iVersion      */
+     fts3termConnectMethod,       /* xCreate       */
+     fts3termConnectMethod,       /* xConnect      */
+     fts3termBestIndexMethod,     /* xBestIndex    */
+     fts3termDisconnectMethod,    /* xDisconnect   */
+     fts3termDisconnectMethod,    /* xDestroy      */
+     fts3termOpenMethod,          /* xOpen         */
+     fts3termCloseMethod,         /* xClose        */
+     fts3termFilterMethod,        /* xFilter       */
+     fts3termNextMethod,          /* xNext         */
+     fts3termEofMethod,           /* xEof          */
+     fts3termColumnMethod,        /* xColumn       */
+     fts3termRowidMethod,         /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
+  return rc;
+}
+
+#endif
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_test.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_test.c
@ -0,0 +1,584 @@
+/*
+** 2011 Jun 13
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file is not part of the production FTS code. It is only used for
+** testing. It contains a Tcl command that can be used to test if a document
+** matches an FTS NEAR expression.
+**
+** As of March 2012, it also contains a version 1 tokenizer used for testing
+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
+*/
+
+#include <tcl.h>
+#include <string.h>
+#include <assert.h>
+
+#if defined(SQLITE_TEST)
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
+
+/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
+#include "fts3Int.h"
+
+#define NM_MAX_TOKEN 12
+
+typedef struct NearPhrase NearPhrase;
+typedef struct NearDocument NearDocument;
+typedef struct NearToken NearToken;
+
+struct NearDocument {
+  int nToken;                     /* Length of token in bytes */
+  NearToken *aToken;              /* Token array */
+};
+
+struct NearToken {
+  int n;                          /* Length of token in bytes */
+  const char *z;                  /* Pointer to token string */
+};
+
+struct NearPhrase {
+  int nNear;                      /* Preceding NEAR value */
+  int nToken;                     /* Number of tokens in this phrase */
+  NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
+};
+
+static int nm_phrase_match(
+  NearPhrase *p,
+  NearToken *aToken
+){
+  int ii;
+
+  for(ii=0; ii<p->nToken; ii++){
+    NearToken *pToken = &p->aToken[ii];
+    if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
+      if( aToken[ii].n<(pToken->n-1) ) return 0;
+      if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
+    }else{
+      if( aToken[ii].n!=pToken->n ) return 0;
+      if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
+    }
+  }
+
+  return 1;
+}
+
+static int nm_near_chain(
+  int iDir,                       /* Direction to iterate through aPhrase[] */
+  NearDocument *pDoc,             /* Document to match against */
+  int iPos,                       /* Position at which iPhrase was found */
+  int nPhrase,                    /* Size of phrase array */
+  NearPhrase *aPhrase,            /* Phrase array */
+  int iPhrase                     /* Index of phrase found */
+){
+  int iStart;
+  int iStop;
+  int ii;
+  int nNear;
+  int iPhrase2;
+  NearPhrase *p;
+  NearPhrase *pPrev;
+
+  assert( iDir==1 || iDir==-1 );
+
+  if( iDir==1 ){
+    if( (iPhrase+1)==nPhrase ) return 1;
+    nNear = aPhrase[iPhrase+1].nNear;
+  }else{
+    if( iPhrase==0 ) return 1;
+    nNear = aPhrase[iPhrase].nNear;
+  }
+  pPrev = &aPhrase[iPhrase];
+  iPhrase2 = iPhrase+iDir;
+  p = &aPhrase[iPhrase2];
+
+  iStart = iPos - nNear - p->nToken;
+  iStop = iPos + nNear + pPrev->nToken;
+
+  if( iStart<0 ) iStart = 0;
+  if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
+
+  for(ii=iStart; ii<=iStop; ii++){
+    if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
+      if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
+    }
+  }
+
+  return 0;
+}
+
+static int nm_match_count(
+  NearDocument *pDoc,             /* Document to match against */
+  int nPhrase,                    /* Size of phrase array */
+  NearPhrase *aPhrase,            /* Phrase array */
+  int iPhrase                     /* Index of phrase to count matches for */
+){
+  int nOcc = 0;
+  int ii;
+  NearPhrase *p = &aPhrase[iPhrase];
+
+  for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
+    if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
+      /* Test forward NEAR chain (i>iPhrase) */
+      if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
+
+      /* Test reverse NEAR chain (i<iPhrase) */
+      if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
+
+      /* This is a real match. Increment the counter. */
+      nOcc++;
+    }
+  } 
+
+  return nOcc;
+}
+
+/*
+** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
+*/
+static int fts3_near_match_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  int nTotal = 0;
+  int rc;
+  int ii;
+  int nPhrase;
+  NearPhrase *aPhrase = 0;
+  NearDocument doc = {0, 0};
+  Tcl_Obj **apDocToken;
+  Tcl_Obj *pRet;
+  Tcl_Obj *pPhrasecount = 0;
+  
+  Tcl_Obj **apExprToken;
+  int nExprToken;
+
+  UNUSED_PARAMETER(clientData);
+
+  /* Must have 3 or more arguments. */
+  if( objc<3 || (objc%2)==0 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
+    rc = TCL_ERROR;
+    goto near_match_out;
+  }
+
+  for(ii=3; ii<objc; ii+=2){
+    enum NM_enum { NM_PHRASECOUNTS };
+    struct TestnmSubcmd {
+      char *zName;
+      enum NM_enum eOpt;
+    } aOpt[] = {
+      { "-phrasecountvar", NM_PHRASECOUNTS },
+      { 0, 0 }
+    };
+    int iOpt;
+    if( Tcl_GetIndexFromObjStruct(
+        interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt) 
+    ){
+      return TCL_ERROR;
+    }
+
+    switch( aOpt[iOpt].eOpt ){
+      case NM_PHRASECOUNTS:
+        pPhrasecount = objv[ii+1];
+        break;
+    }
+  }
+
+  rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
+  if( rc!=TCL_OK ) goto near_match_out;
+  doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
+  for(ii=0; ii<doc.nToken; ii++){
+    doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
+  }
+
+  rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
+  if( rc!=TCL_OK ) goto near_match_out;
+
+  nPhrase = (nExprToken + 1) / 2;
+  aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
+  memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
+  for(ii=0; ii<nPhrase; ii++){
+    Tcl_Obj *pPhrase = apExprToken[ii*2];
+    Tcl_Obj **apToken;
+    int nToken;
+    int jj;
+
+    rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
+    if( rc!=TCL_OK ) goto near_match_out;
+    if( nToken>NM_MAX_TOKEN ){
+      Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
+      rc = TCL_ERROR;
+      goto near_match_out;
+    }
+    for(jj=0; jj<nToken; jj++){
+      NearToken *pT = &aPhrase[ii].aToken[jj];
+      pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
+    }
+    aPhrase[ii].nToken = nToken;
+  }
+  for(ii=1; ii<nPhrase; ii++){
+    Tcl_Obj *pNear = apExprToken[2*ii-1];
+    int nNear;
+    rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
+    if( rc!=TCL_OK ) goto near_match_out;
+    aPhrase[ii].nNear = nNear;
+  }
+
+  pRet = Tcl_NewObj();
+  Tcl_IncrRefCount(pRet);
+  for(ii=0; ii<nPhrase; ii++){
+    int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
+    Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
+    nTotal += nOcc;
+  }
+  if( pPhrasecount ){
+    Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
+  }
+  Tcl_DecrRefCount(pRet);
+  Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
+
+ near_match_out: 
+  ckfree((char *)aPhrase);
+  ckfree((char *)doc.aToken);
+  return rc;
+}
+
+/*
+**   Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
+**
+** Normally, FTS uses hard-coded values to determine the minimum doclist
+** size eligible for incremental loading, and the size of the chunks loaded
+** when a doclist is incrementally loaded. This command allows the built-in
+** values to be overridden for testing purposes.
+**
+** If present, the first argument is the chunksize in bytes to load doclists
+** in. The second argument is the minimum doclist size in bytes to use
+** incremental loading with.
+**
+** Whether or not the arguments are present, this command returns a list of
+** two integers - the initial chunksize and threshold when the command is
+** invoked. This can be used to restore the default behavior after running
+** tests. For example:
+**
+**    # Override incr-load settings for testing:
+**    set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
+**
+**    .... run tests ....
+**
+**    # Restore initial incr-load settings:
+**    eval fts3_configure_incr_load $cfg
+*/
+static int fts3_configure_incr_load_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+  extern int test_fts3_node_chunksize;
+  extern int test_fts3_node_chunk_threshold;
+  Tcl_Obj *pRet;
+
+  if( objc!=1 && objc!=3 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
+    return TCL_ERROR;
+  }
+
+  pRet = Tcl_NewObj();
+  Tcl_IncrRefCount(pRet);
+  Tcl_ListObjAppendElement(
+      interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
+  Tcl_ListObjAppendElement(
+      interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
+
+  if( objc==3 ){
+    int iArg1;
+    int iArg2;
+    if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
+     || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
+    ){
+      Tcl_DecrRefCount(pRet);
+      return TCL_ERROR;
+    }
+    test_fts3_node_chunksize = iArg1;
+    test_fts3_node_chunk_threshold = iArg2;
+  }
+
+  Tcl_SetObjResult(interp, pRet);
+  Tcl_DecrRefCount(pRet);
+#endif
+  UNUSED_PARAMETER(clientData);
+  return TCL_OK;
+}
+
+#ifdef SQLITE_ENABLE_FTS3
+/**************************************************************************
+** Beginning of test tokenizer code.
+**
+** For language 0, this tokenizer is similar to the default 'simple' 
+** tokenizer. For other languages L, the following:
+**
+**   * Odd numbered languages are case-sensitive. Even numbered 
+**     languages are not.
+**
+**   * Language ids 100 or greater are considered an error.
+**
+** The implementation assumes that the input contains only ASCII characters
+** (i.e. those that may be encoded in UTF-8 using a single byte).
+*/
+typedef struct test_tokenizer {
+  sqlite3_tokenizer base;
+} test_tokenizer;
+
+typedef struct test_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *aInput;          /* Input being tokenized */
+  int nInput;                  /* Size of the input in bytes */
+  int iInput;                  /* Current offset in aInput */
+  int iToken;                  /* Index of next token to be returned */
+  char *aBuffer;               /* Buffer containing current token */
+  int nBuffer;                 /* Number of bytes allocated at pToken */
+  int iLangid;                 /* Configured language id */
+} test_tokenizer_cursor;
+
+static int testTokenizerCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  test_tokenizer *pNew;
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  pNew = sqlite3_malloc(sizeof(test_tokenizer));
+  if( !pNew ) return SQLITE_NOMEM;
+  memset(pNew, 0, sizeof(test_tokenizer));
+
+  *ppTokenizer = (sqlite3_tokenizer *)pNew;
+  return SQLITE_OK;
+}
+
+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
+  test_tokenizer *p = (test_tokenizer *)pTokenizer;
+  sqlite3_free(p);
+  return SQLITE_OK;
+}
+
+static int testTokenizerOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *pInput, int nBytes,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  int rc = SQLITE_OK;                    /* Return code */
+  test_tokenizer_cursor *pCsr;           /* New cursor object */
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
+  if( pCsr==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    memset(pCsr, 0, sizeof(test_tokenizer_cursor));
+    pCsr->aInput = pInput;
+    if( nBytes<0 ){
+      pCsr->nInput = (int)strlen(pInput);
+    }else{
+      pCsr->nInput = nBytes;
+    }
+  }
+
+  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+  return rc;
+}
+
+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  sqlite3_free(pCsr->aBuffer);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+static int testIsTokenChar(char c){
+  return (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+static int testTolower(char c){
+  char ret = c;
+  if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
+  return ret;
+}
+
+static int testTokenizerNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by testTokenizerOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  int rc = SQLITE_OK;
+  const char *p;
+  const char *pEnd;
+
+  p = &pCsr->aInput[pCsr->iInput];
+  pEnd = &pCsr->aInput[pCsr->nInput];
+
+  /* Skip past any white-space */
+  assert( p<=pEnd );
+  while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
+
+  if( p==pEnd ){
+    rc = SQLITE_DONE;
+  }else{
+    /* Advance to the end of the token */
+    const char *pToken = p;
+    int nToken;
+    while( p<pEnd && testIsTokenChar(*p) ) p++;
+    nToken = (int)(p-pToken);
+
+    /* Copy the token into the buffer */
+    if( nToken>pCsr->nBuffer ){
+      sqlite3_free(pCsr->aBuffer);
+      pCsr->aBuffer = sqlite3_malloc(nToken);
+    }
+    if( pCsr->aBuffer==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      int i;
+
+      if( pCsr->iLangid & 0x00000001 ){
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
+      }else{
+        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
+      }
+      pCsr->iToken++;
+      pCsr->iInput = (int)(p - pCsr->aInput);
+
+      *ppToken = pCsr->aBuffer;
+      *pnBytes = nToken;
+      *piStartOffset = (int)(pToken - pCsr->aInput);
+      *piEndOffset = (int)(p - pCsr->aInput);
+      *piPosition = pCsr->iToken;
+    }
+  }
+
+  return rc;
+}
+
+static int testTokenizerLanguage(
+  sqlite3_tokenizer_cursor *pCursor,
+  int iLangid
+){
+  int rc = SQLITE_OK;
+  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+  pCsr->iLangid = iLangid;
+  if( pCsr->iLangid>=100 ){
+    rc = SQLITE_ERROR;
+  }
+  return rc;
+}
+#endif
+
+static int fts3_test_tokenizer_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+  static const sqlite3_tokenizer_module testTokenizerModule = {
+    1,
+    testTokenizerCreate,
+    testTokenizerDestroy,
+    testTokenizerOpen,
+    testTokenizerClose,
+    testTokenizerNext,
+    testTokenizerLanguage
+  };
+  const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
+  if( objc!=1 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "");
+    return TCL_ERROR;
+  }
+  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
+    (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
+  ));
+#endif
+  UNUSED_PARAMETER(clientData);
+  return TCL_OK;
+}
+
+static int fts3_test_varint_cmd(
+  ClientData clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+  char aBuf[24];
+  int rc;
+  Tcl_WideInt w, w2;
+  int nByte, nByte2;
+
+  if( objc!=2 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "INTEGER");
+    return TCL_ERROR;
+  }
+
+  rc = Tcl_GetWideIntFromObj(interp, objv[1], &w);
+  if( rc!=TCL_OK ) return rc;
+
+  nByte = sqlite3Fts3PutVarint(aBuf, w);
+  nByte2 = sqlite3Fts3GetVarint(aBuf, &w2);
+  if( w!=w2 || nByte!=nByte2 ){
+    char *zErr = sqlite3_mprintf("error testing %lld", w);
+    Tcl_ResetResult(interp);
+    Tcl_AppendResult(interp, zErr, 0);
+    return TCL_ERROR;
+  }
+
+  if( w<=2147483647 && w>=0 ){
+    int i;
+    nByte2 = fts3GetVarint32(aBuf, &i);
+    if( (int)w!=i || nByte!=nByte2 ){
+      char *zErr = sqlite3_mprintf("error testing %lld (32-bit)", w);
+      Tcl_ResetResult(interp);
+      Tcl_AppendResult(interp, zErr, 0);
+      return TCL_ERROR;
+    }
+  }
+
+#endif
+  UNUSED_PARAMETER(clientData);
+  return TCL_OK;
+}
+
+/* 
+** End of tokenizer code.
+**************************************************************************/ 
+
+int Sqlitetestfts3_Init(Tcl_Interp *interp){
+  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
+  Tcl_CreateObjCommand(interp, 
+      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
+  );
+  Tcl_CreateObjCommand(
+      interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
+  );
+
+  Tcl_CreateObjCommand(
+      interp, "fts3_test_varint", fts3_test_varint_cmd, 0, 0
+  );
+  return TCL_OK;
+}
+#endif                  /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
+#endif                  /* ifdef SQLITE_TEST */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenize_vtab.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenize_vtab.c
@ -0,0 +1,454 @@
+/*
+** 2013 Apr 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This file contains code for the "fts3tokenize" virtual table module.
+** An fts3tokenize virtual table is created as follows:
+**
+**   CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
+**       <tokenizer-name>, <arg-1>, ...
+**   );
+**
+** The table created has the following schema:
+**
+**   CREATE TABLE <tbl>(input, token, start, end, position)
+**
+** When queried, the query must include a WHERE clause of type:
+**
+**   input = <string>
+**
+** The virtual table module tokenizes this <string>, using the FTS3 
+** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE 
+** statement and returns one row for each token in the result. With
+** fields set as follows:
+**
+**   input:   Always set to a copy of <string>
+**   token:   A token from the input.
+**   start:   Byte offset of the token within the input <string>.
+**   end:     Byte offset of the byte immediately following the end of the
+**            token within the input string.
+**   pos:     Token offset of token within input.
+**
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <string.h>
+#include <assert.h>
+
+typedef struct Fts3tokTable Fts3tokTable;
+typedef struct Fts3tokCursor Fts3tokCursor;
+
+/*
+** Virtual table structure.
+*/
+struct Fts3tokTable {
+  sqlite3_vtab base;              /* Base class used by SQLite core */
+  const sqlite3_tokenizer_module *pMod;
+  sqlite3_tokenizer *pTok;
+};
+
+/*
+** Virtual table cursor structure.
+*/
+struct Fts3tokCursor {
+  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
+  char *zInput;                   /* Input string */
+  sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
+  int iRowid;                     /* Current 'rowid' value */
+  const char *zToken;             /* Current 'token' value */
+  int nToken;                     /* Size of zToken in bytes */
+  int iStart;                     /* Current 'start' value */
+  int iEnd;                       /* Current 'end' value */
+  int iPos;                       /* Current 'pos' value */
+};
+
+/*
+** Query FTS for the tokenizer implementation named zName.
+*/
+static int fts3tokQueryTokenizer(
+  Fts3Hash *pHash,
+  const char *zName,
+  const sqlite3_tokenizer_module **pp,
+  char **pzErr
+){
+  sqlite3_tokenizer_module *p;
+  int nName = (int)strlen(zName);
+
+  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+  if( !p ){
+    sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
+    return SQLITE_ERROR;
+  }
+
+  *pp = p;
+  return SQLITE_OK;
+}
+
+/*
+** The second argument, argv[], is an array of pointers to nul-terminated
+** strings. This function makes a copy of the array and strings into a 
+** single block of memory. It then dequotes any of the strings that appear
+** to be quoted.
+**
+** If successful, output parameter *pazDequote is set to point at the
+** array of dequoted strings and SQLITE_OK is returned. The caller is
+** responsible for eventually calling sqlite3_free() to free the array
+** in this case. Or, if an error occurs, an SQLite error code is returned.
+** The final value of *pazDequote is undefined in this case.
+*/
+static int fts3tokDequoteArray(
+  int argc,                       /* Number of elements in argv[] */
+  const char * const *argv,       /* Input array */
+  char ***pazDequote              /* Output array */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  if( argc==0 ){
+    *pazDequote = 0;
+  }else{
+    int i;
+    int nByte = 0;
+    char **azDequote;
+
+    for(i=0; i<argc; i++){
+      nByte += (int)(strlen(argv[i]) + 1);
+    }
+
+    *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
+    if( azDequote==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      char *pSpace = (char *)&azDequote[argc];
+      for(i=0; i<argc; i++){
+        int n = (int)strlen(argv[i]);
+        azDequote[i] = pSpace;
+        memcpy(pSpace, argv[i], n+1);
+        sqlite3Fts3Dequote(pSpace);
+        pSpace += (n+1);
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Schema of the tokenizer table.
+*/
+#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
+
+/*
+** This function does all the work for both the xConnect and xCreate methods.
+** These tables have no persistent representation of their own, so xConnect
+** and xCreate are identical operations.
+**
+**   argv[0]: module name
+**   argv[1]: database name 
+**   argv[2]: table name
+**   argv[3]: first argument (tokenizer name)
+*/
+static int fts3tokConnectMethod(
+  sqlite3 *db,                    /* Database connection */
+  void *pHash,                    /* Hash table of tokenizers */
+  int argc,                       /* Number of elements in argv array */
+  const char * const *argv,       /* xCreate/xConnect argument array */
+  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
+  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
+){
+  Fts3tokTable *pTab = 0;
+  const sqlite3_tokenizer_module *pMod = 0;
+  sqlite3_tokenizer *pTok = 0;
+  int rc;
+  char **azDequote = 0;
+  int nDequote;
+
+  rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
+  if( rc!=SQLITE_OK ) return rc;
+
+  nDequote = argc-3;
+  rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
+
+  if( rc==SQLITE_OK ){
+    const char *zModule;
+    if( nDequote<1 ){
+      zModule = "simple";
+    }else{
+      zModule = azDequote[0];
+    }
+    rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
+  }
+
+  assert( (rc==SQLITE_OK)==(pMod!=0) );
+  if( rc==SQLITE_OK ){
+    const char * const *azArg = (const char * const *)&azDequote[1];
+    rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
+  }
+
+  if( rc==SQLITE_OK ){
+    pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
+    if( pTab==0 ){
+      rc = SQLITE_NOMEM;
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    memset(pTab, 0, sizeof(Fts3tokTable));
+    pTab->pMod = pMod;
+    pTab->pTok = pTok;
+    *ppVtab = &pTab->base;
+  }else{
+    if( pTok ){
+      pMod->xDestroy(pTok);
+    }
+  }
+
+  sqlite3_free(azDequote);
+  return rc;
+}
+
+/*
+** This function does the work for both the xDisconnect and xDestroy methods.
+** These tables have no persistent representation of their own, so xDisconnect
+** and xDestroy are identical operations.
+*/
+static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
+  Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
+
+  pTab->pMod->xDestroy(pTab->pTok);
+  sqlite3_free(pTab);
+  return SQLITE_OK;
+}
+
+/*
+** xBestIndex - Analyze a WHERE and ORDER BY clause.
+*/
+static int fts3tokBestIndexMethod(
+  sqlite3_vtab *pVTab, 
+  sqlite3_index_info *pInfo
+){
+  int i;
+  UNUSED_PARAMETER(pVTab);
+
+  for(i=0; i<pInfo->nConstraint; i++){
+    if( pInfo->aConstraint[i].usable 
+     && pInfo->aConstraint[i].iColumn==0 
+     && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ 
+    ){
+      pInfo->idxNum = 1;
+      pInfo->aConstraintUsage[i].argvIndex = 1;
+      pInfo->aConstraintUsage[i].omit = 1;
+      pInfo->estimatedCost = 1;
+      return SQLITE_OK;
+    }
+  }
+
+  pInfo->idxNum = 0;
+  assert( pInfo->estimatedCost>1000000.0 );
+
+  return SQLITE_OK;
+}
+
+/*
+** xOpen - Open a cursor.
+*/
+static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+  Fts3tokCursor *pCsr;
+  UNUSED_PARAMETER(pVTab);
+
+  pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(Fts3tokCursor));
+
+  *ppCsr = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Reset the tokenizer cursor passed as the only argument. As if it had
+** just been returned by fts3tokOpenMethod().
+*/
+static void fts3tokResetCursor(Fts3tokCursor *pCsr){
+  if( pCsr->pCsr ){
+    Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
+    pTab->pMod->xClose(pCsr->pCsr);
+    pCsr->pCsr = 0;
+  }
+  sqlite3_free(pCsr->zInput);
+  pCsr->zInput = 0;
+  pCsr->zToken = 0;
+  pCsr->nToken = 0;
+  pCsr->iStart = 0;
+  pCsr->iEnd = 0;
+  pCsr->iPos = 0;
+  pCsr->iRowid = 0;
+}
+
+/*
+** xClose - Close a cursor.
+*/
+static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  fts3tokResetCursor(pCsr);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** xNext - Advance the cursor to the next row, if any.
+*/
+static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  int rc;                         /* Return code */
+
+  pCsr->iRowid++;
+  rc = pTab->pMod->xNext(pCsr->pCsr,
+      &pCsr->zToken, &pCsr->nToken,
+      &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
+  );
+
+  if( rc!=SQLITE_OK ){
+    fts3tokResetCursor(pCsr);
+    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+  }
+
+  return rc;
+}
+
+/*
+** xFilter - Initialize a cursor to point at the start of its data.
+*/
+static int fts3tokFilterMethod(
+  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
+  int idxNum,                     /* Strategy index */
+  const char *idxStr,             /* Unused */
+  int nVal,                       /* Number of elements in apVal */
+  sqlite3_value **apVal           /* Arguments for the indexing scheme */
+){
+  int rc = SQLITE_ERROR;
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
+  UNUSED_PARAMETER(idxStr);
+  UNUSED_PARAMETER(nVal);
+
+  fts3tokResetCursor(pCsr);
+  if( idxNum==1 ){
+    const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
+    int nByte = sqlite3_value_bytes(apVal[0]);
+    pCsr->zInput = sqlite3_malloc(nByte+1);
+    if( pCsr->zInput==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memcpy(pCsr->zInput, zByte, nByte);
+      pCsr->zInput[nByte] = 0;
+      rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
+      if( rc==SQLITE_OK ){
+        pCsr->pCsr->pTokenizer = pTab->pTok;
+      }
+    }
+  }
+
+  if( rc!=SQLITE_OK ) return rc;
+  return fts3tokNextMethod(pCursor);
+}
+
+/*
+** xEof - Return true if the cursor is at EOF, or false otherwise.
+*/
+static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  return (pCsr->zToken==0);
+}
+
+/*
+** xColumn - Return a column value.
+*/
+static int fts3tokColumnMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
+  int iCol                        /* Index of column to read value from */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+
+  /* CREATE TABLE x(input, token, start, end, position) */
+  switch( iCol ){
+    case 0:
+      sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
+      break;
+    case 1:
+      sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
+      break;
+    case 2:
+      sqlite3_result_int(pCtx, pCsr->iStart);
+      break;
+    case 3:
+      sqlite3_result_int(pCtx, pCsr->iEnd);
+      break;
+    default:
+      assert( iCol==4 );
+      sqlite3_result_int(pCtx, pCsr->iPos);
+      break;
+  }
+  return SQLITE_OK;
+}
+
+/*
+** xRowid - Return the current rowid for the cursor.
+*/
+static int fts3tokRowidMethod(
+  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
+  sqlite_int64 *pRowid            /* OUT: Rowid value */
+){
+  Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
+  *pRowid = (sqlite3_int64)pCsr->iRowid;
+  return SQLITE_OK;
+}
+
+/*
+** Register the fts3tok module with database connection db. Return SQLITE_OK
+** if successful or an error code if sqlite3_create_module() fails.
+*/
+int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
+  static const sqlite3_module fts3tok_module = {
+     0,                           /* iVersion      */
+     fts3tokConnectMethod,        /* xCreate       */
+     fts3tokConnectMethod,        /* xConnect      */
+     fts3tokBestIndexMethod,      /* xBestIndex    */
+     fts3tokDisconnectMethod,     /* xDisconnect   */
+     fts3tokDisconnectMethod,     /* xDestroy      */
+     fts3tokOpenMethod,           /* xOpen         */
+     fts3tokCloseMethod,          /* xClose        */
+     fts3tokFilterMethod,         /* xFilter       */
+     fts3tokNextMethod,           /* xNext         */
+     fts3tokEofMethod,            /* xEof          */
+     fts3tokColumnMethod,         /* xColumn       */
+     fts3tokRowidMethod,          /* xRowid        */
+     0,                           /* xUpdate       */
+     0,                           /* xBegin        */
+     0,                           /* xSync         */
+     0,                           /* xCommit       */
+     0,                           /* xRollback     */
+     0,                           /* xFindFunction */
+     0,                           /* xRename       */
+     0,                           /* xSavepoint    */
+     0,                           /* xRelease      */
+     0                            /* xRollbackTo   */
+  };
+  int rc;                         /* Return code */
+
+  rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer.c
@ -0,0 +1,507 @@
+/*
+** 2007 June 22
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is part of an SQLite module implementing full-text search.
+** This particular file implements the generic tokenizer interface.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <string.h>
+
+/*
+** Implementation of the SQL scalar function for accessing the underlying 
+** hash table. This function may be called as follows:
+**
+**   SELECT <function-name>(<key-name>);
+**   SELECT <function-name>(<key-name>, <pointer>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
+**
+** If the <pointer> argument is specified, it must be a blob value
+** containing a pointer to be stored as the hash data corresponding
+** to the string <key-name>. If <pointer> is not specified, then
+** the string <key-name> must already exist in the has table. Otherwise,
+** an error is returned.
+**
+** Whether or not the <pointer> argument is specified, the value returned
+** is a blob containing the pointer stored as the hash data corresponding
+** to string <key-name> (after the hash-table is updated, if applicable).
+*/
+static void scalarFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Fts3Hash *pHash;
+  void *pPtr = 0;
+  const unsigned char *zName;
+  int nName;
+
+  assert( argc==1 || argc==2 );
+
+  pHash = (Fts3Hash *)sqlite3_user_data(context);
+
+  zName = sqlite3_value_text(argv[0]);
+  nName = sqlite3_value_bytes(argv[0])+1;
+
+  if( argc==2 ){
+#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
+    void *pOld;
+    int n = sqlite3_value_bytes(argv[1]);
+    if( zName==0 || n!=sizeof(pPtr) ){
+      sqlite3_result_error(context, "argument type mismatch", -1);
+      return;
+    }
+    pPtr = *(void **)sqlite3_value_blob(argv[1]);
+    pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
+    if( pOld==pPtr ){
+      sqlite3_result_error(context, "out of memory", -1);
+      return;
+    }
+#else
+    sqlite3_result_error(context, "fts3tokenize: " 
+        "disabled - rebuild with -DSQLITE_ENABLE_FTS3_TOKENIZER", -1
+    );
+    return;
+#endif /* SQLITE_ENABLE_FTS3_TOKENIZER */
+  }else
+  {
+    if( zName ){
+      pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
+    }
+    if( !pPtr ){
+      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+      sqlite3_result_error(context, zErr, -1);
+      sqlite3_free(zErr);
+      return;
+    }
+  }
+
+  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
+}
+
+int sqlite3Fts3IsIdChar(char c){
+  static const char isFtsIdChar[] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
+      0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
+      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
+      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
+  };
+  return (c&0x80 || isFtsIdChar[(int)(c)]);
+}
+
+const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
+  const char *z1;
+  const char *z2 = 0;
+
+  /* Find the start of the next token. */
+  z1 = zStr;
+  while( z2==0 ){
+    char c = *z1;
+    switch( c ){
+      case '\0': return 0;        /* No more tokens here */
+      case '\'':
+      case '"':
+      case '`': {
+        z2 = z1;
+        while( *++z2 && (*z2!=c || *++z2==c) );
+        break;
+      }
+      case '[':
+        z2 = &z1[1];
+        while( *z2 && z2[0]!=']' ) z2++;
+        if( *z2 ) z2++;
+        break;
+
+      default:
+        if( sqlite3Fts3IsIdChar(*z1) ){
+          z2 = &z1[1];
+          while( sqlite3Fts3IsIdChar(*z2) ) z2++;
+        }else{
+          z1++;
+        }
+    }
+  }
+
+  *pn = (int)(z2-z1);
+  return z1;
+}
+
+int sqlite3Fts3InitTokenizer(
+  Fts3Hash *pHash,                /* Tokenizer hash table */
+  const char *zArg,               /* Tokenizer name */
+  sqlite3_tokenizer **ppTok,      /* OUT: Tokenizer (if applicable) */
+  char **pzErr                    /* OUT: Set to malloced error message */
+){
+  int rc;
+  char *z = (char *)zArg;
+  int n = 0;
+  char *zCopy;
+  char *zEnd;                     /* Pointer to nul-term of zCopy */
+  sqlite3_tokenizer_module *m;
+
+  zCopy = sqlite3_mprintf("%s", zArg);
+  if( !zCopy ) return SQLITE_NOMEM;
+  zEnd = &zCopy[strlen(zCopy)];
+
+  z = (char *)sqlite3Fts3NextToken(zCopy, &n);
+  if( z==0 ){
+    assert( n==0 );
+    z = zCopy;
+  }
+  z[n] = '\0';
+  sqlite3Fts3Dequote(z);
+
+  m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
+  if( !m ){
+    sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z);
+    rc = SQLITE_ERROR;
+  }else{
+    char const **aArg = 0;
+    int iArg = 0;
+    z = &z[n+1];
+    while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
+      int nNew = sizeof(char *)*(iArg+1);
+      char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew);
+      if( !aNew ){
+        sqlite3_free(zCopy);
+        sqlite3_free((void *)aArg);
+        return SQLITE_NOMEM;
+      }
+      aArg = aNew;
+      aArg[iArg++] = z;
+      z[n] = '\0';
+      sqlite3Fts3Dequote(z);
+      z = &z[n+1];
+    }
+    rc = m->xCreate(iArg, aArg, ppTok);
+    assert( rc!=SQLITE_OK || *ppTok );
+    if( rc!=SQLITE_OK ){
+      sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer");
+    }else{
+      (*ppTok)->pModule = m; 
+    }
+    sqlite3_free((void *)aArg);
+  }
+
+  sqlite3_free(zCopy);
+  return rc;
+}
+
+
+#ifdef SQLITE_TEST
+
+#include <tcl.h>
+#include <string.h>
+
+/*
+** Implementation of a special SQL scalar function for testing tokenizers 
+** designed to be used in concert with the Tcl testing framework. This
+** function must be called with two or more arguments:
+**
+**   SELECT <function-name>(<key-name>, ..., <input-string>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
+** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
+**
+** The return value is a string that may be interpreted as a Tcl
+** list. For each token in the <input-string>, three elements are
+** added to the returned list. The first is the token position, the 
+** second is the token text (folded, stemmed, etc.) and the third is the
+** substring of <input-string> associated with the token. For example, 
+** using the built-in "simple" tokenizer:
+**
+**   SELECT fts_tokenizer_test('simple', 'I don't see how');
+**
+** will return the string:
+**
+**   "{0 i I 1 dont don't 2 see see 3 how how}"
+**   
+*/
+static void testFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Fts3Hash *pHash;
+  sqlite3_tokenizer_module *p;
+  sqlite3_tokenizer *pTokenizer = 0;
+  sqlite3_tokenizer_cursor *pCsr = 0;
+
+  const char *zErr = 0;
+
+  const char *zName;
+  int nName;
+  const char *zInput;
+  int nInput;
+
+  const char *azArg[64];
+
+  const char *zToken;
+  int nToken = 0;
+  int iStart = 0;
+  int iEnd = 0;
+  int iPos = 0;
+  int i;
+
+  Tcl_Obj *pRet;
+
+  if( argc<2 ){
+    sqlite3_result_error(context, "insufficient arguments", -1);
+    return;
+  }
+
+  nName = sqlite3_value_bytes(argv[0]);
+  zName = (const char *)sqlite3_value_text(argv[0]);
+  nInput = sqlite3_value_bytes(argv[argc-1]);
+  zInput = (const char *)sqlite3_value_text(argv[argc-1]);
+
+  pHash = (Fts3Hash *)sqlite3_user_data(context);
+  p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
+
+  if( !p ){
+    char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName);
+    sqlite3_result_error(context, zErr2, -1);
+    sqlite3_free(zErr2);
+    return;
+  }
+
+  pRet = Tcl_NewObj();
+  Tcl_IncrRefCount(pRet);
+
+  for(i=1; i<argc-1; i++){
+    azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
+  }
+
+  if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
+    zErr = "error in xCreate()";
+    goto finish;
+  }
+  pTokenizer->pModule = p;
+  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
+    zErr = "error in xOpen()";
+    goto finish;
+  }
+
+  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+    zToken = &zInput[iStart];
+    nToken = iEnd-iStart;
+    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+  }
+
+  if( SQLITE_OK!=p->xClose(pCsr) ){
+    zErr = "error in xClose()";
+    goto finish;
+  }
+  if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
+    zErr = "error in xDestroy()";
+    goto finish;
+  }
+
+finish:
+  if( zErr ){
+    sqlite3_result_error(context, zErr, -1);
+  }else{
+    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
+  }
+  Tcl_DecrRefCount(pRet);
+}
+
+#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
+static
+int registerTokenizer(
+  sqlite3 *db, 
+  char *zName, 
+  const sqlite3_tokenizer_module *p
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
+
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
+  sqlite3_step(pStmt);
+
+  return sqlite3_finalize(pStmt);
+}
+#endif /* SQLITE_ENABLE_FTS3_TOKENIZER */
+
+
+static
+int queryTokenizer(
+  sqlite3 *db, 
+  char *zName,  
+  const sqlite3_tokenizer_module **pp
+){
+  int rc;
+  sqlite3_stmt *pStmt;
+  const char zSql[] = "SELECT fts3_tokenizer(?)";
+
+  *pp = 0;
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc!=SQLITE_OK ){
+    return rc;
+  }
+
+  sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
+  if( SQLITE_ROW==sqlite3_step(pStmt) ){
+    if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
+      memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
+    }
+  }
+
+  return sqlite3_finalize(pStmt);
+}
+
+void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+
+/*
+** Implementation of the scalar function fts3_tokenizer_internal_test().
+** This function is used for testing only, it is not included in the
+** build unless SQLITE_TEST is defined.
+**
+** The purpose of this is to test that the fts3_tokenizer() function
+** can be used as designed by the C-code in the queryTokenizer and
+** registerTokenizer() functions above. These two functions are repeated
+** in the README.tokenizer file as an example, so it is important to
+** test them.
+**
+** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
+** function with no arguments. An assert() will fail if a problem is
+** detected. i.e.:
+**
+**     SELECT fts3_tokenizer_internal_test();
+**
+*/
+static void intTestFunc(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int rc;
+  const sqlite3_tokenizer_module *p1;
+  const sqlite3_tokenizer_module *p2;
+  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
+
+  UNUSED_PARAMETER(argc);
+  UNUSED_PARAMETER(argv);
+
+  /* Test the query function */
+  sqlite3Fts3SimpleTokenizerModule(&p1);
+  rc = queryTokenizer(db, "simple", &p2);
+  assert( rc==SQLITE_OK );
+  assert( p1==p2 );
+  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
+  assert( rc==SQLITE_ERROR );
+  assert( p2==0 );
+  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
+
+  /* Test the storage function */
+#ifdef SQLITE_ENABLE_FTS3_TOKENIZER
+  rc = registerTokenizer(db, "nosuchtokenizer", p1);
+  assert( rc==SQLITE_OK );
+  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
+  assert( rc==SQLITE_OK );
+  assert( p2==p1 );
+#endif
+
+  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
+}
+
+#endif
+
+/*
+** Set up SQL objects in database db used to access the contents of
+** the hash table pointed to by argument pHash. The hash table must
+** been initialized to use string keys, and to take a private copy 
+** of the key when a value is inserted. i.e. by a call similar to:
+**
+**    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
+**
+** This function adds a scalar function (see header comment above
+** scalarFunc() in this file for details) and, if ENABLE_TABLE is
+** defined at compilation time, a temporary virtual table (see header 
+** comment above struct HashTableVtab) to the database schema. Both 
+** provide read/write access to the contents of *pHash.
+**
+** The third argument to this function, zName, is used as the name
+** of both the scalar and, if created, the virtual table.
+*/
+int sqlite3Fts3InitHashTable(
+  sqlite3 *db, 
+  Fts3Hash *pHash, 
+  const char *zName
+){
+  int rc = SQLITE_OK;
+  void *p = (void *)pHash;
+  const int any = SQLITE_ANY;
+
+#ifdef SQLITE_TEST
+  char *zTest = 0;
+  char *zTest2 = 0;
+  void *pdb = (void *)db;
+  zTest = sqlite3_mprintf("%s_test", zName);
+  zTest2 = sqlite3_mprintf("%s_internal_test", zName);
+  if( !zTest || !zTest2 ){
+    rc = SQLITE_NOMEM;
+  }
+#endif
+
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0);
+  }
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0);
+  }
+#ifdef SQLITE_TEST
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
+  }
+  if( SQLITE_OK==rc ){
+    rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
+  }
+#endif
+
+#ifdef SQLITE_TEST
+  sqlite3_free(zTest);
+  sqlite3_free(zTest2);
+#endif
+
+  return rc;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer.h
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer.h
@ -0,0 +1,161 @@
+/*
+** 2006 July 10
+**
+** The author disclaims copyright to this source code.
+**
+*************************************************************************
+** Defines the interface to tokenizers used by fulltext-search.  There
+** are three basic components:
+**
+** sqlite3_tokenizer_module is a singleton defining the tokenizer
+** interface functions.  This is essentially the class structure for
+** tokenizers.
+**
+** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
+** including customization information defined at creation time.
+**
+** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
+** tokens from a particular input.
+*/
+#ifndef _FTS3_TOKENIZER_H_
+#define _FTS3_TOKENIZER_H_
+
+/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
+** If tokenizers are to be allowed to call sqlite3_*() functions, then
+** we will need a way to register the API consistently.
+*/
+#include "sqlite3.h"
+
+/*
+** Structures used by the tokenizer interface. When a new tokenizer
+** implementation is registered, the caller provides a pointer to
+** an sqlite3_tokenizer_module containing pointers to the callback
+** functions that make up an implementation.
+**
+** When an fts3 table is created, it passes any arguments passed to
+** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
+** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
+** implementation. The xCreate() function in turn returns an 
+** sqlite3_tokenizer structure representing the specific tokenizer to
+** be used for the fts3 table (customized by the tokenizer clause arguments).
+**
+** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
+** method is called. It returns an sqlite3_tokenizer_cursor object
+** that may be used to tokenize a specific input buffer based on
+** the tokenization rules supplied by a specific sqlite3_tokenizer
+** object.
+*/
+typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
+typedef struct sqlite3_tokenizer sqlite3_tokenizer;
+typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
+
+struct sqlite3_tokenizer_module {
+
+  /*
+  ** Structure version. Should always be set to 0 or 1.
+  */
+  int iVersion;
+
+  /*
+  ** Create a new tokenizer. The values in the argv[] array are the
+  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
+  ** TABLE statement that created the fts3 table. For example, if
+  ** the following SQL is executed:
+  **
+  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
+  **
+  ** then argc is set to 2, and the argv[] array contains pointers
+  ** to the strings "arg1" and "arg2".
+  **
+  ** This method should return either SQLITE_OK (0), or an SQLite error 
+  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
+  ** to point at the newly created tokenizer structure. The generic
+  ** sqlite3_tokenizer.pModule variable should not be initialized by
+  ** this callback. The caller will do so.
+  */
+  int (*xCreate)(
+    int argc,                           /* Size of argv array */
+    const char *const*argv,             /* Tokenizer argument strings */
+    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
+  );
+
+  /*
+  ** Destroy an existing tokenizer. The fts3 module calls this method
+  ** exactly once for each successful call to xCreate().
+  */
+  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
+
+  /*
+  ** Create a tokenizer cursor to tokenize an input buffer. The caller
+  ** is responsible for ensuring that the input buffer remains valid
+  ** until the cursor is closed (using the xClose() method). 
+  */
+  int (*xOpen)(
+    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
+    const char *pInput, int nBytes,      /* Input buffer */
+    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
+  );
+
+  /*
+  ** Destroy an existing tokenizer cursor. The fts3 module calls this 
+  ** method exactly once for each successful call to xOpen().
+  */
+  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
+
+  /*
+  ** Retrieve the next token from the tokenizer cursor pCursor. This
+  ** method should either return SQLITE_OK and set the values of the
+  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
+  ** the end of the buffer has been reached, or an SQLite error code.
+  **
+  ** *ppToken should be set to point at a buffer containing the 
+  ** normalized version of the token (i.e. after any case-folding and/or
+  ** stemming has been performed). *pnBytes should be set to the length
+  ** of this buffer in bytes. The input text that generated the token is
+  ** identified by the byte offsets returned in *piStartOffset and
+  ** *piEndOffset. *piStartOffset should be set to the index of the first
+  ** byte of the token in the input buffer. *piEndOffset should be set
+  ** to the index of the first byte just past the end of the token in
+  ** the input buffer.
+  **
+  ** The buffer *ppToken is set to point at is managed by the tokenizer
+  ** implementation. It is only required to be valid until the next call
+  ** to xNext() or xClose(). 
+  */
+  /* TODO(shess) current implementation requires pInput to be
+  ** nul-terminated.  This should either be fixed, or pInput/nBytes
+  ** should be converted to zInput.
+  */
+  int (*xNext)(
+    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
+    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
+    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
+    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
+    int *piPosition      /* OUT: Number of tokens returned before this one */
+  );
+
+  /***********************************************************************
+  ** Methods below this point are only available if iVersion>=1.
+  */
+
+  /* 
+  ** Configure the language id of a tokenizer cursor.
+  */
+  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
+};
+
+struct sqlite3_tokenizer {
+  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+struct sqlite3_tokenizer_cursor {
+  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
+  /* Tokenizer implementations will typically add additional fields */
+};
+
+int fts3_global_term_cnt(int iTerm, int iCol);
+int fts3_term_cnt(int iTerm, int iCol);
+
+
+#endif /* _FTS3_TOKENIZER_H_ */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer1.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_tokenizer1.c
@ -0,0 +1,234 @@
+/*
+** 2006 Oct 10
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Implementation of the "simple" full-text-search tokenizer.
+*/
+
+/*
+** The code in this file is only compiled if:
+**
+**     * The FTS3 module is being built as an extension
+**       (in which case SQLITE_CORE is not defined), or
+**
+**     * The FTS3 module is being built into the core of
+**       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
+*/
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fts3_tokenizer.h"
+
+typedef struct simple_tokenizer {
+  sqlite3_tokenizer base;
+  char delim[128];             /* flag ASCII delimiters */
+} simple_tokenizer;
+
+typedef struct simple_tokenizer_cursor {
+  sqlite3_tokenizer_cursor base;
+  const char *pInput;          /* input we are tokenizing */
+  int nBytes;                  /* size of the input */
+  int iOffset;                 /* current position in pInput */
+  int iToken;                  /* index of next token to be returned */
+  char *pToken;                /* storage for current token */
+  int nTokenAllocated;         /* space allocated to zToken buffer */
+} simple_tokenizer_cursor;
+
+
+static int simpleDelim(simple_tokenizer *t, unsigned char c){
+  return c<0x80 && t->delim[c];
+}
+static int fts3_isalnum(int x){
+  return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
+}
+
+/*
+** Create a new tokenizer instance.
+*/
+static int simpleCreate(
+  int argc, const char * const *argv,
+  sqlite3_tokenizer **ppTokenizer
+){
+  simple_tokenizer *t;
+
+  t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
+  if( t==NULL ) return SQLITE_NOMEM;
+  memset(t, 0, sizeof(*t));
+
+  /* TODO(shess) Delimiters need to remain the same from run to run,
+  ** else we need to reindex.  One solution would be a meta-table to
+  ** track such information in the database, then we'd only want this
+  ** information on the initial create.
+  */
+  if( argc>1 ){
+    int i, n = (int)strlen(argv[1]);
+    for(i=0; i<n; i++){
+      unsigned char ch = argv[1][i];
+      /* We explicitly don't support UTF-8 delimiters for now. */
+      if( ch>=0x80 ){
+        sqlite3_free(t);
+        return SQLITE_ERROR;
+      }
+      t->delim[ch] = 1;
+    }
+  } else {
+    /* Mark non-alphanumeric ASCII characters as delimiters */
+    int i;
+    for(i=1; i<0x80; i++){
+      t->delim[i] = !fts3_isalnum(i) ? -1 : 0;
+    }
+  }
+
+  *ppTokenizer = &t->base;
+  return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
+  sqlite3_free(pTokenizer);
+  return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int simpleOpen(
+  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
+  const char *pInput, int nBytes,        /* String to be tokenized */
+  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
+){
+  simple_tokenizer_cursor *c;
+
+  UNUSED_PARAMETER(pTokenizer);
+
+  c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
+  if( c==NULL ) return SQLITE_NOMEM;
+
+  c->pInput = pInput;
+  if( pInput==0 ){
+    c->nBytes = 0;
+  }else if( nBytes<0 ){
+    c->nBytes = (int)strlen(pInput);
+  }else{
+    c->nBytes = nBytes;
+  }
+  c->iOffset = 0;                 /* start tokenizing at the beginning */
+  c->iToken = 0;
+  c->pToken = NULL;               /* no space allocated, yet. */
+  c->nTokenAllocated = 0;
+
+  *ppCursor = &c->base;
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** simpleOpen() above.
+*/
+static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
+  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
+  sqlite3_free(c->pToken);
+  sqlite3_free(c);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to simpleOpen().
+*/
+static int simpleNext(
+  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
+  const char **ppToken,               /* OUT: *ppToken is the token text */
+  int *pnBytes,                       /* OUT: Number of bytes in token */
+  int *piStartOffset,                 /* OUT: Starting offset of token */
+  int *piEndOffset,                   /* OUT: Ending offset of token */
+  int *piPosition                     /* OUT: Position integer of token */
+){
+  simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
+  simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
+  unsigned char *p = (unsigned char *)c->pInput;
+
+  while( c->iOffset<c->nBytes ){
+    int iStartOffset;
+
+    /* Scan past delimiter characters */
+    while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    /* Count non-delimiter characters. */
+    iStartOffset = c->iOffset;
+    while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
+      c->iOffset++;
+    }
+
+    if( c->iOffset>iStartOffset ){
+      int i, n = c->iOffset-iStartOffset;
+      if( n>c->nTokenAllocated ){
+        char *pNew;
+        c->nTokenAllocated = n+20;
+        pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
+        if( !pNew ) return SQLITE_NOMEM;
+        c->pToken = pNew;
+      }
+      for(i=0; i<n; i++){
+        /* TODO(shess) This needs expansion to handle UTF-8
+        ** case-insensitivity.
+        */
+        unsigned char ch = p[iStartOffset+i];
+        c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch);
+      }
+      *ppToken = c->pToken;
+      *pnBytes = n;
+      *piStartOffset = iStartOffset;
+      *piEndOffset = c->iOffset;
+      *piPosition = c->iToken++;
+
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_DONE;
+}
+
+/*
+** The set of routines that implement the simple tokenizer
+*/
+static const sqlite3_tokenizer_module simpleTokenizerModule = {
+  0,
+  simpleCreate,
+  simpleDestroy,
+  simpleOpen,
+  simpleClose,
+  simpleNext,
+  0,
+};
+
+/*
+** Allocate a new simple tokenizer.  Return a pointer to the new
+** tokenizer in *ppModule
+*/
+void sqlite3Fts3SimpleTokenizerModule(
+  sqlite3_tokenizer_module const**ppModule
+){
+  *ppModule = &simpleTokenizerModule;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_unicode.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_unicode.c
@ -0,0 +1,393 @@
+/*
+** 2012 May 24
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Implementation of the "unicode" full-text-search tokenizer.
+*/
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fts3_tokenizer.h"
+
+/*
+** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
+** from the sqlite3 source file utf.c. If this file is compiled as part
+** of the amalgamation, they are not required.
+*/
+#ifndef SQLITE_AMALGAMATION
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c)                           \
+  c = *(zIn++);                                            \
+  if( c>=0xc0 ){                                           \
+    c = sqlite3Utf8Trans1[c-0xc0];                         \
+    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
+      c = (c<<6) + (0x3f & *(zIn++));                      \
+    }                                                      \
+    if( c<0x80                                             \
+        || (c&0xFFFFF800)==0xD800                          \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
+  }
+
+#define WRITE_UTF8(zOut, c) {                          \
+  if( c<0x00080 ){                                     \
+    *zOut++ = (u8)(c&0xFF);                            \
+  }                                                    \
+  else if( c<0x00800 ){                                \
+    *zOut++ = 0xC0 + (u8)((c>>6)&0x1F);                \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+  else if( c<0x10000 ){                                \
+    *zOut++ = 0xE0 + (u8)((c>>12)&0x0F);               \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }else{                                               \
+    *zOut++ = 0xF0 + (u8)((c>>18) & 0x07);             \
+    *zOut++ = 0x80 + (u8)((c>>12) & 0x3F);             \
+    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
+    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
+  }                                                    \
+}
+
+#endif /* ifndef SQLITE_AMALGAMATION */
+
+typedef struct unicode_tokenizer unicode_tokenizer;
+typedef struct unicode_cursor unicode_cursor;
+
+struct unicode_tokenizer {
+  sqlite3_tokenizer base;
+  int bRemoveDiacritic;
+  int nException;
+  int *aiException;
+};
+
+struct unicode_cursor {
+  sqlite3_tokenizer_cursor base;
+  const unsigned char *aInput;    /* Input text being tokenized */
+  int nInput;                     /* Size of aInput[] in bytes */
+  int iOff;                       /* Current offset within aInput[] */
+  int iToken;                     /* Index of next token to be returned */
+  char *zToken;                   /* storage for current token */
+  int nAlloc;                     /* space allocated at zToken */
+};
+
+
+/*
+** Destroy a tokenizer allocated by unicodeCreate().
+*/
+static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
+  if( pTokenizer ){
+    unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
+    sqlite3_free(p->aiException);
+    sqlite3_free(p);
+  }
+  return SQLITE_OK;
+}
+
+/*
+** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
+** statement has specified that the tokenizer for this table shall consider
+** all characters in string zIn/nIn to be separators (if bAlnum==0) or
+** token characters (if bAlnum==1).
+**
+** For each codepoint in the zIn/nIn string, this function checks if the
+** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
+** If so, no action is taken. Otherwise, the codepoint is added to the 
+** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
+** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
+** codepoints in the aiException[] array.
+**
+** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
+** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
+** It is not possible to change the behavior of the tokenizer with respect
+** to these codepoints.
+*/
+static int unicodeAddExceptions(
+  unicode_tokenizer *p,           /* Tokenizer to add exceptions to */
+  int bAlnum,                     /* Replace Isalnum() return value with this */
+  const char *zIn,                /* Array of characters to make exceptions */
+  int nIn                         /* Length of z in bytes */
+){
+  const unsigned char *z = (const unsigned char *)zIn;
+  const unsigned char *zTerm = &z[nIn];
+  int iCode;
+  int nEntry = 0;
+
+  assert( bAlnum==0 || bAlnum==1 );
+
+  while( z<zTerm ){
+    READ_UTF8(z, zTerm, iCode);
+    assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+    if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
+     && sqlite3FtsUnicodeIsdiacritic(iCode)==0 
+    ){
+      nEntry++;
+    }
+  }
+
+  if( nEntry ){
+    int *aNew;                    /* New aiException[] array */
+    int nNew;                     /* Number of valid entries in array aNew[] */
+
+    aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
+    if( aNew==0 ) return SQLITE_NOMEM;
+    nNew = p->nException;
+
+    z = (const unsigned char *)zIn;
+    while( z<zTerm ){
+      READ_UTF8(z, zTerm, iCode);
+      if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum 
+       && sqlite3FtsUnicodeIsdiacritic(iCode)==0
+      ){
+        int i, j;
+        for(i=0; i<nNew && aNew[i]<iCode; i++);
+        for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
+        aNew[i] = iCode;
+        nNew++;
+      }
+    }
+    p->aiException = aNew;
+    p->nException = nNew;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Return true if the p->aiException[] array contains the value iCode.
+*/
+static int unicodeIsException(unicode_tokenizer *p, int iCode){
+  if( p->nException>0 ){
+    int *a = p->aiException;
+    int iLo = 0;
+    int iHi = p->nException-1;
+
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      if( iCode==a[iTest] ){
+        return 1;
+      }else if( iCode>a[iTest] ){
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/*
+** Return true if, for the purposes of tokenization, codepoint iCode is
+** considered a token character (not a separator).
+*/
+static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
+  assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+  return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
+}
+
+/*
+** Create a new tokenizer instance.
+*/
+static int unicodeCreate(
+  int nArg,                       /* Size of array argv[] */
+  const char * const *azArg,      /* Tokenizer creation arguments */
+  sqlite3_tokenizer **pp          /* OUT: New tokenizer handle */
+){
+  unicode_tokenizer *pNew;        /* New tokenizer object */
+  int i;
+  int rc = SQLITE_OK;
+
+  pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
+  if( pNew==NULL ) return SQLITE_NOMEM;
+  memset(pNew, 0, sizeof(unicode_tokenizer));
+  pNew->bRemoveDiacritic = 1;
+
+  for(i=0; rc==SQLITE_OK && i<nArg; i++){
+    const char *z = azArg[i];
+    int n = (int)strlen(z);
+
+    if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
+      pNew->bRemoveDiacritic = 1;
+    }
+    else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
+      pNew->bRemoveDiacritic = 0;
+    }
+    else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
+      rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
+    }
+    else if( n>=11 && memcmp("separators=", z, 11)==0 ){
+      rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
+    }
+    else{
+      /* Unrecognized argument */
+      rc  = SQLITE_ERROR;
+    }
+  }
+
+  if( rc!=SQLITE_OK ){
+    unicodeDestroy((sqlite3_tokenizer *)pNew);
+    pNew = 0;
+  }
+  *pp = (sqlite3_tokenizer *)pNew;
+  return rc;
+}
+
+/*
+** Prepare to begin tokenizing a particular string.  The input
+** string to be tokenized is pInput[0..nBytes-1].  A cursor
+** used to incrementally tokenize this string is returned in 
+** *ppCursor.
+*/
+static int unicodeOpen(
+  sqlite3_tokenizer *p,           /* The tokenizer */
+  const char *aInput,             /* Input string */
+  int nInput,                     /* Size of string aInput in bytes */
+  sqlite3_tokenizer_cursor **pp   /* OUT: New cursor object */
+){
+  unicode_cursor *pCsr;
+
+  pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCsr, 0, sizeof(unicode_cursor));
+
+  pCsr->aInput = (const unsigned char *)aInput;
+  if( aInput==0 ){
+    pCsr->nInput = 0;
+  }else if( nInput<0 ){
+    pCsr->nInput = (int)strlen(aInput);
+  }else{
+    pCsr->nInput = nInput;
+  }
+
+  *pp = &pCsr->base;
+  UNUSED_PARAMETER(p);
+  return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** simpleOpen() above.
+*/
+static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
+  unicode_cursor *pCsr = (unicode_cursor *) pCursor;
+  sqlite3_free(pCsr->zToken);
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.  The cursor must
+** have been opened by a prior call to simpleOpen().
+*/
+static int unicodeNext(
+  sqlite3_tokenizer_cursor *pC,   /* Cursor returned by simpleOpen */
+  const char **paToken,           /* OUT: Token text */
+  int *pnToken,                   /* OUT: Number of bytes at *paToken */
+  int *piStart,                   /* OUT: Starting offset of token */
+  int *piEnd,                     /* OUT: Ending offset of token */
+  int *piPos                      /* OUT: Position integer of token */
+){
+  unicode_cursor *pCsr = (unicode_cursor *)pC;
+  unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
+  int iCode = 0;
+  char *zOut;
+  const unsigned char *z = &pCsr->aInput[pCsr->iOff];
+  const unsigned char *zStart = z;
+  const unsigned char *zEnd;
+  const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
+
+  /* Scan past any delimiter characters before the start of the next token.
+  ** Return SQLITE_DONE early if this takes us all the way to the end of 
+  ** the input.  */
+  while( z<zTerm ){
+    READ_UTF8(z, zTerm, iCode);
+    if( unicodeIsAlnum(p, iCode) ) break;
+    zStart = z;
+  }
+  if( zStart>=zTerm ) return SQLITE_DONE;
+
+  zOut = pCsr->zToken;
+  do {
+    int iOut;
+
+    /* Grow the output buffer if required. */
+    if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
+      char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
+      if( !zNew ) return SQLITE_NOMEM;
+      zOut = &zNew[zOut - pCsr->zToken];
+      pCsr->zToken = zNew;
+      pCsr->nAlloc += 64;
+    }
+
+    /* Write the folded case of the last character read to the output */
+    zEnd = z;
+    iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
+    if( iOut ){
+      WRITE_UTF8(zOut, iOut);
+    }
+
+    /* If the cursor is not at EOF, read the next character */
+    if( z>=zTerm ) break;
+    READ_UTF8(z, zTerm, iCode);
+  }while( unicodeIsAlnum(p, iCode) 
+       || sqlite3FtsUnicodeIsdiacritic(iCode)
+  );
+
+  /* Set the output variables and return. */
+  pCsr->iOff = (int)(z - pCsr->aInput);
+  *paToken = pCsr->zToken;
+  *pnToken = (int)(zOut - pCsr->zToken);
+  *piStart = (int)(zStart - pCsr->aInput);
+  *piEnd = (int)(zEnd - pCsr->aInput);
+  *piPos = pCsr->iToken++;
+  return SQLITE_OK;
+}
+
+/*
+** Set *ppModule to a pointer to the sqlite3_tokenizer_module 
+** structure for the unicode tokenizer.
+*/
+void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
+  static const sqlite3_tokenizer_module module = {
+    0,
+    unicodeCreate,
+    unicodeDestroy,
+    unicodeOpen,
+    unicodeClose,
+    unicodeNext,
+    0,
+  };
+  *ppModule = &module;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_unicode2.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_unicode2.c
@ -0,0 +1,365 @@
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+
+#ifndef SQLITE_DISABLE_FTS3_UNICODE
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
+
+#include <assert.h>
+
+/*
+** Return true if the argument corresponds to a unicode codepoint
+** classified as either a letter or a number. Otherwise false.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3FtsUnicodeIsalnum(int c){
+  /* Each unsigned integer in the following array corresponds to a contiguous
+  ** range of unicode codepoints that are not either letters or numbers (i.e.
+  ** codepoints for which this function should return 0).
+  **
+  ** The most significant 22 bits in each 32-bit value contain the first 
+  ** codepoint in the range. The least significant 10 bits are used to store
+  ** the size of the range (always at least 1). In other words, the value 
+  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
+  ** C. It is not possible to represent a range larger than 1023 codepoints 
+  ** using this format.
+  */
+  static const unsigned int aEntry[] = {
+    0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
+    0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
+    0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
+    0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
+    0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
+    0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
+    0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
+    0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
+    0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
+    0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
+    0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
+    0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
+    0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
+    0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
+    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
+    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
+    0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
+    0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
+    0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
+    0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
+    0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
+    0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
+    0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
+    0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
+    0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
+    0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
+    0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
+    0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
+    0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
+    0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
+    0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
+    0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
+    0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
+    0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
+    0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
+    0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
+    0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
+    0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
+    0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
+    0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
+    0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
+    0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
+    0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
+    0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
+    0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
+    0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
+    0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
+    0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
+    0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
+    0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
+    0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
+    0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
+    0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
+    0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
+    0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
+    0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
+    0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
+    0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
+    0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
+    0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
+    0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
+    0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
+    0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
+    0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
+    0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
+    0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
+    0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
+    0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
+    0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
+    0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
+    0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
+    0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
+    0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
+    0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
+    0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
+    0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
+    0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
+    0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
+    0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
+    0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
+    0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
+    0x380400F0,
+  };
+  static const unsigned int aAscii[4] = {
+    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
+  };
+
+  if( c<128 ){
+    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+  }else if( c<(1<<22) ){
+    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+    int iRes = 0;
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      if( key >= aEntry[iTest] ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+    assert( aEntry[0]<key );
+    assert( key>=aEntry[iRes] );
+    return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+  }
+  return 1;
+}
+
+
+/*
+** If the argument is a codepoint corresponding to a lowercase letter
+** in the ASCII range with a diacritic added, return the codepoint
+** of the ASCII letter only. For example, if passed 235 - "LATIN
+** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
+** E"). The resuls of passing a codepoint that corresponds to an
+** uppercase letter are undefined.
+*/
+static int remove_diacritic(int c){
+  unsigned short aDia[] = {
+        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
+     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
+     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
+     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
+     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
+     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
+     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 
+     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
+    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 
+    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
+    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
+    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
+    62924, 63050, 63082, 63274, 63390, 
+  };
+  char aChar[] = {
+    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
+    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
+    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
+    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
+    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
+    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
+    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
+    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
+    'e',  'i',  'o',  'u',  'y',  
+  };
+
+  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+  int iRes = 0;
+  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+  int iLo = 0;
+  while( iHi>=iLo ){
+    int iTest = (iHi + iLo) / 2;
+    if( key >= aDia[iTest] ){
+      iRes = iTest;
+      iLo = iTest+1;
+    }else{
+      iHi = iTest-1;
+    }
+  }
+  assert( key>=aDia[iRes] );
+  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
+}
+
+
+/*
+** Return true if the argument interpreted as a unicode codepoint
+** is a diacritical modifier character.
+*/
+int sqlite3FtsUnicodeIsdiacritic(int c){
+  unsigned int mask0 = 0x08029FDF;
+  unsigned int mask1 = 0x000361F8;
+  if( c<768 || c>817 ) return 0;
+  return (c < 768+32) ?
+      (mask0 & (1 << (c-768))) :
+      (mask1 & (1 << (c-768-32)));
+}
+
+
+/*
+** Interpret the argument as a unicode codepoint. If the codepoint
+** is an upper case character that has a lower case equivalent,
+** return the codepoint corresponding to the lower case version.
+** Otherwise, return a copy of the argument.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
+  /* Each entry in the following array defines a rule for folding a range
+  ** of codepoints to lower case. The rule applies to a range of nRange
+  ** codepoints starting at codepoint iCode.
+  **
+  ** If the least significant bit in flags is clear, then the rule applies
+  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+  ** need to be folded). Or, if it is set, then the rule only applies to
+  ** every second codepoint in the range, starting with codepoint C.
+  **
+  ** The 7 most significant bits in flags are an index into the aiOff[]
+  ** array. If a specific codepoint C does require folding, then its lower
+  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+  **
+  ** The contents of this array are generated by parsing the CaseFolding.txt
+  ** file distributed as part of the "Unicode Character Database". See
+  ** http://www.unicode.org for details.
+  */
+  static const struct TableEntry {
+    unsigned short iCode;
+    unsigned char flags;
+    unsigned char nRange;
+  } aEntry[] = {
+    {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
+    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
+    {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
+    {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
+    {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
+    {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
+    {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
+    {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
+    {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
+    {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
+    {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
+    {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
+    {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
+    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
+    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
+    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
+    {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
+    {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
+    {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
+    {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
+    {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
+    {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
+    {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
+    {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
+    {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
+    {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
+    {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
+    {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
+    {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
+    {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
+    {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
+    {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
+    {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
+    {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
+    {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
+    {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
+    {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
+    {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
+    {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
+    {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
+    {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
+    {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
+    {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
+    {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
+    {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
+    {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
+    {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
+    {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
+    {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
+    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
+    {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
+    {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
+    {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
+    {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
+    {65313, 14, 26},       
+  };
+  static const unsigned short aiOff[] = {
+   1,     2,     8,     15,    16,    26,    28,    32,    
+   37,    38,    40,    48,    63,    64,    69,    71,    
+   79,    80,    116,   202,   203,   205,   206,   207,   
+   209,   210,   211,   213,   214,   217,   218,   219,   
+   775,   7264,  10792, 10795, 23228, 23256, 30204, 54721, 
+   54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 
+   57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 
+   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
+   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
+   65514, 65521, 65527, 65528, 65529, 
+  };
+
+  int ret = c;
+
+  assert( c>=0 );
+  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+  if( c<128 ){
+    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+  }else if( c<65536 ){
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    int iRes = -1;
+
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      int cmp = (c - aEntry[iTest].iCode);
+      if( cmp>=0 ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+    assert( iRes<0 || c>=aEntry[iRes].iCode );
+
+    if( iRes>=0 ){
+      const struct TableEntry *p = &aEntry[iRes];
+      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+        assert( ret>0 );
+      }
+    }
+
+    if( bRemoveDiacritic ) ret = remove_diacritic(ret);
+  }
+  
+  else if( c>=66560 && c<66600 ){
+    ret = c + 40;
+  }
+
+  return ret;
+}
+#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
+#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_write.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3_write.c
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3speed.tcl
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/fts3speed.tcl
@ -0,0 +1,122 @@
+
+
+#--------------------------------------------------------------------------
+# This script contains several sub-programs used to test FTS3/FTS4 
+# performance. It does not run the queries directly, but generates SQL
+# scripts that can be run using the shell tool.
+#
+# The following cases are tested:
+#
+#   1. Inserting documents into an FTS3 table.
+#   2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')").
+#   3. Deleting documents from an FTS3 table.
+#   4. Querying FTS3 tables.
+#
+
+# Number of tokens in vocabulary. And number of tokens in each document.
+#
+set VOCAB_SIZE  2000
+set DOC_SIZE     100
+
+set NUM_INSERTS 100000
+set NUM_SELECTS 1000
+
+# Force everything in this script to be deterministic.
+#
+expr {srand(0)}
+
+proc usage {} {
+  puts stderr "Usage: $::argv0 <rows> <selects>"
+  exit -1
+}
+
+proc sql {sql} {
+  puts $::fd $sql
+}
+
+
+# Return a list of $nWord randomly generated tokens each between 2 and 10
+# characters in length.
+#
+proc build_vocab {nWord} {
+  set ret [list]
+  set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
+  for {set i 0} {$i<$nWord} {incr i} {
+    set len [expr {int((rand()*9.0)+2)}]
+    set term ""
+    for {set j 0} {$j<$len} {incr j} {
+      append term [lindex $chars [expr {int(rand()*[llength $chars])}]]
+    }
+    lappend ret $term
+  }
+  set ret
+}
+
+proc select_term {} {
+  set n [llength $::vocab]
+  set t [expr int(rand()*$n*3)]
+  if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] }
+  if {$t>=$n} { set t [expr {($t-$n)/10}] }
+  lindex $::vocab $t
+}
+
+proc select_doc {nTerm} {
+  set ret [list]
+  for {set i 0} {$i<$nTerm} {incr i} {
+    lappend ret [select_term]
+  }
+  set ret
+}
+
+proc test_1 {nInsert} {
+  sql "PRAGMA synchronous = OFF;"
+  sql "DROP TABLE IF EXISTS t1;"
+  sql "CREATE VIRTUAL TABLE t1 USING fts4;"
+  for {set i 0} {$i < $nInsert} {incr i} {
+    set doc [select_doc $::DOC_SIZE]
+    sql "INSERT INTO t1 VALUES('$doc');"
+  }
+}
+
+proc test_2 {} {
+  sql "INSERT INTO t1(t1) VALUES('optimize');"
+}
+
+proc test_3 {nSelect} {
+  for {set i 0} {$i < $nSelect} {incr i} {
+    sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';"
+  }
+}
+
+proc test_4 {nSelect} {
+  for {set i 0} {$i < $nSelect} {incr i} {
+    sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';"
+  }
+}
+
+if {[llength $argv]!=0} usage
+
+set ::vocab [build_vocab $::VOCAB_SIZE]
+
+set ::fd [open fts3speed_insert.sql w]
+test_1 $NUM_INSERTS
+close $::fd
+
+set ::fd [open fts3speed_select.sql w]
+test_3 $NUM_SELECTS
+close $::fd
+
+set ::fd [open fts3speed_select2.sql w]
+test_4 $NUM_SELECTS
+close $::fd
+
+set ::fd [open fts3speed_optimize.sql w]
+test_2
+close $::fd
+
+puts "Success. Created files:"
+puts "  fts3speed_insert.sql"
+puts "  fts3speed_select.sql"
+puts "  fts3speed_select2.sql"
+puts "  fts3speed_optimize.sql"
+
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/mkfts3amal.tcl
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/mkfts3amal.tcl
@ -0,0 +1,115 @@
+#!/usr/bin/tclsh
+#
+# This script builds a single C code file holding all of FTS3 code.
+# The name of the output file is fts3amal.c.  To build this file,
+# first do:
+#
+#      make target_source
+#
+# The make target above moves all of the source code files into
+# a subdirectory named "tsrc".  (This script expects to find the files
+# there and will not work if they are not found.)
+#
+# After the "tsrc" directory has been created and populated, run
+# this script:
+#
+#      tclsh mkfts3amal.tcl
+#
+# The amalgamated FTS3 code will be written into fts3amal.c
+#
+
+# Open the output file and write a header comment at the beginning
+# of the file.
+#
+set out [open fts3amal.c w]
+set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
+puts $out [subst \
+{/******************************************************************************
+** This file is an amalgamation of separate C source files from the SQLite
+** Full Text Search extension 2 (fts3).  By combining all the individual C 
+** code  files into this single large file, the entire code can be compiled 
+** as a one translation unit.  This allows many compilers to do optimizations
+** that would not be possible if the files were compiled separately.  It also
+** makes the code easier to import into other projects.
+**
+** This amalgamation was generated on $today.
+*/}]
+
+# These are the header files used by FTS3.  The first time any of these 
+# files are seen in a #include statement in the C code, include the complete
+# text of the file in-line.  The file only needs to be included once.
+#
+foreach hdr {
+   fts3.h
+   fts3_hash.h
+   fts3_tokenizer.h
+   sqlite3.h
+   sqlite3ext.h
+} {
+  set available_hdr($hdr) 1
+}
+
+# 78 stars used for comment formatting.
+set s78 \
+{*****************************************************************************}
+
+# Insert a comment into the code
+#
+proc section_comment {text} {
+  global out s78
+  set n [string length $text]
+  set nstar [expr {60 - $n}]
+  set stars [string range $s78 0 $nstar]
+  puts $out "/************** $text $stars/"
+}
+
+# Read the source file named $filename and write it into the
+# sqlite3.c output file.  If any #include statements are seen,
+# process them approprately.
+#
+proc copy_file {filename} {
+  global seen_hdr available_hdr out
+  set tail [file tail $filename]
+  section_comment "Begin file $tail"
+  set in [open $filename r]
+  while {![eof $in]} {
+    set line [gets $in]
+    if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
+      if {[info exists available_hdr($hdr)]} {
+        if {$available_hdr($hdr)} {
+          section_comment "Include $hdr in the middle of $tail"
+          copy_file tsrc/$hdr
+          section_comment "Continuing where we left off in $tail"
+        }
+      } elseif {![info exists seen_hdr($hdr)]} {
+        set seen_hdr($hdr) 1
+        puts $out $line
+      }
+    } elseif {[regexp {^#ifdef __cplusplus} $line]} {
+      puts $out "#if 0"
+    } elseif {[regexp {^#line} $line]} {
+      # Skip #line directives.
+    } else {
+      puts $out $line
+    }
+  }
+  close $in
+  section_comment "End of $tail"
+}
+
+
+# Process the source files.  Process files containing commonly
+# used subroutines first in order to help the compiler find
+# inlining opportunities.
+#
+foreach file {
+   fts3.c
+   fts3_hash.c
+   fts3_porter.c
+   fts3_tokenizer.c
+   fts3_tokenizer1.c
+} {
+  copy_file tsrc/$file
+}
+
+close $out
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/tool/fts3view.c
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/tool/fts3view.c
@ -0,0 +1,875 @@
+/*
+** This program is a debugging and analysis utility that displays
+** information about an FTS3 or FTS4 index.
+**
+** Link this program against the SQLite3 amalgamation with the
+** SQLITE_ENABLE_FTS4 compile-time option.  Then run it as:
+**
+**    fts3view DATABASE
+**
+** to get a list of all FTS3/4 tables in DATABASE, or do
+**
+**    fts3view DATABASE TABLE COMMAND ....
+**
+** to see various aspects of the TABLE table.  Type fts3view with no
+** arguments for a list of available COMMANDs.
+*/
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "sqlite3.h"
+
+/*
+** Extra command-line arguments:
+*/
+int nExtra;
+char **azExtra;
+
+/*
+** Look for a command-line argument.
+*/
+const char *findOption(const char *zName, int hasArg, const char *zDefault){
+  int i;
+  const char *zResult = zDefault;
+  for(i=0; i<nExtra; i++){
+    const char *z = azExtra[i];
+    while( z[0]=='-' ) z++;
+    if( strcmp(z, zName)==0 ){
+      int j = 1;
+      if( hasArg==0 || i==nExtra-1 ) j = 0;
+      zResult = azExtra[i+j];
+      while( i+j<nExtra ){
+        azExtra[i] = azExtra[i+j+1];
+        i++;
+      }
+      break;
+    }
+  }
+  return zResult;       
+}
+
+
+/*
+** Prepare an SQL query
+*/
+static sqlite3_stmt *prepare(sqlite3 *db, const char *zFormat, ...){
+  va_list ap;
+  char *zSql;
+  sqlite3_stmt *pStmt;
+  int rc;
+
+  va_start(ap, zFormat);
+  zSql = sqlite3_vmprintf(zFormat, ap);
+  va_end(ap);
+  rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
+  if( rc ){
+    fprintf(stderr, "Error: %s\nSQL: %s\n", sqlite3_errmsg(db), zSql);
+    exit(1);
+  }
+  sqlite3_free(zSql);
+  return pStmt;
+}
+
+/*
+** Run an SQL statement
+*/
+static int runSql(sqlite3 *db, const char *zFormat, ...){
+  va_list ap;
+  char *zSql;
+  int rc;
+
+  va_start(ap, zFormat);
+  zSql = sqlite3_vmprintf(zFormat, ap);
+  rc = sqlite3_exec(db, zSql, 0, 0, 0);
+  va_end(ap);
+  return rc;
+}
+
+/*
+** Show the table schema
+*/
+static void showSchema(sqlite3 *db, const char *zTab){
+  sqlite3_stmt *pStmt;
+  pStmt = prepare(db,
+            "SELECT sql FROM sqlite_master"
+            " WHERE name LIKE '%q%%'"
+            " ORDER BY 1",
+            zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    printf("%s;\n", sqlite3_column_text(pStmt, 0));
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db, "PRAGMA page_size");
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    printf("PRAGMA page_size=%s;\n", sqlite3_column_text(pStmt, 0));
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db, "PRAGMA journal_mode");
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    printf("PRAGMA journal_mode=%s;\n", sqlite3_column_text(pStmt, 0));
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db, "PRAGMA auto_vacuum");
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    const char *zType = "???";
+    switch( sqlite3_column_int(pStmt, 0) ){
+      case 0:  zType = "OFF";         break;
+      case 1:  zType = "FULL";        break;
+      case 2:  zType = "INCREMENTAL"; break;
+    }
+    printf("PRAGMA auto_vacuum=%s;\n", zType);
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db, "PRAGMA encoding");
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    printf("PRAGMA encoding=%s;\n", sqlite3_column_text(pStmt, 0));
+  }
+  sqlite3_finalize(pStmt);
+}
+
+/* 
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read, or 0 on error.
+** The value is stored in *v.
+*/
+int getVarint(const unsigned char *p, sqlite_int64 *v){
+  const unsigned char *q = p;
+  sqlite_uint64 x = 0, y = 1;
+  while( (*q&0x80)==0x80 && q-(unsigned char *)p<9 ){
+    x += y * (*q++ & 0x7f);
+    y <<= 7;
+  }
+  x += y * (*q++);
+  *v = (sqlite_int64) x;
+  return (int) (q - (unsigned char *)p);
+}
+
+
+/* Show the content of the %_stat table
+*/
+static void showStat(sqlite3 *db, const char *zTab){
+  sqlite3_stmt *pStmt;
+  pStmt = prepare(db, "SELECT id, value FROM '%q_stat'", zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    printf("stat[%d] =", sqlite3_column_int(pStmt, 0));
+    switch( sqlite3_column_type(pStmt, 1) ){
+      case SQLITE_INTEGER: {
+        printf(" %d\n", sqlite3_column_int(pStmt, 1));
+        break;
+      }
+      case SQLITE_BLOB: {
+        unsigned char *x = (unsigned char*)sqlite3_column_blob(pStmt, 1);
+        int len = sqlite3_column_bytes(pStmt, 1);
+        int i = 0;
+        sqlite3_int64 v;
+        while( i<len ){
+          i += getVarint(x, &v);
+          printf(" %lld", v);
+        }
+        printf("\n");
+        break;
+      }
+    }
+  }
+  sqlite3_finalize(pStmt);
+}
+
+/*
+** Report on the vocabulary.  This creates an fts4aux table with a random
+** name, but deletes it in the end.
+*/
+static void showVocabulary(sqlite3 *db, const char *zTab){
+  char *zAux;
+  sqlite3_uint64 r;
+  sqlite3_stmt *pStmt;
+  int nDoc = 0;
+  int nToken = 0;
+  int nOccurrence = 0;
+  int nTop;
+  int n, i;
+
+  sqlite3_randomness(sizeof(r), &r);
+  zAux = sqlite3_mprintf("viewer_%llx", zTab, r);
+  runSql(db, "BEGIN");
+  pStmt = prepare(db, "SELECT count(*) FROM %Q", zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    nDoc = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  printf("Number of documents...................... %9d\n", nDoc);
+
+  runSql(db, "CREATE VIRTUAL TABLE %s USING fts4aux(%Q)", zAux, zTab);
+  pStmt = prepare(db, 
+             "SELECT count(*), sum(occurrences) FROM %s WHERE col='*'",
+             zAux);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    nToken = sqlite3_column_int(pStmt, 0);
+    nOccurrence = sqlite3_column_int(pStmt, 1);
+  }
+  sqlite3_finalize(pStmt);
+  printf("Total tokens in all documents............ %9d\n", nOccurrence);
+  printf("Total number of distinct tokens.......... %9d\n", nToken);
+  if( nToken==0 ) goto end_vocab;
+
+  n = 0;
+  pStmt = prepare(db, "SELECT count(*) FROM %s"
+                      " WHERE col='*' AND occurrences==1", zAux);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    n = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  printf("Tokens used exactly once................. %9d %5.2f%%\n",
+          n, n*100.0/nToken);
+
+  n = 0;
+  pStmt = prepare(db, "SELECT count(*) FROM %s"
+                      " WHERE col='*' AND documents==1", zAux);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    n = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  printf("Tokens used in only one document......... %9d %5.2f%%\n",
+          n, n*100.0/nToken);
+
+  if( nDoc>=2000 ){
+    n = 0;
+    pStmt = prepare(db, "SELECT count(*) FROM %s"
+                        " WHERE col='*' AND occurrences<=%d", zAux, nDoc/1000);
+    while( sqlite3_step(pStmt)==SQLITE_ROW ){
+      n = sqlite3_column_int(pStmt, 0);
+    }
+    sqlite3_finalize(pStmt);
+    printf("Tokens used in 0.1%% or less of docs...... %9d %5.2f%%\n",
+            n, n*100.0/nToken);
+  }
+
+  if( nDoc>=200 ){
+    n = 0;
+    pStmt = prepare(db, "SELECT count(*) FROM %s"
+                        " WHERE col='*' AND occurrences<=%d", zAux, nDoc/100);
+    while( sqlite3_step(pStmt)==SQLITE_ROW ){
+      n = sqlite3_column_int(pStmt, 0);
+    }
+    sqlite3_finalize(pStmt);
+    printf("Tokens used in 1%% or less of docs........ %9d %5.2f%%\n",
+            n, n*100.0/nToken);
+  }
+
+  nTop = atoi(findOption("top", 1, "25"));
+  printf("The %d most common tokens:\n", nTop);
+  pStmt = prepare(db,
+            "SELECT term, documents FROM %s"
+            " WHERE col='*'"
+            " ORDER BY documents DESC, term"
+            " LIMIT %d", zAux, nTop);
+  i = 0;
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    i++;
+    n = sqlite3_column_int(pStmt, 1);
+    printf("  %2d. %-30s %9d docs %5.2f%%\n", i,
+      sqlite3_column_text(pStmt, 0), n, n*100.0/nDoc);
+  }
+  sqlite3_finalize(pStmt);
+
+end_vocab:
+  runSql(db, "ROLLBACK");
+  sqlite3_free(zAux);
+}
+
+/*
+** Report on the number and sizes of segments
+*/
+static void showSegmentStats(sqlite3 *db, const char *zTab){
+  sqlite3_stmt *pStmt;
+  int nSeg = 0;
+  sqlite3_int64 szSeg = 0, mxSeg = 0;
+  int nIdx = 0;
+  sqlite3_int64 szIdx = 0, mxIdx = 0;
+  int nRoot = 0;
+  sqlite3_int64 szRoot = 0, mxRoot = 0;
+  sqlite3_int64 mx;
+  int nLeaf;
+  int n;
+  int pgsz;
+  int mxLevel;
+  int i;
+
+  pStmt = prepare(db,
+                  "SELECT count(*), sum(length(block)), max(length(block))"
+                  " FROM '%q_segments'",
+                  zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    nSeg = sqlite3_column_int(pStmt, 0);
+    szSeg = sqlite3_column_int64(pStmt, 1);
+    mxSeg = sqlite3_column_int64(pStmt, 2);
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db,
+            "SELECT count(*), sum(length(block)), max(length(block))"
+            "  FROM '%q_segments' a JOIN '%q_segdir' b"
+            " WHERE a.blockid BETWEEN b.leaves_end_block+1 AND b.end_block",
+            zTab, zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    nIdx = sqlite3_column_int(pStmt, 0);
+    szIdx = sqlite3_column_int64(pStmt, 1);
+    mxIdx = sqlite3_column_int64(pStmt, 2);
+  }
+  sqlite3_finalize(pStmt);
+  pStmt = prepare(db,
+            "SELECT count(*), sum(length(root)), max(length(root))"
+            "  FROM '%q_segdir'",
+            zTab);
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    nRoot = sqlite3_column_int(pStmt, 0);
+    szRoot = sqlite3_column_int64(pStmt, 1);
+    mxRoot = sqlite3_column_int64(pStmt, 2);
+  }
+  sqlite3_finalize(pStmt);
+
+  printf("Number of segments....................... %9d\n", nSeg+nRoot);
+  printf("Number of leaf segments.................. %9d\n", nSeg-nIdx);
+  printf("Number of index segments................. %9d\n", nIdx);
+  printf("Number of root segments.................. %9d\n", nRoot);
+  printf("Total size of all segments............... %9lld\n", szSeg+szRoot);
+  printf("Total size of all leaf segments.......... %9lld\n", szSeg-szIdx);
+  printf("Total size of all index segments......... %9lld\n", szIdx);
+  printf("Total size of all root segments.......... %9lld\n", szRoot);
+  if( nSeg>0 ){
+    printf("Average size of all segments............. %11.1f\n",
+            (double)(szSeg+szRoot)/(double)(nSeg+nRoot));
+    printf("Average size of leaf segments............ %11.1f\n",
+            (double)(szSeg-szIdx)/(double)(nSeg-nIdx));
+  }
+  if( nIdx>0 ){
+    printf("Average size of index segments........... %11.1f\n",
+            (double)szIdx/(double)nIdx);
+  }
+  if( nRoot>0 ){
+    printf("Average size of root segments............ %11.1f\n",
+            (double)szRoot/(double)nRoot);
+  }
+  mx = mxSeg;
+  if( mx<mxRoot ) mx = mxRoot;
+  printf("Maximum segment size..................... %9lld\n", mx);
+  printf("Maximum index segment size............... %9lld\n", mxIdx);
+  printf("Maximum root segment size................ %9lld\n", mxRoot);
+
+  pStmt = prepare(db, "PRAGMA page_size");
+  pgsz = 1024;
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    pgsz = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  printf("Database page size....................... %9d\n", pgsz);
+  pStmt = prepare(db,
+            "SELECT count(*)"
+            "  FROM '%q_segments' a JOIN '%q_segdir' b"
+            " WHERE a.blockid BETWEEN b.start_block AND b.leaves_end_block"
+            "   AND length(a.block)>%d",
+            zTab, zTab, pgsz-45);
+  n = 0;
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    n = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  nLeaf = nSeg - nIdx;
+  printf("Leaf segments larger than %5d bytes.... %9d   %5.2f%%\n",
+         pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
+
+  pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
+  mxLevel = 0;
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    mxLevel = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+
+  for(i=0; i<=mxLevel; i++){
+    pStmt = prepare(db,
+           "SELECT count(*), sum(len), avg(len), max(len), sum(len>%d),"
+           "       count(distinct idx)"
+           "  FROM (SELECT length(a.block) AS len, idx"
+           "          FROM '%q_segments' a JOIN '%q_segdir' b"
+           "         WHERE (a.blockid BETWEEN b.start_block"
+                                       " AND b.leaves_end_block)"
+           "           AND (b.level%%1024)==%d)",
+           pgsz-45, zTab, zTab, i);
+    if( sqlite3_step(pStmt)==SQLITE_ROW
+     && (nLeaf = sqlite3_column_int(pStmt, 0))>0
+    ){
+      nIdx = sqlite3_column_int(pStmt, 5);
+      sqlite3_int64 sz;
+      printf("For level %d:\n", i);
+      printf("  Number of indexes...................... %9d\n", nIdx);
+      printf("  Number of leaf segments................ %9d\n", nLeaf);
+      if( nIdx>1 ){
+        printf("  Average leaf segments per index........ %11.1f\n",
+               (double)nLeaf/(double)nIdx);
+      }
+      printf("  Total size of all leaf segments........ %9lld\n",
+             (sz = sqlite3_column_int64(pStmt, 1)));
+      printf("  Average size of leaf segments.......... %11.1f\n",
+             sqlite3_column_double(pStmt, 2));
+      if( nIdx>1 ){
+        printf("  Average leaf segment size per index.... %11.1f\n",
+               (double)sz/(double)nIdx);
+      }
+      printf("  Maximum leaf segment size.............. %9lld\n",
+             sqlite3_column_int64(pStmt, 3));
+      n = sqlite3_column_int(pStmt, 4);
+      printf("  Leaf segments larger than %5d bytes.. %9d   %5.2f%%\n",
+             pgsz-45, n, n*100.0/nLeaf);
+    }
+    sqlite3_finalize(pStmt);
+  }
+}
+
+/*
+** Print a single "tree" line of the segdir map output.
+*/
+static void printTreeLine(sqlite3_int64 iLower, sqlite3_int64 iUpper){
+  printf("                 tree   %9lld", iLower);
+  if( iUpper>iLower ){
+    printf(" thru %9lld  (%lld blocks)", iUpper, iUpper-iLower+1);
+  }
+  printf("\n");
+}
+
+/*
+** Check to see if the block of a %_segments entry is NULL.
+*/
+static int isNullSegment(sqlite3 *db, const char *zTab, sqlite3_int64 iBlockId){
+  sqlite3_stmt *pStmt;
+  int rc = 1;
+
+  pStmt = prepare(db, "SELECT block IS NULL FROM '%q_segments'"
+                      " WHERE blockid=%lld", zTab, iBlockId);
+  if( sqlite3_step(pStmt)==SQLITE_ROW ){
+    rc = sqlite3_column_int(pStmt, 0);
+  }
+  sqlite3_finalize(pStmt);
+  return rc;
+}
+
+/*
+** Show a map of segments derived from the %_segdir table.
+*/
+static void showSegdirMap(sqlite3 *db, const char *zTab){
+  int mxIndex, iIndex;
+  sqlite3_stmt *pStmt = 0;
+  sqlite3_stmt *pStmt2 = 0;
+  int prevLevel;
+
+  pStmt = prepare(db, "SELECT max(level/1024) FROM '%q_segdir'", zTab);
+  if( sqlite3_step(pStmt)==SQLITE_ROW ){
+    mxIndex = sqlite3_column_int(pStmt, 0);
+  }else{
+    mxIndex = 0;
+  }
+  sqlite3_finalize(pStmt);
+
+  printf("Number of inverted indices............... %3d\n", mxIndex+1);
+  pStmt = prepare(db,
+    "SELECT level, idx, start_block, leaves_end_block, end_block, rowid"
+    "  FROM '%q_segdir'"
+    " WHERE level/1024==?"
+    " ORDER BY level DESC, idx",
+    zTab);
+  pStmt2 = prepare(db,
+    "SELECT blockid FROM '%q_segments'"
+    " WHERE blockid BETWEEN ? AND ? ORDER BY blockid",
+    zTab);
+  for(iIndex=0; iIndex<=mxIndex; iIndex++){
+    if( mxIndex>0 ){
+      printf("**************************** Index %d "
+             "****************************\n", iIndex);
+    }
+    sqlite3_bind_int(pStmt, 1, iIndex);
+    prevLevel = -1;
+    while( sqlite3_step(pStmt)==SQLITE_ROW ){
+      int iLevel = sqlite3_column_int(pStmt, 0)%1024;
+      int iIdx = sqlite3_column_int(pStmt, 1);
+      sqlite3_int64 iStart = sqlite3_column_int64(pStmt, 2);
+      sqlite3_int64 iLEnd = sqlite3_column_int64(pStmt, 3);
+      sqlite3_int64 iEnd = sqlite3_column_int64(pStmt, 4);
+      char rtag[20];
+      if( iLevel!=prevLevel ){
+        printf("level %2d idx %2d", iLevel, iIdx);
+        prevLevel = iLevel;
+      }else{
+        printf("         idx %2d", iIdx);
+      }
+      sqlite3_snprintf(sizeof(rtag), rtag, "r%lld",
+                       sqlite3_column_int64(pStmt,5));
+      printf("  root   %9s\n", rtag);
+      if( iLEnd>iStart ){
+        sqlite3_int64 iLower, iPrev = 0, iX;
+        if( iLEnd+1<=iEnd ){
+          sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
+          sqlite3_bind_int64(pStmt2, 2, iEnd);
+          iLower = -1;        
+          while( sqlite3_step(pStmt2)==SQLITE_ROW ){
+            iX = sqlite3_column_int64(pStmt2, 0);
+            if( iLower<0 ){
+              iLower = iPrev = iX;
+            }else if( iX==iPrev+1 ){
+              iPrev = iX;
+            }else{
+              printTreeLine(iLower, iPrev);
+              iLower = iPrev = iX;
+            }
+          }
+          sqlite3_reset(pStmt2);
+          if( iLower>=0 ){
+            if( iLower==iPrev && iLower==iEnd
+             && isNullSegment(db,zTab,iLower)
+            ){
+              printf("                 null   %9lld\n", iLower);
+            }else{
+              printTreeLine(iLower, iPrev);
+            }
+          }
+        }
+        printf("                 leaves %9lld thru %9lld  (%lld blocks)\n",
+               iStart, iLEnd, iLEnd - iStart + 1);
+      }
+    }
+    sqlite3_reset(pStmt);
+  }
+  sqlite3_finalize(pStmt);
+  sqlite3_finalize(pStmt2);
+}
+
+/*
+** Decode a single segment block and display the results on stdout.
+*/
+static void decodeSegment(
+  const unsigned char *aData,   /* Content to print */
+  int nData                     /* Number of bytes of content */
+){
+  sqlite3_int64 iChild = 0;
+  sqlite3_int64 iPrefix;
+  sqlite3_int64 nTerm;
+  sqlite3_int64 n;
+  sqlite3_int64 iDocsz;
+  int iHeight;
+  sqlite3_int64 i = 0;
+  int cnt = 0;
+  char zTerm[1000];
+
+  i += getVarint(aData, &n);
+  iHeight = (int)n;
+  printf("height: %d\n", iHeight);
+  if( iHeight>0 ){
+    i += getVarint(aData+i, &iChild);
+    printf("left-child: %lld\n", iChild);
+  }
+  while( i<nData ){
+    if( (cnt++)>0 ){
+      i += getVarint(aData+i, &iPrefix);
+    }else{
+      iPrefix = 0;
+    }
+    i += getVarint(aData+i, &nTerm);
+    if( iPrefix+nTerm+1 >= sizeof(zTerm) ){
+      fprintf(stderr, "term to long\n");
+      exit(1);
+    }
+    memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
+    zTerm[iPrefix+nTerm] = 0;
+    i += nTerm;
+    if( iHeight==0 ){
+      i += getVarint(aData+i, &iDocsz);
+      printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
+      i += iDocsz;
+    }else{
+      printf("term: %-25s child %lld\n", zTerm, ++iChild);
+    }
+  }
+}
+  
+  
+/*
+** Print a a blob as hex and ascii.
+*/
+static void printBlob(
+  const unsigned char *aData,   /* Content to print */
+  int nData                     /* Number of bytes of content */
+){
+  int i, j;
+  const char *zOfstFmt;
+  const int perLine = 16;
+
+  if( (nData&~0xfff)==0 ){
+    zOfstFmt = " %03x: ";
+  }else if( (nData&~0xffff)==0 ){
+    zOfstFmt = " %04x: ";
+  }else if( (nData&~0xfffff)==0 ){
+    zOfstFmt = " %05x: ";
+  }else if( (nData&~0xffffff)==0 ){
+    zOfstFmt = " %06x: ";
+  }else{
+    zOfstFmt = " %08x: ";
+  }
+
+  for(i=0; i<nData; i += perLine){
+    fprintf(stdout, zOfstFmt, i);
+    for(j=0; j<perLine; j++){
+      if( i+j>nData ){
+        fprintf(stdout, "   ");
+      }else{
+        fprintf(stdout,"%02x ", aData[i+j]);
+      }
+    }
+    for(j=0; j<perLine; j++){
+      if( i+j>nData ){
+        fprintf(stdout, " ");
+      }else{
+        fprintf(stdout,"%c", isprint(aData[i+j]) ? aData[i+j] : '.');
+      }
+    }
+    fprintf(stdout,"\n");
+  }
+}
+
+/*
+** Convert text to a 64-bit integer
+*/
+static sqlite3_int64 atoi64(const char *z){
+  sqlite3_int64 v = 0;
+  while( z[0]>='0' && z[0]<='9' ){
+     v = v*10 + z[0] - '0';
+     z++;
+  }
+  return v;
+}
+
+/*
+** Return a prepared statement which, when stepped, will return in its
+** first column the blob associated with segment zId.  If zId begins with
+** 'r' then it is a rowid of a %_segdir entry.  Otherwise it is a
+** %_segment entry.
+*/
+static sqlite3_stmt *prepareToGetSegment(
+  sqlite3 *db,         /* The database */
+  const char *zTab,    /* The FTS3/4 table name */
+  const char *zId      /* ID of the segment to open */
+){
+  sqlite3_stmt *pStmt;
+  if( zId[0]=='r' ){
+    pStmt = prepare(db, "SELECT root FROM '%q_segdir' WHERE rowid=%lld",
+                    zTab, atoi64(zId+1));
+  }else{
+    pStmt = prepare(db, "SELECT block FROM '%q_segments' WHERE blockid=%lld",
+                    zTab, atoi64(zId));
+  }
+  return pStmt;
+}
+
+/*
+** Print the content of a segment or of the root of a segdir.  The segment
+** or root is identified by azExtra[0].  If the first character of azExtra[0]
+** is 'r' then the remainder is the integer rowid of the %_segdir entry.
+** If the first character of azExtra[0] is not 'r' then, then all of
+** azExtra[0] is an integer which is the block number.
+**
+** If the --raw option is present in azExtra, then a hex dump is provided.
+** Otherwise a decoding is shown.
+*/
+static void showSegment(sqlite3 *db, const char *zTab){
+  const unsigned char *aData;
+  int nData;
+  sqlite3_stmt *pStmt;
+
+  pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
+  if( sqlite3_step(pStmt)!=SQLITE_ROW ){
+    sqlite3_finalize(pStmt);
+    return;
+  }
+  nData = sqlite3_column_bytes(pStmt, 0);
+  aData = sqlite3_column_blob(pStmt, 0);
+  printf("Segment %s of size %d bytes:\n", azExtra[0], nData);
+  if( findOption("raw", 0, 0)!=0 ){
+    printBlob(aData, nData);
+  }else{
+    decodeSegment(aData, nData);
+  }
+  sqlite3_finalize(pStmt);
+}
+
+/*
+** Decode a single doclist and display the results on stdout.
+*/
+static void decodeDoclist(
+  const unsigned char *aData,   /* Content to print */
+  int nData                     /* Number of bytes of content */
+){
+  sqlite3_int64 iPrevDocid = 0;
+  sqlite3_int64 iDocid;
+  sqlite3_int64 iPos;
+  sqlite3_int64 iPrevPos = 0;
+  sqlite3_int64 iCol;
+  int i = 0;
+
+  while( i<nData ){
+    i += getVarint(aData+i, &iDocid);
+    printf("docid %lld col0", iDocid+iPrevDocid);
+    iPrevDocid += iDocid;
+    iPrevPos = 0;
+    while( 1 ){
+      i += getVarint(aData+i, &iPos);
+      if( iPos==1 ){
+        i += getVarint(aData+i, &iCol);
+        printf(" col%lld", iCol);
+        iPrevPos = 0;
+      }else if( iPos==0 ){
+        printf("\n");
+        break;
+      }else{
+        iPrevPos += iPos - 2;
+        printf(" %lld", iPrevPos);
+      }
+    }
+  }
+}
+  
+
+/*
+** Print the content of a doclist.  The segment or segdir-root is
+** identified by azExtra[0].  If the first character of azExtra[0]
+** is 'r' then the remainder is the integer rowid of the %_segdir entry.
+** If the first character of azExtra[0] is not 'r' then, then all of
+** azExtra[0] is an integer which is the block number.  The offset
+** into the segment is identified by azExtra[1].  The size of the doclist
+** is azExtra[2].
+**
+** If the --raw option is present in azExtra, then a hex dump is provided.
+** Otherwise a decoding is shown.
+*/
+static void showDoclist(sqlite3 *db, const char *zTab){
+  const unsigned char *aData;
+  sqlite3_int64 offset;
+  int nData;
+  sqlite3_stmt *pStmt;
+
+  offset = atoi64(azExtra[1]);
+  nData = atoi(azExtra[2]);
+  pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
+  if( sqlite3_step(pStmt)!=SQLITE_ROW ){
+    sqlite3_finalize(pStmt);
+    return;
+  }
+  aData = sqlite3_column_blob(pStmt, 0);
+  printf("Doclist at %s offset %lld of size %d bytes:\n",
+         azExtra[0], offset, nData);
+  if( findOption("raw", 0, 0)!=0 ){
+    printBlob(aData+offset, nData);
+  }else{
+    decodeDoclist(aData+offset, nData);
+  }
+  sqlite3_finalize(pStmt);
+}
+
+/*
+** Show the top N largest segments
+*/
+static void listBigSegments(sqlite3 *db, const char *zTab){
+  int nTop, i;
+  sqlite3_stmt *pStmt;
+  sqlite3_int64 sz;
+  sqlite3_int64 id;
+
+  nTop = atoi(findOption("top", 1, "25"));
+  printf("The %d largest segments:\n", nTop);
+  pStmt = prepare(db,
+            "SELECT blockid, length(block) AS len FROM '%q_segments'"
+            " ORDER BY 2 DESC, 1"
+            " LIMIT %d", zTab, nTop);
+  i = 0;
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    i++;
+    id = sqlite3_column_int64(pStmt, 0);
+    sz = sqlite3_column_int64(pStmt, 1);
+    printf("  %2d. %9lld size %lld\n", i, id, sz);
+  }
+  sqlite3_finalize(pStmt);
+}
+
+
+
+static void usage(const char *argv0){
+  fprintf(stderr, "Usage: %s DATABASE\n"
+                  "   or: %s DATABASE FTS3TABLE ARGS...\n", argv0, argv0);
+  fprintf(stderr,
+    "ARGS:\n"
+    "  big-segments [--top N]                    show the largest segments\n"
+    "  doclist BLOCKID OFFSET SIZE [--raw]       Decode a doclist\n"
+    "  schema                                    FTS table schema\n"
+    "  segdir                                    directory of segments\n"
+    "  segment BLOCKID [--raw]                   content of a segment\n"
+    "  segment-stats                             info on segment sizes\n"
+    "  stat                                      the %%_stat table\n"
+    "  vocabulary [--top N]                      document vocabulary\n"
+  );
+  exit(1);
+}
+
+int main(int argc, char **argv){
+  sqlite3 *db;
+  int rc;
+  const char *zTab;
+  const char *zCmd;
+
+  if( argc<2 ) usage(argv[0]);
+  rc = sqlite3_open(argv[1], &db);
+  if( rc ){
+    fprintf(stderr, "Cannot open %s\n", argv[1]);
+    exit(1);
+  }
+  if( argc==2 ){
+    sqlite3_stmt *pStmt;
+    int cnt = 0;
+    pStmt = prepare(db, "SELECT b.sql"
+                        "  FROM sqlite_master a, sqlite_master b"
+                        " WHERE a.name GLOB '*_segdir'"
+                        "   AND b.name=substr(a.name,1,length(a.name)-7)"
+                        " ORDER BY 1");
+    while( sqlite3_step(pStmt)==SQLITE_ROW ){
+      cnt++;
+      printf("%s;\n", sqlite3_column_text(pStmt, 0));
+    }
+    sqlite3_finalize(pStmt);
+    if( cnt==0 ){
+      printf("/* No FTS3/4 tables found in database %s */\n", argv[1]);
+    }
+    return 0;
+  }
+  if( argc<4 ) usage(argv[0]);
+  zTab = argv[2];
+  zCmd = argv[3];
+  nExtra = argc-4;
+  azExtra = argv+4;
+  if( strcmp(zCmd,"big-segments")==0 ){
+    listBigSegments(db, zTab);
+  }else if( strcmp(zCmd,"doclist")==0 ){
+    if( argc<7 ) usage(argv[0]);
+    showDoclist(db, zTab);
+  }else if( strcmp(zCmd,"schema")==0 ){
+    showSchema(db, zTab);
+  }else if( strcmp(zCmd,"segdir")==0 ){
+    showSegdirMap(db, zTab);
+  }else if( strcmp(zCmd,"segment")==0 ){
+    if( argc<5 ) usage(argv[0]);
+    showSegment(db, zTab);
+  }else if( strcmp(zCmd,"segment-stats")==0 ){
+    showSegmentStats(db, zTab);
+  }else if( strcmp(zCmd,"stat")==0 ){
+    showStat(db, zTab);
+  }else if( strcmp(zCmd,"vocabulary")==0 ){
+    showVocabulary(db, zTab);
+  }else{
+    usage(argv[0]);
+  }
+  return 0; 
+}
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/CaseFolding.txt
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/CaseFolding.txt
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/UnicodeData.txt
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/UnicodeData.txt
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/mkunicode.tcl
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/mkunicode.tcl
@ -0,0 +1,694 @@
+
+source [file join [file dirname [info script]] parseunicode.tcl]
+
+proc print_rd {map} {
+  global tl_lookup_table
+  set aChar [list]
+  set lRange [list]
+
+  set nRange 1
+  set iFirst  [lindex $map 0 0]
+  set cPrev   [lindex $map 0 1]
+
+  foreach m [lrange $map 1 end] {
+    foreach {i c} $m {}
+
+    if {$cPrev == $c} {
+      for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {
+        if {[info exists tl_lookup_table($j)]==0} break
+      }
+
+      if {$j==$i} {
+        set nNew [expr {(1 + $i - $iFirst)}]
+        if {$nNew<=8} {
+          set nRange $nNew
+          continue
+        }
+      }
+    }
+
+    lappend lRange [list $iFirst $nRange]
+    lappend aChar  $cPrev
+
+    set iFirst $i
+    set cPrev  $c
+    set nRange 1
+  }
+  lappend lRange [list $iFirst $nRange]
+  lappend aChar $cPrev
+
+  puts "/*"
+  puts "** If the argument is a codepoint corresponding to a lowercase letter"
+  puts "** in the ASCII range with a diacritic added, return the codepoint"
+  puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
+  puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
+  puts "** E\"). The resuls of passing a codepoint that corresponds to an"
+  puts "** uppercase letter are undefined."
+  puts "*/"
+  puts "static int ${::remove_diacritic}(int c)\{"
+  puts "  unsigned short aDia\[\] = \{"
+  puts -nonewline "        0, "
+  set i 1
+  foreach r $lRange {
+    foreach {iCode nRange} $r {}
+    if {($i % 8)==0} {puts "" ; puts -nonewline "    " }
+    incr i
+
+    puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
+    puts -nonewline ", "
+  }
+  puts ""
+  puts "  \};"
+  puts "  char aChar\[\] = \{"
+  puts -nonewline "    '\\0', "
+  set i 1
+  foreach c $aChar {
+    set str "'$c',  "
+    if {$c == ""} { set str "'\\0', " }
+
+    if {($i % 12)==0} {puts "" ; puts -nonewline "    " }
+    incr i
+    puts -nonewline "$str"
+  }
+  puts ""
+  puts "  \};"
+  puts {
+  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+  int iRes = 0;
+  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+  int iLo = 0;
+  while( iHi>=iLo ){
+    int iTest = (iHi + iLo) / 2;
+    if( key >= aDia[iTest] ){
+      iRes = iTest;
+      iLo = iTest+1;
+    }else{
+      iHi = iTest-1;
+    }
+  }
+  assert( key>=aDia[iRes] );
+  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
+  puts "\}"
+}
+
+proc print_isdiacritic {zFunc map} {
+
+  set lCode [list]
+  foreach m $map {
+    foreach {code char} $m {}
+    if {$code && $char == ""} { lappend lCode $code }
+  }
+  set lCode [lsort -integer $lCode]
+  set iFirst [lindex $lCode 0]
+  set iLast [lindex $lCode end]
+
+  set i1 0
+  set i2 0
+
+  foreach c $lCode {
+    set i [expr $c - $iFirst]
+    if {$i < 32} {
+      set i1 [expr {$i1 | (1<<$i)}]
+    } else {
+      set i2 [expr {$i2 | (1<<($i-32))}]
+    }
+  }
+
+  puts "/*"
+  puts "** Return true if the argument interpreted as a unicode codepoint" 
+  puts "** is a diacritical modifier character."
+  puts "*/"
+  puts "int ${zFunc}\(int c)\{"
+  puts "  unsigned int mask0 = [format "0x%08X" $i1];"
+  puts "  unsigned int mask1 = [format "0x%08X" $i2];"
+
+  puts "  if( c<$iFirst || c>$iLast ) return 0;"
+  puts "  return (c < $iFirst+32) ?"
+  puts "      (mask0 & (1 << (c-$iFirst))) :"
+  puts "      (mask1 & (1 << (c-$iFirst-32)));"
+  puts "\}"
+}
+
+
+#-------------------------------------------------------------------------
+
+proc an_load_separator_ranges {} {
+  global unicodedata.txt
+  set lSep [an_load_unicodedata_text ${unicodedata.txt}]
+  unset -nocomplain iFirst 
+  unset -nocomplain nRange 
+  set lRange [list]
+  foreach sep $lSep {
+    if {0==[info exists iFirst]} {
+      set iFirst $sep
+      set nRange 1
+    } elseif { $sep == ($iFirst+$nRange) } {
+      incr nRange
+    } else {
+      lappend lRange [list $iFirst $nRange]
+      set iFirst $sep
+      set nRange 1
+    }
+  } 
+  lappend lRange [list $iFirst $nRange]
+  set lRange
+}
+
+proc an_print_range_array {lRange} {
+  set iFirstMax 0
+  set nRangeMax 0
+  foreach range $lRange {
+    foreach {iFirst nRange} $range {}
+    if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}
+    if {$nRange > $nRangeMax} {set nRangeMax $nRange}
+  }
+  if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}
+  if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}
+
+  puts -nonewline "  "
+  puts [string trim {
+  /* Each unsigned integer in the following array corresponds to a contiguous
+  ** range of unicode codepoints that are not either letters or numbers (i.e.
+  ** codepoints for which this function should return 0).
+  **
+  ** The most significant 22 bits in each 32-bit value contain the first 
+  ** codepoint in the range. The least significant 10 bits are used to store
+  ** the size of the range (always at least 1). In other words, the value 
+  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
+  ** C. It is not possible to represent a range larger than 1023 codepoints 
+  ** using this format.
+  */
+  }]
+  puts -nonewline "  static const unsigned int aEntry\[\] = \{"
+  set i 0
+  foreach range $lRange {
+    foreach {iFirst nRange} $range {}
+    set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]
+
+    if {($i % 5)==0} {puts "" ; puts -nonewline "   "}
+    puts -nonewline " $u32,"
+    incr i
+  }
+  puts ""
+  puts "  \};"
+}
+
+proc an_print_ascii_bitmap {lRange} {
+  foreach range $lRange {
+    foreach {iFirst nRange} $range {}
+    for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
+      if {$i<=127} { set a($i) 1 }
+    }
+  }
+
+  set aAscii [list 0 0 0 0]
+  foreach key [array names a] {
+    set idx [expr $key >> 5]
+    lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
+  }
+
+  puts "  static const unsigned int aAscii\[4\] = \{"
+  puts -nonewline "   "
+  foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
+  puts ""
+  puts "  \};"
+}
+
+proc print_isalnum {zFunc lRange} {
+  puts "/*"
+  puts "** Return true if the argument corresponds to a unicode codepoint"
+  puts "** classified as either a letter or a number. Otherwise false."
+  puts "**"
+  puts "** The results are undefined if the value passed to this function"
+  puts "** is less than zero."
+  puts "*/"
+  puts "int ${zFunc}\(int c)\{"
+  an_print_range_array $lRange
+  an_print_ascii_bitmap $lRange
+  puts {
+  if( (unsigned int)c<128 ){
+    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+  }else if( (unsigned int)c<(1<<22) ){
+    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+    int iRes = 0;
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      if( key >= aEntry[iTest] ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+    assert( aEntry[0]<key );
+    assert( key>=aEntry[iRes] );
+    return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+  }
+  return 1;}
+  puts "\}"
+}
+
+proc print_test_isalnum {zFunc lRange} {
+  foreach range $lRange {
+    foreach {iFirst nRange} $range {}
+    for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }
+  }
+
+  puts "static int isalnum_test(int *piCode)\{"
+  puts -nonewline "  unsigned char aAlnum\[\] = \{"
+  for {set i 0} {$i < 70000} {incr i} {
+    if {($i % 32)==0} { puts "" ; puts -nonewline "    " }
+    set bFlag [expr ![info exists a($i)]]
+    puts -nonewline "${bFlag},"
+  }
+  puts ""
+  puts "  \};"
+
+  puts -nonewline "  int aLargeSep\[\] = \{"
+  set i 0
+  foreach iSep [lsort -integer [array names a]] {
+    if {$iSep<70000} continue
+    if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
+    puts -nonewline " $iSep,"
+    incr i
+  }
+  puts ""
+  puts "  \};"
+  puts -nonewline "  int aLargeOther\[\] = \{"
+  set i 0
+  foreach iSep [lsort -integer [array names a]] {
+    if {$iSep<70000} continue
+    if {[info exists a([expr $iSep-1])]==0} {
+      if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
+      puts -nonewline " [expr $iSep-1],"
+      incr i
+    }
+    if {[info exists a([expr $iSep+1])]==0} {
+      if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
+      puts -nonewline " [expr $iSep+1],"
+      incr i
+    }
+  }
+  puts ""
+  puts "  \};"
+
+  puts [subst -nocommands {
+  int i;
+  for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){
+    if( ${zFunc}(i)!=aAlnum[i] ){
+      *piCode = i;
+      return 1;
+    }
+  }
+  for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){
+    if( ${zFunc}(aLargeSep[i])!=0 ){
+      *piCode = aLargeSep[i];
+      return 1;
+    }
+  }
+  for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){
+    if( ${zFunc}(aLargeOther[i])!=1 ){
+      *piCode = aLargeOther[i];
+      return 1;
+    }
+  }
+  }]
+  puts "  return 0;"
+  puts "\}"
+}
+
+#-------------------------------------------------------------------------
+
+proc tl_create_records {} {
+  global tl_lookup_table
+
+  set iFirst ""
+  set nOff 0
+  set nRange 0
+  set nIncr 0
+
+  set lRecord [list]
+  foreach code [lsort -integer [array names tl_lookup_table]] {
+    set mapping $tl_lookup_table($code)
+    if {$iFirst == ""} {
+      set iFirst $code
+      set nOff   [expr $mapping - $code]
+      set nRange 1
+      set nIncr 1
+    } else {
+      set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]
+      if { $nRange==1 && ($diff==1 || $diff==2) } {
+        set nIncr $diff
+      }
+
+      if {$diff != $nIncr || ($mapping - $code)!=$nOff} {
+        if { $nRange==1 } {set nIncr 1}
+        lappend lRecord [list $iFirst $nIncr $nRange $nOff]
+        set iFirst $code
+        set nOff   [expr $mapping - $code]
+        set nRange 1
+        set nIncr 1
+      } else {
+        incr nRange
+      }
+    }
+  }
+
+  lappend lRecord [list $iFirst $nIncr $nRange $nOff]
+
+  set lRecord
+}
+
+proc tl_print_table_header {} {
+  puts -nonewline "  "
+  puts [string trim {
+  /* Each entry in the following array defines a rule for folding a range
+  ** of codepoints to lower case. The rule applies to a range of nRange
+  ** codepoints starting at codepoint iCode.
+  **
+  ** If the least significant bit in flags is clear, then the rule applies
+  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+  ** need to be folded). Or, if it is set, then the rule only applies to
+  ** every second codepoint in the range, starting with codepoint C.
+  **
+  ** The 7 most significant bits in flags are an index into the aiOff[]
+  ** array. If a specific codepoint C does require folding, then its lower
+  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+  **
+  ** The contents of this array are generated by parsing the CaseFolding.txt
+  ** file distributed as part of the "Unicode Character Database". See
+  ** http://www.unicode.org for details.
+  */
+  }]
+  puts "  static const struct TableEntry \{"
+  puts "    unsigned short iCode;"
+  puts "    unsigned char flags;"
+  puts "    unsigned char nRange;"
+  puts "  \} aEntry\[\] = \{"
+}
+
+proc tl_print_table_entry {togglevar entry liOff} {
+  upvar $togglevar t
+  foreach {iFirst nIncr nRange nOff} $entry {}
+
+  if {$iFirst > (1<<16)} { return 1 }
+
+  if {[info exists t]==0} {set t 0}
+  if {$t==0} { puts -nonewline "    " }
+
+  set flags 0
+  if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}
+  if {$nOff<0}   { incr nOff [expr (1<<16)] }
+
+  set idx [lsearch $liOff $nOff]
+  if {$idx<0} {error "malfunction generating aiOff"}
+  set flags [expr $flags + $idx*2]
+
+  set txt "{$iFirst, $flags, $nRange},"
+  if {$t==2} {
+    puts $txt
+  } else {
+    puts -nonewline [format "% -23s" $txt]
+  }
+  set t [expr ($t+1)%3]
+
+  return 0
+}
+
+proc tl_print_table_footer {togglevar} {
+  upvar $togglevar t
+  if {$t!=0} {puts ""}
+  puts "  \};"
+}
+
+proc tl_print_if_entry {entry} {
+  foreach {iFirst nIncr nRange nOff} $entry {}
+  if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}
+
+  puts "  else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"
+  puts "    ret = c + $nOff;"
+  puts "  \}"
+}
+
+proc tl_generate_ioff_table {lRecord} {
+  foreach entry $lRecord {
+    foreach {iFirst nIncr nRange iOff} $entry {}
+    if {$iOff<0}   { incr iOff [expr (1<<16)] }
+    if {[info exists a($iOff)]} continue
+    set a($iOff) 1
+  }
+
+  set liOff [lsort -integer [array names a]]
+  if {[llength $liOff]>128} { error "Too many distinct ioffs" }
+  return $liOff
+}
+
+proc tl_print_ioff_table {liOff} {
+  puts -nonewline "  static const unsigned short aiOff\[\] = \{"
+  set i 0
+  foreach off $liOff {
+    if {($i % 8)==0} {puts "" ; puts -nonewline "   "}
+    puts -nonewline [format "% -7s" "$off,"]
+    incr i
+  }
+  puts ""
+  puts "  \};"
+
+}
+
+proc print_fold {zFunc} {
+
+  set lRecord [tl_create_records]
+
+  set lHigh [list]
+  puts "/*"
+  puts "** Interpret the argument as a unicode codepoint. If the codepoint"
+  puts "** is an upper case character that has a lower case equivalent,"
+  puts "** return the codepoint corresponding to the lower case version."
+  puts "** Otherwise, return a copy of the argument."
+  puts "**"
+  puts "** The results are undefined if the value passed to this function"
+  puts "** is less than zero."
+  puts "*/"
+  puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"
+
+  set liOff [tl_generate_ioff_table $lRecord]
+  tl_print_table_header
+  foreach entry $lRecord { 
+    if {[tl_print_table_entry toggle $entry $liOff]} { 
+      lappend lHigh $entry 
+    } 
+  }
+  tl_print_table_footer toggle
+  tl_print_ioff_table $liOff
+
+  puts [subst -nocommands {
+  int ret = c;
+
+  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+  if( c<128 ){
+    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+  }else if( c<65536 ){
+    const struct TableEntry *p;
+    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+    int iLo = 0;
+    int iRes = -1;
+
+    assert( c>aEntry[0].iCode );
+    while( iHi>=iLo ){
+      int iTest = (iHi + iLo) / 2;
+      int cmp = (c - aEntry[iTest].iCode);
+      if( cmp>=0 ){
+        iRes = iTest;
+        iLo = iTest+1;
+      }else{
+        iHi = iTest-1;
+      }
+    }
+
+    assert( iRes>=0 && c>=aEntry[iRes].iCode );
+    p = &aEntry[iRes];
+    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+      assert( ret>0 );
+    }
+
+    if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
+  }
+  }]
+
+  foreach entry $lHigh {
+    tl_print_if_entry $entry
+  }
+
+  puts ""
+  puts "  return ret;"
+  puts "\}"
+}
+
+proc print_fold_test {zFunc mappings} {
+  global tl_lookup_table
+
+  foreach m $mappings {
+    set c [lindex $m 1]
+    if {$c == ""} {
+      set extra([lindex $m 0]) 0
+    } else {
+      scan $c %c i
+      set extra([lindex $m 0]) $i
+    }
+  }
+
+  puts "static int fold_test(int *piCode)\{"
+  puts -nonewline "  static int aLookup\[\] = \{"
+  for {set i 0} {$i < 70000} {incr i} {
+
+    set expected $i
+    catch { set expected $tl_lookup_table($i) }
+    set expected2 $expected
+    catch { set expected2 $extra($expected2) }
+
+    if {($i % 4)==0}  { puts "" ; puts -nonewline "    " }
+    puts -nonewline "$expected, $expected2, "
+  }
+  puts "  \};"
+  puts "  int i;"
+  puts "  for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"
+  puts "    int iCode = (i/2);"
+  puts "    int bFlag = i & 0x0001;"
+  puts "    if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{"
+  puts "      *piCode = iCode;"
+  puts "      return 1;"
+  puts "    \}"
+  puts "  \}"
+  puts "  return 0;"
+  puts "\}"
+}
+
+
+proc print_fileheader {} {
+  puts [string trim {
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+  }]
+  puts ""
+  if {$::generate_fts5_code} {
+    # no-op
+  } else {
+    puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
+    puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
+  }
+  puts ""
+  puts "#include <assert.h>"
+  puts ""
+}
+
+proc print_test_main {} {
+  puts ""
+  puts "#include <stdio.h>"
+  puts ""
+  puts "int main(int argc, char **argv)\{"
+  puts "  int r1, r2;"
+  puts "  int code;"
+  puts "  r1 = isalnum_test(&code);"
+  puts "  if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"
+  puts "  else printf(\"isalnum(): test passed\\n\");"
+  puts "  r2 = fold_test(&code);"
+  puts "  if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);"
+  puts "  else printf(\"fold(): test passed\\n\");"
+  puts "  return (r1 || r2);"
+  puts "\}"
+}
+
+# Proces the command line arguments. Exit early if they are not to
+# our liking.
+#
+proc usage {} {
+  puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
+  puts            stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
+  exit 1
+}
+if {[llength $argv]<2} usage
+set unicodedata.txt [lindex $argv end]
+set casefolding.txt [lindex $argv end-1]
+
+set remove_diacritic remove_diacritic
+set generate_test_code 0
+set generate_fts5_code 0
+set function_prefix "sqlite3Fts"
+for {set i 0} {$i < [llength $argv]-2} {incr i} {
+  switch -- [lindex $argv $i] {
+    -test {
+      set generate_test_code 1
+    }
+    -fts5 {
+      set function_prefix sqlite3Fts5
+      set generate_fts5_code 1
+      set remove_diacritic fts5_remove_diacritic
+    }
+    default {
+      usage
+    }
+  }
+}
+
+print_fileheader
+
+# Print the isalnum() function to stdout.
+#
+set lRange [an_load_separator_ranges]
+print_isalnum ${function_prefix}UnicodeIsalnum $lRange
+
+# Leave a gap between the two generated C functions.
+#
+puts ""
+puts ""
+
+# Load the fold data. This is used by the [rd_XXX] commands
+# as well as [print_fold].
+tl_load_casefolding_txt ${casefolding.txt}
+
+set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
+print_rd $mappings
+puts ""
+puts ""
+print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
+puts ""
+puts ""
+
+# Print the fold() function to stdout.
+#
+print_fold ${function_prefix}UnicodeFold
+
+# Print the test routines and main() function to stdout, if -test 
+# was specified.
+#
+if {$::generate_test_code} {
+  print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
+  print_fold_test ${function_prefix}UnicodeFold $mappings
+  print_test_main 
+}
+
+if {$generate_fts5_code} {
+  # no-op
+} else {
+  puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
+  puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
+}
--- a/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/parseunicode.tcl
+++ b/query_classifier/qc_sqlite/sqlite-src-3110100/ext/fts3/unicode/parseunicode.tcl
@ -0,0 +1,146 @@
+
+#--------------------------------------------------------------------------
+# Parameter $zName must be a path to the file UnicodeData.txt. This command
+# reads the file and returns a list of mappings required to remove all
+# diacritical marks from a unicode string. Each mapping is itself a list
+# consisting of two elements - the unicode codepoint and the single ASCII
+# character that it should be replaced with, or an empty string if the 
+# codepoint should simply be removed from the input. Examples:
+#
+#   { 224 a  }     (replace codepoint 224 to "a")
+#   { 769 "" }     (remove codepoint 769 from input)
+#
+# Mappings are only returned for non-upper case codepoints. It is assumed
+# that the input has already been folded to lower case.
+#
+proc rd_load_unicodedata_text {zName} {
+  global tl_lookup_table
+
+  set fd [open $zName]
+  set lField {
+    code
+    character_name
+    general_category
+    canonical_combining_classes
+    bidirectional_category
+    character_decomposition_mapping
+    decimal_digit_value
+    digit_value
+    numeric_value
+    mirrored
+    unicode_1_name
+    iso10646_comment_field
+    uppercase_mapping
+    lowercase_mapping
+    titlecase_mapping
+  }
+  set lRet [list]
+
+  while { ![eof $fd] } {
+    set line [gets $fd]
+    if {$line == ""} continue
+
+    set fields [split $line ";"]
+    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
+    foreach $lField $fields {}
+    if { [llength $character_decomposition_mapping]!=2
+      || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
+    } {
+      continue
+    }
+
+    set iCode  [expr "0x$code"]
+    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
+    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]
+
+    if {[info exists tl_lookup_table($iCode)]} continue
+
+    if { ($iAscii >= 97 && $iAscii <= 122)
+      || ($iAscii >= 65 && $iAscii <= 90)
+    } {
+      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
+      set dia($iDia) 1
+    }
+  }
+
+  foreach d [array names dia] {
+    lappend lRet [list $d ""]
+  }
+  set lRet [lsort -integer -index 0 $lRet]
+
+  close $fd
+  set lRet
+}
+
+#-------------------------------------------------------------------------
+# Parameter $zName must be a path to the file UnicodeData.txt. This command
+# reads the file and returns a list of codepoints (integers). The list
+# contains all codepoints in the UnicodeData.txt assigned to any "General
+# Category" that is not a "Letter" or "Number".
+#
+proc an_load_unicodedata_text {zName} {
+  set fd [open $zName]
+  set lField {
+    code
+    character_name
+    general_category
+    canonical_combining_classes
+    bidirectional_category
+    character_decomposition_mapping
+    decimal_digit_value
+    digit_value
+    numeric_value
+    mirrored
+    unicode_1_name
+    iso10646_comment_field
+    uppercase_mapping
+    lowercase_mapping
+    titlecase_mapping
+  }
+  set lRet [list]
+
+  while { ![eof $fd] } {
+    set line [gets $fd]
+    if {$line == ""} continue
+
+    set fields [split $line ";"]
+    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
+    foreach $lField $fields {}
+
+    set iCode [expr "0x$code"]
+    set bAlnum [expr {
+         [lsearch {L N} [string range $general_category 0 0]] >= 0
+      || $general_category=="Co"
+    }]
+
+    if { !$bAlnum } { lappend lRet $iCode }
+  }
+
+  close $fd
+  set lRet
+}
+
+proc tl_load_casefolding_txt {zName} {
+  global tl_lookup_table
+
+  set fd [open $zName]
+  while { ![eof $fd] } {
+    set line [gets $fd]
+    if {[string range $line 0 0] == "#"} continue
+    if {$line == ""} continue
+
+    foreach x {a b c d} {unset -nocomplain $x}
+    foreach {a b c d} [split $line ";"] {}
+
+    set a2 [list]
+    set c2 [list]
+    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
+    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
+    set b [string trim $b]
+    set d [string trim $d]
+
+    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
+  }
+}
+
+