#ifdef HAVE_STDINT_H #include <stdint.h> #endif #ifdef HAVE_INTTYPES_H #include <inttypes.h> #endif #line 1 "fts5.h" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Interfaces to extend FTS5. Using the interfaces defined in this file, ** FTS5 may be extended with: ** ** * custom tokenizers, and ** * custom auxiliary functions.
*/
#ifndef _FTS5_H #define _FTS5_H
#include"sqlite3.h"
#ifdef __cplusplus extern"C" { #endif
/************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** ** Virtual table implementations may overload SQL functions by implementing ** the sqlite3_module.xFindFunction() method.
*/
typedefvoid (*fts5_extension_function)( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
);
/* ** EXTENSION API FUNCTIONS ** ** xUserData(pFts): ** Return a copy of the pUserData pointer passed to the xCreateFunction() ** API when the extension function was registered. ** ** xColumnTotalSize(pFts, iCol, pnToken): ** If parameter iCol is less than zero, set output variable *pnToken ** to the total number of tokens in the FTS5 table. Or, if iCol is ** non-negative but less than the number of columns in the table, return ** the total number of tokens in column iCol, considering all rows in ** the FTS5 table. ** ** If parameter iCol is greater than or equal to the number of columns ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. ** an OOM condition or IO error), an appropriate SQLite error code is ** returned. ** ** xColumnCount(pFts): ** Return the number of columns in the table. ** ** xColumnSize(pFts, iCol, pnToken): ** If parameter iCol is less than zero, set output variable *pnToken ** to the total number of tokens in the current row. Or, if iCol is ** non-negative but less than the number of columns in the table, set ** *pnToken to the number of tokens in column iCol of the current row. ** ** If parameter iCol is greater than or equal to the number of columns ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. ** an OOM condition or IO error), an appropriate SQLite error code is ** returned. ** ** This function may be quite inefficient if used with an FTS5 table ** created with the "columnsize=0" option. ** ** xColumnText: ** If parameter iCol is less than zero, or greater than or equal to the ** number of columns in the table, SQLITE_RANGE is returned. ** ** Otherwise, this function attempts to retrieve the text of column iCol of ** the current document. If successful, (*pz) is set to point to a buffer ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, ** if an error occurs, an SQLite error code is returned and the final values ** of (*pz) and (*pn) are undefined. ** ** xPhraseCount: ** Returns the number of phrases in the current query expression. ** ** xPhraseSize: ** If parameter iCol is less than zero, or greater than or equal to the ** number of phrases in the current query, as returned by xPhraseCount, ** 0 is returned. Otherwise, this function returns the number of tokens in ** phrase iPhrase of the query. Phrases are numbered starting from zero. ** ** xInstCount: ** Set *pnInst to the total number of occurrences of all phrases within ** the query within the current row. Return SQLITE_OK if successful, or ** an error code (i.e. SQLITE_NOMEM) if an error occurs. ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" or "detail=column" option. If the FTS5 table is created ** with either "detail=none" or "detail=column" and "content=" option ** (i.e. if it is a contentless table), then this API always returns 0. ** ** xInst: ** Query for the details of phrase match iIdx within the current row. ** Phrase matches are numbered starting from zero, so the iIdx argument ** should be greater than or equal to zero and smaller than the value ** output by xInstCount(). If iIdx is less than zero or greater than ** or equal to the value returned by xInstCount(), SQLITE_RANGE is returned. ** ** Otherwise, output parameter *piPhrase is set to the phrase number, *piCol ** to the column in which it occurs and *piOff the token offset of the ** first token of the phrase. SQLITE_OK is returned if successful, or an ** error code (i.e. SQLITE_NOMEM) if an error occurs. ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" or "detail=column" option. ** ** xRowid: ** Returns the rowid of the current row. ** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. Any column filter that applies to ** phrase iPhrase of the current query is included in $p. For each ** row visited, the callback function passed as the fourth argument ** is invoked. The context and API objects passed to the callback ** function may be used to access the properties of each matched row. ** Invoking Api.xUserData() returns a copy of the pointer passed as ** the third argument to pUserData. ** ** If parameter iPhrase is less than zero, or greater than or equal to ** the number of phrases in the query, as returned by xPhraseCount(), ** this function returns SQLITE_RANGE. ** ** If the callback function returns any value other than SQLITE_OK, the ** query is abandoned and the xQueryPhrase function returns immediately. ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. ** Otherwise, the error code is propagated upwards. ** ** If the query runs to completion without incident, SQLITE_OK is returned. ** Or, if some error occurs before the query completes or is aborted by ** the callback, an SQLite error code is returned. ** ** ** xSetAuxdata(pFts5, pAux, xDelete) ** ** Save the pointer passed as the second argument as the extension function's ** "auxiliary data". The pointer may then be retrieved by the current or any ** future invocation of the same fts5 extension function made as part of ** the same MATCH query using the xGetAuxdata() API. ** ** Each extension function is allocated a single auxiliary data slot for ** each FTS query (MATCH expression). If the extension function is invoked ** more than once for a single FTS query, then all invocations share a ** single auxiliary data context. ** ** If there is already an auxiliary data pointer when this function is ** invoked, then it is replaced by the new pointer. If an xDelete callback ** was specified along with the original pointer, it is invoked at this ** point. ** ** The xDelete callback, if one is specified, is also invoked on the ** auxiliary data pointer after the FTS5 query has finished. ** ** If an error (e.g. an OOM condition) occurs within this function, ** the auxiliary data is set to NULL and an error code returned. If the ** xDelete parameter was not NULL, it is invoked on the auxiliary data ** pointer before returning. ** ** ** xGetAuxdata(pFts5, bClear) ** ** Returns the current auxiliary data pointer for the fts5 extension ** function. See the xSetAuxdata() method for details. ** ** If the bClear argument is non-zero, then the auxiliary data is cleared ** (set to NULL) before this function returns. In this case the xDelete, ** if any, is not invoked. ** ** ** xRowCount(pFts5, pnRow) ** ** This function is used to retrieve the total number of rows in the table. ** In other words, the same value that would be returned by: ** ** SELECT count(*) FROM ftstable; ** ** xPhraseFirst() ** This function is used, along with type Fts5PhraseIter and the xPhraseNext ** method, to iterate through all instances of a single query phrase within ** the current row. This is the same information as is accessible via the ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient ** to use, this API may be faster under some circumstances. To iterate ** through instances of phrase iPhrase, use the following code: ** ** Fts5PhraseIter iter; ** int iCol, iOff; ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); ** iCol>=0; ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) ** ){ ** // An instance of phrase iPhrase at offset iOff of column iCol ** } ** ** The Fts5PhraseIter structure is defined above. Applications should not ** modify this structure directly - it should only be used as shown above ** with the xPhraseFirst() and xPhraseNext() API methods (and by ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" or "detail=column" option. If the FTS5 table is created ** with either "detail=none" or "detail=column" and "content=" option ** (i.e. if it is a contentless table), then this API always iterates ** through an empty set (all calls to xPhraseFirst() set iCol to -1). ** ** In all cases, matches are visited in (column ASC, offset ASC) order. ** i.e. all those in column 0, sorted by offset, followed by those in ** column 1, etc. ** ** xPhraseNext() ** See xPhraseFirst above. ** ** xPhraseFirstColumn() ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() ** and xPhraseNext() APIs described above. The difference is that instead ** of iterating through all instances of a phrase in the current row, these ** APIs are used to iterate through the set of columns in the current row ** that contain one or more instances of a specified phrase. For example: ** ** Fts5PhraseIter iter; ** int iCol; ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); ** iCol>=0; ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) ** ){ ** // Column iCol contains at least one instance of phrase iPhrase ** } ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" option. If the FTS5 table is created with either ** "detail=none" "content=" option (i.e. if it is a contentless table), ** then this API always iterates through an empty set (all calls to ** xPhraseFirstColumn() set iCol to -1). ** ** The information accessed using this API and its companion ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext ** (or xInst/xInstCount). The chief advantage of this API is that it is ** significantly more efficient than those alternatives when used with ** "detail=column" tables. ** ** xPhraseNextColumn() ** See xPhraseFirstColumn above. ** ** xQueryToken(pFts5, iPhrase, iToken, ppToken, pnToken) ** This is used to access token iToken of phrase iPhrase of the current ** query. Before returning, output parameter *ppToken is set to point ** to a buffer containing the requested token, and *pnToken to the ** size of this buffer in bytes. ** ** If iPhrase or iToken are less than zero, or if iPhrase is greater than ** or equal to the number of phrases in the query as reported by ** xPhraseCount(), or if iToken is equal to or greater than the number of ** tokens in the phrase, SQLITE_RANGE is returned and *ppToken and *pnToken are both zeroed. ** ** The output text is not a copy of the query text that specified the ** token. It is the output of the tokenizer module. For tokendata=1 ** tables, this includes any embedded 0x00 and trailing data. ** ** xInstToken(pFts5, iIdx, iToken, ppToken, pnToken) ** This is used to access token iToken of phrase hit iIdx within the ** current row. If iIdx is less than zero or greater than or equal to the ** value returned by xInstCount(), SQLITE_RANGE is returned. Otherwise, ** output variable (*ppToken) is set to point to a buffer containing the ** matching document token, and (*pnToken) to the size of that buffer in ** bytes. This API is not available if the specified token matches a ** prefix query term. In that case both output variables are always set ** to 0. ** ** The output text is not a copy of the document text that was tokenized. ** It is the output of the tokenizer module. For tokendata=1 tables, this ** includes any embedded 0x00 and trailing data. ** ** This API can be quite slow if used with an FTS5 table created with the ** "detail=none" or "detail=column" option. ** ** xColumnLocale(pFts5, iIdx, pzLocale, pnLocale) ** If parameter iCol is less than zero, or greater than or equal to the ** number of columns in the table, SQLITE_RANGE is returned. ** ** Otherwise, this function attempts to retrieve the locale associated ** with column iCol of the current row. Usually, there is no associated ** locale, and output parameters (*pzLocale) and (*pnLocale) are set ** to NULL and 0, respectively. However, if the fts5_locale() function ** was used to associate a locale with the value when it was inserted ** into the fts5 table, then (*pzLocale) is set to point to a nul-terminated ** buffer containing the name of the locale in utf-8 encoding. (*pnLocale) ** is set to the size in bytes of the buffer, not including the ** nul-terminator. ** ** If successful, SQLITE_OK is returned. Or, if an error occurs, an ** SQLite error code is returned. The final value of the output parameters ** is undefined in this case. ** ** xTokenize_v2: ** Tokenize text using the tokenizer belonging to the FTS5 table. This ** API is the same as the xTokenize() API, except that it allows a tokenizer ** locale to be specified.
*/ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 4 */
void *(*xUserData)(Fts5Context*);
int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
int (*xTokenize)(Fts5Context*, constchar *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, constchar*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase);
int (*xInstCount)(Fts5Context*, int *pnInst); int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
sqlite3_int64 (*xRowid)(Fts5Context*); int (*xColumnText)(Fts5Context*, int iCol, constchar **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
); int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); void *(*xGetAuxdata)(Fts5Context*, int bClear);
int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
/* Below this point are iVersion>=3 only */ int (*xQueryToken)(Fts5Context*, int iPhrase, int iToken, constchar **ppToken, int *pnToken
); int (*xInstToken)(Fts5Context*, int iIdx, int iToken, constchar**, int*);
/* Below this point are iVersion>=4 only */ int (*xColumnLocale)(Fts5Context*, int iCol, constchar **pz, int *pn); int (*xTokenize_v2)(Fts5Context*, constchar *pText, int nText, /* Text to tokenize */ constchar *pLocale, int nLocale, /* Locale to pass to tokenizer */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, constchar*, int, int, int) /* Callback */
);
};
/************************************************************************* ** CUSTOM TOKENIZERS ** ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** ** xCreate: ** This function is used to allocate and initialize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer_v2 object ** was registered with FTS5 (the third argument to xCreateTokenizer()). ** The second and third arguments are an array of nul-terminated strings ** containing the tokenizer arguments, if any, specified following the ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used ** to create the FTS5 table. ** ** The final argument is an output variable. If successful, (*ppOut) ** should be set to point to the new tokenizer handle and SQLITE_OK ** returned. If an error occurs, some value other than SQLITE_OK should ** be returned. In this case, fts5 assumes that the final value of *ppOut ** is undefined. ** ** xDelete: ** This function is invoked to delete a tokenizer handle previously ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated ** by argument pText. pText may or may not be nul-terminated. The first ** argument passed to this function is a pointer to an Fts5Tokenizer object ** returned by an earlier call to xCreate(). ** ** The third argument indicates the reason that FTS5 is requesting ** tokenization of the supplied text. This is always one of the following ** four values: ** ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into ** or removed from the FTS table. The tokenizer is being invoked to ** determine the set of tokens to add to (or delete from) the ** FTS index. ** ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed ** against the FTS index. The tokenizer is being called to tokenize ** a bareword or quoted string specified as part of the query. ** ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is ** followed by a "*" character, indicating that the last token ** returned by the tokenizer will be treated as a token prefix. ** ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to ** satisfy an fts5_api.xTokenize() request made by an auxiliary ** function. Or an fts5_api.xColumnSize() request made by the same ** on a columnsize=0 database. ** </ul> ** ** The sixth and seventh arguments passed to xTokenize() - pLocale and ** nLocale - are a pointer to a buffer containing the locale to use for ** tokenization (e.g. "en_US") and its size in bytes, respectively. The ** pLocale buffer is not nul-terminated. pLocale may be passed NULL (in ** which case nLocale is always 0) to indicate that the tokenizer should ** use its default locale. ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The third and fourth ** arguments are a pointer to a buffer containing the token text, and the ** size of the token in bytes. The 4th and 5th arguments are the byte offsets ** of the first byte of and first byte immediately following the text from ** which the token is derived within the input. ** ** The second argument passed to the xToken() callback ("tflags") should ** normally be set to 0. The exception is if the tokenizer supports ** synonyms. In this case see the discussion below for details. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should ** immediately return a copy of the xToken() return value. Or, if the ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** ** If the tokenizer is registered using an fts5_tokenizer_v2 object, ** then the xTokenize() method has two additional arguments - pLocale ** and nLocale. These specify the locale that the tokenizer should use ** for the current request. If pLocale and nLocale are both 0, then the ** tokenizer should use its default locale. Otherwise, pLocale points to ** an nLocale byte buffer containing the name of the locale to use as utf-8 ** text. pLocale is not nul-terminated. ** ** FTS5_TOKENIZER ** ** There is also an fts5_tokenizer object. This is an older, deprecated, ** version of fts5_tokenizer_v2. It is similar except that: ** ** <ul> ** <li> There is no "iVersion" field, and ** <li> The xTokenize() method does not take a locale argument. ** </ul> ** ** Legacy fts5_tokenizer tokenizers must be registered using the ** legacy xCreateTokenizer() function, instead of xCreateTokenizer_v2(). ** ** Tokenizer implementations registered using either API may be retrieved ** using both xFindTokenizer() and xFindTokenizer_v2(). ** ** SYNONYM SUPPORT ** ** Custom tokenizers may also support synonyms. Consider a case in which a ** user wishes to query for a phrase such as "first place". Using the ** built-in tokenizers, the FTS5 query 'first + place' will match instances ** of "first place" within the document set, but not alternative forms ** such as "1st place". In some applications, it would be better to match ** all instances of "first place" or "1st place" regardless of which form ** the user specified in the MATCH query text. ** ** There are several ways to approach this in FTS5: ** ** <ol><li> By mapping all synonyms to a single token. In this case, using ** the above example, this means that the tokenizer returns the ** same token for inputs "first" and "1st". Say that token is in ** fact "first", so that when the user inserts the document "I won ** 1st place" entries are added to the index for tokens "i", "won", ** "first" and "place". If the user then queries for '1st + place', ** the tokenizer substitutes "first" for "1st" and the query works ** as expected. ** ** <li> By querying the index for all synonyms of each query term ** separately. In this case, when tokenizing query text, the ** tokenizer may provide multiple synonyms for a single term ** within the document. FTS5 then queries the index for each ** synonym individually. For example, faced with the query: ** ** <codeblock> ** ... MATCH 'first place'</codeblock> ** ** the tokenizer offers both "1st" and "first" as synonyms for the ** first token in the MATCH query and FTS5 effectively runs a query ** similar to: ** ** <codeblock> ** ... MATCH '(first OR 1st) place'</codeblock> ** ** except that, for the purposes of auxiliary functions, the query ** still appears to contain just two phrases - "(first OR 1st)" ** being treated as a single phrase. ** ** <li> By adding multiple synonyms for a single term to the FTS index. ** Using this method, when tokenizing document text, the tokenizer ** provides multiple synonyms for each token. So that when a ** document such as "I won first place" is tokenized, entries are ** added to the FTS index for "i", "won", "first", "1st" and ** "place". ** ** This way, even if the tokenizer does not provide synonyms ** when tokenizing query text (it should not - to do so would be ** inefficient), it doesn't matter if the user queries for ** 'first + place' or '1st + place', as there are entries in the ** FTS index corresponding to both forms of the first token. ** </ol> ** ** Whether it is parsing document or query text, any call to xToken that ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit ** is considered to supply a synonym for the previous token. For example, ** when parsing the document "I won first place", a tokenizer that supports ** synonyms would call xToken() 5 times, as follows: ** ** <codeblock> ** xToken(pCtx, 0, "i", 1, 0, 1); ** xToken(pCtx, 0, "won", 3, 2, 5); ** xToken(pCtx, 0, "first", 5, 6, 11); ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); ** xToken(pCtx, 0, "place", 5, 12, 17); **</codeblock> ** ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time ** xToken() is called. Multiple synonyms may be specified for a single token ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. ** There is no limit to the number of synonyms that may be provided for a ** single token. ** ** In many cases, method (1) above is the best approach. It does not add ** extra data to the FTS index or require FTS5 to query for multiple terms, ** so it is efficient in terms of disk space and query speed. However, it ** does not support prefix queries very well. If, as suggested above, the ** token "first" is substituted for "1st" by the tokenizer, then the query: ** ** <codeblock> ** ... MATCH '1s*'</codeblock> ** ** will not match documents that contain the token "1st" (as the tokenizer ** will probably not map "1s" to any prefix of "first"). ** ** For full prefix support, method (3) may be preferred. In this case, ** because the index contains entries for both "first" and "1st", prefix ** queries such as 'fi*' or '1s*' will match correctly. However, because ** extra entries are added to the FTS index, this method uses more space ** within the database. ** ** Method (2) offers a midpoint between (1) and (3). Using this method, ** a query such as '1s*' will match documents that contain the literal ** token "1st", but not "first" (assuming the tokenizer is not able to ** provide synonyms for prefixes). However, a non-prefix query like '1st' ** will match against "1st" and "first". This method does not require ** extra disk space, as no extra entries are added to the FTS index. ** On the other hand, it may require more CPU cycles to run MATCH queries, ** as separate queries of the FTS index are required for each synonym. ** ** When using methods (2) or (3), it is important that the tokenizer only ** provide synonyms when tokenizing document text (method (3)) or query ** text (method (2)), not both. Doing so will not cause any errors, but is ** inefficient.
*/ typedefstruct Fts5Tokenizer Fts5Tokenizer; typedefstruct fts5_tokenizer_v2 fts5_tokenizer_v2; struct fts5_tokenizer_v2 { int iVersion; /* Currently always 2 */
int (*xCreate)(void*, constchar **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, int flags, /* Mask of FTS5_TOKENIZE_* flags */ constchar *pText, int nText, constchar *pLocale, int nLocale, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */
)
);
};
/* ** New code should use the fts5_tokenizer_v2 type to define tokenizer ** implementations. The following type is included for legacy applications ** that still use it.
*/ typedefstruct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, constchar **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, int flags, /* Mask of FTS5_TOKENIZE_* flags */ constchar *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */
)
);
};
/* Flags that may be passed as the third argument to xTokenize() */ #define FTS5_TOKENIZE_QUERY 0x0001 #define FTS5_TOKENIZE_PREFIX 0x0002 #define FTS5_TOKENIZE_DOCUMENT 0x0004 #define FTS5_TOKENIZE_AUX 0x0008
/* Flags that may be passed by the tokenizer implementation back to FTS5
** as the third argument to the supplied xToken callback. */ #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
/* ** END OF CUSTOM TOKENIZERS
*************************************************************************/
/************************************************************************* ** FTS5 EXTENSION REGISTRATION API
*/ typedefstruct fts5_api fts5_api; struct fts5_api { int iVersion; /* Currently always set to 3 */
/* Create a new tokenizer */ int (*xCreateTokenizer)(
fts5_api *pApi, constchar *zName, void *pUserData,
fts5_tokenizer *pTokenizer, void (*xDestroy)(void*)
);
/* Find an existing tokenizer */ int (*xFindTokenizer)(
fts5_api *pApi, constchar *zName, void **ppUserData,
fts5_tokenizer *pTokenizer
);
/* Create a new auxiliary function */ int (*xCreateFunction)(
fts5_api *pApi, constchar *zName, void *pUserData,
fts5_extension_function xFunction, void (*xDestroy)(void*)
);
/* APIs below this point are only available if iVersion>=3 */
/* Create a new tokenizer */ int (*xCreateTokenizer_v2)(
fts5_api *pApi, constchar *zName, void *pUserData,
fts5_tokenizer_v2 *pTokenizer, void (*xDestroy)(void*)
);
/* Find an existing tokenizer */ int (*xFindTokenizer_v2)(
fts5_api *pApi, constchar *zName, void **ppUserData,
fts5_tokenizer_v2 **ppTokenizer
);
};
/* ** END OF REGISTRATION API
*************************************************************************/
#ifdef __cplusplus
} /* end of the 'extern "C"' block */ #endif
#endif/* _FTS5_H */
#line 1 "fts5Int.h" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** **
*/ #ifndef _FTS5INT_H #define _FTS5INT_H
/* ** Constants for the largest and smallest possible 64-bit signed integers.
*/ # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
/* The uptr type is an unsigned integer large enough to hold a pointer
*/ #ifdefined(HAVE_STDINT_H) typedef uintptr_t uptr; #elif SQLITE_PTRSIZE==4 typedef u32 uptr; #else typedef u64 uptr; #endif
/* Truncate very long tokens to this many bytes. Hard limit is ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
** field that occurs at the start of each leaf page (see fts5_index.c). */ #define FTS5_MAX_TOKEN_SIZE 32768
/* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail.
*/ #define FTS5_MAX_PREFIX_INDEXES 31
/* ** Maximum segments permitted in a single index
*/ #define FTS5_MAX_SEGMENT 2000
/* ** The assert_nc() macro is similar to the assert() macro, except that it ** is used for assert() conditions that are true only if it can be ** guranteed that the database is not corrupt.
*/ #ifdef SQLITE_DEBUG externint sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else # define assert_nc(x) assert(x) #endif
/* ** A version of memcmp() that does not cause asan errors if one of the pointer ** parameters is NULL and the number of bytes to compare is zero.
*/ #define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n)))
/* Mark a function parameter as unused, to suppress nuisance compiler
** warnings. */ #ifndef UNUSED_PARAM # define UNUSED_PARAM(X) (void)(X) #endif
/* If a NEAR() clump or phrase may only match a specific set of columns, ** then an object of the following type is used to record the set of columns. ** Each entry in the aiCol[] array is a column that may be matched. ** ** This object is used by fts5_expr.c and fts5_index.c.
*/ struct Fts5Colset { int nCol; int aiCol[1];
};
/************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/
struct Fts5TokenizerConfig {
Fts5Tokenizer *pTok;
fts5_tokenizer_v2 *pApi2;
fts5_tokenizer *pApi1; constchar **azArg; int nArg; int ePattern; /* FTS_PATTERN_XXX constant */ constchar *pLocale; /* Current locale to use */ int nLocale; /* Size of pLocale in bytes */
};
/* ** An instance of the following structure encodes all information that can ** be gleaned from the CREATE VIRTUAL TABLE statement. ** ** And all information loaded from the %_config table. ** ** nAutomerge: ** The minimum number of segments that an auto-merge operation should ** attempt to merge together. A value of 1 sets the object to use the ** compile time default. Zero disables auto-merge altogether. ** ** bContentlessDelete: ** True if the contentless_delete option was present in the CREATE ** VIRTUAL TABLE statement. ** ** zContent: ** ** zContentRowid: ** The value of the content_rowid= option, if one was specified. Or ** the string "rowid" otherwise. This text is not quoted - if it is ** used as part of an SQL statement it needs to be quoted appropriately. ** ** zContentExprlist: ** ** pzErrmsg: ** This exists in order to allow the fts5_index.c module to return a ** decent error message if it encounters a file-format version it does ** not understand. ** ** bColumnsize: ** True if the %_docsize table is created. ** ** bPrefixIndex: ** This is only used for debugging. If set to false, any prefix indexes ** are ignored. This value is configured using: ** ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); ** ** bLocale: ** Set to true if locale=1 was specified when the table was created.
*/ struct Fts5Config {
sqlite3 *db; /* Database handle */
Fts5Global *pGlobal; /* Global fts5 object for handle db */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */
u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ int bTokendata; /* "tokendata=" option value (dflt==0) */ int bLocale; /* "locale=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist;
Fts5TokenizerConfig t; int bLock; /* True when table is preparing statement */
/* Values loaded from the %_config table */ int iVersion; /* fts5 file format 'version' */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ int nCrisisMerge; /* Maximum allowed segments per level */ int nUsermerge; /* 'usermerge' setting */ int nHashSize; /* Bytes of memory for in-memory hash */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ int bSecureDelete; /* 'secure-delete' */ int nDeleteMerge; /* 'deletemerge' */
/* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg;
#ifdef SQLITE_DEBUG int bPrefixIndex; /* True to use prefix-indexes */ #endif
};
/* Current expected value of %_config table 'version' field. And ** the expected version if the 'secure-delete' option has ever been
** set on the table. */ #define FTS5_CURRENT_VERSION 4 #define FTS5_CURRENT_VERSION_SECUREDELETE 5
/* ** End of interface to code in fts5_config.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_buffer.c.
*/
/* ** Buffer object for the incremental building of string data.
*/ typedefstruct Fts5Buffer Fts5Buffer; struct Fts5Buffer {
u8 *p; int n; int nSpace;
};
typedefstruct Fts5PoslistReader Fts5PoslistReader; struct Fts5PoslistReader { /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */
u8 bFlag; /* For client use (any custom purpose) */
/* Output variables */
u8 bEof; /* Set to true at EOF */
i64 iPos; /* (iCol<<32) + iPos */
}; staticint sqlite3Fts5PoslistReaderInit( const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
); staticint sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
/* Character set tests (like isspace(), isalpha() etc.) */ staticint sqlite3Fts5IsBareword(char t);
/* Bucket of terms object used by the integrity-check in offsets=0 mode. */ typedefstruct Fts5Termset Fts5Termset; staticint sqlite3Fts5TermsetNew(Fts5Termset**); staticint sqlite3Fts5TermsetAdd(Fts5Termset*, int, constchar*, int, int *pbPresent); staticvoid sqlite3Fts5TermsetFree(Fts5Termset*);
/* ** End of interface to code in fts5_buffer.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table.
*/
/* ** Values used as part of the flags argument passed to IndexQuery().
*/ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
/* The following are used internally by the fts5_index.c module. They are ** defined here only to make it easier to avoid clashes with the flags
** above. */ #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 #define FTS5INDEX_QUERY_SKIPHASH 0x0040 #define FTS5INDEX_QUERY_NOTOKENDATA 0x0080 #define FTS5INDEX_QUERY_SCANONETERM 0x0100
/* ** Create/destroy an Fts5Index object.
*/ staticint sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); staticint sqlite3Fts5IndexClose(Fts5Index *p);
/* ** Return a simple checksum value based on the arguments.
*/ static u64 sqlite3Fts5IndexEntryCksum(
i64 iRowid, int iCol, int iPos, int iIdx, constchar *pTerm, int nTerm
);
/* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total.
*/ staticint sqlite3Fts5IndexCharlenToBytelen( constchar *p, int nByte, int nChar
);
/* ** Open a new iterator to iterate though all rowids that match the ** specified token or token prefix.
*/ staticint sqlite3Fts5IndexQuery(
Fts5Index *p, /* FTS index to query */ constchar *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */
Fts5Colset *pColset, /* Match these columns only */
Fts5IndexIter **ppIter /* OUT: New iterator object */
);
/* ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery().
*/ staticint sqlite3Fts5IterNext(Fts5IndexIter*); staticint sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
/* ** Close an iterator opened by sqlite3Fts5IndexQuery().
*/ staticvoid sqlite3Fts5IterClose(Fts5IndexIter*);
/* ** Close the reader blob handle, if it is open.
*/ staticvoid sqlite3Fts5IndexCloseReader(Fts5Index*);
/* ** This interface is used by the fts5vocab module.
*/ staticconstchar *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); staticint sqlite3Fts5IterNextScan(Fts5IndexIter*); staticvoid *sqlite3Fts5StructureRef(Fts5Index*); staticvoid sqlite3Fts5StructureRelease(void*); staticint sqlite3Fts5StructureTest(Fts5Index*, void*);
/* ** Used by xInstToken():
*/ staticint sqlite3Fts5IterToken(Fts5IndexIter*, i64, int, int, constchar**, int*);
/* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more ** times. ** ** For an insert, it must be called once for each token in the new document. ** If the operation is a delete, it must be called (at least) once for each ** unique token in the document with an iCol value less than zero. The iPos ** argument is ignored for a delete.
*/ staticint sqlite3Fts5IndexWrite(
Fts5Index *p, /* Index to write to */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ constchar *pToken, int nToken /* Token to add or remove to or from index */
);
/* ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to ** document iDocid.
*/ staticint sqlite3Fts5IndexBeginWrite(
Fts5Index *p, /* Index to write to */ int bDelete, /* True if current operation is a delete */
i64 iDocid /* Docid to add or remove data from */
);
/* ** Flush any data stored in the in-memory hash tables to the database. ** Also close any open blob handles.
*/ staticint sqlite3Fts5IndexSync(Fts5Index *p);
/* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated.
*/ staticint sqlite3Fts5IndexRollback(Fts5Index *p);
/* ** Get or set the "averages" values.
*/ staticint sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); staticint sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
/* ** Functions called by the storage module as part of integrity-check.
*/ staticint sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum);
/* ** Called during virtual module initialization to register UDF ** fts5_decode() with SQLite
*/ staticint sqlite3Fts5IndexInit(sqlite3*);
/* ** Return the total number of entries read from the %_data table by ** this connection since it was created.
*/ staticint sqlite3Fts5IndexReads(Fts5Index *p);
/* Used to populate hash tables for xInstToken in detail=none/column mode. */ staticint sqlite3Fts5IndexIterWriteTokendata(
Fts5IndexIter*, constchar*, int, i64 iRowid, int iCol, int iOff
);
/* ** End of interface to code in fts5_index.c.
**************************************************************************/
staticvoid sqlite3Fts5ClearLocale(Fts5Config *pConfig); staticvoid sqlite3Fts5SetLocale(Fts5Config *pConfig, constchar *pLoc, int nLoc);
staticint sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); staticint sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, constchar **ppText, int *pnText, constchar **ppLoc, int *pnLoc
);
/* ** End of interface to code in fts5.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_hash.c.
*/ typedefstruct Fts5Hash Fts5Hash;
/* ** Create a hash table, free a hash table.
*/ staticint sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); staticvoid sqlite3Fts5HashFree(Fts5Hash*);
staticint sqlite3Fts5HashWrite(
Fts5Hash*,
i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ char bByte, constchar *pToken, int nToken /* Token to add or remove to or from index */
);
/* ** Empty (but do not delete) a hash table.
*/ staticvoid sqlite3Fts5HashClear(Fts5Hash*);
/* ** Return true if the hash is empty, false otherwise.
*/ staticint sqlite3Fts5HashIsEmpty(Fts5Hash*);
staticint sqlite3Fts5HashQuery(
Fts5Hash*, /* Hash table to query */ int nPre, constchar *pTerm, int nTerm, /* Query term */ void **ppObj, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */
);
staticint sqlite3Fts5HashScanInit(
Fts5Hash*, /* Hash table to query */ constchar *pTerm, int nTerm /* Query prefix */
); staticvoid sqlite3Fts5HashScanNext(Fts5Hash*); staticint sqlite3Fts5HashScanEof(Fts5Hash*); staticvoid sqlite3Fts5HashScanEntry(Fts5Hash *, constchar **pzTerm, /* OUT: term (nul-terminated) */ int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */
);
/* ** End of interface to code in fts5_hash.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables.
*/
#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
struct Fts5Token { constchar *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */
};
/* Parse a MATCH expression. */ staticint sqlite3Fts5ExprNew(
Fts5Config *pConfig, int bPhraseToAnd, int iCol, /* Column on LHS of MATCH operator */ constchar *zExpr,
Fts5Expr **ppNew, char **pzErr
); staticint sqlite3Fts5ExprPattern(
Fts5Config *pConfig, int bGlob, int iCol, constchar *zText,
Fts5Expr **pp
);
/******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y. */
/* ** End of interface to code in fts5_expr.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_aux.c.
*/
staticint sqlite3Fts5AuxInit(fts5_api*); /* ** End of interface to code in fts5_aux.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_tokenizer.c.
*/
staticint sqlite3Fts5TokenizerInit(fts5_api*); staticint sqlite3Fts5TokenizerPattern( int (*xCreate)(void*, constchar**, int, Fts5Tokenizer**),
Fts5Tokenizer *pTok
); staticint sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*); /* ** End of interface to code in fts5_tokenizer.c.
**************************************************************************/
/************************************************************************** ** Interface to code in fts5_vocab.c.
*/
/* ** End of interface to code in fts5_vocab.c.
**************************************************************************/
/************************************************************************** ** Interface to automatically generated code in fts5_unicode2.c.
*/ staticint sqlite3Fts5UnicodeIsdiacritic(int c); staticint sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
staticint sqlite3Fts5UnicodeCatParse(constchar*, u8*); staticint sqlite3Fts5UnicodeCategory(u32 iCode); staticvoid sqlite3Fts5UnicodeAscii(u8*, u8*); /* ** End of interface to code in fts5_unicode2.c.
**************************************************************************/
#line 1 "fts5parse.c" /* This file is automatically generated by Lemon from input grammar ** source file "fts5parse.y".
*/ /* ** 2000-05-29 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** Driver template for the LEMON parser generator. ** ** The "lemon" program processes an LALR(1) input grammar file, then uses ** this template to construct a parser. The "lemon" program inserts text ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the ** interstitial "-" characters) contained in this template is changed into ** the value of the %name directive from the grammar. Otherwise, the content ** of this template is copied straight through into the generate parser ** source file. ** ** The following is the concatenation of all %include directives from the ** input grammar file:
*/ /************ Begin %include sections from the grammar ************************/ #line 47 "fts5parse.y"
/* ** Disable all error recovery processing in the parser push-down ** automaton.
*/ #define fts5YYNOERRORRECOVERY 1
/* ** Make fts5yytestcase() the same as testcase()
*/ #define fts5yytestcase(X) testcase(X)
/* ** Indicate that sqlite3ParserFree() will never be called with a null ** pointer.
*/ #define fts5YYPARSEFREENOTNULL 1
/* ** Alternative datatype for the argument to the malloc() routine passed ** into sqlite3ParserAlloc(). The default is size_t.
*/ #define fts5YYMALLOCARGTYPE u64
#line 58 "fts5parse.sql" /**************** End of %include directives **********************************/ /* These constants specify the various numeric values for terminal symbols.
***************** Begin token definitions *************************************/ #ifndef FTS5_OR #define FTS5_OR 1 #define FTS5_AND 2 #define FTS5_NOT 3 #define FTS5_TERM 4 #define FTS5_COLON 5 #define FTS5_MINUS 6 #define FTS5_LCP 7 #define FTS5_RCP 8 #define FTS5_STRING 9 #define FTS5_LP 10 #define FTS5_RP 11 #define FTS5_CARET 12 #define FTS5_COMMA 13 #define FTS5_PLUS 14 #define FTS5_STAR 15 #endif /**************** End token definitions ***************************************/
/* The next sections is a series of control #defines. ** various aspects of the generated parser. ** fts5YYCODETYPE is the data type used to store the integer codes ** that represent terminal and non-terminal symbols. ** "unsigned char" is used if there are fewer than ** 256 symbols. Larger types otherwise. ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for ** any terminal or nonterminal symbol. ** fts5YYFALLBACK If defined, this indicates that one or more tokens ** (also known as: "terminal symbols") have fall-back ** values which should be used if the original symbol ** would not parse. This permits keywords to sometimes ** be used as identifiers, for example. ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers ** that indicate what to do in response to the next ** token. ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal ** symbols. Background: A "minor type" is a semantic ** value associated with a terminal or non-terminal ** symbols. For example, for an "ID" terminal symbol, ** the minor type might be the name of the identifier. ** Each non-terminal can have a different minor type. ** Terminal symbols all have the same minor type, though. ** This macros defines the minor type for terminal ** symbols. ** fts5YYMINORTYPE is the data type used for all minor types. ** This is typically a union of many types, one of ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union ** for terminal symbols is called "fts5yy0". ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If ** zero the stack is dynamically sized using realloc() ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context ** fts5YYREALLOC Name of the realloc() function to use ** fts5YYFREE Name of the free() function to use ** fts5YYDYNSTACK True if stack space should be extended on heap ** fts5YYERRORSYMBOL is the code number of the error symbol. If not ** defined, then do no error processing. ** fts5YYNSTATE the combined number of states. ** fts5YYNRULE the number of rules in the grammar ** fts5YYNFTS5TOKEN Number of terminal symbols ** fts5YY_MAX_SHIFT Maximum value for shift actions ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op ** fts5YY_MIN_REDUCE Minimum value for reduce actions ** fts5YY_MAX_REDUCE Maximum value for reduce actions ** fts5YY_MIN_DSTRCTR Minimum symbol value that has a destructor ** fts5YY_MAX_DSTRCTR Maximum symbol value that has a destructor
*/ #ifndef INTERFACE # define INTERFACE 1 #endif /************* Begin control #defines *****************************************/ #define fts5YYCODETYPE unsignedchar #define fts5YYNOCODE 27 #define fts5YYACTIONTYPE unsignedchar #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token typedefunion { int fts5yyinit;
sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0; int fts5yy4;
Fts5Colset* fts5yy11;
Fts5ExprNode* fts5yy24;
Fts5ExprNearset* fts5yy46;
Fts5ExprPhrase* fts5yy53;
} fts5YYMINORTYPE; #ifndef fts5YYSTACKDEPTH #define fts5YYSTACKDEPTH 100 #endif #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse; #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse #define sqlite3Fts5ParserARG_PARAM ,pParse #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse=fts5yypParser->pParse; #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse=pParse; #define fts5YYREALLOC realloc #define fts5YYFREE free #define fts5YYDYNSTACK 0 #define sqlite3Fts5ParserCTX_SDECL #define sqlite3Fts5ParserCTX_PDECL #define sqlite3Fts5ParserCTX_PARAM #define sqlite3Fts5ParserCTX_FETCH #define sqlite3Fts5ParserCTX_STORE #define fts5YYNSTATE 35 #define fts5YYNRULE 28 #define fts5YYNRULE_WITH_ACTION 28 #define fts5YYNFTS5TOKEN 16 #define fts5YY_MAX_SHIFT 34 #define fts5YY_MIN_SHIFTREDUCE 52 #define fts5YY_MAX_SHIFTREDUCE 79 #define fts5YY_ERROR_ACTION 80 #define fts5YY_ACCEPT_ACTION 81 #define fts5YY_NO_ACTION 82 #define fts5YY_MIN_REDUCE 83 #define fts5YY_MAX_REDUCE 110 #define fts5YY_MIN_DSTRCTR 16 #define fts5YY_MAX_DSTRCTR 24 /************* End control #defines *******************************************/ #define fts5YY_NLOOKAHEAD ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])))
/* Define the fts5yytestcase() macro to be a no-op if is not already defined ** otherwise. ** ** Applications can choose to define fts5yytestcase() in the %include section ** to a macro that can assist in verifying code coverage. For production ** code the fts5yytestcase() macro should be turned off. But it is useful ** for testing.
*/ #ifndef fts5yytestcase # define fts5yytestcase(X) #endif
/* Macro to determine if stack space has the ability to grow using ** heap memory.
*/ #if fts5YYSTACKDEPTH<=0 || fts5YYDYNSTACK # define fts5YYGROWABLESTACK 1 #else # define fts5YYGROWABLESTACK 0 #endif
/* Guarantee a minimum number of initial stack slots.
*/ #if fts5YYSTACKDEPTH<=0 # undef fts5YYSTACKDEPTH # define fts5YYSTACKDEPTH 2 /* Need a minimum stack size */ #endif
/* Next are the tables used to determine what action to take based on the ** current state and lookahead token. These tables are used to implement ** functions that take a state number and lookahead value and return an ** action integer. ** ** Suppose the action integer is N. Then the action is determined as ** follows ** ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead ** token onto the stack and goto state N. ** ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. ** ** N == fts5YY_ERROR_ACTION A syntax error has occurred. ** ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. ** ** N == fts5YY_NO_ACTION No such action. Denotes unused ** slots in the fts5yy_action[] table. ** ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE ** and fts5YY_MAX_REDUCE ** ** The action table is constructed as a single large table named fts5yy_action[]. ** Given state S and lookahead X, the action is computed as either: ** ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] ** (B) N = fts5yy_default[S] ** ** The (A) formula is preferred. The B formula is used instead if ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. ** ** The formulas above are for computing the action when the lookahead is ** a terminal symbol. If the lookahead is a non-terminal (as occurs after ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of ** the fts5yy_shift_ofst[] array. ** ** The following are the tables generated in this section: ** ** fts5yy_action[] A single table containing all actions. ** fts5yy_lookahead[] A table containing the lookahead for each entry in ** fts5yy_action. Used to detect hash collisions. ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for ** shifting terminals. ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for ** shifting non-terminals after a reduce. ** fts5yy_default[] Default action for each state. **
*********** Begin parsing tables **********************************************/ #define fts5YY_ACTTAB_COUNT (105) staticconst fts5YYACTIONTYPE fts5yy_action[] = { /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, /* 100 */ 57, 15, 82, 82, 71,
}; staticconst fts5YYCODETYPE fts5yy_lookahead[] = { /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, /* 120 */ 27,
}; #define fts5YY_SHIFT_COUNT (34) #define fts5YY_SHIFT_MIN (0) #define fts5YY_SHIFT_MAX (93) staticconstunsignedchar fts5yy_shift_ofst[] = { /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, /* 30 */ 53, 87, 92, 53, 93,
}; #define fts5YY_REDUCE_COUNT (17) #define fts5YY_REDUCE_MIN (-17) #define fts5YY_REDUCE_MAX (67) staticconstsignedchar fts5yy_reduce_ofst[] = { /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38,
}; staticconst fts5YYACTIONTYPE fts5yy_default[] = { /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, /* 30 */ 103, 80, 80, 104, 80,
}; /********** End of lemon-generated parsing tables *****************************/
/* The next table maps tokens (terminal symbols) into fallback tokens. ** If a construct like the following: ** ** %fallback ID X Y Z. ** ** appears in the grammar, then ID becomes a fallback token for X, Y, ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser ** but it does not parse, the type of the token is changed to ID and ** the parse is retried before an error is thrown. ** ** This feature can be used, for example, to cause some keywords in a language ** to revert to identifiers if they keyword does not apply in the context where ** it appears.
*/ #ifdef fts5YYFALLBACK staticconst fts5YYCODETYPE fts5yyFallback[] = {
}; #endif/* fts5YYFALLBACK */
/* The following structure represents a single element of the ** parser's stack. Information stored includes: ** ** + The state number for the parser at this level of the stack. ** ** + The value of the token stored at this level of the stack. ** (In other words, the "major" token.) ** ** + The semantic value stored at this level of the stack. This is ** the information used by the action routines in the grammar. ** It is sometimes called the "minor" token. ** ** After the "shift" half of a SHIFTREDUCE action, the stateno field ** actually contains the reduce action for the second half of the ** SHIFTREDUCE.
*/ struct fts5yyStackEntry {
fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
fts5YYCODETYPE major; /* The major token value. This is the code
** number for the token at this stack level */
fts5YYMINORTYPE minor; /* The user-supplied minor token value. This
** is the value of the token */
}; typedefstruct fts5yyStackEntry fts5yyStackEntry;
/* The state of the parser is completely contained in an instance of
** the following structure */ struct fts5yyParser {
fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ #ifdef fts5YYTRACKMAXSTACKDEPTH int fts5yyhwm; /* High-water mark of the stack */ #endif #ifndef fts5YYNOERRORRECOVERY int fts5yyerrcnt; /* Shifts left before out of the error */ #endif
sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument */
sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */
fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */
fts5yyStackEntry *fts5yystack; /* The parser stack */
fts5yyStackEntry fts5yystk0[fts5YYSTACKDEPTH]; /* Initial stack space */
}; typedefstruct fts5yyParser fts5yyParser;
#ifndef NDEBUG /* ** Turn parser tracing on by giving a stream to which to write the trace ** and a prompt to preface each trace message. Tracing is turned off ** by making either argument NULL ** ** Inputs: ** <ul> ** <li> A FILE* to which trace output should be written. ** If NULL, then tracing is turned off. ** <li> A prefix string written at the beginning of every ** line of trace output. If NULL, then tracing is ** turned off. ** </ul> ** ** Outputs: ** None.
*/ staticvoid sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
fts5yyTraceFILE = TraceFILE;
fts5yyTracePrompt = zTracePrompt; if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; elseif( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
} #endif/* NDEBUG */
#if fts5YYGROWABLESTACK /* ** Try to increase the size of the parser stack. Return the number ** of errors. Return 0 on success.
*/ staticint fts5yyGrowStack(fts5yyParser *p){ int oldSize = 1 + (int)(p->fts5yystackEnd - p->fts5yystack); int newSize; int idx;
fts5yyStackEntry *pNew;
#if !fts5YYGROWABLESTACK /* For builds that do no have a growable stack, fts5yyGrowStack always ** returns an error.
*/ # define fts5yyGrowStack(X) 1 #endif
/* Datatype of the argument to the memory allocated passed as the ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by ** putting an appropriate #define in the %include section of the input ** grammar.
*/ #ifndef fts5YYMALLOCARGTYPE # define fts5YYMALLOCARGTYPE size_t #endif
/* Initialize a new parser that has already been allocated.
*/ staticvoid sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){
fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser;
sqlite3Fts5ParserCTX_STORE #ifdef fts5YYTRACKMAXSTACKDEPTH
fts5yypParser->fts5yyhwm = 0; #endif
fts5yypParser->fts5yystack = fts5yypParser->fts5yystk0;
fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1]; #ifndef fts5YYNOERRORRECOVERY
fts5yypParser->fts5yyerrcnt = -1; #endif
fts5yypParser->fts5yytos = fts5yypParser->fts5yystack;
fts5yypParser->fts5yystack[0].stateno = 0;
fts5yypParser->fts5yystack[0].major = 0;
}
#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK /* ** This function allocates a new parser. ** The only argument is a pointer to a function which works like ** malloc. ** ** Inputs: ** A pointer to the function used to allocate memory. ** ** Outputs: ** A pointer to a parser. This pointer is used in subsequent calls ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
*/ staticvoid *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE) sqlite3Fts5ParserCTX_PDECL){
fts5yyParser *fts5yypParser;
fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) ); if( fts5yypParser ){
sqlite3Fts5ParserCTX_STORE
sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM);
} return (void*)fts5yypParser;
} #endif/* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
/* The following function deletes the "minor type" or semantic value ** associated with a symbol. The symbol can be either a terminal ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is ** a pointer to the value to be deleted. The code used to do the ** deletions is derived from the %destructor and/or %token_destructor ** directives of the input grammar.
*/ staticvoid fts5yy_destructor(
fts5yyParser *fts5yypParser, /* The parser */
fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */
fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */
){
sqlite3Fts5ParserARG_FETCH
sqlite3Fts5ParserCTX_FETCH switch( fts5yymajor ){ /* Here is inserted the actions which take place when a ** terminal or non-terminal is destroyed. This can happen ** when the symbol is popped from the stack during a ** reduce or during error processing or when a parser is ** being destroyed before it is finished parsing. ** ** Note: during a reduce, the only symbols destroyed are those ** which appear on the RHS of the rule, but which are *not* used ** inside the C code.
*/ /********* Begin destructor definitions ***************************************/ case 16: /* input */
{ #line 83 "fts5parse.y"
(void)pParse; #line 606 "fts5parse.sql"
} break; case 17: /* expr */ case 18: /* cnearset */ case 19: /* exprlist */
{ #line 89 "fts5parse.y"
sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); #line 615 "fts5parse.sql"
} break; case 20: /* colset */ case 21: /* colsetlist */
{ #line 93 "fts5parse.y"
sqlite3_free((fts5yypminor->fts5yy11)); #line 623 "fts5parse.sql"
} break; case 22: /* nearset */ case 23: /* nearphrases */
{ #line 148 "fts5parse.y"
sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); #line 631 "fts5parse.sql"
} break; case 24: /* phrase */
{ #line 183 "fts5parse.y"
sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); #line 638 "fts5parse.sql"
} break; /********* End destructor definitions *****************************************/ default: break; /* If no destructor action specified: do nothing */
}
}
/* ** Pop the parser's stack once. ** ** If there is a destructor routine associated with the token which ** is popped from the stack, then call it.
*/ staticvoid fts5yy_pop_parser_stack(fts5yyParser *pParser){
fts5yyStackEntry *fts5yytos;
assert( pParser->fts5yytos!=0 );
assert( pParser->fts5yytos > pParser->fts5yystack );
fts5yytos = pParser->fts5yytos--; #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sPopping %s\n",
fts5yyTracePrompt,
fts5yyTokenName[fts5yytos->major]);
} #endif
fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
}
/* ** Clear all secondary memory allocations from the parser
*/ staticvoid sqlite3Fts5ParserFinalize(void *p){
fts5yyParser *pParser = (fts5yyParser*)p;
/* In-lined version of calling fts5yy_pop_parser_stack() for each
** element left in the stack */
fts5yyStackEntry *fts5yytos = pParser->fts5yytos; while( fts5yytos>pParser->fts5yystack ){ #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sPopping %s\n",
fts5yyTracePrompt,
fts5yyTokenName[fts5yytos->major]);
} #endif if( fts5yytos->major>=fts5YY_MIN_DSTRCTR ){
fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
}
fts5yytos--;
}
#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK /* ** Deallocate and destroy a parser. Destructors are called for ** all stack elements before shutting the parser down. ** ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it ** is defined in a %include section of the input grammar) then it is ** assumed that the input pointer is never NULL.
*/ staticvoid sqlite3Fts5ParserFree( void *p, /* The parser to be deleted */ void (*freeProc)(void*) /* Function used to reclaim memory */
){ #ifndef fts5YYPARSEFREENEVERNULL if( p==0 ) return; #endif
sqlite3Fts5ParserFinalize(p);
(*freeProc)(p);
} #endif/* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
/* ** Return the peak depth of the stack for a parser.
*/ #ifdef fts5YYTRACKMAXSTACKDEPTH staticint sqlite3Fts5ParserStackPeak(void *p){
fts5yyParser *pParser = (fts5yyParser*)p; return pParser->fts5yyhwm;
} #endif
/* This array of booleans keeps track of the parser statement ** coverage. The element fts5yycoverage[X][Y] is set when the parser ** is in state X and has a lookahead token Y. In a well-tested ** systems, every element of this matrix should end up being set.
*/ #ifdefined(fts5YYCOVERAGE) staticunsignedchar fts5yycoverage[fts5YYNSTATE][fts5YYNFTS5TOKEN]; #endif
/* ** Write into out a description of every state/lookahead combination that ** ** (1) has not been used by the parser, and ** (2) is not a syntax error. ** ** Return the number of missed state/lookahead combinations.
*/ #ifdefined(fts5YYCOVERAGE) staticint sqlite3Fts5ParserCoverage(FILE *out){ int stateno, iLookAhead, i; int nMissed = 0; for(stateno=0; stateno<fts5YYNSTATE; stateno++){
i = fts5yy_shift_ofst[stateno]; for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN; iLookAhead++){ if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; if( out ){
fprintf(out,"State %d lookahead %s %s\n", stateno,
fts5yyTokenName[iLookAhead],
fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed");
}
}
} return nMissed;
} #endif
/* ** Find the appropriate action for a parser given the terminal ** look-ahead token iLookAhead.
*/ static fts5YYACTIONTYPE fts5yy_find_shift_action(
fts5YYCODETYPE iLookAhead, /* The look-ahead token */
fts5YYACTIONTYPE stateno /* Current state number */
){ int i;
/* ** Find the appropriate action for a parser given the non-terminal ** look-ahead token iLookAhead.
*/ static fts5YYACTIONTYPE fts5yy_find_reduce_action(
fts5YYACTIONTYPE stateno, /* Current state number */
fts5YYCODETYPE iLookAhead /* The look-ahead token */
){ int i; #ifdef fts5YYERRORSYMBOL if( stateno>fts5YY_REDUCE_COUNT ){ return fts5yy_default[stateno];
} #else
assert( stateno<=fts5YY_REDUCE_COUNT ); #endif
i = fts5yy_reduce_ofst[stateno];
assert( iLookAhead!=fts5YYNOCODE );
i += iLookAhead; #ifdef fts5YYERRORSYMBOL if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){ return fts5yy_default[stateno];
} #else
assert( i>=0 && i<fts5YY_ACTTAB_COUNT );
assert( fts5yy_lookahead[i]==iLookAhead ); #endif return fts5yy_action[i];
}
/* ** The following routine is called if the stack overflows.
*/ staticvoid fts5yyStackOverflow(fts5yyParser *fts5yypParser){
sqlite3Fts5ParserARG_FETCH
sqlite3Fts5ParserCTX_FETCH #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
} #endif while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); /* Here code is inserted which will execute if the parser
** stack every overflows */ /******** Begin %stack_overflow code ******************************************/ #line 36 "fts5parse.y"
sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); #line 876 "fts5parse.sql" /******** End %stack_overflow code ********************************************/
sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument var */
sqlite3Fts5ParserCTX_STORE
}
/* ** Print tracing information for a SHIFT action
*/ #ifndef NDEBUG staticvoid fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, constchar *zTag){ if( fts5yyTraceFILE ){ if( fts5yyNewState<fts5YYNSTATE ){
fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n",
fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
fts5yyNewState);
}else{
fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n",
fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
fts5yyNewState - fts5YY_MIN_REDUCE);
}
}
} #else # define fts5yyTraceShift(X,Y,Z) #endif
/* ** Perform a shift action.
*/ staticvoid fts5yy_shift(
fts5yyParser *fts5yypParser, /* The parser to be shifted */
fts5YYACTIONTYPE fts5yyNewState, /* The new state to shift in */
fts5YYCODETYPE fts5yyMajor, /* The major token to shift in */
sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */
){
fts5yyStackEntry *fts5yytos;
fts5yypParser->fts5yytos++; #ifdef fts5YYTRACKMAXSTACKDEPTH if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
fts5yypParser->fts5yyhwm++;
assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) );
} #endif
fts5yytos = fts5yypParser->fts5yytos; if( fts5yytos>fts5yypParser->fts5yystackEnd ){ if( fts5yyGrowStack(fts5yypParser) ){
fts5yypParser->fts5yytos--;
fts5yyStackOverflow(fts5yypParser); return;
}
fts5yytos = fts5yypParser->fts5yytos;
assert( fts5yytos <= fts5yypParser->fts5yystackEnd );
} if( fts5yyNewState > fts5YY_MAX_SHIFT ){
fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
}
fts5yytos->stateno = fts5yyNewState;
fts5yytos->major = fts5yyMajor;
fts5yytos->minor.fts5yy0 = fts5yyMinor;
fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift");
}
/* ** Perform a reduce action and the shift that must immediately ** follow the reduce. ** ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE ** if the lookahead token has already been consumed. As this procedure is ** only called from one place, optimizing compilers will in-line it, which ** means that the extra parameters have no performance impact.
*/ static fts5YYACTIONTYPE fts5yy_reduce(
fts5yyParser *fts5yypParser, /* The parser */ unsignedint fts5yyruleno, /* Number of the rule by which to reduce */ int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */
sqlite3Fts5ParserFTS5TOKENTYPE fts5yyLookaheadToken /* Value of the lookahead token */
sqlite3Fts5ParserCTX_PDECL /* %extra_context */
){ int fts5yygoto; /* The next state */
fts5YYACTIONTYPE fts5yyact; /* The next action */
fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ int fts5yysize; /* Amount to pop the stack */
sqlite3Fts5ParserARG_FETCH
(void)fts5yyLookahead;
(void)fts5yyLookaheadToken;
fts5yymsp = fts5yypParser->fts5yytos;
/* There are no SHIFTREDUCE actions on nonterminals because the table
** generator has simplified them to pure REDUCE actions. */
assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) );
/* It is not possible for a REDUCE to be followed by an error */
assert( fts5yyact!=fts5YY_ERROR_ACTION );
/* ** The following code executes when the parse fails
*/ #ifndef fts5YYNOERRORRECOVERY staticvoid fts5yy_parse_failed(
fts5yyParser *fts5yypParser /* The parser */
){
sqlite3Fts5ParserARG_FETCH
sqlite3Fts5ParserCTX_FETCH #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
} #endif while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); /* Here code is inserted which will be executed whenever the
** parser fails */ /************ Begin %parse_failure code ***************************************/ /************ End %parse_failure code *****************************************/
sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
sqlite3Fts5ParserCTX_STORE
} #endif/* fts5YYNOERRORRECOVERY */
/* ** The following code executes when a syntax error first occurs.
*/ staticvoid fts5yy_syntax_error(
fts5yyParser *fts5yypParser, /* The parser */ int fts5yymajor, /* The major type of the error token */
sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the error token */
){
sqlite3Fts5ParserARG_FETCH
sqlite3Fts5ParserCTX_FETCH #define FTS5TOKEN fts5yyminor /************ Begin %syntax_error code ****************************************/ #line 30 "fts5parse.y"
UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */
sqlite3Fts5ParseError(
pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p
); #line 1304 "fts5parse.sql" /************ End %syntax_error code ******************************************/
sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
sqlite3Fts5ParserCTX_STORE
}
/* ** The following is executed when the parser accepts
*/ staticvoid fts5yy_accept(
fts5yyParser *fts5yypParser /* The parser */
){
sqlite3Fts5ParserARG_FETCH
sqlite3Fts5ParserCTX_FETCH #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
} #endif #ifndef fts5YYNOERRORRECOVERY
fts5yypParser->fts5yyerrcnt = -1; #endif
assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack ); /* Here code is inserted which will be executed whenever the
** parser accepts */ /*********** Begin %parse_accept code *****************************************/ /*********** End %parse_accept code *******************************************/
sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
sqlite3Fts5ParserCTX_STORE
}
/* The main parser program. ** The first argument is a pointer to a structure obtained from ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. ** The second argument is the major token number. The third is ** the minor token. The fourth optional argument is whatever the ** user wants (and specified in the grammar) and is available for ** use by the action routines. ** ** Inputs: ** <ul> ** <li> A pointer to the parser (an opaque structure.) ** <li> The major token number. ** <li> The minor token number. ** <li> An option argument of a grammar-specified type. ** </ul> ** ** Outputs: ** None.
*/ staticvoid sqlite3Fts5Parser( void *fts5yyp, /* The parser */ int fts5yymajor, /* The major token code number */
sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */
sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */
){
fts5YYMINORTYPE fts5yyminorunion;
fts5YYACTIONTYPE fts5yyact; /* The parser action. */ #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) int fts5yyendofinput; /* True if we are at the end of input */ #endif #ifdef fts5YYERRORSYMBOL int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ #endif
fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */
sqlite3Fts5ParserCTX_FETCH
sqlite3Fts5ParserARG_STORE
fts5yyact = fts5yypParser->fts5yytos->stateno; #ifndef NDEBUG if( fts5yyTraceFILE ){ if( fts5yyact < fts5YY_MIN_REDUCE ){
fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n",
fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact);
}else{
fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE);
}
} #endif
while(1){ /* Exit by "break" */
assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack );
assert( fts5yyact==fts5yypParser->fts5yytos->stateno );
fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPE)fts5yymajor,fts5yyact); if( fts5yyact >= fts5YY_MIN_REDUCE ){ unsignedint fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE; /* Reduce by this rule */ #ifndef NDEBUG
assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) ); if( fts5yyTraceFILE ){ int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; if( fts5yysize ){
fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
fts5yyTracePrompt,
fts5yyruleno, fts5yyRuleName[fts5yyruleno],
fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action",
fts5yypParser->fts5yytos[fts5yysize].stateno);
}else{
fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n",
fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno],
fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action");
}
} #endif/* NDEBUG */
/* Check that the stack is large enough to grow by a single entry ** if the RHS of the rule is empty. This ensures that there is room
** enough on the stack to push the LHS value */ if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ #ifdef fts5YYTRACKMAXSTACKDEPTH if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
fts5yypParser->fts5yyhwm++;
assert( fts5yypParser->fts5yyhwm ==
(int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack));
} #endif if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ if( fts5yyGrowStack(fts5yypParser) ){
fts5yyStackOverflow(fts5yypParser); break;
}
}
}
fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM);
}elseif( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPE)fts5yymajor,fts5yyminor); #ifndef fts5YYNOERRORRECOVERY
fts5yypParser->fts5yyerrcnt--; #endif break;
}elseif( fts5yyact==fts5YY_ACCEPT_ACTION ){
fts5yypParser->fts5yytos--;
fts5yy_accept(fts5yypParser); return;
}else{
assert( fts5yyact == fts5YY_ERROR_ACTION );
fts5yyminorunion.fts5yy0 = fts5yyminor; #ifdef fts5YYERRORSYMBOL int fts5yymx; #endif #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
} #endif #ifdef fts5YYERRORSYMBOL /* A syntax error has occurred. ** The response to an error depends upon whether or not the ** grammar defines an error token "ERROR". ** ** This is what we do if the grammar does define ERROR: ** ** * Call the %syntax_error function. ** ** * Begin popping the stack until we enter a state where ** it is legal to shift the error symbol, then shift ** the error symbol. ** ** * Set the error count to three. ** ** * Begin accepting and shifting new tokens. No new error ** processing will occur until three tokens have been ** shifted successfully. **
*/ if( fts5yypParser->fts5yyerrcnt<0 ){
fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
}
fts5yymx = fts5yypParser->fts5yytos->major; if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ #ifndef NDEBUG if( fts5yyTraceFILE ){
fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
} #endif
fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion);
fts5yymajor = fts5YYNOCODE;
}else{ while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){
fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno,
fts5YYERRORSYMBOL); if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE ) break;
fts5yy_pop_parser_stack(fts5yypParser);
} if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){
fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
fts5yy_parse_failed(fts5yypParser); #ifndef fts5YYNOERRORRECOVERY
fts5yypParser->fts5yyerrcnt = -1; #endif
fts5yymajor = fts5YYNOCODE;
}elseif( fts5yymx!=fts5YYERRORSYMBOL ){
fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
}
}
fts5yypParser->fts5yyerrcnt = 3;
fts5yyerrorhit = 1; if( fts5yymajor==fts5YYNOCODE ) break;
fts5yyact = fts5yypParser->fts5yytos->stateno; #elifdefined(fts5YYNOERRORRECOVERY) /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to ** do any kind of error recovery. Instead, simply invoke the syntax ** error routine and continue going as if nothing had happened. ** ** Applications can set this macro (for example inside %include) if ** they intend to abandon the parse upon the first syntax error seen.
*/
fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); break; #else/* fts5YYERRORSYMBOL is not defined */ /* This is what we do if the grammar does not define ERROR: ** ** * Report an error message, and throw away the input token. ** ** * If the input token is $, then fail the parse. ** ** As before, subsequent error messages are suppressed until ** three input tokens have been successfully shifted.
*/ if( fts5yypParser->fts5yyerrcnt<=0 ){
fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
}
fts5yypParser->fts5yyerrcnt = 3;
fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); if( fts5yyendofinput ){
fts5yy_parse_failed(fts5yypParser); #ifndef fts5YYNOERRORRECOVERY
fts5yypParser->fts5yyerrcnt = -1; #endif
} break; #endif
}
} #ifndef NDEBUG if( fts5yyTraceFILE ){
fts5yyStackEntry *i; char cDiv = '[';
fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){
fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]);
cDiv = ' ';
}
fprintf(fts5yyTraceFILE,"]\n");
} #endif return;
}
/* ** Return the fallback token corresponding to canonical token iToken, or ** 0 if iToken has no fallback.
*/ staticint sqlite3Fts5ParserFallback(int iToken){ #ifdef fts5YYFALLBACK
assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) ); return fts5yyFallback[iToken]; #else
(void)iToken; return 0; #endif
}
#line 1 "fts5_aux.c" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ******************************************************************************
*/
/* ** Object used to iterate through all "coalesced phrase instances" in ** a single column of the current row. If the phrase instances in the ** column being considered do not overlap, this object simply iterates ** through them. Or, if they do overlap (share one or more tokens in ** common), each set of overlapping instances is treated as a single ** match. See documentation for the highlight() auxiliary function for ** details. ** ** Usage is: ** ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); ** rc = fts5CInstIterNext(&iter) ** ){ ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); ** } **
*/ typedefstruct CInstIter CInstIter; struct CInstIter { const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
Fts5Context *pFts; /* First arg to pass to pApi functions */ int iCol; /* Column to search */ int iInst; /* Next phrase instance index */ int nInst; /* Total number of phrase instances */
/* Output variables */ int iStart; /* First token in coalesced phrase instance */ int iEnd; /* Last token in coalesced phrase instance */
};
/* ** Advance the iterator to the next coalesced phrase instance. Return ** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
*/ staticint fts5CInstIterNext(CInstIter *pIter){ int rc = SQLITE_OK;
pIter->iStart = -1;
pIter->iEnd = -1;
/* ** Initialize the iterator object indicated by the final parameter to ** iterate through coalesced phrase instances in column iCol.
*/ staticint fts5CInstIterInit( const Fts5ExtensionApi *pApi,
Fts5Context *pFts, int iCol,
CInstIter *pIter
){ int rc;
/************************************************************************* ** Start of highlight() implementation.
*/ typedefstruct HighlightContext HighlightContext; struct HighlightContext { /* Constant parameters to fts5HighlightCb() */ int iRangeStart; /* First token to include */ int iRangeEnd; /* If non-zero, last token to include */ constchar *zOpen; /* Opening highlight */ constchar *zClose; /* Closing highlight */ constchar *zIn; /* Input text */ int nIn; /* Size of input text in bytes */
/* Variables modified by fts5HighlightCb() */
CInstIter iter; /* Coalesced Instance Iterator */ int iPos; /* Current token offset in zIn[] */ int iOff; /* Have copied up to this offset in zIn[] */ int bOpen; /* True if highlight is open */ char *zOut; /* Output value */
};
/* ** Append text to the HighlightContext output string - p->zOut. Argument ** z points to a buffer containing n bytes of text to append. If n is ** negative, everything up until the first '\0' is appended to the output. ** ** If *pRc is set to any value other than SQLITE_OK when this function is ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, ** *pRc is set to an error code before returning.
*/ staticvoid fts5HighlightAppend( int *pRc,
HighlightContext *p, constchar *z, int n
){ if( *pRc==SQLITE_OK && z ){ if( n<0 ) n = (int)strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
}
}
/* ** Tokenizer callback used by implementation of highlight() function.
*/ staticint fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start byte offset of token */ int iEndOff /* End byte offset of token */
){
HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; int iPos;
/* If the parenthesis is open, and this token is not part of the current ** phrase, and the starting byte offset of this token is past the point ** that has currently been copied into the output buffer, close the
** parenthesis. */ if( p->bOpen
&& (iPos<=p->iter.iStart || p->iter.iStart<0)
&& iStartOff>p->iOff
){
fts5HighlightAppend(&rc, p, p->zClose, -1);
p->bOpen = 0;
}
/* If this is the start of a new phrase, and the highlight is not open: ** ** * copy text from the input up to the start of the phrase, and ** * open the highlight.
*/ if( iPos==p->iter.iStart && p->bOpen==0 ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zOpen, -1);
p->iOff = iStartOff;
p->bOpen = 1;
}
/* ** Implementation of highlight() function.
*/ staticvoid fts5HighlightFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx; int rc; int iCol;
if( nVal!=3 ){ constchar *zErr = "wrong number of arguments to function highlight()";
sqlite3_result_error(pCtx, zErr, -1); return;
}
/* ** Context object passed to the fts5SentenceFinderCb() function.
*/ typedefstruct Fts5SFinder Fts5SFinder; struct Fts5SFinder { int iPos; /* Current token position */ int nFirstAlloc; /* Allocated size of aFirst[] */ int nFirst; /* Number of entries in aFirst[] */ int *aFirst; /* Array of first token in each sentence */ constchar *zDoc; /* Document being tokenized */
};
/* ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an ** error occurs.
*/ staticint fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ if( p->nFirstAlloc==p->nFirst ){ int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; int *aNew;
/* ** This function is an xTokenize() callback used by the auxiliary snippet() ** function. Its job is to identify tokens that are the first in a sentence. ** For each such token, an entry is added to the SFinder.aFirst[] array.
*/ staticint fts5SentenceFinderCb( void *pContext, /* Pointer to HighlightContext object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */
){ int rc = SQLITE_OK;
staticint fts5SnippetScore( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */ int nDocsize, /* Size of column in tokens */ unsignedchar *aSeen, /* Array with one element per query phrase */ int iCol, /* Column to score */ int iPos, /* Starting offset to score */ int nToken, /* Max tokens per snippet */ int *pnScore, /* OUT: Score */ int *piPos /* OUT: Adjusted offset */
){ int rc; int i; int ip = 0; int ic = 0; int iOff = 0; int iFirst = -1; int nInst; int nScore = 0; int iLast = 0;
sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;
/* ** Return the value in pVal interpreted as utf-8 text. Except, if pVal ** contains a NULL value, return a pointer to a static string zero ** bytes in length instead of a NULL pointer.
*/ staticconstchar *fts5ValueToText(sqlite3_value *pVal){ constchar *zRet = (constchar*)sqlite3_value_text(pVal); return zRet ? zRet : "";
}
/* ** Implementation of snippet() function.
*/ staticvoid fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx; int rc = SQLITE_OK; /* Return code */ int iCol; /* 1st argument to snippet() */ constchar *zEllips; /* 4th argument to snippet() */ int nToken; /* 5th argument to snippet() */ int nInst = 0; /* Number of instance matches this row */ int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ unsignedchar *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ int nBestScore = 0; /* Score of best snippet */ int nColSize = 0; /* Total size of iBestCol in tokens */
Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ int nCol;
if( nVal!=5 ){ constchar *zErr = "wrong number of arguments to function snippet()";
sqlite3_result_error(pCtx, zErr, -1); return;
}
/* Advance iterator ctx.iter so that it points to the first coalesced
** phrase instance at or following position iBestStart. */ while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
rc = fts5CInstIterNext(&ctx.iter);
}
/* ** The first time the bm25() function is called for a query, an instance ** of the following structure is allocated and populated.
*/ typedefstruct Fts5Bm25Data Fts5Bm25Data; struct Fts5Bm25Data { int nPhrase; /* Number of phrases in query */ double avgdl; /* Average number of tokens in each row */ double *aIDF; /* IDF for each phrase */ double *aFreq; /* Array used to calculate phrase freq. */
};
/* ** Callback used by fts5Bm25GetData() to count the number of rows in the ** table matched by each individual phrase within the query.
*/ staticint fts5CountCb( const Fts5ExtensionApi *pApi,
Fts5Context *pFts, void *pUserData /* Pointer to sqlite3_int64 variable */
){
sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
UNUSED_PARAM2(pApi, pFts);
(*pn)++; return SQLITE_OK;
}
/* ** Set *ppData to point to the Fts5Bm25Data object for the current query. ** If the object has not already been allocated, allocate and populate it ** now.
*/ staticint fts5Bm25GetData( const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
){ int rc = SQLITE_OK; /* Return code */
Fts5Bm25Data *p; /* Object to return */
p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); if( p==0 ){ int nPhrase; /* Number of phrases in query */
sqlite3_int64 nRow = 0; /* Number of rows in table */
sqlite3_int64 nToken = 0; /* Number of tokens in table */
sqlite3_int64 nByte; /* Bytes of space to allocate */ int i;
/* Calculate the average document length for this FTS5 table */ if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
assert( rc!=SQLITE_OK || nRow>0 ); if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
/* Calculate an IDF for each phrase in the query */ for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
sqlite3_int64 nHit = 0;
rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); if( rc==SQLITE_OK ){ /* Calculate the IDF (Inverse Document Frequency) for phrase i. ** This is done using the standard BM25 formula as found on wikipedia: ** ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) ** ** where "N" is the total number of documents in the set and nHit ** is the number that contain at least one instance of the phrase ** under consideration. ** ** The problem with this is that if (N < 2*nHit), the IDF is ** negative. Which is undesirable. So the mimimum allowable IDF is ** (1e-6) - roughly the same as a term that appears in just over
** half of set of 5,000,000 documents. */ double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); if( idf<=0.0 ) idf = 1e-6;
p->aIDF[i] = idf;
}
}
/* ** Implementation of bm25() function.
*/ staticvoid fts5Bm25Function( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){ constdouble k1 = 1.2; /* Constant "k1" from BM25 formula */ constdouble b = 0.75; /* Constant "b" from BM25 formula */ int rc; /* Error code */ double score = 0.0; /* SQL function return value */
Fts5Bm25Data *pData; /* Values allocated/calculated once only */ int i; /* Iterator variable */ int nInst = 0; /* Value returned by xInstCount() */ double D = 0.0; /* Total number of tokens in row */ double *aFreq = 0; /* Array of phrase freq. for current row */
/* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
** for each phrase in the query for the current row. */
rc = fts5Bm25GetData(pApi, pFts, &pData); if( rc==SQLITE_OK ){
aFreq = pData->aFreq;
memset(aFreq, 0, sizeof(double) * pData->nPhrase);
rc = pApi->xInstCount(pFts, &nInst);
} for(i=0; rc==SQLITE_OK && i<nInst; i++){ int ip; int ic; int io;
rc = pApi->xInst(pFts, i, &ip, &ic, &io); if( rc==SQLITE_OK ){ double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
aFreq[ip] += w;
}
}
/* Figure out the total size of the current row in tokens. */ if( rc==SQLITE_OK ){ int nTok;
rc = pApi->xColumnSize(pFts, -1, &nTok);
D = (double)nTok;
}
/* Determine and return the BM25 score for the current row. Or, if an
** error has occurred, throw an exception. */ if( rc==SQLITE_OK ){ for(i=0; i<pData->nPhrase; i++){
score += pData->aIDF[i] * (
( aFreq[i] * (k1 + 1.0) ) /
( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
);
}
sqlite3_result_double(pCtx, -1.0 * score);
}else{
sqlite3_result_error_code(pCtx, rc);
}
}
/* ** Implementation of fts5_get_locale() function.
*/ staticvoid fts5GetLocaleFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){ int iCol = 0; int eType = 0; int rc = SQLITE_OK; constchar *zLocale = 0; int nLocale = 0;
/* xColumnLocale() must be available */
assert( pApi->iVersion>=4 );
if( nVal!=1 ){ constchar *z = "wrong number of arguments to function fts5_get_locale()";
sqlite3_result_error(pCtx, z, -1); return;
}
eType = sqlite3_value_numeric_type(apVal[0]); if( eType!=SQLITE_INTEGER ){ constchar *z = "non-integer argument passed to function fts5_get_locale()";
sqlite3_result_error(pCtx, z, -1); return;
}
#line 1 "fts5_buffer.c" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ******************************************************************************
*/
/* ** Encode value iVal as an SQLite varint and append it to the buffer object ** pBuf. If an OOM error occurs, set the error code in p.
*/ staticvoid sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ if( fts5BufferGrow(pRc, pBuf, 9) ) return;
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
}
/* ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op.
*/ staticvoid sqlite3Fts5BufferAppendBlob( int *pRc,
Fts5Buffer *pBuf,
u32 nData, const u8 *pData
){ if( nData ){ if( fts5BufferGrow(pRc, pBuf, nData) ) return;
assert( pBuf->p!=0 );
memcpy(&pBuf->p[pBuf->n], pData, nData);
pBuf->n += nData;
}
}
/* ** Append the nul-terminated string zStr to the buffer pBuf. This function ** ensures that the byte following the buffer data is set to 0x00, even ** though this byte is not included in the pBuf->n count.
*/ staticvoid sqlite3Fts5BufferAppendString( int *pRc,
Fts5Buffer *pBuf, constchar *zStr
){ int nStr = (int)strlen(zStr);
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
pBuf->n--;
}
/* ** Argument zFmt is a printf() style format string. This function performs ** the printf() style processing, then appends the results to buffer pBuf. ** ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte ** following the buffer data is set to 0x00, even though this byte is not ** included in the pBuf->n count.
*/ staticvoid sqlite3Fts5BufferAppendPrintf( int *pRc,
Fts5Buffer *pBuf, char *zFmt, ...
){ if( *pRc==SQLITE_OK ){ char *zTmp;
va_list ap;
va_start(ap, zFmt);
zTmp = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
/* ** Free any buffer allocated by pBuf. Zero the structure before returning.
*/ staticvoid sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
sqlite3_free(pBuf->p);
memset(pBuf, 0, sizeof(Fts5Buffer));
}
/* ** Zero the contents of the buffer object. But do not free the associated ** memory allocation.
*/ staticvoid sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
pBuf->n = 0;
}
/* ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op.
*/ staticvoid sqlite3Fts5BufferSet( int *pRc,
Fts5Buffer *pBuf, int nData, const u8 *pData
){
pBuf->n = 0;
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
}
staticint sqlite3Fts5PoslistNext64( const u8 *a, int n, /* Buffer containing poslist */ int *pi, /* IN/OUT: Offset within a[] */
i64 *piOff /* IN/OUT: Current offset */
){ int i = *pi;
assert( a!=0 || i==0 ); if( i>=n ){ /* EOF */
*piOff = -1; return 1;
}else{
i64 iOff = *piOff;
u32 iVal;
assert( a!=0 );
fts5FastGetVarint32(a, i, iVal); if( iVal<=1 ){ if( iVal==0 ){
*pi = i; return 0;
}
fts5FastGetVarint32(a, i, iVal);
iOff = ((i64)iVal) << 32;
assert( iOff>=0 );
fts5FastGetVarint32(a, i, iVal); if( iVal<2 ){ /* This is a corrupt record. So stop parsing it here. */
*piOff = -1; return 1;
}
*piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
}else{
*piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
}
*pi = i;
assert_nc( *piOff>=iOff ); return 0;
}
}
/* ** Advance the iterator object passed as the only argument. Return true ** if the iterator reaches EOF, or false otherwise.
*/ staticint sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
pIter->bEof = 1;
} return pIter->bEof;
}
staticint sqlite3Fts5PoslistReaderInit( const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
){
memset(pIter, 0, sizeof(*pIter));
pIter->a = a;
pIter->n = n;
sqlite3Fts5PoslistReaderNext(pIter); return pIter->bEof;
}
/* ** Append position iPos to the position list being accumulated in buffer ** pBuf, which must be already be large enough to hold the new data. ** The previous position written to this list is *piPrev. *piPrev is set ** to iPos before returning.
*/ staticvoid sqlite3Fts5PoslistSafeAppend(
Fts5Buffer *pBuf,
i64 *piPrev,
i64 iPos
){ if( iPos>=*piPrev ){ staticconst i64 colmask = ((i64)(0x7FFFFFFF)) << 32; if( (iPos & colmask) != (*piPrev & colmask) ){
pBuf->p[pBuf->n++] = 1;
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
*piPrev = (iPos & colmask);
}
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
*piPrev = iPos;
}
}
staticint sqlite3Fts5PoslistWriterAppend(
Fts5Buffer *pBuf,
Fts5PoslistWriter *pWriter,
i64 iPos
){ int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); return SQLITE_OK;
}
/* ** Return a nul-terminated copy of the string indicated by pIn. If nIn ** is non-negative, then it is the length of the string in bytes. Otherwise, ** the length of the string is determined using strlen(). ** ** It is the responsibility of the caller to eventually free the returned ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
*/ staticchar *sqlite3Fts5Strndup(int *pRc, constchar *pIn, int nIn){ char *zRet = 0; if( *pRc==SQLITE_OK ){ if( nIn<0 ){
nIn = (int)strlen(pIn);
}
zRet = (char*)sqlite3_malloc(nIn+1); if( zRet ){
memcpy(zRet, pIn, nIn);
zRet[nIn] = '\0';
}else{
*pRc = SQLITE_NOMEM;
}
} return zRet;
}
staticint sqlite3Fts5TermsetAdd(
Fts5Termset *p, int iIdx, constchar *pTerm, int nTerm, int *pbPresent
){ int rc = SQLITE_OK;
*pbPresent = 0; if( p ){ int i;
u32 hash = 13;
Fts5TermsetEntry *pEntry;
/* Calculate a hash value for this term. This is the same hash checksum ** used by the fts5_hash.c module. This is not important for correct ** operation of the module, but is necessary to ensure that some tests
** designed to produce hash table collisions really do work. */ for(i=nTerm-1; i>=0; i--){
hash = (hash << 3) ^ hash ^ pTerm[i];
}
hash = (hash << 3) ^ hash ^ iIdx;
hash = hash % ArraySize(p->apHash);
#line 1 "fts5_config.c" /* ** 2014 Jun 09 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This is an SQLite module implementing full-text search.
*/
/* ** Argument pIn points to a character that is part of a nul-terminated ** string. Return a pointer to the first character following *pIn in ** the string that is not a white-space character.
*/ staticconstchar *fts5ConfigSkipWhitespace(constchar *pIn){ constchar *p = pIn; if( p ){ while( fts5_iswhitespace(*p) ){ p++; }
} return p;
}
/* ** Argument pIn points to a character that is part of a nul-terminated ** string. Return a pointer to the first character following *pIn in ** the string that is not a "bareword" character.
*/ staticconstchar *fts5ConfigSkipBareword(constchar *pIn){ constchar *p = pIn; while ( sqlite3Fts5IsBareword(*p) ) p++; if( p==pIn ) p = 0; return p;
}
default: /* maybe a number */ if( *p=='+' || *p=='-' ) p++; while( fts5_isdigit(*p) ) p++;
/* At this point, if the literal was an integer, the parse is ** finished. Or, if it is a floating point value, it may continue
** with either a decimal point or an 'E' character. */ if( *p=='.' && fts5_isdigit(p[1]) ){
p += 2; while( fts5_isdigit(*p) ) p++;
} if( p==pIn ) p = 0;
break;
}
return p;
}
/* ** The first character of the string pointed to by argument z is guaranteed ** to be an open-quote character (see function fts5_isopenquote()). ** ** This function searches for the corresponding close-quote character within ** the string and, if found, dequotes the string in place and adds a new ** nul-terminator byte. ** ** If the close-quote is found, the value returned is the byte offset of ** the character immediately following it. Or, if the close-quote is not ** found, -1 is returned. If -1 is returned, the buffer is left in an ** undefined state.
*/ staticint fts5Dequote(char *z){ char q; int iIn = 1; int iOut = 0;
q = z[0];
/* Set stack variable q to the close-quote character */
assert( q=='[' || q=='\'' || q=='"' || q=='`' ); if( q=='[' ) q = ']';
while( z[iIn] ){ if( z[iIn]==q ){ if( z[iIn+1]!=q ){ /* Character iIn was the close quote. */
iIn++; break;
}else{ /* Character iIn and iIn+1 form an escaped quote character. Skip ** the input cursor past both and copy a single quote character
** to the output buffer. */
iIn += 2;
z[iOut++] = q;
}
}else{
z[iOut++] = z[iIn++];
}
}
z[iOut] = '\0'; return iIn;
}
/* ** Convert an SQL-style quoted string into a normal string by removing ** the quote characters. The conversion is done in-place. If the ** input does not begin with a quote character, then this routine ** is a no-op. ** ** Examples: ** ** "abc" becomes abc ** 'xyz' becomes xyz ** [pqr] becomes pqr ** `mno` becomes mno
*/ staticvoid sqlite3Fts5Dequote(char *z){ char quote; /* Quote character (if any ) */
/* ** Parse a "special" CREATE VIRTUAL TABLE directive and update ** configuration object pConfig as appropriate. ** ** If successful, object pConfig is updated and SQLITE_OK returned. If ** an error occurs, an SQLite error code is returned and an error message ** may be left in *pzErr. It is the responsibility of the caller to ** eventually free any such error message using sqlite3_free().
*/ staticint fts5ConfigParseSpecial(
Fts5Config *pConfig, /* Configuration object to update */ constchar *zCmd, /* Special command to parse */ constchar *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */
){ int rc = SQLITE_OK; int nCmd = (int)strlen(zCmd);
/* ** Gobble up the first bareword or quoted word from the input buffer zIn. ** Return a pointer to the character immediately following the last in ** the gobbled word if successful, or a NULL pointer otherwise (failed ** to find close-quote character). ** ** Before returning, set pzOut to point to a new buffer containing a ** nul-terminated, dequoted copy of the gobbled word. If the word was ** quoted, *pbQuoted is also set to 1 before returning. ** ** If *pRc is other than SQLITE_OK when this function is called, it is ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* ** set if a parse error (failed to find close quote) occurs.
*/ staticconstchar *fts5ConfigGobbleWord( int *pRc, /* IN/OUT: Error code */ constchar *zIn, /* Buffer to gobble string/bareword from */ char **pzOut, /* OUT: malloc'd buffer containing str/bw */ int *pbQuoted /* OUT: Set to true if dequoting required */
){ constchar *zRet = 0;
sqlite3_int64 nIn = strlen(zIn); char *zOut = sqlite3_malloc64(nIn+1);
/* ** Arguments nArg/azArg contain the string arguments passed to the xCreate ** or xConnect method of the virtual table. This function attempts to ** allocate an instance of Fts5Config containing the results of parsing ** those arguments. ** ** If successful, SQLITE_OK is returned and *ppOut is set to point to the ** new Fts5Config object. If an error occurs, an SQLite error code is ** returned, *ppOut is set to NULL and an error message may be left in ** *pzErr. It is the responsibility of the caller to eventually free any ** such error message using sqlite3_free().
*/ staticint sqlite3Fts5ConfigParse(
Fts5Global *pGlobal,
sqlite3 *db, int nArg, /* Number of arguments */ constchar **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
Fts5Config **ppOut, /* OUT: Results of parse */ char **pzErr /* OUT: Error message */
){ int rc = SQLITE_OK; /* Return code */
Fts5Config *pRet; /* New object to return */ int i;
sqlite3_int64 nByte; int bUnindexed = 0; /* True if there are one or more UNINDEXED */
/* We only allow contentless_delete=1 if the table is indeed contentless. */ if( rc==SQLITE_OK
&& pRet->bContentlessDelete
&& pRet->eContent!=FTS5_CONTENT_NONE
){
*pzErr = sqlite3_mprintf( "contentless_delete=1 requires a contentless table"
);
rc = SQLITE_ERROR;
}
/* We only allow contentless_delete=1 if columnsize=0 is not present. ** ** This restriction may be removed at some point.
*/ if( rc==SQLITE_OK && pRet->bContentlessDelete && pRet->bColumnsize==0 ){
*pzErr = sqlite3_mprintf( "contentless_delete=1 is incompatible with columnsize=0"
);
rc = SQLITE_ERROR;
}
/* We only allow contentless_unindexed=1 if the table is actually a ** contentless one.
*/ if( rc==SQLITE_OK
&& pRet->bContentlessUnindexed
&& pRet->eContent!=FTS5_CONTENT_NONE
){
*pzErr = sqlite3_mprintf( "contentless_unindexed=1 requires a contentless table"
);
rc = SQLITE_ERROR;
}
/* If no zContent option was specified, fill in the default values. */ if( rc==SQLITE_OK && pRet->zContent==0 ){ constchar *zTail = 0;
assert( pRet->eContent==FTS5_CONTENT_NORMAL
|| pRet->eContent==FTS5_CONTENT_NONE
); if( pRet->eContent==FTS5_CONTENT_NORMAL ){
zTail = "content";
}elseif( bUnindexed && pRet->bContentlessUnindexed ){
pRet->eContent = FTS5_CONTENT_UNINDEXED;
zTail = "content";
}elseif( pRet->bColumnsize ){
zTail = "docsize";
}
/* ** Free the configuration object passed as the only argument.
*/ staticvoid sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; if( pConfig->t.pTok ){ if( pConfig->t.pApi1 ){
pConfig->t.pApi1->xDelete(pConfig->t.pTok);
}else{
pConfig->t.pApi2->xDelete(pConfig->t.pTok);
}
}
sqlite3_free((char*)pConfig->t.azArg);
sqlite3_free(pConfig->zDb);
sqlite3_free(pConfig->zName); for(i=0; i<pConfig->nCol; i++){
sqlite3_free(pConfig->azCol[i]);
}
sqlite3_free(pConfig->azCol);
sqlite3_free(pConfig->aPrefix);
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);
sqlite3_free(pConfig->zContent);
sqlite3_free(pConfig->zContentRowid);
sqlite3_free(pConfig->zContentExprlist);
sqlite3_free(pConfig);
}
}
/* ** Call sqlite3_declare_vtab() based on the contents of the configuration ** object passed as the only argument. Return SQLITE_OK if successful, or ** an SQLite error code if an error occurs.
*/ staticint sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ int i; int rc = SQLITE_OK; char *zSql;
/* ** Tokenize the text passed via the second and third arguments. ** ** The callback is invoked once for each token in the input text. The ** arguments passed to it are, in order: ** ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() ** const char *pToken // Pointer to buffer containing token ** int nToken // Size of token in bytes ** int iStart // Byte offset of start of token within input text ** int iEnd // Byte offset of end of token within input text ** int iPos // Position of token in input (first token is 0) ** ** If the callback returns a non-zero value the tokenization is abandoned ** and no further callbacks are issued. ** ** This function returns SQLITE_OK if successful or an SQLite error code ** if an error occurs. If the tokenization was abandoned early because ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller.
*/ staticint sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */ int flags, /* FTS5_TOKENIZE_* flags */ constchar *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, constchar*, int, int, int) /* Callback */
){ int rc = SQLITE_OK; if( pText ){ if( pConfig->t.pTok==0 ){
rc = sqlite3Fts5LoadTokenizer(pConfig);
} if( rc==SQLITE_OK ){ if( pConfig->t.pApi1 ){
rc = pConfig->t.pApi1->xTokenize(
pConfig->t.pTok, pCtx, flags, pText, nText, xToken
);
}else{
rc = pConfig->t.pApi2->xTokenize(pConfig->t.pTok, pCtx, flags,
pText, nText, pConfig->t.pLocale, pConfig->t.nLocale, xToken
);
}
}
} return rc;
}
/* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. ** If it actually is this, return a pointer to the ')'. Otherwise, return ** NULL to indicate a parse error.
*/ staticconstchar *fts5ConfigSkipArgs(constchar *pIn){ constchar *p = pIn;
while( 1 ){
p = fts5ConfigSkipWhitespace(p);
p = fts5ConfigSkipLiteral(p);
p = fts5ConfigSkipWhitespace(p); if( p==0 || *p==')' ) break; if( *p!=',' ){
p = 0; break;
}
p++;
}
return p;
}
/* ** Parameter zIn contains a rank() function specification. The format of ** this is: ** ** + Bareword (function name) ** + Open parenthesis - "(" ** + Zero or more SQL literals in a comma separated list ** + Close parenthesis - ")"
*/ staticint sqlite3Fts5ConfigParseRank( constchar *zIn, /* Input string */ char **pzRank, /* OUT: Rank function name */ char **pzRankArgs /* OUT: Rank function arguments */
){ constchar *p = zIn; constchar *pRank; char *zRank = 0; char *zRankArgs = 0; int rc = SQLITE_OK;
*pzRank = 0;
*pzRankArgs = 0;
if( p==0 ){
rc = SQLITE_ERROR;
}else{
p = fts5ConfigSkipWhitespace(p);
pRank = p;
p = fts5ConfigSkipBareword(p);
/* ** Load the contents of the %_config table into memory.
*/ staticint sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ constchar *zSelect = "SELECT k, v FROM %Q.'%q_config'"; char *zSql;
sqlite3_stmt *p = 0; int rc = SQLITE_OK; int iVersion = 0;
/* ** Set (*pConfig->pzErrmsg) to point to an sqlite3_malloc()ed buffer ** containing the error message created using printf() style formatting ** string zFmt and its trailing arguments.
*/ staticvoid sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, constchar *zFmt, ...){
va_list ap; /* ... printf arguments */ char *zMsg = 0;
#line 1 "fts5_expr.c" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** **
*/
struct Fts5Expr {
Fts5Index *pIndex;
Fts5Config *pConfig;
Fts5ExprNode *pRoot; int bDesc; /* Iterate in descending rowid order */ int nPhrase; /* Number of phrases in expression */
Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
};
/* ** eType: ** Expression node type. Usually one of: ** ** FTS5_AND (nChild, apChild valid) ** FTS5_OR (nChild, apChild valid) ** FTS5_NOT (nChild, apChild valid) ** FTS5_STRING (pNear valid) ** FTS5_TERM (pNear valid) ** ** An expression node with eType==0 may also exist. It always matches zero ** rows. This is created when a phrase containing no tokens is parsed. ** e.g. "". ** ** iHeight: ** Distance from this node to furthest leaf. This is always 0 for nodes ** of type FTS5_STRING and FTS5_TERM. For all other nodes it is one ** greater than the largest child value.
*/ struct Fts5ExprNode { int eType; /* Node type */ int bEof; /* True at EOF */ int bNomatch; /* True if entry is not a match */ int iHeight; /* Distance to tree leaf nodes */
/* Next method for this node. */ int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
i64 iRowid; /* Current rowid */
Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
/* Child nodes. For a NOT node, this array always contains 2 entries. For
** AND or OR nodes, it contains 2 or more entries. */ int nChild; /* Number of child nodes */
Fts5ExprNode *apChild[1]; /* Array of child nodes */
};
/* ** Invoke the xNext method of an Fts5ExprNode object. This macro should be ** used as if it has the same signature as the xNext() methods themselves.
*/ #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
/* ** An instance of the following structure represents a single search term ** or term prefix.
*/ struct Fts5ExprTerm {
u8 bPrefix; /* True for a prefix term */
u8 bFirst; /* True if token must be first in column */ char *pTerm; /* Term data */ int nQueryTerm; /* Effective size of term in bytes */ int nFullTerm; /* Size of term in bytes incl. tokendata */
Fts5IndexIter *pIter; /* Iterator for this term */
Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
};
/* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match.
*/ struct Fts5ExprPhrase {
Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */
Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
};
/* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document.
*/ struct Fts5ExprNearset { int nNear; /* NEAR parameter */
Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ int nPhrase; /* Number of entries in aPhrase[] array */
Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */
};
/* ** Parse context.
*/ struct Fts5Parse {
Fts5Config *pConfig; char *zErr; int rc; int nPhrase; /* Size of apPhrase array */
Fts5ExprPhrase **apPhrase; /* Array of all phrases */
Fts5ExprNode *pExpr; /* Result of a successful parse */ int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
};
/* ** Check that the Fts5ExprNode.iHeight variables are set correctly in ** the expression tree passed as the only argument.
*/ #ifndef NDEBUG staticvoid assert_expr_depth_ok(int rc, Fts5ExprNode *p){ if( rc==SQLITE_OK ){ if( p->eType==FTS5_TERM || p->eType==FTS5_STRING || p->eType==0 ){
assert( p->iHeight==0 );
}else{ int ii; int iMaxChild = 0; for(ii=0; ii<p->nChild; ii++){
Fts5ExprNode *pChild = p->apChild[ii];
iMaxChild = MAX(iMaxChild, pChild->iHeight);
assert_expr_depth_ok(SQLITE_OK, pChild);
}
assert( p->iHeight==iMaxChild+1 );
}
}
} #else # define assert_expr_depth_ok(rc, p) #endif
/* ** Read the first token from the nul-terminated string at *pz.
*/ staticint fts5ExprGetToken(
Fts5Parse *pParse, constchar **pz, /* IN/OUT: Pointer into buffer */
Fts5Token *pToken
){ constchar *z = *pz; int tok;
/* Skip past any whitespace */ while( fts5ExprIsspace(*z) ) z++;
pToken->p = z;
pToken->n = 1; switch( *z ){ case'(': tok = FTS5_LP; break; case')': tok = FTS5_RP; break; case'{': tok = FTS5_LCP; break; case'}': tok = FTS5_RCP; break; case':': tok = FTS5_COLON; break; case',': tok = FTS5_COMMA; break; case'+': tok = FTS5_PLUS; break; case'*': tok = FTS5_STAR; break; case'-': tok = FTS5_MINUS; break; case'^': tok = FTS5_CARET; break; case'\0': tok = FTS5_EOF; break;
/* If the LHS of the MATCH expression was a user column, apply the
** implicit column-filter. */ if( sParse.rc==SQLITE_OK && iCol<pConfig->nCol ){ int n = sizeof(Fts5Colset);
Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); if( pColset ){
pColset->nCol = 1;
pColset->aiCol[0] = iCol;
sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
}
}
/* ** Assuming that buffer z is at least nByte bytes in size and contains a ** valid utf-8 string, return the number of characters in the string.
*/ staticint fts5ExprCountChar(constchar *z, int nByte){ int nRet = 0; int ii; for(ii=0; ii<nByte; ii++){ if( (z[ii] & 0xC0)!=0x80 ) nRet++;
} return nRet;
}
/* ** This function is only called when using the special 'trigram' tokenizer. ** Argument zText contains the text of a LIKE or GLOB pattern matched ** against column iCol. This function creates and compiles an FTS5 MATCH ** expression that will match a superset of the rows matched by the LIKE or ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error ** code.
*/ staticint sqlite3Fts5ExprPattern(
Fts5Config *pConfig, int bGlob, int iCol, constchar *zText, Fts5Expr **pp
){
i64 nText = strlen(zText); char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1); int rc = SQLITE_OK;
if( zExpr==0 ){
rc = SQLITE_NOMEM;
}else{ char aSpec[3]; int iOut = 0; int i = 0; int iFirst = 0;
/* ** Free the expression node object passed as the only argument.
*/ staticvoid sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ int i; for(i=0; i<p->nChild; i++){
sqlite3Fts5ParseNodeFree(p->apChild[i]);
}
sqlite3Fts5ParseNearsetFree(p->pNear);
sqlite3_free(p);
}
}
/* ** Free the expression object passed as the only argument.
*/ staticvoid sqlite3Fts5ExprFree(Fts5Expr *p){ if( p ){
sqlite3Fts5ParseNodeFree(p->pRoot);
sqlite3_free(p->apExprPhrase);
sqlite3_free(p);
}
}
/* ** Argument pTerm must be a synonym iterator. Return the current rowid ** that it points to.
*/ static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
i64 iRet = 0; int bRetValid = 0;
Fts5ExprTerm *p;
/* ** Argument pTerm must be a synonym iterator.
*/ staticint fts5ExprSynonymList(
Fts5ExprTerm *pTerm,
i64 iRowid,
Fts5Buffer *pBuf, /* Use this buffer for space if required */
u8 **pa, int *pn
){
Fts5PoslistReader aStatic[4];
Fts5PoslistReader *aIter = aStatic; int nIter = 0; int nAlloc = 4; int rc = SQLITE_OK;
Fts5ExprTerm *p;
/* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match.
*/ staticint fts5ExprPhraseIsMatch(
Fts5ExprNode *pNode, /* Node pPhrase belongs to */
Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */
){
Fts5PoslistWriter writer = {0};
Fts5PoslistReader aStatic[4];
Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; int bFirst = pPhrase->aTerm[0].bFirst;
fts5BufferZero(&pPhrase->poslist);
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>ArraySize(aStatic) ){
sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte); if( !aIter ) return SQLITE_NOMEM;
}
memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
/* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){
Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; int n = 0; int bFlag = 0;
u8 *a = 0; if( pTerm->pSynonym ){
Fts5Buffer buf = {0, 0, 0};
rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); if( rc ){
sqlite3_free(a); goto ismatch_out;
} if( a==buf.p ) bFlag = 1;
}else{
a = (u8*)pTerm->pIter->pData;
n = pTerm->pIter->nData;
}
sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
aIter[i].bFlag = (u8)bFlag; if( aIter[i].bEof ) goto ismatch_out;
}
typedefstruct Fts5LookaheadReader Fts5LookaheadReader; struct Fts5LookaheadReader { const u8 *a; /* Buffer containing position list */ int n; /* Size of buffer a[] in bytes */ int i; /* Current offset in position list */
i64 iPos; /* Current position */
i64 iLookahead; /* Next position */
};
/* ** The near-set object passed as the first argument contains more than ** one phrase. All phrases currently point to the same row. The ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function ** tests if the current row contains instances of each phrase sufficiently ** close together to meet the NEAR constraint. Non-zero is returned if it ** does, or zero otherwise. ** ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) ** occurs within this function (*pRc) is set accordingly before returning. ** The return value is undefined in both these cases. ** ** If no error occurs and non-zero (a match) is returned, the position-list ** of each phrase object is edited to contain only those entries that ** meet the constraint before returning.
*/ staticint fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
Fts5NearTrimmer aStatic[4];
Fts5NearTrimmer *a = aStatic;
Fts5ExprPhrase **apPhrase = pNear->apPhrase;
int i; int rc = *pRc; int bMatch;
assert( pNear->nPhrase>1 );
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>ArraySize(aStatic) ){
sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
}else{
memset(aStatic, 0, sizeof(aStatic));
} if( rc!=SQLITE_OK ){
*pRc = rc; return 0;
}
/* Initialize a lookahead iterator for each phrase. After passing the ** buffer and buffer size to the lookaside-reader init function, zero ** the phrase poslist buffer. The new poslist for the phrase (containing ** the same entries as the original with some entries removed on account ** of the NEAR constraint) is written over the original even as it is ** being read. This is safe as the entries for the new poslist are a ** subset of the old, so it is not possible for data yet to be read to
** be overwritten. */ for(i=0; i<pNear->nPhrase; i++){
Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
pPoslist->n = 0;
a[i].pOut = pPoslist;
}
while( 1 ){ int iAdv;
i64 iMin;
i64 iMax;
/* This block advances the phrase iterators until they point to a set of
** entries that together comprise a match. */
iMax = a[0].reader.iPos; do {
bMatch = 1; for(i=0; i<pNear->nPhrase; i++){
Fts5LookaheadReader *pPos = &a[i].reader;
iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; if( pPos->iPos<iMin || pPos->iPos>iMax ){
bMatch = 0; while( pPos->iPos<iMin ){ if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
} if( pPos->iPos>iMax ) iMax = pPos->iPos;
}
}
}while( bMatch==0 );
/* Add an entry to each output position list */ for(i=0; i<pNear->nPhrase; i++){
i64 iPos = a[i].reader.iPos;
Fts5PoslistWriter *pWriter = &a[i].writer; if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
}
}
/* ** Advance iterator pIter until it points to a value equal to or laster ** than the initial value of *piLast. If this means the iterator points ** to a value laster than *piLast, update *piLast to the new lastest value. ** ** If the iterator reaches EOF, set *pbEof to true before returning. If ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc ** are set, return a non-zero value. Otherwise, return zero.
*/ staticint fts5ExprAdvanceto(
Fts5IndexIter *pIter, /* Iterator to advance */ int bDesc, /* True if iterator is "rowid DESC" */
i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc, /* OUT: Error code */ int *pbEof /* OUT: Set to true if EOF */
){
i64 iLast = *piLast;
i64 iRowid;
staticint fts5ExprSynonymAdvanceto(
Fts5ExprTerm *pTerm, /* Term iterator to advance */ int bDesc, /* True if iterator is "rowid DESC" */
i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc /* OUT: Error code */
){ int rc = SQLITE_OK;
i64 iLast = *piLast;
Fts5ExprTerm *p; int bEof = 0;
staticint fts5ExprNearTest( int *pRc,
Fts5Expr *pExpr, /* Expression that pNear is a part of */
Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
){
Fts5ExprNearset *pNear = pNode->pNear; int rc = *pRc;
/* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any
** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
|| pNear->pColset || pPhrase->aTerm[0].bFirst
){ int bMatch = 0;
rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); if( bMatch==0 ) break;
}else{
Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
}
}
/* ** Initialize all term iterators in the pNear object. If any term is found ** to match no documents at all, return immediately without initializing any ** further iterators. ** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. It is not considered an error if some term matches zero ** documents.
*/ staticint fts5ExprNearInitAll(
Fts5Expr *pExpr,
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear; int i;
/* ** Compare the values currently indicated by the two nodes as follows: ** ** res = (*p1) - (*p2) ** ** Nodes that point to values that come later in the iteration order are ** considered to be larger. Nodes at EOF are the largest of all. ** ** This means that if the iteration order is ASC, then numerically larger ** rowids are considered larger. Or if it is the default DESC, numerically ** smaller rowids are larger.
*/ staticint fts5NodeCompare(
Fts5Expr *pExpr,
Fts5ExprNode *p1,
Fts5ExprNode *p2
){ if( p2->bEof ) return -1; if( p1->bEof ) return +1; return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
}
/* ** All individual term iterators in pNear are guaranteed to be valid when ** this function is called. This function checks if all term iterators ** point to the same rowid, and if not, advances them until they do. ** If an EOF is reached before this happens, *pbEof is set to true before ** returning. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF.
*/ staticint fts5ExprNodeTest_STRING(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode
){
Fts5ExprNearset *pNear = pNode->pNear;
Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK;
i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ constint bDesc = pExpr->bDesc;
/* Check that this node should not be FTS5_TERM */
assert( pNear->nPhrase>1
|| pNear->apPhrase[0]->nTerm>1
|| pNear->apPhrase[0]->aTerm[0].pSynonym
|| pNear->apPhrase[0]->aTerm[0].bFirst
);
/* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
** means the minimum rowid. */ if( pLeft->aTerm[0].pSynonym ){
iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
}else{
iLast = pLeft->aTerm[0].pIter->iRowid;
}
/* ** Advance the first term iterator in the first phrase of pNear. Set output ** variable *pbEof to true if it reaches EOF or if an error occurs. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs.
*/ staticint fts5ExprNodeNext_STRING(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid,
i64 iFrom
){
Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; int rc = SQLITE_OK;
/* Find the firstest rowid any synonym points to. */
i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
/* Advance each iterator that currently points to iRowid. Or, if iFrom
** is valid - each iterator that points to a rowid before iFrom. */ for(p=pTerm; p; p=p->pSynonym){ if( sqlite3Fts5IterEof(p->pIter)==0 ){
i64 ii = p->pIter->iRowid; if( ii==iRowid
|| (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
){ if( bFromValid ){
rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
}else{
rc = sqlite3Fts5IterNext(p->pIter);
} if( rc!=SQLITE_OK ) break; if( sqlite3Fts5IterEof(p->pIter)==0 ){
bEof = 0;
}
}else{
bEof = 0;
}
}
}
/* Set the EOF flag if either all synonym iterators are at EOF or an
** error has occurred. */
pNode->bEof = (rc || bEof);
}else{
Fts5IndexIter *pIter = pTerm->pIter;
staticint fts5ExprNodeTest_TERM(
Fts5Expr *pExpr, /* Expression that pNear is a part of */
Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
){ /* As this "NEAR" object is actually a single phrase that consists ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR
** expressions. */
Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
/* ** xNext() method for a node of type FTS5_TERM.
*/ staticint fts5ExprNodeNext_TERM(
Fts5Expr *pExpr,
Fts5ExprNode *pNode, int bFromValid,
i64 iFrom
){ int rc;
Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
staticvoid fts5ExprNodeTest_OR(
Fts5Expr *pExpr, /* Expression of which pNode is a part */
Fts5ExprNode *pNode /* Expression node to test */
){
Fts5ExprNode *pNext = pNode->apChild[0]; int i;
/* ** Argument pNode is an FTS5_AND node.
*/ staticint fts5ExprNodeTest_AND(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
Fts5ExprNode *pAnd /* FTS5_AND node to advance */
){ int iChild;
i64 iLast = pAnd->iRowid; int rc = SQLITE_OK; int bMatch;
assert( pAnd->bEof==0 ); do {
pAnd->bNomatch = 0;
bMatch = 1; for(iChild=0; iChild<pAnd->nChild; iChild++){
Fts5ExprNode *pChild = pAnd->apChild[iChild]; int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); if( cmp>0 ){ /* Advance pChild until it points to iLast or laster */
rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); if( rc!=SQLITE_OK ){
pAnd->bNomatch = 0; return rc;
}
}
/* If the child node is now at EOF, so is the parent AND node. Otherwise, ** the child node is guaranteed to have advanced at least as far as ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
** new lastest rowid seen so far. */
assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); if( pChild->bEof ){
fts5ExprSetEof(pAnd);
bMatch = 1; break;
}elseif( iLast!=pChild->iRowid ){
bMatch = 0;
iLast = pChild->iRowid;
}
/* ** If pNode currently points to a match, this function returns SQLITE_OK ** without modifying it. Otherwise, pNode is advanced until it does point ** to a match or EOF is reached.
*/ staticint fts5ExprNodeTest(
Fts5Expr *pExpr, /* Expression of which pNode is a part */
Fts5ExprNode *pNode /* Expression node to test */
){ int rc = SQLITE_OK; if( pNode->bEof==0 ){ switch( pNode->eType ){
case FTS5_STRING: {
rc = fts5ExprNodeTest_STRING(pExpr, pNode); break;
}
case FTS5_TERM: {
rc = fts5ExprNodeTest_TERM(pExpr, pNode); break;
}
case FTS5_AND: {
rc = fts5ExprNodeTest_AND(pExpr, pNode); break;
}
case FTS5_OR: {
fts5ExprNodeTest_OR(pExpr, pNode); break;
}
/* ** Set node pNode, which is part of expression pExpr, to point to the first ** match. If there are no matches, set the Node.bEof flag to indicate EOF. ** ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. ** It is not an error if there are no matches.
*/ staticint fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK;
pNode->bEof = 0;
pNode->bNomatch = 0;
if( Fts5NodeIsString(pNode) ){ /* Initialize all term iterators in the NEAR object. */
rc = fts5ExprNearInitAll(pExpr, pNode);
}elseif( pNode->xNext==0 ){
pNode->bEof = 1;
}else{ int i; int nEof = 0; for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
Fts5ExprNode *pChild = pNode->apChild[i];
rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
assert( pChild->bEof==0 || pChild->bEof==1 );
nEof += pChild->bEof;
}
pNode->iRowid = pNode->apChild[0]->iRowid;
switch( pNode->eType ){ case FTS5_AND: if( nEof>0 ) fts5ExprSetEof(pNode); break;
case FTS5_OR: if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); break;
/* ** Begin iterating through the set of documents in index pIdx matched by ** the MATCH expression passed as the first argument. If the "bDesc" ** parameter is passed a non-zero value, iteration is in descending rowid ** order. Or, if it is zero, in ascending order. ** ** If iterating in ascending rowid order (bDesc==0), the first document ** visited is that with the smallest rowid that is larger than or equal ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), ** then the first document visited must have a rowid smaller than or ** equal to iFirst. ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents.
*/ staticint sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
Fts5ExprNode *pRoot = p->pRoot; int rc; /* Return code */
/* If not at EOF but the current rowid occurs earlier than iFirst in
** the iteration order, move to document iFirst or later. */ if( rc==SQLITE_OK
&& 0==pRoot->bEof
&& fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
){
rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
}
/* If the iterator is not at a real match, skip forward until it is. */ while( pRoot->bNomatch && rc==SQLITE_OK ){
assert( pRoot->bEof==0 );
rc = fts5ExprNodeNext(p, pRoot, 0, 0);
} return rc;
}
/* ** Move to the next document ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents.
*/ staticint sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ int rc;
Fts5ExprNode *pRoot = p->pRoot;
assert( pRoot->bEof==0 && pRoot->bNomatch==0 ); do {
rc = fts5ExprNodeNext(p, pRoot, 0, 0);
assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
}while( pRoot->bNomatch ); if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
pRoot->bEof = 1;
} return rc;
}
/* ** Set the "bFirst" flag on the first token of the phrase passed as the ** only argument.
*/ staticvoid sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ if( pPhrase && pPhrase->nTerm ){
pPhrase->aTerm[0].bFirst = 1;
}
}
/* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is ** appended to it and the results returned. ** ** If an OOM error occurs, both the pNear and pPhrase objects are freed and ** NULL returned.
*/ static Fts5ExprNearset *sqlite3Fts5ParseNearset(
Fts5Parse *pParse, /* Parse context */
Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
){ constint SZALLOC = 8;
Fts5ExprNearset *pRet = 0;
/* ** Callback for tokenizing terms used by ParseTerm().
*/ staticint fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iUnused1, /* Start offset of token */ int iUnused2 /* End offset of token */
){ int rc = SQLITE_OK; constint SZALLOC = 8;
TokenCtx *pCtx = (TokenCtx*)pContext;
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
UNUSED_PARAM2(iUnused1, iUnused2);
/* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
/* ** Free the phrase object passed as the only argument.
*/ staticvoid sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
fts5ExprPhraseFree(pPhrase);
}
/* ** Free the phrase object passed as the second argument.
*/ staticvoid sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ if( pNear ){ int i; for(i=0; i<pNear->nPhrase; i++){
fts5ExprPhraseFree(pNear->apPhrase[i]);
}
sqlite3_free(pNear->pColset);
sqlite3_free(pNear);
}
}
/* ** This function is called by the parser to process a string token. The ** string may or may not be quoted. In any case it is tokenized and a ** phrase object consisting of all tokens returned.
*/ static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
Fts5Parse *pParse, /* Parse context */
Fts5ExprPhrase *pAppend, /* Phrase to append to */
Fts5Token *pToken, /* String to tokenize */ int bPrefix /* True if there is a trailing "*" */
){
Fts5Config *pConfig = pParse->pConfig;
TokenCtx sCtx; /* Context object passed to callback */ int rc; /* Tokenize return code */ char *z = 0;
if( sCtx.pPhrase==0 ){ /* This happens when parsing a token or quoted phrase that contains
** no token characters at all. (e.g ... MATCH '""'). */
sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
}elseif( sCtx.pPhrase->nTerm ){
sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
}
assert( pParse->apPhrase!=0 );
pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
}
/* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object.
*/ staticvoid sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
sqlite3Fts5ParseError(
pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
);
}
}
/* ** The second argument passed to this function may be NULL, or it may be ** an existing Fts5Colset object. This function returns a pointer to ** a new colset object containing the contents of (p) with new value column ** number iCol appended. ** ** If an OOM error occurs, store an error code in pParse and return NULL. ** The old colset object (if any) is not freed in this case.
*/ static Fts5Colset *fts5ParseColset(
Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
Fts5Colset *p, /* Existing colset object */ int iCol /* New column to add to colset object */
){ int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
Fts5Colset *pNew; /* New colset object to return */
#ifndef NDEBUG /* Check that the array is in order and contains no duplicate entries. */ for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); #endif
}
return pNew;
}
/* ** Allocate and return an Fts5Colset object specifying the inverse of ** the colset passed as the second argument. Free the colset passed ** as the second argument before returning.
*/ static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
Fts5Colset *pRet; int nCol = pParse->pConfig->nCol;
pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5Colset) + sizeof(int)*nCol
); if( pRet ){ int i; int iOld = 0; for(i=0; i<nCol; i++){ if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
pRet->aiCol[pRet->nCol++] = i;
}else{
iOld++;
}
}
}
sqlite3_free(p); return pRet;
}
static Fts5Colset *sqlite3Fts5ParseColset(
Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
Fts5Colset *pColset, /* Existing colset object */
Fts5Token *p
){
Fts5Colset *pRet = 0; int iCol; char *z; /* Dequoted copy of token p */
z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); if( pParse->rc==SQLITE_OK ){
Fts5Config *pConfig = pParse->pConfig;
sqlite3Fts5Dequote(z); for(iCol=0; iCol<pConfig->nCol; iCol++){ if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
} if( iCol==pConfig->nCol ){
sqlite3Fts5ParseError(pParse, "no such column: %s", z);
}else{
pRet = fts5ParseColset(pParse, pColset, iCol);
}
sqlite3_free(z);
}
/* ** If argument pOrig is NULL, or if (*pRc) is set to anything other than ** SQLITE_OK when this function is called, NULL is returned. ** ** Otherwise, a copy of (*pOrig) is made into memory obtained from ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
*/ static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
Fts5Colset *pRet; if( pOrig ){
sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int);
pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); if( pRet ){
memcpy(pRet, pOrig, (size_t)nByte);
}
}else{
pRet = 0;
} return pRet;
}
/* ** Remove from colset pColset any columns that are not also in colset pMerge.
*/ staticvoid fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ int iIn = 0; /* Next input in pColset */ int iMerge = 0; /* Next input in pMerge */ int iOut = 0; /* Next output slot in pColset */
/* ** Add pSub as a child of p.
*/ staticvoid fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ int ii = p->nChild; if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
p->nChild += pSub->nChild;
sqlite3_free(pSub);
}else{
p->apChild[p->nChild++] = pSub;
} for( ; ii<p->nChild; ii++){
p->iHeight = MAX(p->iHeight, p->apChild[ii]->iHeight + 1);
}
}
/* ** This function is used when parsing LIKE or GLOB patterns against ** trigram indexes that specify either detail=column or detail=none. ** It converts a phrase: ** ** abc + def + ghi ** ** into an AND tree: ** ** abc AND def AND ghi
*/ static Fts5ExprNode *fts5ParsePhraseToAnd(
Fts5Parse *pParse,
Fts5ExprNearset *pNear
){ int nTerm = pNear->apPhrase[0]->nTerm; int ii; int nByte;
Fts5ExprNode *pRet;
/* ** Allocate and return a new expression object. If anything goes wrong (i.e. ** OOM error), leave an error code in pParse and return NULL.
*/ static Fts5ExprNode *sqlite3Fts5ParseNode(
Fts5Parse *pParse, /* Parse context */ int eType, /* FTS5_STRING, AND, OR or NOT */
Fts5ExprNode *pLeft, /* Left hand child expression */
Fts5ExprNode *pRight, /* Right hand child expression */
Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
){
Fts5ExprNode *pRet = 0;
if( pParse->rc==SQLITE_OK ){ int nChild = 0; /* Number of children of returned node */
sqlite3_int64 nByte; /* Bytes of space to allocate for this node */
/* ** Compose a tcl-readable representation of expression pExpr. Return a ** pointer to a buffer containing that representation. It is the ** responsibility of the caller to at some point free the buffer using ** sqlite3_free().
*/ staticchar *fts5ExprPrintTcl(
Fts5Config *pConfig, constchar *zNearsetCmd,
Fts5ExprNode *pExpr
){ char *zRet = 0; if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm;
staticvoid fts5ExprFunctionHr(
sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
fts5ExprFunction(pCtx, nArg, apVal, 0);
} staticvoid fts5ExprFunctionTcl(
sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){
fts5ExprFunction(pCtx, nArg, apVal, 1);
}
/* ** The implementation of an SQLite user-defined-function that accepts a ** single integer as an argument. If the integer is an alpha-numeric ** unicode code point, 1 is returned. Otherwise 0.
*/ staticvoid fts5ExprIsAlnum(
sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){ int iCode;
u8 aArr[32]; if( nArg!=1 ){
sqlite3_result_error(pCtx, "wrong number of arguments to function fts5_isalnum", -1
); return;
}
memset(aArr, 0, sizeof(aArr));
sqlite3Fts5UnicodeCatParse("L*", aArr);
sqlite3Fts5UnicodeCatParse("N*", aArr);
sqlite3Fts5UnicodeCatParse("Co", aArr);
iCode = sqlite3_value_int(apVal[0]);
sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
}
staticvoid fts5ExprFold(
sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */
sqlite3_value **apVal /* Function arguments */
){ if( nArg!=1 && nArg!=2 ){
sqlite3_result_error(pCtx, "wrong number of arguments to function fts5_fold", -1
);
}else{ int iCode; int bRemoveDiacritics = 0;
iCode = sqlite3_value_int(apVal[0]); if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
}
} #endif/* if SQLITE_TEST || SQLITE_FTS5_DEBUG */
/* ** This is called during initialization to register the fts5_expr() scalar ** UDF with the SQLite handle passed as the only argument.
*/ staticint sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ #ifdefined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG) struct Fts5ExprFunc { constchar *z; void (*x)(sqlite3_context*,int,sqlite3_value**);
} aFunc[] = {
{ "fts5_expr", fts5ExprFunctionHr },
{ "fts5_expr_tcl", fts5ExprFunctionTcl },
{ "fts5_isalnum", fts5ExprIsAlnum },
{ "fts5_fold", fts5ExprFold },
}; int i; int rc = SQLITE_OK; void *pCtx = (void*)pGlobal;
/* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
** sqlite3Fts5ParserFallback() are unused */ #ifndef NDEBUG
(void)sqlite3Fts5ParserTrace; #endif
(void)sqlite3Fts5ParserFallback;
return rc;
}
/* ** Return the number of phrases in expression pExpr.
*/ staticint sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ return (pExpr ? pExpr->nPhrase : 0);
}
/* ** Return the number of terms in the iPhrase'th phrase in pExpr.
*/ staticint sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; return pExpr->apExprPhrase[iPhrase]->nTerm;
}
/* ** This function is used to access the current position list for phrase ** iPhrase.
*/ staticint sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ int nRet;
Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
Fts5ExprNode *pNode = pPhrase->pNode; if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
*pa = pPhrase->poslist.p;
nRet = pPhrase->poslist.n;
}else{
*pa = 0;
nRet = 0;
} return nRet;
}
struct Fts5PoslistPopulator {
Fts5PoslistWriter writer; int bOk; /* True if ok to populate */ int bMiss;
};
/* ** Clear the position lists associated with all phrases in the expression ** passed as the first argument. Argument bLive is true if the expression ** might be pointing to a real entry, otherwise it has just been reset. ** ** At present this function is only used for detail=col and detail=none ** fts5 tables. This implies that all phrases must be at most 1 token ** in size, as phrase matches are not supported without detail=full.
*/ static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
Fts5PoslistPopulator *pRet;
pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); if( pRet ){ int i;
memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); for(i=0; i<pExpr->nPhrase; i++){
Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
assert( pExpr->apExprPhrase[i]->nTerm<=1 ); if( bLive &&
(pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
){
pRet[i].bMiss = 1;
}else{
pBuf->n = 0;
}
}
} return pRet;
}
/* ** TODO: Make this more efficient!
*/ staticint fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1;
} return 0;
}
/* ** pToken is a buffer nToken bytes in size that may or may not contain ** an embedded 0x00 byte. If it does, return the number of bytes in ** the buffer before the 0x00. If it does not, return nToken.
*/ staticint fts5QueryTerm(constchar *pToken, int nToken){ int ii; for(ii=0; ii<nToken && pToken[ii]; ii++){} return ii;
}
staticint fts5ExprPopulatePoslistsCb( void *pCtx, /* Copy of 2nd argument to xTokenize() */ int tflags, /* Mask of FTS5_TOKEN_* flags */ constchar *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iUnused1, /* Byte offset of token within input text */ int iUnused2 /* Byte offset of end of token within input text */
){
Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
Fts5Expr *pExpr = p->pExpr; int i; int nQuery = nToken;
i64 iRowid = pExpr->pRoot->iRowid;
/* ** This function is only called for detail=columns tables.
*/ staticint sqlite3Fts5ExprPhraseCollist(
Fts5Expr *pExpr, int iPhrase, const u8 **ppCollist, int *pnCollist
){
Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
Fts5ExprNode *pNode = pPhrase->pNode; int rc = SQLITE_OK;
/* ** Does the work of the fts5_api.xQueryToken() API method.
*/ staticint sqlite3Fts5ExprQueryToken(
Fts5Expr *pExpr, int iPhrase, int iToken, constchar **ppOut, int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
/* ** Does the work of the fts5_api.xInstToken() API method.
*/ staticint sqlite3Fts5ExprInstToken(
Fts5Expr *pExpr,
i64 iRowid, int iPhrase, int iCol, int iOff, int iToken, constchar **ppOut, int *pnOut
){
Fts5ExprPhrase *pPhrase = 0;
Fts5ExprTerm *pTerm = 0; int rc = SQLITE_OK;
/* ** Clear the token mappings for all Fts5IndexIter objects mannaged by ** the expression passed as the only argument.
*/ staticvoid sqlite3Fts5ExprClearTokens(Fts5Expr *pExpr){ int ii; for(ii=0; ii<pExpr->nPhrase; ii++){
Fts5ExprTerm *pT; for(pT=&pExpr->apExprPhrase[ii]->aTerm[0]; pT; pT=pT->pSynonym){
sqlite3Fts5IndexIterClearTokendata(pT->pIter);
}
}
}
#line 1 "fts5_hash.c" /* ** 2014 August 11 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** **
*/
/* #include "fts5Int.h" */
typedefstruct Fts5HashEntry Fts5HashEntry;
/* ** This file contains the implementation of an in-memory hash table used ** to accumuluate "term -> doclist" content before it is flused to a level-0 ** segment.
*/
struct Fts5Hash { int eDetail; /* Copy of Fts5Config.eDetail */ int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */
Fts5HashEntry *pScan; /* Current ordered scan item */
Fts5HashEntry **aSlot; /* Array of hash slots */
};
/* ** Each entry in the hash table is represented by an object of the ** following type. Each object, its key, and its current data are stored ** in a single memory allocation. The key immediately follows the object ** in memory. The position list data immediately follows the key data ** in memory. ** ** The key is Fts5HashEntry.nKey bytes in size. It consists of a single ** byte identifying the index (either the main term index or a prefix-index), ** followed by the term data. For example: "0token". There is no ** nul-terminator - in this case nKey=6. ** ** The data that follows the key is in a similar, but not identical format ** to the doclist data stored in the database. It is: ** ** * Rowid, as a varint ** * Position list, without 0x00 terminator. ** * Size of previous position list and rowid, as a 4 byte ** big-endian integer. ** ** iRowidOff: ** Offset of last rowid written to data area. Relative to first byte of ** structure. ** ** nData: ** Bytes of data written since iRowidOff.
*/ struct Fts5HashEntry {
Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
Fts5HashEntry *pScanNext; /* Next entry in sorted order */
int nAlloc; /* Total size of allocation */ int iSzPoslist; /* Offset of space for 4-byte poslist size */ int nData; /* Total bytes of data (incl. structure) */ int nKey; /* Length of key in bytes */
u8 bDel; /* Set delete-flag @ iSzPoslist */
u8 bContent; /* Set content-flag (detail=none mode) */
i16 iCol; /* Column of last value written */ int iPos; /* Position of last value written */
i64 iRowid; /* Rowid of last value written */
};
staticunsignedint fts5HashKey(int nSlot, const u8 *p, int n){ int i; unsignedint h = 13; for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
} return (h % nSlot);
}
staticunsignedint fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ int i; unsignedint h = 13; for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
h = (h << 3) ^ h ^ b; return (h % nSlot);
}
/* ** Resize the hash table by doubling the number of slots.
*/ staticint fts5HashResize(Fts5Hash *pHash){ int nNew = pHash->nSlot*2; int i;
Fts5HashEntry **apNew;
Fts5HashEntry **apOld = pHash->aSlot;
/* ** Add an entry to the in-memory hash table. The key is the concatenation ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). ** ** (bByte || pToken) -> (iRowid,iCol,iPos) ** ** Or, if iCol is negative, then the value is a delete marker.
*/ staticint sqlite3Fts5HashWrite(
Fts5Hash *pHash,
i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ char bByte, /* First byte of token */ constchar *pToken, int nToken /* Token to add or remove to or from index */
){ unsignedint iHash;
Fts5HashEntry *p;
u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ int bNew; /* If non-delete entry should be written */
/* If an existing hash entry cannot be found, create a new one. */ if( p==0 ){ /* Figure out how much space to allocate */ char *zKey;
sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; if( nByte<128 ) nByte = 128;
/* Grow the Fts5Hash.aSlot[] array if necessary. */ if( (pHash->nEntry*2)>=pHash->nSlot ){ int rc = fts5HashResize(pHash); if( rc!=SQLITE_OK ) return rc;
iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
}
/* Allocate new Fts5HashEntry and add it to the hash table. */
p = (Fts5HashEntry*)sqlite3_malloc64(nByte); if( !p ) return SQLITE_NOMEM;
memset(p, 0, sizeof(Fts5HashEntry));
p->nAlloc = (int)nByte;
zKey = fts5EntryKey(p);
zKey[0] = bByte;
memcpy(&zKey[1], pToken, nToken);
assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
p->nKey = nToken+1;
zKey[nToken+1] = '\0';
p->nData = nToken+1 + sizeof(Fts5HashEntry);
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
/* Add the first rowid field to the hash-entry */
p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
p->iRowid = iRowid;
/* Appending to an existing hash-entry. Check that there is enough ** space to append the largest possible new entry. Worst case scenario ** is: ** ** + 9 bytes for a new rowid, ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max).
*/ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
sqlite3_int64 nNew = p->nAlloc * 2;
Fts5HashEntry *pNew;
Fts5HashEntry **pp;
pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew); if( pNew==0 ) return SQLITE_NOMEM;
pNew->nAlloc = (int)nNew; for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
*pp = pNew;
p = pNew;
}
nIncr -= p->nData;
}
assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
pPtr = (u8*)p;
/* If this is a new rowid, append the 4-byte size field for the previous
** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){
u64 iDiff = (u64)iRowid - (u64)p->iRowid;
fts5HashAddPoslistSize(pHash, p, 0);
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff);
p->iRowid = iRowid;
bNew = 1;
p->iSzPoslist = p->nData; if( pHash->eDetail!=FTS5_DETAIL_NONE ){
p->nData += 1;
p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
p->iPos = 0;
}
}
/* Append the new position offset, if necessary */ if( bNew ){
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
p->iPos = iPos;
}
}
}else{ /* This is a delete. Set the delete flag. */
p->bDel = 1;
}
/* ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, ** each sorted in key order. This function merges the two lists into a ** single list and returns a pointer to its first element.
*/ static Fts5HashEntry *fts5HashEntryMerge(
Fts5HashEntry *pLeft,
Fts5HashEntry *pRight
){
Fts5HashEntry *p1 = pLeft;
Fts5HashEntry *p2 = pRight;
Fts5HashEntry *pRet = 0;
Fts5HashEntry **ppOut = &pRet;
/* ** Link all tokens from hash table iHash into a list in sorted order. The ** tokens are not removed from the hash table.
*/ staticint fts5HashEntrySort(
Fts5Hash *pHash, constchar *pTerm, int nTerm, /* Query prefix, if any */
Fts5HashEntry **ppSorted
){ constint nMergeSlot = 32;
Fts5HashEntry **ap;
Fts5HashEntry *pList; int iSlot; int i;
/* ** Query the hash table for a doclist associated with term pTerm/nTerm.
*/ staticint sqlite3Fts5HashQuery(
Fts5Hash *pHash, /* Hash table to query */ int nPre, constchar *pTerm, int nTerm, /* Query term */ void **ppOut, /* OUT: Pointer to new object */ int *pnDoclist /* OUT: Size of doclist in bytes */
){ unsignedint iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); char *zKey = 0;
Fts5HashEntry *p;
staticvoid sqlite3Fts5HashScanEntry(
Fts5Hash *pHash, constchar **pzTerm, /* OUT: term (nul-terminated) */ int *pnTerm, /* OUT: Size of term in bytes */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */
){
Fts5HashEntry *p; if( (p = pHash->pScan) ){ char *zKey = fts5EntryKey(p); int nTerm = p->nKey;
fts5HashAddPoslistSize(pHash, p, 0);
*pzTerm = zKey;
*pnTerm = nTerm;
*ppDoclist = (const u8*)&zKey[nTerm];
*pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm);
}else{
*pzTerm = 0;
*pnTerm = 0;
*ppDoclist = 0;
*pnDoclist = 0;
}
}
#line 1 "fts5_index.c" /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the ** %_data table. Other parts of the system access this functionality via ** the interface defined in fts5Int.h.
*/
/* #include "fts5Int.h" */
/* ** Overview: ** ** The %_data table contains all the FTS indexes for an FTS5 virtual table. ** As well as the main term index, there may be up to 31 prefix indexes. ** The format is similar to FTS3/4, except that: ** ** * all segment b-tree leaf data is stored in fixed size page records ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is ** taken to ensure it is possible to iterate in either direction through ** the entries in a doclist, or to seek to a specific entry within a ** doclist, without loading it into memory. ** ** * large doclists that span many pages have associated "doclist index" ** records that contain a copy of the first rowid on each page spanned by ** the doclist. This is used to speed up seek operations, and merges of ** large doclists with very small doclists. ** ** * extra fields in the "structure record" record the state of ongoing ** incremental merge operations. **
*/
#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
#define FTS5_MAIN_PREFIX '0'
#if FTS5_MAX_PREFIX_INDEXES > 31 # error "FTS5_MAX_PREFIX_INDEXES is too large" #endif
#define FTS5_MAX_LEVEL 64
/* ** There are two versions of the format used for the structure record: ** ** 1. the legacy format, that may be read by all fts5 versions, and ** ** 2. the V2 format, which is used by contentless_delete=1 databases. ** ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy ** format structure record contains a varint - the number of levels in ** the structure. Whereas a V2 structure record contains the constant ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a ** varint has to be at least 16256 to begin with "0xFF". And the default ** maximum number of levels is 64. ** ** See below for more on structure record formats.
*/ #define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
/* ** Details: ** ** The %_data table managed by this module, ** ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); ** ** , contains the following 6 types of records. See the comments surrounding ** the FTS5_*_ROWID macros below for a description of how %_data rowids are ** assigned to each fo them. ** ** 1. Structure Records: ** ** The set of segments that make up an index - the index structure - are ** recorded in a single record within the %_data table. The record consists ** of a single 32-bit configuration cookie value followed by a list of ** SQLite varints. ** ** If the structure record is a V2 record, the configuration cookie is ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01]. ** ** Next, the record continues with three varints: ** ** + number of levels, ** + total number of segments on all levels, ** + value of write counter. ** ** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** Then, for V2 structures only: ** ** + lower origin counter value, ** + upper origin counter value, ** + the number of tombstone hash pages. ** ** 2. The Averages Record: ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column ** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** ** TERM/DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The ** general format of term/doclist, starting with the first term ** on the leaf page, is: ** ** varint : size of first term ** blob: first term data ** doclist: first doclist ** zero-or-more { ** varint: number of bytes in common with previous term ** varint: number of bytes of new term data (nNew) ** blob: nNew bytes of new term data ** doclist: next doclist ** } ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } ** ** poslist format: ** ** varint: size of poslist in bytes multiplied by 2, not including ** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } ** ** collist format: ** ** varint: first offset + 2 ** zero-or-more { ** varint: offset delta + 2 ** } ** ** PAGE FORMAT ** ** Each leaf page begins with a 4-byte header containing 2 16-bit ** unsigned integer fields in big-endian format. They are: ** ** * The byte offset of the first rowid on the page, if it exists ** and occurs before the first term (otherwise 0). ** ** * The byte offset of the start of the page footer. If the page ** footer is 0 bytes in size, then this field is the same as the ** size of the leaf page in bytes. ** ** The page footer consists of a single varint for each term located ** on the page. Each varint is the byte offset of the current term ** within the page, delta-compressed against the previous value. In ** other words, the first varint in the footer is the byte offset of ** the first term, the second is the byte offset of the second less that ** of the first, and so on. ** ** The term/doclist format described above is accurate if the entire ** term/doclist data fits on a single leaf page. If this is not the case, ** the format is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: ** ** varint: first rowid ** ** + the first term on each page is stored in the same way as the ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: ** ** * Flags byte. Bits are: ** 0x01: Clear if leaf is also the root page, otherwise set. ** ** * Page number of fts index leaf page. As a varint. ** ** * First rowid on page indicated by previous field. As a varint. ** ** * A list of varints, one for each subsequent termless page. A ** positive delta if the termless page contains at least one rowid, ** or an 0x00 byte otherwise. ** ** Internal doclist index nodes are: ** ** * Flags byte. Bits are: ** 0x01: Clear for root page, otherwise set. ** ** * Page number of first child page. As a varint. ** ** * Copy of first rowid on page indicated by previous field. As a varint. ** ** * A list of delta-encoded varints - the first rowid on each subsequent ** child page. ** ** 6. Tombstone Hash Page ** ** These records are only ever present in contentless_delete=1 tables. ** There are zero or more of these associated with each segment. They ** are used to store the tombstone rowids for rows contained in the ** associated segments. ** ** The set of nHashPg tombstone hash pages associated with a single ** segment together form a single hash table containing tombstone rowids. ** To find the page of the hash on which a key might be stored: ** ** iPg = (rowid % nHashPg) ** ** Then, within page iPg, which has nSlot slots: ** ** iSlot = (rowid / nHashPg) % nSlot ** ** Each tombstone hash page begins with an 8 byte header: ** ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8. ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the ** first tombstone hash page for each segment (iPg=0). If set, ** the hash table contains rowid 0. If clear, it does not. ** Rowid 0 is handled specially. ** 2-bytes: unused. ** 4-bytes: Big-endian integer containing number of entries on page. ** ** Following this are nSlot 4 or 8 byte slots (depending on the key-size ** in the first byte of the page header). The number of slots may be ** determined based on the size of the page record and the key-size: ** ** nSlot = (nByte - 8) / key-size
*/
/* ** Rowids for the averages and structure records in the %_data table.
*/ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
/* ** Macros determining the rowids used by segment leaves and dlidx leaves ** and nodes. All nodes and leaves are stored in the %_data table with large ** positive rowids. ** ** Each segment has a unique non-zero 16-bit id. ** ** The rowid for each segment leaf is found by passing the segment id and ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered ** sequentially starting from 1.
*/ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
/* ** Each time a blob is read from the %_data table, it is padded with this ** many zero bytes. This makes it easier to decode the various record formats ** without overreading if the records are corrupt.
*/ #define FTS5_DATA_ZERO_PADDING 8 #define FTS5_DATA_PADDING 20
struct Fts5Data {
u8 *p; /* Pointer to buffer containing record */ int nn; /* Size of record in bytes */ int szLeaf; /* Size of leaf without page-index */
};
/* ** One object per %_data table. ** ** nContentlessDelete: ** The number of contentless delete operations since the most recent ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked ** so that extra auto-merge work can be done by fts5IndexFlush() to ** account for the delete operations.
*/ struct Fts5Index {
Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ int nWorkUnit; /* Leaf pages in a "unit" of work */
/* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk.
*/
Fts5Hash *pHash; /* Hash table for in-memory data */ int nPendingData; /* Current bytes of pending data */
i64 iWriteRowid; /* Rowid for current doc being written */ int bDelete; /* Current write is a delete */ int nContentlessDelete; /* Number of contentless delete ops */ int nPendingRow; /* Number of INSERT in hash table */
/* Error state. */ int rc; /* Current error code */ int flushRc;
/* State used by the fts5DataXXX() functions. */
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
sqlite3_stmt *pIdxSelect;
sqlite3_stmt *pIdxNextSelect; int nRead; /* Total number of blocks read */
sqlite3_stmt *pDeleteFromIdx;
sqlite3_stmt *pDataVersion;
i64 iStructVersion; /* data_version when pStruct read */
Fts5Structure *pStruct; /* Current db structure (or NULL) */
};
struct Fts5DoclistIter {
u8 *aEof; /* Pointer to 1 byte past end of doclist */
/* Output variables. aPoslist==0 at EOF */
i64 iRowid;
u8 *aPoslist; int nPoslist; int nSize;
};
/* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. ** ** nOriginCntr: ** This value is set to non-zero for structure records created for ** contentlessdelete=1 tables only. In that case it represents the ** origin value to apply to the next top-level segment created.
*/ struct Fts5StructureSegment { int iSegid; /* Segment id */ int pgnoFirst; /* First leaf page number in segment */ int pgnoLast; /* Last leaf page number in segment */
/* contentlessdelete=1 tables only: */
u64 iOrigin1;
u64 iOrigin2; int nPgTombstone; /* Number of tombstone hash table pages */
u64 nEntryTombstone; /* Number of tombstone entries that "count" */
u64 nEntry; /* Number of rows in this segment */
}; struct Fts5StructureLevel { int nMerge; /* Number of segments in incr-merge */ int nSeg; /* Total number of segments on level */
Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
}; struct Fts5Structure { int nRef; /* Object reference count */
u64 nWriteCounter; /* Total leaves written to level 0 */
u64 nOriginCntr; /* Origin value for next top-level segment */ int nSegment; /* Total segments in this structure */ int nLevel; /* Number of levels in this index */
Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
};
/* ** An object of type Fts5SegWriter is used to write to segments.
*/ struct Fts5PageWriter { int pgno; /* Page number for this page */ int iPrevPgidx; /* Previous value written into pgidx */
Fts5Buffer buf; /* Buffer containing leaf data */
Fts5Buffer pgidx; /* Buffer containing page-index */
Fts5Buffer term; /* Buffer containing previous term on page */
}; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */
i64 iPrev; /* Previous rowid value written to page */
Fts5Buffer buf; /* Buffer containing page data */
}; struct Fts5SegWriter { int iSegid; /* Segid to write to */
Fts5PageWriter writer; /* PageWriter object */
i64 iPrevRowid; /* Previous rowid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */ /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */
int nDlidx; /* Allocated size of aDlidx[] array */
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
/* Values to insert into the %_idx table */
Fts5Buffer btterm; /* Next term to insert into %_idx table */ int iBtPage; /* Page number corresponding to btterm */
};
typedefstruct Fts5CResult Fts5CResult; struct Fts5CResult {
u16 iFirst; /* aSeg[] index of firstest iterator */
u8 bTermEq; /* True if the terms are equal */
};
/* ** Object for iterating through a single segment, visiting each term/rowid ** pair in the segment. ** ** pSeg: ** The segment to iterate through. ** ** iLeafPgno: ** Current leaf page number within segment. ** ** iLeafOffset: ** Byte offset within the current leaf that is the first byte of the ** position list data (one byte passed the position-list size field). ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. ** ** iTermLeafPgno, iTermLeafOffset: ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. ** ** flags: ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: ** ** FTS5_SEGITER_ONETERM: ** If set, set the iterator to point to EOF after the current doclist ** has been exhausted. Do not proceed to the next term in the segment. ** ** FTS5_SEGITER_REVERSE: ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If ** it is set, iterate through rowid in descending order instead of the ** default ascending order. ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. ** ** iTermIdx: ** Index of current term on iTermLeafPgno. ** ** apTombstone/nTombstone: ** These are used for contentless_delete=1 tables only. When the cursor ** is first allocated, the apTombstone[] array is allocated so that it ** is large enough for all tombstones hash pages associated with the ** segment. The pages themselves are loaded lazily from the database as ** they are required.
*/ struct Fts5SegIter {
Fts5StructureSegment *pSeg; /* Segment to iterate through */ int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */
Fts5Data *pLeaf; /* Current leaf data */
Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
i64 iLeafOffset; /* Byte offset within current leaf */
Fts5TombstoneArray *pTombArray; /* Array of tombstone pages */
/* Next method */ void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
/* The page and offset from which the current term was read. The offset
** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset;
int iPgidxOff; /* Next offset in pgidx */ int iEndofDoclist;
/* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */
Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
/* Variables populated based on current entry. */
Fts5Buffer term; /* Current term */
i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */
u8 bDel; /* True if the delete flag is set */
};
/* ** Array of tombstone pages. Reference counted.
*/ struct Fts5TombstoneArray { int nRef; /* Number of pointers to this object */ int nTombstone;
Fts5Data *apTombstone[1]; /* Array of tombstone pages */
};
/* ** Argument is a pointer to an Fts5Data structure that contains a ** leaf page.
*/ #define ASSERT_SZLEAF_OK(x) assert( \
(x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
)
/* ** Argument is a pointer to an Fts5Data structure that contains a leaf ** page. This macro evaluates to true if the leaf contains no terms, or ** false if it contains at least one term.
*/ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
/* ** Object for iterating through the merged results of one or more segments, ** visiting each term/rowid pair in the merged data. ** ** nSeg is always a power of two greater than or equal to the number of ** segments that this object is merging data from. Both the aSeg[] and ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded ** with zeroed objects - these are handled as if they were iterators opened ** on empty segments. ** ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the ** comparison in this context is the index of the iterator that currently ** points to the smaller term/rowid combination. Iterators at EOF are ** considered to be greater than all other iterators. ** ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. ** ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. ** There is no way to tell if this is populated or not. ** ** pColset: ** If not NULL, points to an object containing a set of column indices. ** Only matches that occur in one of these columns will be returned. ** The Fts5Iter does not own the Fts5Colset object, and so it is not ** freed when the iterator is closed - it is owned by the upper layer.
*/ struct Fts5Iter {
Fts5IndexIter base; /* Base class containing output vars */
Fts5TokenDataIter *pTokenDataIter;
Fts5Index *pIndex; /* Index that owns this iterator */
Fts5Buffer poslist; /* Buffer containing current poslist */
Fts5Colset *pColset; /* Restrict matches to these columns */
/* Invoked to set output variables. */ void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */
u8 bSkipEmpty; /* True to skip deleted entries */
i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
Fts5CResult *aFirst; /* Current merge state (see above) */
Fts5SegIter aSeg[1]; /* Array of segment iterators */
};
/* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: ** Record containing the doclist-index data. ** ** bEof: ** Set to true once iterator has reached EOF. ** ** iOff: ** Set to the current offset within record pData.
*/ struct Fts5DlidxLvl {
Fts5Data *pData; /* Data for current page of this level */ int iOff; /* Current offset into pData */ int bEof; /* At EOF already */ int iFirstOff; /* Used by reverse iterators */
/* Output variables */ int iLeafPgno; /* Page number of current leaf page */
i64 iRowid; /* First rowid on leaf iLeafPgno */
}; struct Fts5DlidxIter { int nLvl; int iSegid;
Fts5DlidxLvl aLvl[1];
};
/* ** The only argument points to a buffer at least 8 bytes in size. This ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian ** unsigned integer and returns the result.
*/ static u64 fts5GetU64(u8 *a){ return ((u64)a[0] << 56)
+ ((u64)a[1] << 48)
+ ((u64)a[2] << 40)
+ ((u64)a[3] << 32)
+ ((u64)a[4] << 24)
+ ((u64)a[5] << 16)
+ ((u64)a[6] << 8)
+ ((u64)a[7] << 0);
}
/* ** The only argument points to a buffer at least 4 bytes in size. This ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian ** unsigned integer and returns the result.
*/ static u32 fts5GetU32(const u8 *a){ return ((u32)a[0] << 24)
+ ((u32)a[1] << 16)
+ ((u32)a[2] << 8)
+ ((u32)a[3] << 0);
}
/* ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the ** buffer indicated by the first argument.
*/ staticvoid fts5PutU32(u8 *a, u32 iVal){
a[0] = ((iVal >> 24) & 0xFF);
a[1] = ((iVal >> 16) & 0xFF);
a[2] = ((iVal >> 8) & 0xFF);
a[3] = ((iVal >> 0) & 0xFF);
}
/* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument.
*/ staticvoid *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ return sqlite3Fts5MallocZero(&p->rc, nByte);
}
/* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight
*/ #ifdef SQLITE_DEBUG staticint fts5BufferCompareBlob(
Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */
){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res);
} #endif
/* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight
*/ staticint fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ int nCmp, res;
nCmp = MIN(pLeft->n, pRight->n);
assert( nCmp<=0 || pLeft->p!=0 );
assert( nCmp<=0 || pRight->p!=0 );
res = fts5Memcmp(pLeft->p, pRight->p, nCmp); return (res==0 ? (pLeft->n - pRight->n) : res);
}
staticint fts5LeafFirstTermOff(Fts5Data *pLeaf){ int ret;
fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); return ret;
}
/* ** Close the read-only blob handle, if it is open.
*/ staticvoid sqlite3Fts5IndexCloseReader(Fts5Index *p){ if( p->pReader ){
sqlite3_blob *pReader = p->pReader;
p->pReader = 0;
sqlite3_blob_close(pReader);
}
}
/* ** Retrieve a record from the %_data table. ** ** If an error occurs, NULL is returned and an error left in the ** Fts5Index object.
*/ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK;
if( p->pReader ){ /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle
** is required. */
sqlite3_blob *pBlob = p->pReader;
p->pReader = 0;
rc = sqlite3_blob_reopen(pBlob, iRowid);
assert( p->pReader==0 );
p->pReader = pBlob; if( rc!=SQLITE_OK ){
sqlite3Fts5IndexCloseReader(p);
} if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
}
/* If the blob handle is not open at this point, open it and seek
** to the requested entry. */ if( p->pReader==0 && rc==SQLITE_OK ){
Fts5Config *pConfig = p->pConfig;
rc = sqlite3_blob_open(pConfig->db,
pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
);
}
/* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. ** All the reasons those functions might return SQLITE_ERROR - missing ** table, missing row, non-blob/text in block column - indicate
** backing store corruption. */ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
if( rc==SQLITE_OK ){
u8 *aOut = 0; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); int szData = (sizeof(Fts5Data) + 7) & ~7;
sqlite3_int64 nAlloc = szData + nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc64(nAlloc); if( pRet ){
pRet->nn = nByte;
aOut = pRet->p = (u8*)pRet + szData;
}else{
rc = SQLITE_NOMEM;
}
/* ** Release a reference to data record returned by an earlier call to ** fts5DataRead().
*/ staticvoid fts5DataRelease(Fts5Data *pData){
sqlite3_free(pData);
}
/* ** INSERT OR REPLACE a record into the %_data table.
*/ staticvoid fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return;
/* ** Execute the following SQL: ** ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
*/ staticvoid fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ if( p->rc!=SQLITE_OK ) return;
if( p->pDeleter==0 ){
Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3_mprintf( "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
pConfig->zDb, pConfig->zName
); if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
}
/* ** Release a reference to an Fts5Structure object returned by an earlier ** call to fts5StructureRead() or fts5StructureDecode().
*/ staticvoid fts5StructureRelease(Fts5Structure *pStruct){ if( pStruct && 0>=(--pStruct->nRef) ){ int i;
assert( pStruct->nRef==0 ); for(i=0; i<pStruct->nLevel; i++){
sqlite3_free(pStruct->aLevel[i].aSeg);
}
sqlite3_free(pStruct);
}
}
/* ** Ensure that structure object (*pp) is writable. ** ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If ** an error occurs, (*pRc) is set to an SQLite error code before returning.
*/ staticvoid fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
Fts5Structure *p = *pp; if( *pRc==SQLITE_OK && p->nRef>1 ){
i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
Fts5Structure *pNew;
pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); if( pNew ){ int i;
memcpy(pNew, p, nByte); for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; for(i=0; i<p->nLevel; i++){
Fts5StructureLevel *pLvl = &pNew->aLevel[i];
nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); if( pLvl->aSeg==0 ){ for(i=0; i<p->nLevel; i++){
sqlite3_free(pNew->aLevel[i].aSeg);
}
sqlite3_free(pNew); return;
}
memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
}
p->nRef--;
pNew->nRef = 1;
}
*pp = pNew;
}
}
/* ** Deserialize and return the structure record currently stored in serialized ** form within buffer pData/nData. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated by one slot. This allows the structure contents ** to be more easily edited. ** ** If an error occurs, *ppOut is set to NULL and an SQLite error code ** returned. Otherwise, *ppOut is set to point to the new object and ** SQLITE_OK returned.
*/ staticint fts5StructureDecode( const u8 *pData, /* Buffer containing serialized structure */ int nData, /* Size of buffer pData in bytes */ int *piCookie, /* Configuration cookie value */
Fts5Structure **ppOut /* OUT: Deserialized object */
){ int rc = SQLITE_OK; int i = 0; int iLvl; int nLevel = 0; int nSegment = 0;
sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
Fts5Structure *pRet = 0; /* Structure object to return */ int bStructureV2 = 0; /* True for FTS5_STRUCTURE_V2 */
u64 nOriginCntr = 0; /* Largest origin value seen so far */
/* Grab the cookie value */ if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
i = 4;
/* Check if this is a V2 structure record. Set bStructureV2 if it is. */ if( 0==memcmp(&pData[i], FTS5_STRUCTURE_V2, 4) ){
i += 4;
bStructureV2 = 1;
}
/* Read the total number of levels and segments from the start of the
** structure record. */
i += fts5GetVarint32(&pData[i], nLevel);
i += fts5GetVarint32(&pData[i], nSegment); if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
|| nSegment>FTS5_MAX_SEGMENT || nSegment<0
){ return FTS5_CORRUPT;
}
nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
);
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
if( pRet ){
pRet->nRef = 1;
pRet->nLevel = nLevel;
pRet->nSegment = nSegment;
i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; int nTotal = 0; int iSeg;
/* ** Extend level iLvl so that there is room for at least nExtra more ** segments.
*/ staticvoid fts5StructureExtendLevel( int *pRc,
Fts5Structure *pStruct, int iLvl, int nExtra, int bInsert
){ if( *pRc==SQLITE_OK ){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
Fts5StructureSegment *aNew;
sqlite3_int64 nByte;
/* ** Read, deserialize and return the structure record. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated as described for function fts5StructureDecode() ** above. ** ** If an error occurs, NULL is returned and an error code left in the ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op.
*/ static Fts5Structure *fts5StructureRead(Fts5Index *p){
/* ** Return the total number of segments in index structure pStruct. This ** function is only ever used as part of assert() conditions.
*/ #ifdef SQLITE_DEBUG staticint fts5StructureCountSegments(Fts5Structure *pStruct){ int nSegment = 0; /* Total number of segments */ if( pStruct ){ int iLvl; /* Used to iterate through levels */ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
nSegment += pStruct->aLevel[iLvl].nSeg;
}
}
/* ** Serialize and store the "structure" record. ** ** If an error occurs, leave an error code in the Fts5Index object. If an ** error has already occurred, this function is a no-op.
*/ staticvoid fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ if( p->rc==SQLITE_OK ){
Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ int iCookie; /* Cookie value to store */ int nHdr = (pStruct->nOriginCntr>0 ? (4+4+9+9+9) : (4+9+9));
/* ** Return a copy of index structure pStruct. Except, promote as many ** segments as possible to level iPromote. If an OOM occurs, NULL is ** returned.
*/ staticvoid fts5StructurePromoteTo(
Fts5Index *p, int iPromote, int szPromote,
Fts5Structure *pStruct
){ int il, is;
Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
/* ** A new segment has just been written to level iLvl of index structure ** pStruct. This function determines if any segments should be promoted ** as a result. Segments are promoted in two scenarios: ** ** a) If the segment just written is smaller than one or more segments ** within the previous populated level, it is promoted to the previous ** populated level. ** ** b) If the segment just written is larger than the newest segment on ** the next populated level, then that segment, and any other adjacent ** segments that are also smaller than the one just written, are ** promoted. ** ** If one or more segments are promoted, the structure object is updated ** to reflect this.
*/ staticvoid fts5StructurePromote(
Fts5Index *p, /* FTS5 backend object */ int iLvl, /* Index level just updated */
Fts5Structure *pStruct /* Index structure */
){ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote = 0; /* Promote anything this size or smaller */
Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ int nSeg = pStruct->aLevel[iLvl].nSeg;
/* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); if( iTst>=0 ){ int i; int szMax = 0;
Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
assert( pTst->nMerge==0 ); for(i=0; i<pTst->nSeg; i++){ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; if( sz>szMax ) szMax = sz;
} if( szMax>=szSeg ){ /* Condition (a) is true. Promote the newest segment on level
** iLvl to level iTst. */
iPromote = iTst;
szPromote = szMax;
}
}
/* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
** is a no-op if it is not. */ if( iPromote<0 ){
iPromote = iLvl;
szPromote = szSeg;
}
fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
}
}
/* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero.
*/ staticint fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
Fts5Data *pData = pLvl->pData;
/* ** Advance the iterator passed as the only argument.
*/ staticint fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
/* ** The iterator passed as the first argument has the following fields set ** as follows. This function sets up the rest of the iterator so that it ** points to the first rowid in the doclist-index. ** ** pData: ** pointer to doclist-index record, ** ** When this function is called pIter->iLeafPgno is the page number the ** doclist is associated with (the one featuring the term).
*/ staticint fts5DlidxIterFirst(Fts5DlidxIter *pIter){ int i; for(i=0; i<pIter->nLvl; i++){
fts5DlidxLvlNext(&pIter->aLvl[i]);
} return pIter->aLvl[0].bEof;
}
staticvoid fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ int i;
/* Advance each level to the last entry on the last page */ for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; while( fts5DlidxLvlNext(pLvl)==0 );
pLvl->bEof = 0;
/* ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/ staticvoid fts5DlidxIterFree(Fts5DlidxIter *pIter){ if( pIter ){ int i; for(i=0; i<pIter->nLvl; i++){
fts5DataRelease(pIter->aLvl[i].pData);
}
sqlite3_free(pIter);
}
}
static Fts5DlidxIter *fts5DlidxIterInit(
Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */
){
Fts5DlidxIter *pIter = 0; int i; int bDone = 0;
/* ** Argument p points to a buffer containing a varint to be interpreted as a ** position list size field. Read the varint and return the number of bytes ** read. Before returning, set *pnSz to the number of bytes in the position ** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/ staticint fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ int nSz; int n = 0;
fts5FastGetVarint32(p, n, nSz);
assert_nc( nSz>=0 );
*pnSz = nSz/2;
*pbDel = nSz & 0x0001; return n;
}
/* ** Fts5SegIter.iLeafOffset currently points to the first byte of a ** position-list size field. Read the value of the field and store it ** in the following variables: ** ** Fts5SegIter.nPos ** Fts5SegIter.bDel ** ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the ** position list content (if any).
*/ staticvoid fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */
ASSERT_SZLEAF_OK(pIter->pLeaf); if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
pIter->bDel = 0;
pIter->nPos = 1; if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
pIter->bDel = 1;
iOff++; if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
pIter->nPos = 1;
iOff++;
}else{
pIter->nPos = 0;
}
}
}else{ int nSz;
fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
pIter->bDel = (nSz & 0x0001);
pIter->nPos = nSz>>1;
assert_nc( pIter->nPos>=0 );
}
pIter->iLeafOffset = iOff;
}
}
staticvoid fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
i64 iOff = pIter->iLeafOffset;
/* ** Fts5SegIter.iLeafOffset currently points to the first byte of the ** "nSuffix" field of a term. Function parameter nKeep contains the value ** of the "nPrefix" field (if there was one - it is passed 0 if this is ** the first term in the segment). ** ** This function populates: ** ** Fts5SegIter.term ** Fts5SegIter.rowid ** ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of ** the first position list. The position list belonging to document ** (Fts5SegIter.iRowid).
*/ staticvoid fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
i64 iOff = pIter->iLeafOffset; /* Offset to read at */ int nNew; /* Bytes of new data */
/* ** Allocate a tombstone hash page array object (pIter->pTombArray) for ** the iterator passed as the second argument. If an OOM error occurs, ** leave an error in the Fts5Index object.
*/ staticvoid fts5SegIterAllocTombstone(Fts5Index *p, Fts5SegIter *pIter){ constint nTomb = pIter->pSeg->nPgTombstone; if( nTomb>0 ){ int nByte = nTomb * sizeof(Fts5Data*) + sizeof(Fts5TombstoneArray);
Fts5TombstoneArray *pNew;
pNew = (Fts5TombstoneArray*)sqlite3Fts5MallocZero(&p->rc, nByte); if( pNew ){
pNew->nTombstone = nTomb;
pNew->nRef = 1;
pIter->pTombArray = pNew;
}
}
}
/* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when ** this function returns. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op.
*/ staticvoid fts5SegIterInit(
Fts5Index *p, /* FTS index object */
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){ if( pSeg->pgnoFirst==0 ){ /* This happens if the segment is being used as an input to an incremental ** merge and all data has already been "trimmed". See function ** fts5TrimSegments() for details. In this case leave the iterator empty. ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
** at EOF already. */
assert( pIter->pLeaf==0 ); return;
}
/* ** This function is only ever called on iterators created by calls to ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** ** The iterator is in an unusual state when this function is called: the ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of ** the position-list size field for the first relevant rowid on the page. ** Fts5SegIter.rowid is set, but nPos and bDel are not. ** ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid.
*/ staticvoid fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int eDetail = p->pConfig->eDetail; int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset;
u8 *a = pIter->pLeaf->p; int iRowidOffset = 0;
if( n>pIter->iEndofDoclist ){
n = pIter->iEndofDoclist;
}
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
Fts5Data *pNew;
pIter->iLeafPgno--;
pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
pIter->pSeg->iSegid, pIter->iLeafPgno
)); if( pNew ){ /* iTermLeafOffset may be equal to szLeaf if the term is the last ** thing on the page - i.e. the first rowid is on the following page.
** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
assert( pIter->pLeaf==0 ); if( pIter->iTermLeafOffset<pNew->szLeaf ){
pIter->pLeaf = pNew;
pIter->iLeafOffset = pIter->iTermLeafOffset;
}
}else{ int iRowidOff;
iRowidOff = fts5LeafFirstRowidOff(pNew); if( iRowidOff ){ if( iRowidOff>=pNew->szLeaf ){
p->rc = FTS5_CORRUPT;
}else{
pIter->pLeaf = pNew;
pIter->iLeafOffset = iRowidOff;
}
}
}
/* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list.
*/ staticint fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
}
/* ** Advance iterator pIter to the next entry. ** ** This version of fts5SegIterNext() is only used by reverse iterators.
*/ staticvoid fts5SegIterNext_Reverse(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */ int *pbUnused /* Unused */
){
assert( pIter->flags & FTS5_SEGITER_REVERSE );
assert( pIter->pNextLeaf==0 );
UNUSED_PARAM(pbUnused);
/* ** Advance iterator pIter to the next entry. ** ** This version of fts5SegIterNext() is only used if detail=none and the ** iterator is not a reverse direction iterator.
*/ staticvoid fts5SegIterNext_None(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */
){ int iOff;
/* ** Advance iterator pIter to the next entry. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It ** is not considered an error if the iterator reaches EOF. If an error has ** already occurred when this function is called, it is a no-op.
*/ staticvoid fts5SegIterNext(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */
){
Fts5Data *pLeaf = pIter->pLeaf; int iOff; int bNewTerm = 0; int nKeep = 0;
u8 *a; int n;
/* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf ){ if( bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = 0;
}else{
fts5SegIterLoadTerm(p, pIter, nKeep);
fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1;
}
}else{ /* The following could be done by calling fts5SegIterLoadNPos(). But ** this block is particularly performance critical, so equivalent
** code is inlined. */ int nSz;
assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
pIter->bDel = (nSz & 0x0001);
pIter->nPos = nSz>>1;
assert_nc( pIter->nPos>=0 );
}
}
}
#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
/* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist.
*/ staticvoid fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
Fts5DlidxIter *pDlidx = pIter->pDlidx;
Fts5Data *pLast = 0; int pgnoLast = 0;
if( pDlidx && p->pConfig->iVersion==FTS5_CURRENT_VERSION ){ int iSegid = pIter->pSeg->iSegid;
pgnoLast = fts5DlidxIterPgno(pDlidx);
pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
}else{
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
/* Currently, Fts5SegIter.iLeafOffset points to the first byte of ** position-list content for the current rowid. Back it up so that it
** points to the start of the position-list size field. */ int iPoslist; if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
iPoslist = pIter->iTermLeafOffset;
}else{
iPoslist = 4;
}
fts5IndexSkipVarint(pLeaf->p, iPoslist);
pIter->iLeafOffset = iPoslist;
/* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to
** see where said rowid really is. */ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno;
Fts5StructureSegment *pSeg = pIter->pSeg;
/* The last rowid in the doclist may not be on the current page. Search
** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
Fts5Data *pNew = fts5LeafRead(p, iAbs); if( pNew ){ int iRowid, bTermless;
iRowid = fts5LeafFirstRowidOff(pNew);
bTermless = fts5LeafIsTermless(pNew); if( iRowid ){
SWAPVAL(Fts5Data*, pNew, pLast);
pgnoLast = pgno;
}
fts5DataRelease(pNew); if( bTermless==0 ) break;
}
}
}
}
/* If pLast is NULL at this point, then the last rowid for this doclist ** lies on the page currently indicated by the iterator. In this case ** pIter->iLeafOffset is already set to point to the position-list size ** field associated with the first relevant rowid on the page. ** ** Or, if pLast is non-NULL, then it is the page that contains the last ** rowid. In this case configure the iterator so that it points to the ** first rowid on this page.
*/ if( pLast ){ int iOff;
fts5DataRelease(pIter->pLeaf);
pIter->pLeaf = pLast;
pIter->iLeafPgno = pgnoLast;
iOff = fts5LeafFirstRowidOff(pLast); if( iOff>pLast->szLeaf ){
p->rc = FTS5_CORRUPT; return;
}
iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
pIter->iLeafOffset = iOff;
/* ** Iterator pIter currently points to the first rowid of a doclist. ** There is a doclist-index associated with the final term on the current ** page. If the current term is the last term on the page, load the ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
*/ staticvoid fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
/* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different
** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno
&& pIter->iEndofDoclist<pLeaf->szLeaf
){ return;
}
/* ** The iterator object passed as the second argument currently contains ** no valid values except for the Fts5SegIter.pLeaf member variable. This ** function searches the leaf page for a term matching (pTerm/nTerm). ** ** If the specified term is found on the page, then the iterator is left ** pointing to it. If argument bGe is zero and the term is not found, ** the iterator is left pointing at EOF. ** ** If bGe is non-zero and the specified term is not found, then the ** iterator is left pointing to the smallest term in the segment that ** is larger than the specified term, even if this term is not on the ** current page.
*/ staticvoid fts5LeafSeek(
Fts5Index *p, /* Leave any error code here */ int bGe, /* True for a >= search */
Fts5SegIter *pIter, /* Iterator to seek */ const u8 *pTerm, int nTerm /* Term to search for */
){
u32 iOff; const u8 *a = pIter->pLeaf->p;
u32 n = (u32)pIter->pLeaf->nn;
u32 nMatch = 0;
u32 nKeep = 0;
u32 nNew = 0;
u32 iTermOff;
u32 iPgidx; /* Current offset in pgidx */ int bEndOfPage = 0;
static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ if( p->pIdxSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( "SELECT pgno FROM '%q'.'%q_idx' WHERE " "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
pConfig->zDb, pConfig->zName
));
} return p->pIdxSelect;
}
/* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op.
*/ staticvoid fts5SegIterSeekInit(
Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */
sqlite3_stmt *pIdxSelect = 0;
/* This block sets stack variable iPg to the leaf page number that may
** contain term (pTerm/nTerm), if it is present in the segment. */
pIdxSelect = fts5IdxSelectStmt(p); if( p->rc ) return;
sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
i64 val = sqlite3_column_int(pIdxSelect, 0);
iPg = (int)(val>>1);
bDlidx = (val & 0x0001);
}
p->rc = sqlite3_reset(pIdxSelect);
sqlite3_bind_null(pIdxSelect, 2);
/* Either: ** ** 1) an error has occurred, or ** 2) the iterator points to EOF, or ** 3) the iterator points to an entry with term (pTerm/nTerm), or ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points ** to an entry with a term greater than or equal to (pTerm/nTerm).
*/
assert_nc( p->rc!=SQLITE_OK /* 1 */
|| pIter->pLeaf==0 /* 2 */
|| fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
|| (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
);
}
/* ** SQL used by fts5SegIterNextInit() to find the page to open.
*/ static sqlite3_stmt *fts5IdxNextStmt(Fts5Index *p){ if( p->pIdxNextSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxNextSelect, sqlite3_mprintf( "SELECT pgno FROM '%q'.'%q_idx' WHERE " "segid=? AND term>? ORDER BY term ASC LIMIT 1",
pConfig->zDb, pConfig->zName
));
} return p->pIdxNextSelect;
}
/* ** This is similar to fts5SegIterSeekInit(), except that it initializes ** the segment iterator to point to the first term following the page ** with pToken/nToken on it.
*/ staticvoid fts5SegIterNextInit(
Fts5Index *p, constchar *pTerm, int nTerm,
Fts5StructureSegment *pSeg, /* Description of segment */
Fts5SegIter *pIter /* Object to populate */
){ int iPg = -1; /* Page of segment to open */ int bDlidx = 0;
sqlite3_stmt *pSel = 0; /* SELECT to find iPg */
/* ** Initialize the object pIter to point to term pTerm/nTerm within the ** in-memory hash table. If there is no such term in the hash-table, the ** iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op.
*/ staticvoid fts5SegIterHashInit(
Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */
Fts5SegIter *pIter /* Object to populate */
){ int nList = 0; const u8 *z = 0; int n = 0;
Fts5Data *pLeaf = 0;
/* The call to sqlite3Fts5HashScanInit() causes the hash table to ** fill the size field of all existing position lists. This means they ** can no longer be appended to. Since the only scenario in which they ** can be appended to is if the previous operation on this table was ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
** possibility altogether. */
p->bDelete = 0;
}else{
p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
(constchar*)pTerm, nTerm, (void**)&pLeaf, &nList
); if( pLeaf ){
pLeaf->p = (u8*)&pLeaf[1];
}
z = pTerm;
n = nTerm;
pIter->flags |= FTS5_SEGITER_ONETERM;
}
/* ** Array ap[] contains n elements. Release each of these elements using ** fts5DataRelease(). Then free the array itself using sqlite3_free().
*/ staticvoid fts5IndexFreeArray(Fts5Data **ap, int n){ if( ap ){ int ii; for(ii=0; ii<n; ii++){
fts5DataRelease(ap[ii]);
}
sqlite3_free(ap);
}
}
/* ** Decrement the ref-count of the object passed as the only argument. If it ** reaches 0, free it and its contents.
*/ staticvoid fts5TombstoneArrayDelete(Fts5TombstoneArray *p){ if( p ){
p->nRef--; if( p->nRef<=0 ){ int ii; for(ii=0; ii<p->nTombstone; ii++){
fts5DataRelease(p->apTombstone[ii]);
}
sqlite3_free(p);
}
}
}
/* ** Zero the iterator passed as the only argument.
*/ staticvoid fts5SegIterClear(Fts5SegIter *pIter){
fts5BufferFree(&pIter->term);
fts5DataRelease(pIter->pLeaf);
fts5DataRelease(pIter->pNextLeaf);
fts5TombstoneArrayDelete(pIter->pTombArray);
fts5DlidxIterFree(pIter->pDlidx);
sqlite3_free(pIter->aRowidOffset);
memset(pIter, 0, sizeof(Fts5SegIter));
}
#ifdef SQLITE_DEBUG
/* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators.
*/ staticvoid fts5AssertComparisonResult(
Fts5Iter *pIter,
Fts5SegIter *p1,
Fts5SegIter *p2,
Fts5CResult *pRes
){ int i1 = p1 - pIter->aSeg; int i2 = p2 - pIter->aSeg;
if( p1->pLeaf || p2->pLeaf ){ if( p1->pLeaf==0 ){
assert( pRes->iFirst==i2 );
}elseif( p2->pLeaf==0 ){
assert( pRes->iFirst==i1 );
}else{ int nMin = MIN(p1->term.n, p2->term.n); int res = fts5Memcmp(p1->term.p, p2->term.p, nMin); if( res==0 ) res = p1->term.n - p2->term.n;
/* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct.
*/ staticvoid fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ if( p->rc==SQLITE_OK ){
Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int i;
assert( (pFirst->pLeaf==0)==pIter->base.bEof );
/* Check that pIter->iSwitchRowid is set correctly. */ for(i=0; i<pIter->nSeg; i++){
Fts5SegIter *p1 = &pIter->aSeg[i];
assert( p1==pFirst
|| p1->pLeaf==0
|| fts5BufferCompare(&pFirst->term, &p1->term)
|| p1->iRowid==pIter->iSwitchRowid
|| (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
);
}
/* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array.
*/ staticint fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes;
Fts5SegIter *p1; /* Left-hand Fts5SegIter */
Fts5SegIter *p2; /* Right-hand Fts5SegIter */
Fts5CResult *pRes = &pIter->aFirst[iOut];
pRes->bTermEq = 0; if( p1->pLeaf==0 ){ /* If p1 is at EOF */
iRes = i2;
}elseif( p2->pLeaf==0 ){ /* If p2 is at EOF */
iRes = i1;
}else{ int res = fts5BufferCompare(&p1->term, &p2->term); if( res==0 ){
assert_nc( i2>i1 );
assert_nc( i2!=0 );
pRes->bTermEq = 1; if( p1->iRowid==p2->iRowid ){ return i2;
}
res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
}
assert( res!=0 ); if( res<0 ){
iRes = i1;
}else{
iRes = i2;
}
}
pRes->iFirst = (u16)iRes; return 0;
}
/* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. ** It is an error if leaf iLeafPgno does not exist. Unless the db is ** a 'secure-delete' db, if it contains no rowids then this is also an error.
*/ staticvoid fts5SegIterGotoPage(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */ int iLeafPgno
){
assert( iLeafPgno>pIter->iLeafPgno );
/* ** Advance the iterator passed as the second argument until it is at or ** past rowid iFrom. Regardless of the value of iFrom, the iterator is ** always advanced at least once.
*/ staticvoid fts5SegIterNextFrom(
Fts5Index *p, /* FTS5 backend object */
Fts5SegIter *pIter, /* Iterator to advance */
i64 iMatch /* Advance iterator at least this far */
){ int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
Fts5DlidxIter *pDlidx = pIter->pDlidx; int iLeafPgno = pIter->iLeafPgno; int bMove = 1;
/* ** Free the iterator object passed as the second argument.
*/ staticvoid fts5MultiIterFree(Fts5Iter *pIter){ if( pIter ){ int i; for(i=0; i<pIter->nSeg; i++){
fts5SegIterClear(&pIter->aSeg[i]);
}
fts5BufferFree(&pIter->poslist);
sqlite3_free(pIter);
}
}
staticvoid fts5MultiIterAdvanced(
Fts5Index *p, /* FTS5 backend to iterate within */
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */
){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
Fts5SegIter *pSeg = &pIter->aSeg[iEq];
assert( p->rc==SQLITE_OK );
pSeg->xNext(p, pSeg, 0);
i = pIter->nSeg + iEq;
}
}
}
/* ** Sub-iterator iChanged of iterator pIter has just been advanced. It still ** points to the same term though - just a different rowid. This function ** attempts to update the contents of the pIter->aFirst[] accordingly. ** If it does so successfully, 0 is returned. Otherwise 1. ** ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() ** on the iterator instead. That function does the same as this one, except ** that it deals with more complicated cases as well.
*/ staticint fts5MultiIterAdvanceRowid(
Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */
Fts5SegIter **ppFirst
){
Fts5SegIter *pNew = &pIter->aSeg[iChanged];
/* ** Set the pIter->bEof variable based on the state of the sub-iterators.
*/ staticvoid fts5MultiIterSetEof(Fts5Iter *pIter){
Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
pIter->base.bEof = pSeg->pLeaf==0;
pIter->iSwitchRowid = pSeg->iRowid;
}
/* ** The argument to this macro must be an Fts5Data structure containing a ** tombstone hash page. This macro returns the key-size of the hash-page.
*/ #define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
/* ** Query a single tombstone hash table for rowid iRowid. Return true if ** it is found or false otherwise. The tombstone hash table is one of ** nHashTable tables.
*/ staticint fts5IndexTombstoneQuery(
Fts5Data *pHash, /* Hash table page to query */ int nHashTable, /* Number of pages attached to segment */
u64 iRowid /* Rowid to query hash for */
){ constint szKey = TOMBSTONE_KEYSIZE(pHash); constint nSlot = TOMBSTONE_NSLOT(pHash); int iSlot = (iRowid / nHashTable) % nSlot; int nCollide = nSlot;
/* ** Return true if the iterator passed as the only argument points ** to an segment entry for which there is a tombstone. Return false ** if there is no tombstone or if the iterator is already at EOF.
*/ staticint fts5MultiIterIsDeleted(Fts5Iter *pIter){ int iFirst = pIter->aFirst[1].iFirst;
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
Fts5TombstoneArray *pArray = pSeg->pTombArray;
if( pSeg->pLeaf && pArray ){ /* Figure out which page the rowid might be present on. */ int iPg = ((u64)pSeg->iRowid) % pArray->nTombstone;
assert( iPg>=0 );
/* If tombstone hash page iPg has not yet been loaded from the
** database, load it now. */ if( pArray->apTombstone[iPg]==0 ){
pArray->apTombstone[iPg] = fts5DataRead(pIter->pIndex,
FTS5_TOMBSTONE_ROWID(pSeg->pSeg->iSegid, iPg)
); if( pArray->apTombstone[iPg]==0 ) return 0;
}
/* ** Move the iterator to the next entry. ** ** If an error occurs, an error code is left in Fts5Index.rc. It is not ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called.
*/ staticvoid fts5MultiIterNext(
Fts5Index *p,
Fts5Iter *pIter, int bFrom, /* True if argument iFrom is valid */
i64 iFrom /* Advance at least as far as this */
){ int bUseFrom = bFrom;
assert( pIter->base.bEof==0 ); while( p->rc==SQLITE_OK ){ int iFirst = pIter->aFirst[1].iFirst; int bNewTerm = 0;
Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){
fts5SegIterNextFrom(p, pSeg, iFrom);
}else{
pSeg->xNext(p, pSeg, &bNewTerm);
}
static Fts5Iter *fts5MultiIterAlloc(
Fts5Index *p, /* FTS5 backend to iterate within */ int nSeg
){
Fts5Iter *pNew;
i64 nSlot; /* Power of two >= nSeg */
typedefstruct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx {
Fts5Buffer *pBuf; /* Append to this buffer */
Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */
};
typedefstruct PoslistOffsetsCtx PoslistOffsetsCtx; struct PoslistOffsetsCtx {
Fts5Buffer *pBuf; /* Append to this buffer */
Fts5Colset *pColset; /* Restrict matches to this column */ int iRead; int iWrite;
};
/* ** TODO: Make this more efficient!
*/ staticint fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1;
} return 0;
}
staticvoid fts5PoslistFilterCallback(
Fts5Index *pUnused, void *pContext, const u8 *pChunk, int nChunk
){
PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
UNUSED_PARAM(pUnused);
assert_nc( nChunk>=0 ); if( nChunk>0 ){ /* Search through to find the first varint with value 1. This is the
** start of the next columns hits. */ int i = 0; int iStart = 0;
/* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field.
*/ staticvoid fts5SegiterPoslist(
Fts5Index *p,
Fts5SegIter *pSeg,
Fts5Colset *pColset,
Fts5Buffer *pBuf
){
assert( pBuf!=0 );
assert( pSeg!=0 ); if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
assert( pBuf->p!=0 );
assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING); if( pColset==0 ){
fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
}else{ if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
PoslistCallbackCtx sCtx;
sCtx.pBuf = pBuf;
sCtx.pColset = pColset;
sCtx.eState = fts5IndexColsetTest(pColset, 0);
assert( sCtx.eState==0 || sCtx.eState==1 );
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
}else{
PoslistOffsetsCtx sCtx;
memset(&sCtx, 0, sizeof(sCtx));
sCtx.pBuf = pBuf;
sCtx.pColset = pColset;
fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
}
}
}
}
/* ** Parameter pPos points to a buffer containing a position list, size nPos. ** This function filters it according to pColset (which must be non-NULL) ** and sets pIter->base.pData/nData to point to the new position list. ** If memory is required for the new position list, use buffer pIter->poslist. ** Or, if the new position list is a contiguous subset of the input, set ** pIter->base.pData/nData to point directly to it. ** ** This function is a no-op if *pRc is other than SQLITE_OK when it is ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM ** before returning.
*/ staticvoid fts5IndexExtractColset( int *pRc,
Fts5Colset *pColset, /* Colset to filter on */ const u8 *pPos, int nPos, /* Position list */
Fts5Iter *pIter
){ if( *pRc==SQLITE_OK ){ const u8 *p = pPos; const u8 *aCopy = p; const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ int i = 0; int iCurrent = 0;
/* ** xSetOutputs callback used by detail=full and detail=col tables when no ** column filters are specified.
*/ staticvoid fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
pIter->base.iRowid = pSeg->iRowid;
pIter->base.nData = pSeg->nPos;
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output
** variables to point into the body of the page object. */
pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
}else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point
** to this buffer. */
fts5BufferZero(&pIter->poslist);
fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
pIter->base.pData = pIter->poslist.p;
}
}
/* ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match ** against no columns at all).
*/ staticvoid fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
UNUSED_PARAM(pSeg);
pIter->base.nData = 0;
}
/* ** xSetOutputs callback used by detail=col when there is a column filter ** and there are 100 or more columns. Also called as a fallback from ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
*/ staticvoid fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
fts5BufferZero(&pIter->poslist);
fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
pIter->base.iRowid = pSeg->iRowid;
pIter->base.pData = pIter->poslist.p;
pIter->base.nData = pIter->poslist.n;
}
/* ** xSetOutputs callback used when: ** ** * detail=col, ** * there is a column filter, and ** * the table contains 100 or fewer columns. ** ** The last point is to ensure all column numbers are stored as ** single-byte varints.
*/ staticvoid fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
/* ** xSetOutputs callback used by detail=full when there is a column filter.
*/ staticvoid fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
Fts5Colset *pColset = pIter->pColset;
pIter->base.iRowid = pSeg->iRowid;
if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ /* All data is stored on the current page. Populate the output
** variables to point into the body of the page object. */ const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int *pRc = &pIter->pIndex->rc;
fts5BufferZero(&pIter->poslist);
fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
}else{ /* The data is distributed over two or more pages. Copy it into the ** Fts5Iter.poslist buffer and then set the output pointer to point
** to this buffer. */
fts5BufferZero(&pIter->poslist);
fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
pIter->base.pData = pIter->poslist.p;
pIter->base.nData = pIter->poslist.n;
}
}
/* ** All the component segment-iterators of pIter have been set up. This ** functions finishes setup for iterator pIter itself.
*/ staticvoid fts5MultiIterFinishSetup(Fts5Index *p, Fts5Iter *pIter){ int iIter; for(iIter=pIter->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, iIter)) ){
Fts5SegIter *pSeg = &pIter->aSeg[iEq]; if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
fts5MultiIterAdvanced(p, pIter, iEq, iIter);
}
}
fts5MultiIterSetEof(pIter);
fts5AssertMultiIterSetup(p, pIter);
/* ** Allocate a new Fts5Iter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data.
*/ staticvoid fts5MultiIterNew(
Fts5Index *p, /* FTS5 backend to iterate within */
Fts5Structure *pStruct, /* Structure of specific index */ int flags, /* FTS5INDEX_QUERY_XXX flags */
Fts5Colset *pColset, /* Colset to filter on (or NULL) */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */
Fts5Iter **ppOut /* New object */
){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */
Fts5StructureLevel *pLvl;
Fts5Iter *pNew;
/* If the above was successful, each component iterator now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator
** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){
fts5MultiIterFinishSetup(p, pNew);
}else{
fts5MultiIterFree(pNew);
*ppOut = 0;
}
/* ** Create an Fts5Iter that iterates through the doclist provided ** as the second argument.
*/ staticvoid fts5MultiIterNew2(
Fts5Index *p, /* FTS5 backend to iterate within */
Fts5Data *pData, /* Doclist to iterate through */ int bDesc, /* True for descending rowid order */
Fts5Iter **ppOut /* New object */
){
Fts5Iter *pNew;
pNew = fts5MultiIterAlloc(p, 2); if( pNew ){
Fts5SegIter *pIter = &pNew->aSeg[1];
pIter->flags = FTS5_SEGITER_ONETERM; if( pData->szLeaf>0 ){
pIter->pLeaf = pData;
pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
pIter->iEndofDoclist = pData->nn;
pNew->aFirst[1].iFirst = 1; if( bDesc ){
pNew->bRev = 1;
pIter->flags |= FTS5_SEGITER_REVERSE;
fts5SegIterReverseInitPage(p, pIter);
}else{
fts5SegIterLoadNPos(p, pIter);
}
pData = 0;
}else{
pNew->base.bEof = 1;
}
fts5SegIterSetNext(p, pIter);
*ppOut = pNew;
}
fts5DataRelease(pData);
}
/* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise.
*/ staticint fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
assert( pIter!=0 || p->rc!=SQLITE_OK );
assert( p->rc!=SQLITE_OK
|| (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
); return (p->rc || pIter->base.bEof);
}
/* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined.
*/ static i64 fts5MultiIterRowid(Fts5Iter *pIter){
assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
}
/* ** Move the iterator to the next entry at or following iMatch.
*/ staticvoid fts5MultiIterNextFrom(
Fts5Index *p,
Fts5Iter *pIter,
i64 iMatch
){ while( 1 ){
i64 iRowid;
fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break;
iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid>=iMatch ) break; if( pIter->bRev!=0 && iRowid<=iMatch ) break;
}
}
/* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to.
*/ staticconst u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
*pn = p->term.n; return p->term.p;
}
/* ** Allocate a new segment-id for the structure pStruct. The new segment ** id must be between 1 and 65335 inclusive, and must not be used by ** any currently existing segment. If a free segment id cannot be found, ** SQLITE_FULL is returned. ** ** If an error has already occurred, this function is a no-op. 0 is ** returned in this case.
*/ staticint fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ int iSegid = 0;
if( p->rc==SQLITE_OK ){ if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
p->rc = SQLITE_FULL;
}else{ /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
** array is 63 elements, or 252 bytes, in size. */
u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; int iLvl, iSeg; int i;
u32 mask;
memset(aUsed, 0, sizeof(aUsed)); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
}
}
}
/* ** Discard all data currently cached in the hash-tables.
*/ staticvoid fts5IndexDiscardData(Fts5Index *p){
assert( p->pHash || p->nPendingData==0 ); if( p->pHash ){
sqlite3Fts5HashClear(p->pHash);
p->nPendingData = 0;
p->nPendingRow = 0;
p->flushRc = SQLITE_OK;
}
p->nContentlessDelete = 0;
}
/* ** Return the size of the prefix, in bytes, that buffer ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). ** ** Buffer (pNew/<length-unknown>) is guaranteed to be greater ** than buffer (pOld/nOld).
*/ staticint fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ int i; for(i=0; i<nOld; i++){ if( pOld[i]!=pNew[i] ) break;
} return i;
}
staticvoid fts5WriteDlidxClear(
Fts5Index *p,
Fts5SegWriter *pWriter, int bFlush /* If true, write dlidx to disk */
){ int i;
assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); for(i=0; i<pWriter->nDlidx; i++){
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; if( pDlidx->buf.n==0 ) break; if( bFlush ){
assert( pDlidx->pgno!=0 );
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
}
sqlite3Fts5BufferZero(&pDlidx->buf);
pDlidx->bPrevValid = 0;
}
}
/* ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. ** Any new array elements are zeroed before returning.
*/ staticint fts5WriteDlidxGrow(
Fts5Index *p,
Fts5SegWriter *pWriter, int nLvl
){ if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
); if( aDlidx==0 ){
p->rc = SQLITE_NOMEM;
}else{
size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
memset(&aDlidx[pWriter->nDlidx], 0, nByte);
pWriter->aDlidx = aDlidx;
pWriter->nDlidx = nLvl;
}
} return p->rc;
}
/* ** If the current doclist-index accumulating in pWriter->aDlidx[] is large ** enough, flush it to disk and return 1. Otherwise discard it and return ** zero.
*/ staticint fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag = 0;
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
** to the database, also write the doclist-index to disk. */ if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
bFlag = 1;
}
fts5WriteDlidxClear(p, pWriter, bFlag);
pWriter->nEmpty = 0; return bFlag;
}
/* ** This function is called whenever processing of the doclist for the ** last term on leaf page (pWriter->iBtPage) is completed. ** ** The doclist-index for that term is currently stored in-memory within the ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function ** writes it out to disk. Or, if it is too small to bother with, discards ** it. ** ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
*/ staticvoid fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ int bFlag;
/* ** This is called once for each leaf page except the first that contains ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that ** is larger than all terms written to earlier leaves, and equal to or ** smaller than the first term on the new leaf. ** ** If an error occurs, an error code is left in Fts5Index.rc. If an error ** has already occurred when this function is called, it is a no-op.
*/ staticvoid fts5WriteBtreeTerm(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter, /* Writer object */ int nTerm, const u8 *pTerm /* First term on new page */
){
fts5WriteFlushBtree(p, pWriter); if( p->rc==SQLITE_OK ){
fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
pWriter->iBtPage = pWriter->writer.pgno;
}
}
/* ** This function is called when flushing a leaf page that contains no ** terms at all to disk.
*/ staticvoid fts5WriteBtreeNoTerm(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter /* Writer object */
){ /* If there were no rowids on the leaf page either and the doclist-index
** has already been started, append an 0x00 byte to it. */ if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
assert( pDlidx->bPrevValid );
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
}
/* Increment the "number of sequential leaves without a term" counter. */
pWriter->nEmpty++;
}
static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
i64 iRowid; int iOff;
/* ** Rowid iRowid has just been appended to the current leaf page. It is the ** first on the page. This function appends an appropriate entry to the current ** doclist-index.
*/ staticvoid fts5WriteDlidxAppend(
Fts5Index *p,
Fts5SegWriter *pWriter,
i64 iRowid
){ int i; int bDone = 0;
if( pDlidx->buf.n>=p->pConfig->pgsz ){ /* The current doclist-index page is full. Write it to disk and push ** a copy of iRowid (which will become the first rowid on the next ** doclist-index leaf page) up into the next level of the b-tree ** hierarchy. If the node being flushed is currently the root node,
** also push its first rowid upwards. */
pDlidx->buf.p[0] = 0x01; /* Not the root node */
fts5DataWrite(p,
FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
pDlidx->buf.p, pDlidx->buf.n
);
fts5WriteDlidxGrow(p, pWriter, i+2);
pDlidx = &pWriter->aDlidx[i]; if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
/* This was the root node. Push its first rowid up to the new root. */
pDlidx[1].pgno = pDlidx->pgno;
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
pDlidx[1].bPrevValid = 1;
pDlidx[1].iPrev = iFirst;
}
/* Set the szLeaf header field. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */
assert( pPage->pgidx.n==0 );
fts5WriteBtreeNoTerm(p, pWriter);
}else{ /* Append the pgidx to the page buffer. Set the szLeaf header field. */
fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
}
/* Write the page out to disk */
iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->pgidx);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
pPage->iPrevPgidx = 0;
pPage->pgno++;
/* Increase the leaves written counter */
pWriter->nLeafWritten++;
/* The new leaf holds no terms or rowids */
pWriter->bFirstTermInPage = 1;
pWriter->bFirstRowidInPage = 1;
}
/* ** Append term pTerm/nTerm to the segment being written by the writer passed ** as the second argument. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op.
*/ staticvoid fts5WriteAppendTerm(
Fts5Index *p,
Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm
){ int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->writer;
Fts5Buffer *pPgidx = &pWriter->writer.pgidx; int nMin = MIN(pPage->term.n, nTerm);
if( pWriter->bFirstTermInPage ){
nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term ** already written to the segment and (b) smaller than or equal to ** this term. In other words, a prefix of (pTerm/nTerm) that is one ** byte longer than the longest prefix (pTerm/nTerm) shares with the ** previous term. ** ** Usually, the previous term is available in pPage->term. The exception ** is if this is the first term written in an incremental-merge step. ** In this case the previous term is not available, so just write a ** copy of (pTerm/nTerm) into the parent node. This is slightly
** inefficient, but still correct. */ int n = nTerm; if( pPage->term.n ){
n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm); if( p->rc!=SQLITE_OK ) return;
pPage = &pWriter->writer;
}
}else{
nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
}
/* Append the number of bytes of new data, then the term data itself
** to the page. */
fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
/* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx
** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){
fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
fts5WriteDlidxAppend(p, pWriter, iRowid);
}
/* Grow the two buffers to pgsz + padding bytes in size. */
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
if( p->rc==SQLITE_OK ){ /* Initialize the 4-byte leaf-page header to 0x00. */
memset(pWriter->writer.buf.p, 0, 4);
pWriter->writer.buf.n = 4;
/* Bind the current output segment id to the index-writer. This is an ** optimization over binding the same value over and over as rows are
** inserted into %_idx by the current writer. */
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
/* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted.
*/ staticvoid fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ int i;
Fts5Buffer buf;
memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */
}elseif( pSeg->pLeaf==0 ){ /* All keys from this input segment have been transfered to the output. ** Set both the first and last page-numbers to 0 to indicate that the
** segment is now empty. */
pSeg->pSeg->pgnoLast = 0;
pSeg->pSeg->pgnoFirst = 0;
}else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
i64 iLeafRowid;
Fts5Data *pData; int iId = pSeg->pSeg->iSegid;
u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
pData = fts5LeafRead(p, iLeafRowid); if( pData ){ if( iOff>pData->szLeaf ){ /* This can occur if the pages that the segments occupy overlap - if ** a single page has been assigned to more than one segment. In ** this case a prior iteration of this loop may have corrupted the
** segment currently being trimmed. */
p->rc = FTS5_CORRUPT;
}else{
fts5BufferZero(&buf);
fts5BufferGrow(&p->rc, &buf, pData->nn);
fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
fts5BufferAppendBlob(&p->rc, &buf,pData->szLeaf-iOff,&pData->p[iOff]); if( p->rc==SQLITE_OK ){ /* Set the szLeaf field */
fts5PutU16(&buf.p[2], (u16)buf.n);
}
/* Set up the new page-index array */
fts5BufferAppendVarint(&p->rc, &buf, 4); if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
&& pSeg->iEndofDoclist<pData->szLeaf
&& pSeg->iPgidxOff<=pData->nn
){ int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
fts5BufferAppendBlob(&p->rc, &buf,
pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
);
}
/* **
*/ staticvoid fts5IndexMergeLevel(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */
){
Fts5Structure *pStruct = *ppStruct;
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
Fts5StructureLevel *pLvlOut;
Fts5Iter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */
Fts5SegWriter writer; /* Writer object */
Fts5StructureSegment *pSeg; /* Output segment */
Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ int eDetail = p->pConfig->eDetail; constint flags = FTS5INDEX_QUERY_NOOUTPUT; int bTermWritten = 0; /* True if current term already output */
/* Extend the Fts5Structure object as required to ensure the output
** segment exists. */ if( iLvl==pStruct->nLevel-1 ){
fts5StructureAddLevel(&p->rc, ppStruct);
pStruct = *ppStruct;
}
fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); if( p->rc ) return;
pLvl = &pStruct->aLevel[iLvl];
pLvlOut = &pStruct->aLevel[iLvl+1];
fts5WriteInit(p, &writer, iSegid);
/* Add the new segment to the output level */
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
pLvlOut->nSeg++;
pSeg->pgnoFirst = 1;
pSeg->iSegid = iSegid;
pStruct->nSegment++;
/* Read input from all segments in the input level */
nInput = pLvl->nSeg;
/* Set the range of origins that will go into the output segment. */ if( pStruct->nOriginCntr>0 ){
pSeg->iOrigin1 = pLvl->aSeg[0].iOrigin1;
pSeg->iOrigin2 = pLvl->aSeg[pLvl->nSeg-1].iOrigin2;
}
}
bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
fts5MultiIterEof(p, pIter)==0;
fts5MultiIterNext(p, pIter, 0, 0)
){
Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm;
if( p->rc==SQLITE_OK && bTermWritten==0 ){ /* This is a new term. Append a term to the output segment. */
fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
bTermWritten = 1;
}
/* Append the rowid to the output */ /* WRITEPOSLISTSIZE */
fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
/* Flush the last leaf page to disk. Set the output segment b-tree height
** and last leaf page number at the same time. */
fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
/* ** If this is not a contentless_delete=1 table, or if the 'deletemerge' ** configuration option is set to 0, then this function always returns -1. ** Otherwise, it searches the structure object passed as the second argument ** for a level suitable for merging due to having a large number of ** tombstones in the tombstone hash. If one is found, its index is returned. ** Otherwise, if there is no suitable level, -1.
*/ staticint fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){
Fts5Config *pConfig = p->pConfig; int iRet = -1; if( pConfig->bContentlessDelete && pConfig->nDeleteMerge>0 ){ int ii; int nBest = 0;
/* If pLvl is already the input level to an ongoing merge, look no ** further for a merge candidate. The caller should be allowed to
** continue merging from pLvl first. */ if( pLvl->nMerge ) break;
}
} return iRet;
}
/* ** Do up to nPg pages of automerge work on the index. ** ** Return true if any changes were actually made, or false otherwise.
*/ staticint fts5IndexMerge(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nPg, /* Pages of work to do */ int nMin /* Minimum number of segments to merge */
){ int nRem = nPg; int bRet = 0;
Fts5Structure *pStruct = *ppStruct; while( nRem>0 && p->rc==SQLITE_OK ){ int iLvl; /* To iterate through levels */ int iBestLvl = 0; /* Level offering the most input segments */ int nBest = 0; /* Number of input segments on best level */
/* Set iBestLvl to the level to read input segments from. Or to -1 if
** there is no level suitable to merge segments from. */
assert( pStruct->nLevel>0 ); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; if( pLvl->nMerge ){ if( pLvl->nMerge>nBest ){
iBestLvl = iLvl;
nBest = nMin;
} break;
} if( pLvl->nSeg>nBest ){
nBest = pLvl->nSeg;
iBestLvl = iLvl;
}
} if( nBest<nMin ){
iBestLvl = fts5IndexFindDeleteMerge(p, pStruct);
}
/* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segment. This function updates the write-counter accordingly and, if ** necessary, performs incremental merge work. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op.
*/ staticvoid fts5IndexAutomerge(
Fts5Index *p, /* FTS5 backend object */
Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */
){ if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
Fts5Structure *pStruct = *ppStruct;
u64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ int nRem; /* Number of leaf pages left to write */
/* Update the write-counter. While doing so, set nWork. */
nWrite = pStruct->nWriteCounter;
nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
pStruct->nWriteCounter += nLeaf;
nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
/* ** Buffer aBuf[] contains a list of varints, all small enough to fit ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size.
*/ staticint fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret;
u32 dummy;
ret = fts5GetVarint32(aBuf, dummy); if( ret<nMax ){ while( 1 ){ int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break;
ret += i;
}
} return ret;
}
/* ** Execute the SQL statement: ** ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno); ** ** This is used when a secure-delete operation removes the last term ** from a segment leaf page. In that case the %_idx entry is removed ** too. This is done to ensure that if all instances of a token are ** removed from an fts5 database in secure-delete mode, no trace of ** the token itself remains in the database.
*/ staticvoid fts5SecureDeleteIdxEntry(
Fts5Index *p, /* FTS5 backend object */ int iSegid, /* Id of segment to delete entry for */ int iPgno /* Page number within segment */
){ if( iPgno!=1 ){
assert( p->pConfig->iVersion==FTS5_CURRENT_VERSION_SECUREDELETE ); if( p->pDeleteFromIdx==0 ){
fts5IndexPrepareStmt(p, &p->pDeleteFromIdx, sqlite3_mprintf( "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
p->pConfig->zDb, p->pConfig->zName
));
} if( p->rc==SQLITE_OK ){
sqlite3_bind_int(p->pDeleteFromIdx, 1, iSegid);
sqlite3_bind_int(p->pDeleteFromIdx, 2, iPgno);
sqlite3_step(p->pDeleteFromIdx);
p->rc = sqlite3_reset(p->pDeleteFromIdx);
}
}
}
/* ** This is called when a secure-delete operation removes a position-list ** that overflows onto segment page iPgno of segment pSeg. This function ** rewrites node iPgno, and possibly one or more of its right-hand peers, ** to remove this portion of the position list. ** ** Output variable (*pbLastInDoclist) is set to true if the position-list ** removed is followed by a new term or the end-of-segment, or false if ** it is followed by another rowid/position list.
*/ staticvoid fts5SecureDeleteOverflow(
Fts5Index *p,
Fts5StructureSegment *pSeg, int iPgno, int *pbLastInDoclist
){ constint bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE); int pgno;
Fts5Data *pLeaf = 0;
assert( iPgno!=1 );
if( iNext==0 ){ /* The page contains no terms or rowids. Replace it with an empty
** page and move on to the right-hand peer. */ const u8 aEmpty[] = {0x00, 0x00, 0x00, 0x04};
assert_nc( bDetailNone==0 || pLeaf->nn==4 ); if( bDetailNone==0 ) fts5DataWrite(p, iRowid, aEmpty, sizeof(aEmpty));
fts5DataRelease(pLeaf);
pLeaf = 0;
}elseif( bDetailNone ){ break;
}elseif( iNext>=pLeaf->szLeaf || pLeaf->nn<pLeaf->szLeaf || iNext<4 ){
p->rc = FTS5_CORRUPT; break;
}else{ int nShift = iNext - 4; int nPg;
int nIdx = 0;
u8 *aIdx = 0;
/* Unless the current page footer is 0 bytes in size (in which case ** the new page footer will be as well), allocate and populate a ** buffer containing the new page footer. Set stack variables aIdx
** and nIdx accordingly. */ if( pLeaf->nn>pLeaf->szLeaf ){ int iFirst = 0; int i1 = pLeaf->szLeaf; int i2 = 0;
/* Modify the contents of buffer aPg[]. Set nPg to the new size
** in bytes. The new page is always smaller than the old. */
nPg = pLeaf->szLeaf - nShift;
memmove(&aPg[4], &aPg[4+nShift], nPg-4);
fts5PutU16(&aPg[2], nPg); if( fts5GetU16(&aPg[0]) ) fts5PutU16(&aPg[0], 4); if( nIdx>0 ){
memcpy(&aPg[nPg], aIdx, nIdx);
nPg += nIdx;
}
sqlite3_free(aIdx);
/* Write the new page to disk and exit the loop */
assert( nPg>4 || fts5GetU16(aPg)==0 );
fts5DataWrite(p, iRowid, aPg, nPg); break;
}
}
fts5DataRelease(pLeaf);
}
/* ** Completely remove the entry that pSeg currently points to from ** the database.
*/ staticvoid fts5DoSecureDelete(
Fts5Index *p,
Fts5SegIter *pSeg
){ constint bDetailNone = (p->pConfig->eDetail==FTS5_DETAIL_NONE); int iSegid = pSeg->pSeg->iSegid;
u8 *aPg = pSeg->pLeaf->p; int nPg = pSeg->pLeaf->nn; int iPgIdx = pSeg->pLeaf->szLeaf;
u64 iDelta = 0; int iNextOff = 0; int iOff = 0; int nIdx = 0;
u8 *aIdx = 0; int bLastInDoclist = 0; int iIdx = 0; int iStart = 0; int iDelKeyOff = 0; /* Offset of deleted key, if any */
/* At this point segment iterator pSeg points to the entry ** this function should remove from the b-tree segment. ** ** In detail=full or detail=column mode, pSeg->iLeafOffset is the ** offset of the first byte in the position-list for the entry to ** remove. Immediately before this comes two varints that will also ** need to be removed: ** ** + the rowid or delta rowid value for the entry, and ** + the size of the position list in bytes. ** ** Or, in detail=none mode, there is a single varint prior to ** pSeg->iLeafOffset - the rowid or delta rowid value. ** ** This block sets the following variables: ** ** iStart: ** The offset of the first byte of the rowid or delta-rowid ** value for the doclist entry being removed. ** ** iDelta: ** The value of the rowid or delta-rowid value for the doclist ** entry being removed. ** ** iNextOff: ** The offset of the next entry following the position list ** for the one being removed. If the position list for this ** entry overflows onto the next leaf page, this value will be ** greater than pLeaf->szLeaf.
*/
{ int iSOP; /* Start-Of-Position-list */ if( pSeg->iLeafPgno==pSeg->iTermLeafPgno ){
iStart = pSeg->iTermLeafOffset;
}else{
iStart = fts5GetU16(&aPg[0]);
}
/* If the position-list for the entry being removed flows over past ** the end of this page, delete the portion of the position-list on the ** next page and beyond. ** ** Set variable bLastInDoclist to true if this entry happens
** to be the last rowid in the doclist for its term. */ if( iNextOff>=iPgIdx ){ int pgno = pSeg->iLeafPgno+1;
fts5SecureDeleteOverflow(p, pSeg->pSeg, pgno, &bLastInDoclist);
iNextOff = iPgIdx;
}
if( pSeg->bDel==0 ){ if( iNextOff!=iPgIdx ){ /* Loop through the page-footer. If iNextOff (offset of the ** entry following the one we are removing) is equal to the ** offset of a key on this page, then the entry is the last
** in its doclist. */ int iKeyOff = 0; for(iIdx=0; iIdx<nIdx; /* no-op */){
u32 iVal = 0;
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal);
iKeyOff += iVal; if( iKeyOff==iNextOff ){
bLastInDoclist = 1;
}
}
}
/* If this is (a) the first rowid on a page and (b) is not followed by ** another position list on the same page, set the "first-rowid" field
** of the header to 0. */ if( fts5GetU16(&aPg[0])==iStart && (bLastInDoclist || iNextOff==iPgIdx) ){
fts5PutU16(&aPg[0], 0);
}
}
if( pSeg->bDel ){
iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta);
aPg[iOff++] = 0x01;
}elseif( bLastInDoclist==0 ){ if( iNextOff!=iPgIdx ){
u64 iNextDelta = 0;
iNextOff += fts5GetVarint(&aPg[iNextOff], &iNextDelta);
iOff += sqlite3Fts5PutVarint(&aPg[iOff], iDelta + iNextDelta);
}
}elseif(
pSeg->iLeafPgno==pSeg->iTermLeafPgno
&& iStart==pSeg->iTermLeafOffset
){ /* The entry being removed was the only position list in its
** doclist. Therefore the term needs to be removed as well. */ int iKey = 0; int iKeyOff = 0;
/* Set iKeyOff to the offset of the term that will be removed - the
** last offset in the footer that is not greater than iStart. */ for(iIdx=0; iIdx<nIdx; iKey++){
u32 iVal = 0;
iIdx += fts5GetVarint32(&aIdx[iIdx], iVal); if( (iKeyOff+iVal)>(u32)iStart ) break;
iKeyOff += iVal;
}
assert_nc( iKey>=1 );
/* Set iDelKeyOff to the value of the footer entry to remove from
** the page. */
iDelKeyOff = iOff = iKeyOff;
if( iNextOff!=iPgIdx ){ /* This is the only position-list associated with the term, and there ** is another term following it on this page. So the subsequent term ** needs to be moved to replace the term associated with the entry
** being removed. */ int nPrefix = 0; int nSuffix = 0; int nPrefix2 = 0; int nSuffix2 = 0;
assert_nc( pSeg->iLeafPgno>pSeg->iTermLeafPgno ); /* The entry being removed may be the only position list in
** its doclist. */ for(iPgno=pSeg->iLeafPgno-1; iPgno>pSeg->iTermLeafPgno; iPgno-- ){
Fts5Data *pPg = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, iPgno)); int bEmpty = (pPg && pPg->nn==4);
fts5DataRelease(pPg); if( bEmpty==0 ) break;
}
/* Assuming no error has occurred, this block does final edits to the ** leaf page before writing it back to disk. Input variables are: ** ** nPg: Total initial size of leaf page. ** iPgIdx: Initial offset of page footer. ** ** iOff: Offset to move data to ** iNextOff: Offset to move data from
*/ if( p->rc==SQLITE_OK ){ constint nMove = nPg - iNextOff; /* Number of bytes to move */ int nShift = iNextOff - iOff; /* Distance to move them */
/* ** This is called as part of flushing a delete to disk in 'secure-delete' ** mode. It edits the segments within the database described by argument ** pStruct to remove the entries for term zTerm, rowid iRowid.
*/ staticvoid fts5FlushSecureDelete(
Fts5Index *p,
Fts5Structure *pStruct, constchar *zTerm, int nTerm,
i64 iRowid
){ constint f = FTS5INDEX_QUERY_SKIPHASH;
Fts5Iter *pIter = 0; /* Used to find term instance */
/* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op.
*/ staticvoid fts5FlushOneHash(Fts5Index *p){
Fts5Hash *pHash = p->pHash;
Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */
/* Obtain a reference to the index structure and allocate a new segment-id
** for the new level-0 segment. */
pStruct = fts5StructureRead(p);
fts5StructureInvalidate(p);
if( sqlite3Fts5HashIsEmpty(pHash)==0 ){
iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ constint pgsz = p->pConfig->pgsz; int eDetail = p->pConfig->eDetail; int bSecureDelete = p->pConfig->bSecureDelete;
Fts5StructureSegment *pSeg; /* New segment within pStruct */
Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
/* fts5WriteInit() should have initialized the buffers to (most likely)
** the maximum space required. */
assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
/* Begin scanning through hash table entries. This loop runs once for each
** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){
p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
} while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ constchar *zTerm; /* Buffer containing term */ int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */
/* Get the term and doclist for this entry. */
sqlite3Fts5HashScanEntry(pHash, &zTerm, &nTerm, &pDoclist, &nDoclist); if( bSecureDelete==0 ){
fts5WriteAppendTerm(p, &writer, nTerm, (const u8*)zTerm); if( p->rc!=SQLITE_OK ) break;
assert( writer.bFirstRowidInPage==0 );
}
if( !bSecureDelete && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */
fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
}else{ int bTermWritten = !bSecureDelete;
i64 iRowid = 0;
i64 iPrev = 0; int iOff = 0;
/* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current
** doclist. */ while( p->rc==SQLITE_OK && iOff<nDoclist ){
u64 iDelta = 0;
iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
iRowid += iDelta;
/* If in secure delete mode, and if this entry in the poslist is ** in fact a delete, then edit the existing segments directly
** using fts5FlushSecureDelete(). */ if( bSecureDelete ){ if( eDetail==FTS5_DETAIL_NONE ){ if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid);
iOff++; if( iOff<nDoclist && pDoclist[iOff]==0x00 ){
iOff++;
nDoclist = 0;
}else{ continue;
}
}
}elseif( (pDoclist[iOff] & 0x01) ){
fts5FlushSecureDelete(p, pStruct, zTerm, nTerm, iRowid); if( p->rc!=SQLITE_OK || pDoclist[iOff]==0x01 ){
iOff++; continue;
}
}
}
/* Figure out if this structure requires optimization. A structure does ** not require optimization if either: ** ** 1. it consists of fewer than two segments, or ** 2. all segments are on the same level, or ** 3. all segments except one are currently inputs to a merge operation. ** ** In the first case, if there are no tombstone hash pages, return NULL. In ** the second, increment the ref-count on *pStruct and return a copy of the ** pointer to it.
*/ if( nSeg==0 ) return 0; for(i=0; i<pStruct->nLevel; i++){ int nThis = pStruct->aLevel[i].nSeg; int nMerge = pStruct->aLevel[i].nMerge; if( nThis>0 && (nThis==nSeg || (nThis==nSeg-1 && nMerge==nThis)) ){ if( nSeg==1 && nThis==1 && pStruct->aLevel[i].aSeg[0].nPgTombstone==0 ){ return 0;
}
fts5StructureRef(pStruct); return pStruct;
}
assert( pStruct->aLevel[i].nMerge<=nThis );
}
/* ** This is called to implement the special "VALUES('merge', $nMerge)" ** INSERT command.
*/ staticint sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
Fts5Structure *pStruct = 0;
#if 0 /* ** Append a doclist to buffer pBuf. ** ** This function assumes that space within the buffer has already been ** allocated.
*/ staticvoid fts5MergeAppendDocid(
Fts5Buffer *pBuf, /* Buffer to write to */
i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
i64 iRowid /* Rowid to append */
){
assert( pBuf->n!=0 || (*piLastRowid)==0 );
fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
*piLastRowid = iRowid;
} #endif
/* ** Swap the contents of buffer *p1 with that of *p2.
*/ staticvoid fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
Fts5Buffer tmp = *p1;
*p1 = *p2;
*p2 = tmp;
}
staticvoid fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ int i = *piOff; if( i>=pBuf->n ){
*piOff = -1;
}else{
u64 iVal;
*piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
*piRowid += iVal;
}
}
/* ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. ** In this case the buffers consist of a delta-encoded list of rowids only.
*/ staticvoid fts5MergeRowidLists(
Fts5Index *p, /* FTS5 backend object */
Fts5Buffer *p1, /* First list to merge */ int nBuf, /* Number of entries in apBuf[] */
Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
){ int i1 = 0; int i2 = 0;
i64 iRowid1 = 0;
i64 iRowid2 = 0;
i64 iOut = 0;
Fts5Buffer *p2 = &aBuf[0];
Fts5Buffer out;
typedefstruct PrefixMerger PrefixMerger; struct PrefixMerger {
Fts5DoclistIter iter; /* Doclist iterator */
i64 iPos; /* For iterating through a position list */ int iOff;
u8 *aPos;
PrefixMerger *pNext; /* Next in docid/poslist order */
};
/* ** Array aBuf[] contains nBuf doclists. These are all merged in with the ** doclist in buffer p1.
*/ staticvoid fts5MergePrefixLists(
Fts5Index *p, /* FTS5 backend object */
Fts5Buffer *p1, /* First list to merge */ int nBuf, /* Number of buffers in array aBuf[] */
Fts5Buffer *aBuf /* Other lists to merge in */
){ #define fts5PrefixMergerNextPosition(p) \
sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) #define FTS5_MERGE_NLIST 16
PrefixMerger aMerger[FTS5_MERGE_NLIST];
PrefixMerger *pHead = 0; int i; int nOut = 0;
Fts5Buffer out = {0, 0, 0};
Fts5Buffer tmp = {0, 0, 0};
i64 iLastRowid = 0;
/* Initialize a doclist-iterator for each input buffer. Arrange them in ** a linked-list starting at pHead in ascending order of rowid. Avoid
** linking any iterators already at EOF into the linked list at all. */
assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
pHead = &aMerger[nBuf];
fts5DoclistIterInit(p1, &pHead->iter); for(i=0; i<nBuf; i++){
fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
nOut += aBuf[i].n;
} if( nOut==0 ) return;
nOut += p1->n + 9 + 10*nBuf;
/* The maximum size of the output is equal to the sum of the ** input sizes + 1 varint (9 bytes). The extra varint is because if the ** first rowid in one input is a large negative number, and the first in ** the other a non-negative number, the delta for the non-negative ** number will be larger on disk than the literal integer value ** was. ** ** Or, if the input position-lists are corrupt, then the output might ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 ** (the value PoslistNext64() uses for EOF) as a position and appending ** it to the output. This can happen at most once for each input
** position-list, hence (nBuf+1) 10 byte paddings. */ if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ /* Merge data from two or more poslists */
i64 iPrev = 0; int nTmp = FTS5_DATA_ZERO_PADDING; int nMerge = 0;
PrefixMerger *pSave = pHead;
PrefixMerger *pThis = 0; int nTail = 0;
/* See the earlier comment in this function for an explanation of why ** corrupt input position lists might cause the output to consume
** at most nMerge*10 bytes of unexpected space. */ if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ break;
}
fts5BufferZero(&tmp);
staticvoid fts5SetupPrefixIter(
Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ int iIdx, /* Index to scan for data */
u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */
Fts5Colset *pColset, /* Restrict matches to these columns */
Fts5Iter **ppIter /* OUT: New iterator */
){
Fts5Structure *pStruct;
Fts5Buffer *aBuf; int nBuf = 32; int nMerge = 1;
if( p->rc==SQLITE_OK ){ constint flags = FTS5INDEX_QUERY_SCAN
| FTS5INDEX_QUERY_SKIPEMPTY
| FTS5INDEX_QUERY_NOOUTPUT; int i;
i64 iLastRowid = 0;
Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
Fts5Data *pData;
Fts5Buffer doclist; int bNewTerm = 1;
memset(&doclist, 0, sizeof(doclist));
/* If iIdx is non-zero, then it is the number of a prefix-index for ** prefixes 1 character longer than the prefix being queried for. That ** index contains all the doclists required, except for the one ** corresponding to the prefix itself. That one is extracted from the
** main term index here. */ if( iIdx!=0 ){ int dummy = 0; constint f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
pToken[0] = FTS5_MAIN_PREFIX;
fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
fts5IterSetOutputCb(&p->rc, p1); for(;
fts5MultiIterEof(p, p1)==0;
fts5MultiIterNext2(p, p1, &dummy)
){
Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
p1->xSetOutputs(p1, pSeg); if( p1->base.nData ){
xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
iLastRowid = p1->base.iRowid;
}
}
fts5MultiIterFree(p1);
}
/* ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain ** to the document with rowid iRowid.
*/ staticint sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
assert( p->rc==SQLITE_OK );
/* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){
p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
}
/* Flush the hash table to disk if required */ if( iRowid<p->iWriteRowid
|| (iRowid==p->iWriteRowid && p->bDelete==0)
|| (p->nPendingData > p->pConfig->nHashSize)
){
fts5IndexFlush(p);
}
/* ** Commit data to disk.
*/ staticint sqlite3Fts5IndexSync(Fts5Index *p){
assert( p->rc==SQLITE_OK );
fts5IndexFlush(p);
sqlite3Fts5IndexCloseReader(p); return fts5IndexReturn(p);
}
/* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated.
*/ staticint sqlite3Fts5IndexRollback(Fts5Index *p){
sqlite3Fts5IndexCloseReader(p);
fts5IndexDiscardData(p);
fts5StructureInvalidate(p); /* assert( p->rc==SQLITE_OK ); */ return SQLITE_OK;
}
/* ** The %_data table is completely empty when this function is called. This ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob).
*/ staticint sqlite3Fts5IndexReinit(Fts5Index *p){
Fts5Structure s;
fts5StructureInvalidate(p);
fts5IndexDiscardData(p);
memset(&s, 0, sizeof(Fts5Structure)); if( p->pConfig->bContentlessDelete ){
s.nOriginCntr = 1;
}
fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
fts5StructureWrite(p, &s); return fts5IndexReturn(p);
}
/* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying %_data table. ** ** If successful, set *pp to point to the new object and return SQLITE_OK. ** Otherwise, set *pp to NULL and return an SQLite error code.
*/ staticint sqlite3Fts5IndexOpen(
Fts5Config *pConfig, int bCreate,
Fts5Index **pp, char **pzErr
){ int rc = SQLITE_OK;
Fts5Index *p; /* New object */
/* ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
*/ staticint sqlite3Fts5IndexClose(Fts5Index *p){ int rc = SQLITE_OK; if( p ){
assert( p->pReader==0 );
fts5StructureInvalidate(p);
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
sqlite3_finalize(p->pIdxNextSelect);
sqlite3_finalize(p->pDataVersion);
sqlite3_finalize(p->pDeleteFromIdx);
sqlite3Fts5HashFree(p->pHash);
sqlite3_free(p->zDataTbl);
sqlite3_free(p);
} return rc;
}
/* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total.
*/ staticint sqlite3Fts5IndexCharlenToBytelen( constchar *p, int nByte, int nChar
){ int n = 0; int i; for(i=0; i<nChar; i++){ if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ if( (unsignedchar)p[n++]>=0xc0 ){ if( n>=nByte ) return 0; while( (p[n] & 0xc0)==0x80 ){
n++; if( n>=nByte ){ if( i+1==nChar ) break; return 0;
}
}
}
} return n;
}
/* ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of ** unicode characters in the string.
*/ staticint fts5IndexCharlen(constchar *pIn, int nIn){ int nChar = 0; int i = 0; while( i<nIn ){ if( (unsignedchar)pIn[i++]>=0xc0 ){ while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
}
nChar++;
} return nChar;
}
/* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more ** times. ** ** For an insert, it must be called once for each token in the new document. ** If the operation is a delete, it must be called (at least) once for each ** unique token in the document with an iCol value less than zero. The iPos ** argument is ignored for a delete.
*/ staticint sqlite3Fts5IndexWrite(
Fts5Index *p, /* Index to write to */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ constchar *pToken, int nToken /* Token to add or remove to or from index */
){ int i; /* Used to iterate through indexes */ int rc = SQLITE_OK; /* Return code */
Fts5Config *pConfig = p->pConfig;
/* ** pToken points to a buffer of size nToken bytes containing a search ** term, including the index number at the start, used on a tokendata=1 ** table. This function returns true if the term in buffer pBuf matches ** token pToken/nToken.
*/ staticint fts5IsTokendataPrefix(
Fts5Buffer *pBuf, const u8 *pToken, int nToken
){ return (
pBuf->n>=nToken
&& 0==memcmp(pBuf->p, pToken, nToken)
&& (pBuf->n==nToken || pBuf->p[nToken]==0x00)
);
}
/* ** Ensure the segment-iterator passed as the only argument points to EOF.
*/ staticvoid fts5SegIterSetEOF(Fts5SegIter *pSeg){
fts5DataRelease(pSeg->pLeaf);
pSeg->pLeaf = 0;
}
/* ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an ** array of these for each row it visits. Or, for an iterator used by an ** "ORDER BY rank" query, it accumulates an array of these for the entire ** query. ** ** Each instance in the array indicates the iterator (and therefore term) ** associated with position iPos of rowid iRowid. This is used by the ** xInstToken() API.
*/ struct Fts5TokenDataMap {
i64 iRowid; /* Row this token is located in */
i64 iPos; /* Position of token */ int iIter; /* Iterator token was read from */
};
/* ** An object used to supplement Fts5Iter for tokendata=1 iterators.
*/ struct Fts5TokenDataIter { int nIter; int nIterAlloc;
int nMap; int nMapAlloc;
Fts5TokenDataMap *aMap;
Fts5PoslistReader *aPoslistReader; int *aPoslistToIter;
Fts5Iter *apIter[1];
};
/* ** This function appends iterator pAppend to Fts5TokenDataIter pIn and ** returns the result.
*/ static Fts5TokenDataIter *fts5AppendTokendataIter(
Fts5Index *p, /* Index object (for error code) */
Fts5TokenDataIter *pIn, /* Current Fts5TokenDataIter struct */
Fts5Iter *pAppend /* Append this iterator */
){
Fts5TokenDataIter *pRet = pIn;
/* ** The iterator passed as the only argument must be a tokendata=1 iterator ** (pIter->pTokenDataIter!=0). This function sets the iterator output ** variables (pIter->base.*) according to the contents of the current ** row.
*/ staticvoid fts5IterSetOutputsTokendata(Fts5Iter *pIter){ int ii; int nHit = 0;
i64 iRowid = SMALLEST_INT64; int iMin = 0;
/* ** The iterator passed as the only argument must be a tokendata=1 iterator ** (pIter->pTokenDataIter!=0). This function advances the iterator. If ** argument bFrom is false, then the iterator is advanced to the next ** entry. Or, if bFrom is true, it is advanced to the first entry with ** a rowid of iFrom or greater.
*/ staticvoid fts5TokendataIterNext(Fts5Iter *pIter, int bFrom, i64 iFrom){ int ii;
Fts5TokenDataIter *pT = pIter->pTokenDataIter;
Fts5Index *pIndex = pIter->pIndex;
/* ** If the segment-iterator passed as the first argument is at EOF, then ** set pIter->term to a copy of buffer pTerm.
*/ staticvoid fts5TokendataSetTermIfEof(Fts5Iter *pIter, Fts5Buffer *pTerm){ if( pIter && pIter->aSeg[0].pLeaf==0 ){
fts5BufferSet(&pIter->pIndex->rc, &pIter->aSeg[0].term, pTerm->n, pTerm->p);
}
}
/* ** This function sets up an iterator to use for a non-prefix query on a ** tokendata=1 table.
*/ static Fts5Iter *fts5SetupTokendataIter(
Fts5Index *p, /* FTS index to query */ const u8 *pToken, /* Buffer containing query term */ int nToken, /* Size of buffer pToken in bytes */
Fts5Colset *pColset /* Colset to filter on */
){
Fts5Iter *pRet = 0;
Fts5TokenDataIter *pSet = 0;
Fts5Structure *pStruct = 0; constint flags = FTS5INDEX_QUERY_SCANONETERM | FTS5INDEX_QUERY_SCAN;
/* Loop through all segments in the new iterator. Find the smallest ** term that any segment-iterator points to. Iterator pNew will be ** used for this term. Also, set any iterator that points to a term that
** does not match pToken/nToken to point to EOF */
pSmall = 0; for(ii=0; ii<pNew->nSeg; ii++){
Fts5SegIter *pII = &pNew->aSeg[ii]; if( 0==fts5IsTokendataPrefix(&pII->term, pToken, nToken) ){
fts5SegIterSetEOF(pII);
}
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5 in Prozent
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen1.110Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.