diff --git a/COPYING b/COPYING
index 02be4c60..d159169d 100644
--- a/COPYING
+++ b/COPYING
@@ -1,13 +1,12 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
- Preamble
+ Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
@@ -16,7 +15,7 @@ software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
+the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
@@ -57,7 +56,7 @@ patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
- GNU GENERAL PUBLIC LICENSE
+ GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
@@ -256,7 +255,7 @@ make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
- NO WARRANTY
+ NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
@@ -278,9 +277,9 @@ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
- END OF TERMS AND CONDITIONS
+ END OF TERMS AND CONDITIONS
- How to Apply These Terms to Your New Programs
+ How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
@@ -292,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright © 2001-2015 Andrew Aksyonoff Copyright © 2008-2015 Sphinx Technologies Inc, http://sphinxsearch.com
+
Sphinx initial author (and a benevolent dictator ever since):
Andrew Aksyonoff, http://shodan.ru
-
+
Past and present employees of Sphinx Technologies Inc who should be
noted on their work on Sphinx (in alphabetical order):
- People who contributed to Sphinx and their contributions (in no particular order):
+ People who contributed to Sphinx and their contributions (in no particular order):
Robert "coredev" Bengtsson (Sweden), initial version of PostgreSQL data source Len Kranendonk, Perl API Dmytro Shteflyuk, Ruby API
Extract everything from the distribution tarball (haven't you already?)
and go to the There are two ways of getting Sphinx for Ubuntu: regular deb packages and the Launchpad PPA repository. Deb packages: Sphinx requires a few libraries to be installed on Debian/Ubuntu. Use apt-get to download and install these dependencies: Now you can install Sphinx: Now you can install Sphinx: PPA repository (Ubuntu only). Installing Sphinx is much easier from Sphinxsearch PPA repository, because you will get all dependencies and can also update Sphinx to the latest version with the same command. First, add Sphinxsearch repository and update the list of packages: Install/update sphinxsearch package:
Installing Sphinx on a Windows server is often easier than installing on a Linux environment;
unless you are preparing code patches, you can use the pre-compiled binary files from the Downloads
area on the website. Extract everything from the .zip file you have downloaded -
- For the remainder of this guide, we will assume that the folders are unzipped into
is obsolete and will be removed in the near future. docinfo=inline is deprecated. You can now use
ondisk_attrs or
-ondisk_attrs_default instead. workers=threads is a new default for all OS now.
We're gonna get rid of other modes in future. mem_limit=128M is a new default.
Removed CLI search which confused people instead of
helping them and sql_query_info. Deprecated SetMatchMode() API call. Changed default thread_stack
+ Changed default thread_stack
value to 1M. Deprecated SetOverride() API call.
(excluding title and content, that are full-text fields) as
attributes, indexing them, and then using API calls to
setup filtering, sorting, and grouping. Here as an example.
-
sql_attr_uint = forum_id
sql_attr_timestamp = post_date
...
-
Obviously, that's not much of a difference for 2000-row table,
but when it comes to indexing 10-million-row MyISAM table,
ranged queries might be of some help.
-
+
The difference between post-query and post-index query is in that post-query
is run immediately when Sphinx received all the documents, but further indexing
may still fail for some other reason. On the contrary,
@@ -1653,13 +1655,7 @@
RT indexes are currently quality feature, but there are still a few known
usage quirks. Those quirks are listed in this section.
- Prefix indexing is supported with dict = keywords starting 2.0.2-beta. Infix indexing is experimental in trunk. Disk chunks optimization routine is not implemented yet. On initial index creation, attributes are reordered by type,
-in the following order: uint, bigint, float, timestamp, string. So when
-using INSERT without an explicit column names list, specify all uint
-column values first, then bigint, etc. Default conservative RAM chunk limit ( Default conservative RAM chunk limit ( High DELETE/REPLACE rate can lead to kill-list fragmentation
@@ -1777,7 +1773,7 @@
is not very good for disk use and crash recovery time. Starting
with 2.0.1-beta you can configure
// SphinxQL
mysql_query ( "SELECT ... OPTION ranker=sph04" );
-
+
Legacy matching modes automatically select a ranker as follows:
SPH_MATCH_ALL uses SPH_RANK_PROXIMITY ranker; SPH_MATCH_ANY uses SPH_RANK_MATCHANY ranker;
SPH_SORT_RELEVANCE is equivalent to sorting by "@weight DESC, @id ASC" in extended sorting mode,
SPH_SORT_ATTR_ASC is equivalent to "attribute ASC, @weight DESC, @id ASC",
and SPH_SORT_ATTR_DESC to "attribute DESC, @weight DESC, @id ASC" respectively.
-
+
In SPH_SORT_TIME_SEGMENTS mode, attribute values are split into so-called
time segments, and then sorted by time segment first, and by relevance second.
@@ -3559,7 +3555,8 @@
Collations should affect all string attribute comparisons, including
those within ORDER BY and GROUP BY, so differently ordered or grouped results
-can be returned depending on the collation chosen.
+can be returned depending on the collation chosen. Note that collations don't
+affect full-text searching, for that use charset_table.
Table of Contents
UDFs reside in the external dynamic libraries (.so files on UNIX and .dll
on Windows systems). Library files need to reside in a trusted folder
-specified by plugin_dir directive,
+specified by plugin_dir directive,
for obvious security reasons: securing a single folder is easy; letting
anyone install arbitrary code into
DROP FUNCTION SphinxQL statements
respectively. Sphinx keeps track of the currently loaded functions, that is,
every time you create or drop an UDF,
Once you successfully load an UDF, you can use it in your SELECT or other
@@ -4884,7 +4881,7 @@
can also noticeably impact performance.
'max_query_time' - integer (max search time threshold, msec) 'max_predicted_time' - integer (max predicted search time, see Section 12.4.44, “predicted_time_costs”) 'max_predicted_time' - integer (max predicted search time, see Section 12.4.43, “predicted_time_costs”) 'ranker' - any of 'proximity_bm25', 'bm25', 'none', 'wordcount', 'proximity',
'matchany', 'fieldmask', 'sph04', 'expr', or 'export' (refer to Section 5.4, “Search results ranking”
for more details on each ranker)
SHOW META shows additional meta-information about the latest
query such as query time and keyword statistics. IO and CPU counters will only be available if searchd was started with --iostats and --cpustats switches respectively.
Additional predicted_time, dist_predicted_time, [{local|dist}]_fetched_[{docs|hits|skips}] counters will only be available if searchd was configured with
-predicted time costs and query had predicted_time in OPTION clause.
+predicted time costs and query had predicted_time in OPTION clause.
installs a user-defined function (UDF)
with the given name and type from the given library file.
The library file must reside in a trusted
-plugin_dir directory.
+plugin_dir directory.
On success, the function is available for use in all subsequent
queries that the server receives. Example:
write would need to be replayed. Those writes normally happen either
on a clean shutdown, or periodically with a (big enough!) interval
between writes specified in
-rt_flush_period directive.
+rt_flush_period directive.
So such a backup made at an arbitrary point in time just might end up
with way too much binary log data to replay.
@@ -5875,7 +5872,7 @@
agents or distributed index. It includes the values like the age of the last
request, last answer, the number of different kind of errors and
successes, etc. The statistic is shown for every agent for last 1, 5
-and 15 intervals, each of them of ha_period_karma seconds.
+and 15 intervals, each of them of ha_period_karma seconds.
The command exists only in sphinxql.
to the SHOW INDEX STATUS and SHOW STATUS statements respectively).
The optimization thread can be IO-throttled, you can control the
maximum number of IOs per second and the maximum IO size
-with rt_merge_iops
-and rt_merge_maxiosize
+with rt_merge_iops
+and rt_merge_maxiosize
directives respectively. The optimization jobs queue is lost
on daemon crash.
@@ -6760,7 +6757,7 @@
(Section 9.4.5, “SetGeoAnchor”) are now internally implemented using
this computed expressions mechanism, using magic names '@expr' and '@geodist'
respectively.
-
because to fix it, we need to be able either to reproduce and fix the bug,
or to deduce what's causing it from the information that you provide.
So here are some instructions on how to do that.
- Nothing special to say here. Here is the
+ Nothing special to say here. Here is the
<a href="http://sphinxsearch.com/bugs">link</a>. Create a new
ticket and describe your bug in details so both you and developers can
-save their time. In case of crashes we sometimes can get enough info to fix from
+save their time. In case of crashes we sometimes can get enough info to fix from
backtrace. Sphinx tries to write crash backtrace to its log file. It may look like
this:
that the binary is not stripped. Our official binary packages should be fine.
(That, or we have the symbols stored.) However, if you manually build Sphinx
from the source tarball, do not run To fix your bug developers often need to reproduce it on their machines.
+binary, and/or do not let your build/packaging system do that! To fix your bug developers often need to reproduce it on their machines.
To do this they need your sphinx.conf, index files, binlog (if present),
sometimes data to index (like SQL tables or XMLpipe2 data files) and queries.
@@ -8117,27 +8114,26 @@
and "127.0.0.1" will force TCP/IP usage. Refer to
MySQL manual
for more details.
-
Optional, default is 3306 for
SQL user to use when connecting to sql_host.
Mandatory, no default value.
Applies to SQL source types (
SQL user password to use when connecting to sql_host.
Mandatory, no default value.
Applies to SQL source types (
SQL database (in MySQL terms) to use after the connection and perform further queries within.
Mandatory, no default value.
Applies to SQL source types (
On Linux, it would typically be
both in theory and in practice. However, enabling compression on 100 Mbps links
may improve indexing time significantly (upto 20-30% of the total indexing time
improvement was reported). Your mileage may vary.
-
ODBC DSN (Data Source Name) specifies the credentials (host, user, password, etc)
to use when connecting to ODBC data source. The format depends on specific ODBC
driver used.
-
-
by default it builds with 32-bit IDs support but
it will automatically switch to a variant that matches keywords
in those fields, computes a sum of matched payloads multiplied
by field weights, and adds that sum to the final rank.
-
@@ -8445,7 +8442,7 @@
exactly equal to
-
over the network when sending queries. (Because that might be too much
of an impact when the K-list is huge.) You will need to setup a
separate per-server K-lists in that case.
-
such bitfields are packed together in 32-bit chunks in
Multi-value (there might be multiple attributes declared), optional.
Applies to SQL source types (
Note that unlike sql_attr_uint,
these values are signed.
Introduced in version 0.9.9-rc1.
-
and UNIX_TIMESTAMP() in MySQL will not return anything expected.
If you only needs to work with dates, not times, consider TO_DAYS()
function in MySQL instead.
-
One important usage of the float attributes is storing latitude
and longitude values (in radians), for further usage in query-time
geosphere distance calculations.
-
RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
-
declared using
You can read more on JSON attributes in
http://sphinxsearch.com/blog/2013/08/08/full-json-support-in-trunk/.
-
-
value but does not full-text index it. In some cases it might be desired to both full-text
index the column and store it as attribute.
in size are skipped. Any errors during the file loading (IO errors, missed
limits, etc) will be reported as indexing warnings and will not early
terminate the indexing. No content will be indexed for such files.
-
For instance, updates on helper table that permanently change
the last successfully indexed ID should not be run from post-fetch
query; they should be run from post-index query instead.
-
expanded to maximum document ID which was actually fetched
from the database during indexing. If no documents were indexed,
$maxid will be expanded to 0.
-
database server. It causes the indexer to sleep for given amount of
milliseconds once per each ranged query step. This sleep is unconditional,
and is performed before the fetch query.
-
Specifies a command that will be executed and which output
will be parsed for documents. Refer to Section 3.9, “xmlpipe2 data source” for specific format description.
-
xmlpipe field declaration.
Multi-value, optional.
Applies to
Makes the specified XML element indexed as both a full-text field and a string attribute.
Equivalent to <sphinx:field name="field" attr="string"/> declaration within the XML file.
-
Multi-value, optional.
Applies to
Multi-value, optional.
Applies to
Multi-value, optional.
Applies to
Multi-value, optional.
Applies to
Multi-value, optional.
Applies to
that will constitute the MVA will be extracted, similar to how
sql_attr_multi parses
SQL column contents when 'field' MVA source type is specified.
-
that will constitute the MVA will be extracted, similar to how
sql_attr_multi parses
SQL column contents when 'field' MVA source type is specified.
-
This setting declares a string attribute tag in xmlpipe2 stream.
The contents of the specified tag will be parsed and stored as a string value.
-
XML tag are to be treated as a JSON document and stored into a Sphinx
index for later use. Refer to Section 12.1.24, “sql_attr_json”
for more details on the JSON attributes.
-
UTF8 fixup feature lets you avoid that. When fixup is enabled,
Sphinx will preprocess the incoming stream before passing it to the
XML parser and replace invalid UTF-8 sequences with spaces.
-
authentication when connecting to MS SQL Server. Note that when running
using standard zlib algorithm (called deflate and also implemented by
using modified zlib algorithm used by MySQL COMPRESS() and UNCOMPRESS() functions.
When indexing on a different box than the database, this lets you offload the database, and save on network traffic.
The feature is only available if zlib and zlib-devel were both available during build time.
-Free open-source SQL full-text search engine
Author
Author
Team
Team
Contributors
Contributors
Author
sphinx subdirectory. (We are using
- version 2.2.8-dev here for the sake of example only; be sure to change this
+ version 2.2.10-dev here for the sake of example only; be sure to change this
to a specific version you're using.)
- $ sudo apt-get install mysql-client unixodbc libpq5$ sudo dpkg -i sphinxsearch_2.2.8-dev-0ubuntu11~trusty_amd64.deb$ sudo dpkg -i sphinxsearch_2.2.10-dev-0ubuntu12~trusty_amd64.deb$ sudo add-apt-repository ppa:builds/sphinxsearch-rel22$ sudo apt-get update$ sudo apt-get install sphinxsearchAuthor
sphinx-2.2.8-dev-win32.zip,
- or sphinx-2.2.8-dev-win32-pgsql.zip if you need PostgresSQL support as well.
- (We are using version 2.2.8-dev here for the sake of example only;
+ sphinx-2.2.10-dev-win32.zip,
+ or sphinx-2.2.10-dev-win32-pgsql.zip if you need PostgresSQL support as well.
+ (We are using version 2.2.10-dev here for the sake of example only;
be sure to change this to a specific version you're using.)
You can use Windows Explorer in Windows XP and up to extract the files,
or a freeware package like 7Zip to open the archive.C:\Sphinx,
@@ -796,7 +798,7 @@ Author
Author
Author
Example sphinx.conf part:
+
Example sphinx.conf part:
...
sql_query = SELECT id, title, content, \
author_id, forum_id, post_date FROM my_forum_posts
@@ -1014,7 +1016,7 @@ Author
Example application code (in PHP):
+
Example application code (in PHP):
// only search posts by author whose ID is 123
$cl->SetFilter ( "author_id", array ( 123 ) );
@@ -1250,7 +1252,7 @@
Author
sql_query_post vs. sql_query_post_indexsql_query_post vs. sql_query_post_indexAuthor
rt_mem_limit)
+rt_mem_limit)
of 32M can lead to poor performance on bigger indexes, you should raise it to
256..1024M if you're planning to index gigabytes.Author
searchd
to perform a periodic RAM chunk flush to fix that problem
-using a rt_flush_period
+using a rt_flush_period
directive. With periodic flushes enabled, searchd
will keep a separate thread, checking whether RT indexes RAM
chunks need to be written back to disk. Once that happens,
@@ -2243,7 +2239,7 @@ Author
Legacy matching modes rankers
Legacy matching modes rankers
Author
SPH_SORT_TIME_SEGMENTS mode
SPH_SORT_TIME_SEGMENTS mode
Author
Author
searchd is a risk.
You can load and unload them dynamically into searchd
@@ -3602,7 +3599,7 @@ Author
searchd writes
-its state to the sphinxql_state file
+its state to the sphinxql_state file
as a plain good old SQL script.
Author
Author
mysql> SELECT * FROM test1 WHERE MATCH('test|one|two');
+------+--------+----------+------------+
@@ -5506,7 +5503,7 @@ Author
@@ -5818,7 +5815,7 @@
Author
Author
mysql> SHOW AGENT STATUS;
@@ -6158,8 +6155,8 @@
Author
Author
Example:
+
Example:
$cl->SetSelect ( "*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight" );
$cl->SetSelect ( "exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd,
IF(age>40,1,0) AS over40" );
@@ -7902,10 +7899,10 @@ Author
Bug-tracker
Bug-tracker
Crashes
Crashes
@@ -7952,7 +7949,7 @@
Author
strip utility on that
-binary, and/or do not let your build/packaging system do that!Uploading your data
Uploading your data
Author
Author
mssql type is currently only available on Windows.
odbc type is available both on Windows natively and on
Linux through UnixODBC library.
-Example:
+
Example:
type = mysql
Author
Example:
+
Example:
sql_host = localhost
Author
mysql source type and 5432 for pgsql type.
Applies to SQL source types (mysql, pgsql, mssql) only.
Note that it depends on sql_host setting whether this value will actually be used.
-Example:
+
Example:
sql_port = 3306
Author
mysql, pgsql, mssql) only.
-Example:
+
Example:
sql_user = test
Author
mysql, pgsql, mssql) only.
-Example:
+
Example:
sql_pass = mysecretpassword
Author
mysql, pgsql, mssql) only.
-Example:
+
Example:
sql_db = test
Author
/var/lib/mysql/mysql.sock.
On FreeBSD, it would typically be /tmp/mysql.sock.
Note that it depends on sql_host setting whether this value will actually be used.
-Example:
+
Example:
sql_sock = /tmp/mysql.sock
Author
Example:
+
Example:
mysql_connect_flags = 32 # enable compression
Author
indexer and MySQL. The details on creating
the certificates and setting up MySQL server can be found in
MySQL documentation.
-Example:
+
Example:
mysql_ssl_cert = /etc/ssl/client-cert.pem
mysql_ssl_key = /etc/ssl/client-key.pem
mysql_ssl_ca = /etc/ssl/cacert.pem
@@ -8279,7 +8276,7 @@
Author
Example:
+
Example:
odbc_dsn = Driver={Oracle ODBC Driver};Dbq=myDBName;Uid=myUsername;Pwd=myPassword
Author
sql_query_pre = SET SESSION query_cache_type=OFF
Example:
+
Example:
sql_query_pre = SET NAMES utf8
sql_query_pre = SET SESSION query_cache_type=OFF
Author
--enable-id64 option
to configure allows to build with 64-bit document and word IDs support.
-Example:
+
Example:
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, \
title, content \
@@ -8412,14 +8409,14 @@ Author
Example:
+
Example:
sql_joined_field = \
tagstext from query; \
SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
sql_joined_field = bigint tag from ranged-query; \
- SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
- SELECT MIN(id), MAX(id) FROM tags ORDER BY docid ASC
+ SELECT id, tag FROM tags WHERE id>=$start AND id<=$end ORDER BY id ASC; \
+ SELECT MIN(id), MAX(id) FROM tags
Author
$start or $end from your query.
The example in Section 3.8, “Ranged queries”) illustrates that; note how it
uses greater-or-equal and less-or-equal comparisons.
-Example:
+
Example:
sql_query_range = SELECT MIN(id),MAX(id) FROM documents
Example:
+
Example:
sql_range_step = 1000
Author
Example:
+
Example:
sql_query_killlist = \
SELECT id FROM documents WHERE updated_ts>=@last_reindex UNION \
SELECT id FROM documents_deleted WHERE deleted_ts>=@last_reindex
@@ -8534,7 +8531,7 @@ Author
.spa
attribute data file. Bit size settings are ignored if using
inline storage.
-Example:
+
Example:
sql_attr_uint = group_id
sql_attr_uint = forum_id:9 # 9 bits for forum_id
Author
mysql, pgsql, mssql) only.
Equivalent to sql_attr_uint declaration with a bit count of 1.
-Example:
+
Example:
sql_attr_bool = is_deleted # will be packed to 1 bit
Author
Example:
+
Example:
sql_attr_bigint = my_bigint_id
Author
Example:
+
Example:
# sql_query = ... UNIX_TIMESTAMP(added_datetime) AS added_ts ...
sql_attr_timestamp = added_ts
Author
Example:
+
Example:
sql_attr_float = lat_radians
sql_attr_float = long_radians
Author
Example:
+
Example:
sql_attr_multi = uint tag from query; SELECT id, tag FROM tags
sql_attr_multi = bigint tag from ranged-query; \
SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \
@@ -8648,7 +8645,7 @@ Author
sql_attr_string will not be full-text
indexed; you can use sql_field_string
directive for that.
-Example:
+
Example:
sql_attr_string = title # will be stored but will not be indexed
Author
Example:
+
Example:
sql_attr_json = properties
Author
sql_column_buffers = <colname>=<size>[K|M] [, ...]
Example:
+
Example:
sql_query = SELECT id, mytitle, mycontent FROM documents
sql_column_buffers = mytitle=64K, mycontent=10M
Author
sql_field_string lets you do
exactly that. Both the field and the attribute will be named the same.
-Example:
+
Example:
sql_field_string = title # will be both indexed and stored
Author
Example:
+
Example:
sql_file_field = my_file_path # load and index files referred to by my_file_path
Author
Example:
+
Example:
sql_query_post = DROP TABLE my_tmp_table
Author
Example:
+
Example:
sql_query_post_index = REPLACE INTO counters ( id, val ) \
VALUES ( 'max_indexed_id', $maxid )
Author
Example:
+
Example:
sql_ranged_throttle = 1000 # sleep for 1 sec before each query step
Author
Example:
+
Example:
xmlpipe_command = cat /home/sphinx/test.xml
Author
xmlpipe2 source type only. Refer to Section 3.9, “xmlpipe2 data source”.
-Example:
+
Example:
xmlpipe_field = subject
xmlpipe_field = content
Author
Example:
+
Example:
xmlpipe_field_string = subject
Author
xmlpipe2 source type only.
Syntax fully matches that of sql_attr_uint.
-Example:
+
Example:
xmlpipe_attr_uint = author_id
Author
xmlpipe2 source type only.
Syntax fully matches that of sql_attr_bigint.
-Example:
+
Example:
xmlpipe_attr_bigint = my_bigint_id
Author
xmlpipe2 source type only.
Syntax fully matches that of sql_attr_bool.
-Example:
+
Example:
xmlpipe_attr_bool = is_deleted # will be packed to 1 bit
Author
xmlpipe2 source type only.
Syntax fully matches that of sql_attr_timestamp.
-Example:
+
Example:
xmlpipe_attr_timestamp = published
Author
xmlpipe2 source type only.
Syntax fully matches that of sql_attr_float.
-Example:
+
Example:
xmlpipe_attr_float = lat_radians
xmlpipe_attr_float = long_radians
Author
Example:
+
Example:
xmlpipe_attr_multi = taglist
Author
Example:
+
Example:
xmlpipe_attr_multi_64 = taglist
Author
Example:
+
Example:
xmlpipe_attr_string = subject
Author
Example:
+
Example:
xmlpipe_attr_json = properties
Author
Example:
+
Example:
xmlpipe_fixup_utf8 = 1
Author
searchd as a service, account user can differ
from the account you used to install the service.
-Example:
+
Example:
mssql_winauth = 1
Author
gunzip).
When indexing on a different box than the database, this lets you offload the database, and save on network traffic.
The feature is only available if zlib and zlib-devel were both available during build time.
-Example:
+
Example:
unpack_zlib = col1
unpack_zlib = col2
Author
Example:
+
unpack_mysqlcompress = body_compressed unpack_mysqlcompress = description_compressed@@ -8998,7 +8995,7 @@
data can not go over the buffer size. This option lets you control the buffer size,
both to limit indexer memory use, and to enable unpacking
of really long data fields if necessary.
-
+Example:
unpack_mysqlcompress_maxsize = 1M@@ -9026,7 +9023,7 @@Author
Index type setting lets you choose the needed type. By default, plain local index type will be assumed. -
Example:
+Example:
type = distributed-
Example:
+Example:
source = srcpart1 source = srcpart2 source = srcpart3 @@ -9112,9 +9109,10 @@Author
.spm stores MVA data;
.spp stores hit (aka posting, aka word occurrence) lists for each word ID;
.sps stores string attribute data.
.spe stores skip-lists to speed up doc-list filtering
-
+Example:
path = /var/data/test1@@ -9138,7 +9136,7 @@Author
However, such cases are infrequent, and docinfo defaults to "extern". Refer to Section 3.3, “Attributes” for in-depth discussion and RAM usage estimates. -
Example:
+Example:
docinfo = inline@@ -9161,7 +9159,7 @@Author
from root account, or be granted enough privileges otherwise. If mlock() fails, a warning is emitted, but index continues working. -
Example:
+Example:
mlock = 1@@ -9265,8 +9263,8 @@Author
lemmatize_en - apply English lemmatizer and pick a single root form (added in 2.2.1-beta);
lemmatize_de - apply German lemmatizer and pick a single root form (added in 2.2.1-beta);
- lemmatize_ru_all - apply Russian lemmatizer and index all possible root forms (added in 2.1.1-beta);
- lemmatize_en_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta);
+ lemmatize_de_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta);
+ lemmatize_en_all - apply English lemmatizer and index all possible root forms (added in 2.2.1-beta);
lemmatize_de_all - apply German lemmatizer and index all possible root forms (added in 2.2.1-beta);
stem_en - apply Porter's English stemmer;
stem_ru - apply Porter's Russian stemmer;
@@ -9290,7 +9288,7 @@ stem_enru - apply Porter's English and Russian stemmers;
Author
a matching entry in the dictionary, stemmers will not be applied at all. Or in other words, wordforms can be used to implement stemming exceptions. -
Example:
+Example:
morphology = stem_en, libstemmer_sv@@ -9359,7 +9357,7 @@Author
on how many actual keywords match the given substring (in other words, into how many keywords does the search term expand). The maximum number of keywords matched is restricted by the -expansion_limit +expansion_limit directive.
Essentially, keywords and CRC dictionaries represent the two @@ -9368,7 +9366,7 @@
Author
top-speed worst-case searches (CRC dictionary), or only slightly impact indexing time but sacrifice worst-case searching time when the prefix expands into very many keywords (keywords dictionary). -
Example:
+Example:
dict = keywords@@ -9406,7 +9404,7 @@Author
PRE, TABLE, TBODY, TD, TFOOT, TH, THEAD, TR, and UL.
Both sentences and paragraphs increment the keyword position counter by 1. -
Example:
+Example:
index_sp = 1@@ -9437,7 +9435,7 @@Author
in a document. Once indexed, zones can then be used for matching with the ZONE operator, see Section 5.3, “Extended query syntax”. -
Example:
+Example:
index_zones = h*, th, titleEarlier versions than 2.1.1-beta only provided this feature for plain @@ -9458,7 +9456,7 @@
Author
exactly as long as specified will be stemmed. So in order to avoid stemming 3-character keywords, you should specify 4 for the value. For more finely grained control, refer to wordforms feature. -
Example:
+Example:
min_stemming_len = 4@@ -9496,7 +9494,7 @@@@ -9566,7 +9564,7 @@Author
of the index, sorted by the keyword frequency, see
--buildstopsand--buildfreqsswitch in Section 7.1, “indexercommand reference”. Top keywords from that dictionary can usually be used as stopwords. -Example:
+Example:
stopwords = /usr/local/sphinx/data/stopwords.txt stopwords = stopwords-ru.txt stopwords-en.txtAuthor
s02e02 > season 2 episode 2 s3 e3 > season 3 episode 3
-
+Example:
wordforms = /usr/local/sphinx/data/wordforms.txt wordforms = /usr/local/sphinx/data/alternateforms.txt wordforms = /usr/local/sphinx/private/dict*.txt @@ -9597,7 +9595,7 @@Author
time it makes no sense to embed a 100 MB wordforms dictionary into a tiny delta index. So there needs to be a size threshold, and
embedded_limitis that threshold. -Example:
+Example:
embedded_limit = 32K@@ -9677,7 +9675,7 @@Author
during indexing and searching respectively. Therefore, to pick up changes in the file it's required to reindex and restart
searchd. -Example:
+Example:
exceptions = /usr/local/sphinx/data/exceptions.txt@@ -9687,7 +9685,7 @@Author
Only those words that are not shorter than this minimum will be indexed. For instance, if min_word_len is 4, then 'the' won't be indexed, but 'they' will be. -
Example:
+Example:
min_word_len = 4@@ -9755,7 +9753,7 @@Author
Starting with 2.2.3-beta, aliases "english" and "russian" are allowed at control character mapping. -
Example:
+Example:
# default are English and Russian letters charset_table = 0..9, A..Z->a..z, _, a..z, \ U+410..U+42F->U+430..U+44F, U+430..U+44F, U+401->U+451, U+451 @@ -9778,7 +9776,7 @@Author
The syntax is the same as for charset_table, but it's only allowed to declare characters, and not allowed to map them. Also, the ignored characters must not be present in charset_table. -
Example:
+Example:
ignore_chars = U+AD@@ -9808,7 +9806,7 @@Author
$cl->Query ( "( keyword | keyword* ) other keywords" );-
Example:
+Example:
min_prefix_len = 3@@ -9830,7 +9828,7 @@Author
There's no automatic way to rank perfect word matches higher in an infix index, but the same tricks as with prefix indexes can be applied. -
Example:
+Example:
min_infix_len = 3@@ -9866,7 +9864,7 @@Author
and intentionally forbidden in that case. If required, you can still limit the length of a substring that you search for in the application code. -
Example:
+Example:
max_substring_len = 12@@ -9880,7 +9878,7 @@Author
page contents. prefix_fields specifies what fields will be prefix-indexed; all other fields will be indexed in normal mode. The value format is a comma-separated list of field names. -
Example:
+Example:
prefix_fields = url, domain@@ -9890,7 +9888,7 @@Author
Similar to prefix_fields, but lets you limit infix-indexing to given fields. -
Example:
+Example:
infix_fields = url, domain@@ -9930,7 +9928,7 @@Author
good results, thanks to phrase based ranking: it will pull closer phrase matches (which in case of N-gram CJK words can mean closer multi-character word matches) to the top. -
Example:
+Example:
ngram_len = 1@@ -9942,7 +9940,7 @@Author
this list defines characters, sequences of which are subject to N-gram extraction. Words comprised of other characters will not be affected by N-gram indexing feature. The value format is identical to charset_table. -
Example:
+Example:
ngram_chars = U+3000..U+2FA1F@@ -9968,7 +9966,7 @@Author
Phrase boundary condition will be raised if and only if such character is followed by a separator; this is to avoid abbreviations such as S.T.A.L.K.E.R or URLs being treated as several phrases. -
Example:
+Example:
phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis@@ -9978,7 +9976,7 @@Author
On phrase boundary, current word position will be additionally incremented by this number. See phrase_boundary for details. -
Example:
+Example:
phrase_boundary_step = 100@@ -10015,7 +10013,7 @@Author
There are no restrictions on tag names; ie. everything that looks like a valid tag start, or end, or a comment will be stripped. -
Example:
+Example:
html_strip = 1@@ -10026,7 +10024,7 @@Author
Specifies HTML markup attributes whose contents should be retained and indexed even though other HTML markup is stripped. The format is per-tag enumeration of indexable attributes, as shown in the example below. -
Example:
+Example:
html_index_attrs = img=alt,title; a=title;@@ -10042,7 +10040,7 @@Author
The value is a comma-separated list of element (tag) names whose contents should be removed. Tag names are case insensitive. -
Example:
+Example:
html_remove_elements = style, script@@ -10065,7 +10063,7 @@@@ -10121,7 +10119,7 @@Author
local indexes (refer to Section 12.2.31, “agent” for the details). However, that creates redundant CPU and network load, and
dist_threadsis now strongly suggested instead. -Example:
+Example:
local = chunk1 local = chunk2Author
(ie. sequentially or in parallel too) depends solely on the agent configuration (ie. dist_threads directive). Master has no remote control over that. -
Example:
+Example:
# config on box2 # sharding an index over 3 servers agent = box2:9312:chunk2 @@ -10136,7 +10134,7 @@Author
# sharding an index over 3 servers agent = box1:9312:chunk2 agent = box2:9312:chunk3 -
Agent mirrors
+
Agent mirrors
New syntax added in 2.1.1-beta lets you define so-called agent mirrors that can be used interchangeably when processing a search query. Master server keeps track of mirror status (alive or dead) and response times, and does @@ -10157,7 +10155,7 @@
Author
By default, all queries are routed to the best of the mirrors. The best one is picked based on the recent statistics, as controlled by the -ha_period_karma config directive. +ha_period_karma config directive. Master stores a number of metrics (total query count, error count, response time, etc) recently observed for every agent. It groups those by time spans, and karma is that time span length. The best agent mirror is then determined @@ -10172,11 +10170,11 @@
Author
HA/LB logic.
When there are no queries, master sends a regular ping command every -ha_ping_interval milliseconds +ha_ping_interval milliseconds in order to have some statistics and at least check, whether the remote host is still alive. ha_ping_interval defaults to 1000 msec. Setting it to 0 disables pings and statistics will only be accumulated based on actual queries. -
Example:
+Example:
# sharding index over 4 servers total # in just 2 chunks but with 2 failover mirrors for each chunk # box1, box2 carry chunk1 as local @@ -10199,10 +10197,10 @@Author
is that the master will not open a new connection to the agent for every query and then close it. Rather, it will keep a connection open and attempt to reuse for the subsequent queries. The maximal number of such persistent connections per one agent host -is limited by persistent_connections_limit option of searchd section. +is limited by persistent_connections_limit option of searchd section.
Note, that you have to set the last one in something greater than 0 if you want to use persistent agent connections. -Otherwise - when persistent_connections_limit is not defined, it assumes +Otherwise - when persistent_connections_limit is not defined, it assumes the zero num of persistent connections, and 'agent_persistent' acts exactly as simple 'agent'.
Persistent master-agent connections reduce TCP port pressure, and @@ -10210,7 +10208,7 @@
Author
in workers=threads mode. In other modes, simple non-persistent connections (i.e., one connection per operation) will be used, and a warning will show up in the console. -
Example:
+Example:
agent_persistent = remotebox:9312:index2@@ -10229,7 +10227,7 @@Author
Also, all network errors on blackhole agents will be ignored. The value format is completely identical to regular agent directive. -
Example:
+Example:
agent_blackhole = testbox:9312:testindex1,testindex2@@ -10242,7 +10240,7 @@Author
successfully. If the timeout is reached but connect() does not complete, and retries are enabled, retry will be initiated. -
Example:
+Example:
agent_connect_timeout = 300@@ -10257,7 +10255,7 @@Author
a remote agent equals to the sum of
agent_connection_timeoutandagent_query_timeout. Queries will not be retried if this timeout is reached; a warning will be produced instead. -Example:
+Example:
agent_query_timeout = 10000 # our query can be long, allow up to 10 sec@@ -10277,7 +10275,7 @@Author
This directive does not affect
indexerin any way, it only affectssearchd. -Example:
+Example:
preopen = 1@@ -10308,7 +10306,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
inplace_enable = 1@@ -10320,7 +10318,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
inplace_hit_gap = 1M@@ -10332,7 +10330,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
inplace_docinfo_gap = 1M@@ -10344,7 +10342,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
inplace_reloc_factor = 0.1@@ -10356,7 +10354,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
inplace_write_factor = 0.1@@ -10370,7 +10368,7 @@Author
enables exact form operator in the query language to work. This impacts the index size and the indexing time. However, searching performance is not impacted at all. -
Example:
+Example:
index_exact_words = 1@@ -10381,7 +10379,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
overshort_step = 1@@ -10392,7 +10390,7 @@Author
This directive does not affect
searchdin any way, it only affectsindexer. -Example:
+Example:
stopword_step = 1@@ -10427,7 +10425,7 @@Author
hitless, "simon says hello world" will be converted to ("simon says" & hello & world) query, matching all documents that contain "hello" and "world" anywhere in the document, and also "simon says" as an exact phrase. -
Example:
+Example:
hitless_words = all@@ -10460,7 +10458,7 @@Author
This directive does not affect
indexerin any way, it only affectssearchd. -Example:
+Example:
expand_keywords = 1@@ -10499,7 +10497,7 @@@@ -10548,7 +10546,7 @@Author
so that multiple different blended characters could be normalized into just one base form. This is useful when indexing multiple alternative Unicode codepoints with equivalent glyphs. -
Example:
+Example:
blend_chars = +, &, U+23 blend_chars = +, &->+ # 2.0.1 and aboveAuthor
Default behavior is to index the entire token, equivalent to
blend_mode = trim_none. -Example:
+Example:
blend_mode = trim_tail, skip_pure@@ -10568,7 +10566,7 @@Author
hence, specifying 512 MB limit and only inserting 3 MB of data should result in allocating 3 MB, not 512 MB.
-
Example:
+Example:
rt_mem_limit = 512M@@ -10582,7 +10580,7 @@Author
in INSERT statements without an explicit list of inserted columns will have to be in the same order as configured.
-
Example:
+Example:
rt_field = author rt_field = title rt_field = content @@ -10593,7 +10591,7 @@Author
Multi-value (an arbitrary number of attributes is allowed), optional. Declares an unsigned 32-bit attribute. Introduced in version 1.10-beta. -
Example:
+Example:
rt_attr_uint = gid@@ -10602,7 +10600,7 @@Author
Multi-value (there might be multiple attributes declared), optional. Declares a 1-bit unsigned integer attribute. Introduced in version 2.1.2-release. -
Example:
+Example:
rt_attr_bool = available@@ -10611,7 +10609,7 @@Author
Multi-value (an arbitrary number of attributes is allowed), optional. Declares a signed 64-bit attribute. Introduced in version 1.10-beta. -
Example:
+Example:
rt_attr_bigint = guid@@ -10620,7 +10618,7 @@Author
Multi-value (an arbitrary number of attributes is allowed), optional. Declares a single precision, 32-bit IEEE 754 format float attribute. Introduced in version 1.10-beta. -
Example:
+Example:
rt_attr_float = gpa@@ -10629,7 +10627,7 @@Author
Declares the UNSIGNED INTEGER (unsigned 32-bit) MVA attribute. Multi-value (ie. there may be more than one such attribute declared), optional. Applies to RT indexes only. -
Example:
+Example:
rt_attr_multi = my_tags@@ -10638,7 +10636,7 @@Author
Declares the BIGINT (signed 64-bit) MVA attribute. Multi-value (ie. there may be more than one such attribute declared), optional. Applies to RT indexes only. -
Example:
+Example:
rt_attr_multi_64 = my_wide_tags@@ -10646,7 +10644,7 @@Author
Timestamp attribute declaration. Multi-value (an arbitrary number of attributes is allowed), optional. Introduced in version 1.10-beta. -
Example:
+Example:
rt_attr_timestamp = date_added@@ -10654,7 +10652,7 @@Author
String attribute declaration. Multi-value (an arbitrary number of attributes is allowed), optional. Introduced in version 1.10-beta. -
Example:
+Example:
rt_attr_string = author@@ -10664,7 +10662,7 @@Author
Introduced in version 2.1.1-beta.
Refer to Section 12.1.24, “sql_attr_json” for more details on the JSON attributes. -
Example:
+Example:
rt_attr_json = properties@@ -10678,11 +10676,11 @@Author
index. Essentially, this directive controls how exactly master does the load balancing between the configured mirror agent nodes. As of 2.1.1-beta, the following strategies are implemented: -
Simple random balancing
ha_strategy = random+
Simple random balancing
ha_strategy = randomThe default balancing mode. Simple linear random distribution among the mirrors. That is, equal selection probability are assigned to every mirror. Kind of similar to round-robin (RR), but unlike RR, does not impose a strict selection order. -
Adaptive randomized balancing
+
Adaptive randomized balancing
The default simple random strategy does not take mirror status, error rate, and, most importantly, actual response latencies into account. So to accommodate for heterogeneous clusters and/or temporary spikes in agent node load, we have @@ -10726,7 +10724,7 @@
Author
ha_strategy = noerrorsLatency-weighted probabilities, but mirrors with worse errors/success ratio are excluded from the selection. -
Round-robin balancing
ha_strategy = roundrobinSimple round-robin selection, that is, selecting the 1st mirror +
Round-robin balancing
ha_strategy = roundrobinSimple round-robin selection, that is, selecting the 1st mirror in the list, then the 2nd one, then the 3rd one, etc, and then repeating the process once the last mirror in the list is reached. Unlike with the randomized strategies, RR imposes a strict querying order (1, 2, 3, .., @@ -10752,7 +10750,7 @@
Author
to index a current word pair or not.
bigram_freq_wordslets you define a list of such keywords. -Example:
+Example:
bigram_freq_words = the, a, you, i@@ -10792,7 +10790,7 @@Author
For most usecases,
both_freqwould be the best mode, but your mileage may vary. -Example:
+Example:
bigram_freq_words = both_freq@@ -10804,7 +10802,7 @@Author
When
index_field_lengthsis set to 1,indexerwill 1) create a respective length attribute for every full-text field, -sharing the same name; 2) compute a field length (counted in keywords) for +sharing the same name but with _len suffix; 2) compute a field length (counted in keywords) for every document and store in to a respective attribute; 3) compute the per-index averages. The lengths attributes will have a special TOKENCOUNT type, but their values are in fact regular 32-bit integers, and their values are generally @@ -10819,7 +10817,7 @@Author
and its extension towards multiple fields, called BM25F. They require per-document length and per-field lengths, respectively. Hence the additional directive. -
Example:
+Example:
index_field_lengths = 1@@ -10856,7 +10854,7 @@Author
installed in the system and Sphinx must be configured built with a
--with-re2switch. Binary packages should come with RE2 builtin. -Example:
+Example:
# index '13-inch' as '13inch' regexp_filter = \b(\d+)\" => \1inch @@ -10880,7 +10878,7 @@Author
stopwords_unstemmed directive fixes that issue. When it's enabled, stopwords are applied before stemming (and therefore to the original word forms), and the tokens are stopped when token == stopword. -
Example:
+Example:
stopwords_unstemmed = 1@@ -10915,14 +10913,14 @@Author
first, then converting those to .idf format using
--buildidf, then merging all .idf files across cluser using--mergeidf. Refer to Section 7.4, “indextoolcommand reference” for more information. -Example:
+Example:
global_idf = /usr/local/sphinx/var/global.idfRLP context configuration file. Mandatory if RLP is used. Added in 2.2.1-beta. -
Example:
+Example:
rlp_context = /home/myuser/RLP/rlp-context.xml@@ -10941,7 +10939,7 @@Author
Note that this option also affects RT indexes. When it is enabled, all atribute updates will be disabled, and also all disk chunks of RT indexes will behave described above. However inserting and deleting of docs from RT indexes is still possible with enabled ondisk_attrs. -
Possible values:
@@ -10985,7 +10983,7 @@Author
and the database server can timeout. You can resolve that either by raising timeouts on SQL server side or by lowering
mem_limit. -Example:
+Example:
mem_limit = 256M # mem_limit = 262144K # same, but in KB # mem_limit = 268435456 # same, but in bytes @@ -11008,7 +11006,7 @@Author
(that's mostly limited by disk heads seek time). Limiting indexing I/O to a fraction of that can help reduce search performance degradation caused by indexing. -
Example:
+Example:
max_iops = 40@@ -11024,14 +11022,14 @@Author
by max_iops setting. At the time of this writing, all I/O calls should be under 256 KB (default internal buffer size) anyway, so
max_iosizevalues higher than 256 KB must not affect anything. -Example:
+Example:
max_iosize = 1048576Maximum allowed field size for XMLpipe2 source type, bytes. Optional, default is 2 MB. -
Example:
+Example:
max_xmlpipe2_field = 8M@@ -11045,7 +11043,7 @@Author
mem_limit. Note that several (currently up to 4) buffers for different files will be allocated, proportionally increasing the RAM usage. -
Example:
+Example:
write_buffer = 4M@@ -11065,7 +11063,7 @@Author
(for example) 2 MB in size, but
max_file_field_buffervalue is 128 MB, peak buffer usage would still be only 2 MB. However, files over 128 MB would be entirely skipped. -Example:
+Example:
max_file_field_buffer = 128M@@ -11163,7 +11161,7 @@Author
and 'mysql41' (MySQL protocol used since 4.1 upto at least 5.1). More details on MySQL protocol support can be found in Section 5.10, “MySQL protocol support and SphinxQL” section. -
Examples:
+Examples:
listen = localhost listen = localhost:5000 listen = 192.168.0.1:5000 @@ -11190,7 +11188,7 @@Author
Also you can use the 'syslog' as the file name. In this case the events will be sent to syslog daemon. To use the syslog option the sphinx must be configured '--with-syslog' on building. -
Example:
+Example:
log = /var/log/searchd.log@@ -11203,7 +11201,7 @@Author
In this case all search queries will be sent to syslog daemon with LOG_INFO priority, prefixed with '[query]' instead of timestamp. To use the syslog option the sphinx must be configured '--with-syslog' on building. -
Example:
+Example:
query_log = /var/log/query.log@@ -11219,7 +11217,7 @@Author
on the fly, using
SET GLOBAL query_log_format=sphinxqlsyntax. Refer to Section 5.9, “searchdquery log formats” for more discussion and format details. -Example:
+Example:
query_log_format = sphinxql@@ -11227,14 +11225,14 @@Author
Network client request read timeout, in seconds. Optional, default is 5 seconds.
searchdwill forcibly close the client connections which fail to send a query within this timeout. -Example:
+Example:
read_timeout = 1Maximum time to wait between requests (in seconds) when using persistent connections. Optional, default is five minutes. -
Example:
+Example:
client_timeout = 3600@@ -11246,7 +11244,7 @@Author
searches running, at all times. When the limit is reached, additional incoming clients are dismissed with temporarily failure (SEARCHD_RETRY) status code and a message stating that the server is maxed out. -
Example:
+Example:
max_children = 10@@ -11261,7 +11259,7 @@Author
of
searchd; to stopsearchd; to notify it that it should rotate the indexes. Can also be used for different external automation scripts. -Example:
+Example:
pid_file = /var/run/searchd.pid@@ -11300,7 +11298,7 @@Author
memory usage during the rotation (because both old and new copies of
.spa/.spi/.spmdata need to be in RAM while preloading new copy). Average usage stays the same. -Example:
+Example:
seamless_rotate = 1@@ -11323,14 +11321,14 @@Author
They also make
searchduse more file handles. In most scenarios it's therefore preferred and recommended to preopen indexes. -Example:
+Example:
preopen_indexes = 1Whether to unlink .old index copies on successful rotation. Optional, default is 1 (do unlink). -
Example:
+Example:
unlink_old = 0@@ -11346,7 +11344,7 @@Author
between those intervals is set with
attr_flush_period, in seconds.It defaults to 0, which disables the periodic flushing, but flushing will still occur at normal shut-down. -
Example:
+Example:
attr_flush_period = 900 # persist updates to disk every 15 minutes@@ -11356,7 +11354,7 @@Author
Only used for internal sanity checks, does not directly affect RAM use or performance. Optional, default is 8M. Introduced in version 0.9.9-rc1. -
Example:
+Example:
max_packet_size = 32M@@ -11374,7 +11372,7 @@Author
In the meantime, MVA updates are intended to be used as a measure to quickly catchup with latest changes in the database until the next index rebuild; not as a persistent storage mechanism. -
Example:
+Example:
mva_updates_pool = 16M@@ -11383,7 +11381,7 @@Author
Only used for internal sanity checks, does not directly affect RAM use or performance. Optional, default is 256. Introduced in version 0.9.9-rc1. -
Example:
+Example:
max_filters = 1024@@ -11392,7 +11390,7 @@Author
Only used for internal sanity checks, does not directly affect RAM use or performance. Optional, default is 4096. Introduced in version 0.9.9-rc1. -
Example:
+Example:
max_filter_values = 16384@@ -11406,7 +11404,7 @@Author
fail with "connection refused" message. listen_backlog directive controls the length of the connection queue. Non-Windows builds should work fine with the default value. -
Example:
+Example:
listen_backlog = 20@@ -11418,7 +11416,7 @@Author
two associated read buffers (one for document list and one for hit list). This setting lets you control their sizes, increasing per-query RAM use, but possibly decreasing IO time. -
Example:
+Example:
read_buffer = 1M@@ -11434,7 +11432,7 @@Author
unhinted read size, but raising it for smaller lists. It will not affect RAM use because read buffer will be already allocated. So it should be not greater than read_buffer. -
Example:
+Example:
read_unhinted = 32K@@ -11445,7 +11443,7 @@Author
Makes searchd perform a sanity check of the amount of the queries submitted in a single batch when using multi-queries. Set it to 0 to skip the check. -
Example:
+Example:
max_batch_queries = 256@@ -11456,7 +11454,7 @@Author
Limits RAM usage of a common subtree optimizer (see Section 5.11, “Multi-queries”). At most this much RAM will be spent to cache document entries per each query. Setting the limit to 0 disables the optimizer. -
Example:
+Example:
subtree_docs_cache = 8M@@ -11467,7 +11465,7 @@Author
Limits RAM usage of a common subtree optimizer (see Section 5.11, “Multi-queries”). At most this much RAM will be spent to cache keyword occurrences (hits) per each query. Setting the limit to 0 disables the optimizer. -
Example:
+Example:
subtree_hits_cache = 16M@@ -11507,7 +11505,7 @@Author
Threads mode was implemented along with RT backend and is required to use RT indexes. (Regular disk-based indexes work in all the available modes.) -
Example:
+Example:
workers = threads@@ -11541,7 +11539,7 @@@@ -11612,7 +11610,7 @@Author
Up to
dist_threadsthreads are be created to process those files. That speeds up snippet extraction when the total amount of document data to process is significant (hundreds of megabytes). -Example:
+Example:
index dist_test { type = distributed @@ -11580,7 +11578,7 @@Author
Otherwise, the default path, which in most cases is the same as working folder, may point to the folder with no write access (for example, /usr/local/var/data). In this case, the searchd will not start at all. -
Example:
+Example:
binlog_path = # disable logging binlog_path = /var/data # /var/data/binlog.001 etc will be createdAuthor
cases, the default hybrid mode 2 provides a nice balance of speed and safety, with full RT index data protection against daemon crashes, and some protection against hardware ones. -
Example:
+Example:
binlog_flush = 1 # ultimate safety, low speed@@ -11624,7 +11622,7 @@Author
A new binlog file will be forcibly opened once the current binlog file reaches this limit. This achieves a finer granularity of logs and can yield more efficient binlog disk usage under certain borderline workloads. -
Example:
+Example:
binlog_max_log_size = 16M@@ -11648,7 +11646,7 @@Author
This might be useful, for instance, when the document storage locations (be those local storage or NAS mountpoints) are inconsistent across the servers. -
Example:
+Example:
snippets_file_prefix = /mnt/common/server1/@@ -11660,7 +11658,7 @@Author
Specifies the default collation used for incoming requests. The collation can be overridden on a per-query basis. Refer to Section 5.12, “Collations” section for the list of available collations and other details. -
Example:
+Example:
collation_server = utf8_ci@@ -11671,23 +11669,10 @@-Author
Specifies the libc locale, affecting the libc-based collations. Refer to Section 5.12, “Collations” section for the details. -
Example:
+Example:
collation_libc_locale = fr_FR---Trusted location for the dynamic libraries (UDFs). -Optional, default is empty (no location). -Introduced in version 2.0.1-beta. -
-Specifies the trusted directory from which the -UDF libraries can be loaded. Requires -workers = thread to take effect. -
Example:
-workers = threads -plugin_dir = /usr/local/sphinx/lib -+-A server version string to return via MySQL protocol. Optional, default is empty (return Sphinx version). @@ -11703,10 +11688,10 @@
Author
mysql_version_stringdirective and havesearchdreport a different version to clients connecting over MySQL protocol. (By default, it reports its own version.) -Example:
+Example:
mysql_version_string = 5.0.37+-RT indexes RAM chunk flush check period, in seconds. Optional, default is 10 hours. @@ -11718,10 +11703,10 @@
Author
periodic flush checks, and eligible RAM chunks can get saved, enabling consequential binlog cleanup. See Section 4.4, “Binary logging” for more details. -
Example:
+Example:
rt_flush_period = 3600 # 1 hour+-Per-thread stack size. Optional, default is 1M. @@ -11744,10 +11729,10 @@
Author
with upto 250 levels, 150K for upto 700 levels, etc. If the stack size limit is not met,
searchdfails the query and reports the required stack size in the error message. -Example:
+Example:
thread_stack = 256K+-The maximum number of expanded keywords for a single wildcard. Optional, default is 0 (no limit). @@ -11761,10 +11746,10 @@
Author
of such expansions. Setting
expansion_limit = Nrestricts expansions to no more than N of the most frequent matching keywords (per each wildcard in the query). -Example:
+Example:
expansion_limit = 16+-Threaded server watchdog. Optional, default is 1 (watchdog enabled). @@ -11777,10 +11762,10 @@
Author
process that monitors the main server process, and automatically restarts the latter in case of abnormal termination. Watchdog is enabled by default. -
Example:
+Example:
watchdog = 0 # disable watchdog+-Delay between restarting preforked children on index rotation, in milliseconds. Optional, default is 0 (no delay). @@ -11799,10 +11784,10 @@
Author
there are 30 children, then the last one would only be actually restarted 1.5 seconds (50*30=1500 milliseconds) after the "rotation finished" message in the
searchdevent log. -Example:
+Example:
prefork_rotation_throttle = 50 # throttle children restarts by 50 msec each+-Path to a file where current SphinxQL state will be serialized. Available since version 2.1.1-beta. @@ -11812,10 +11797,10 @@
Author
If you load UDF functions, but Sphinx crashes, when it gets (automatically) restarted, your UDF and global variables will no longer be available; using persistent state helps a graceful recovery with no such surprises. -
Example:
+Example:
sphinxql_state = uservars.sql+-Interval between agent mirror pings, in milliseconds. Optional, default is 1000. @@ -11828,10 +11813,10 @@
Author
by this directive.
To disable pings, set ha_ping_interval to 0. -
Example:
+Example:
ha_ping_interval = 0+-Agent mirror statistics window size, in seconds. Optional, default is 60. @@ -11853,10 +11838,10 @@
Author
They can be inspected using SHOW AGENT STATUS statement. -
Example:
+Example:
ha_period_karma = 120+-The maximum # of simultaneous persistent connections to remote persistent agents. Each time connecting agent defined under 'agent_persistent' we try to reuse existing connection (if any), or connect and save the connection for the future. @@ -11864,10 +11849,10 @@
Author
when all of them are busy). This very directive limits the number. It affects the num of connections to each agent's host, across all distributed indexes.
It is reasonable to set the value equal or less than max_children option of the agents. -
Example:
+Example:
persistent_connections_limit = 29 # assume that each host of agents has max_children = 30 (or 29).+-A maximum number of I/O operations (per second) that the RT chunks merge thread is allowed to start. Optional, default is 0 (no limit). Added in 2.1.1-beta. @@ -11877,10 +11862,10 @@
Author
RT optimization activity will not generate more disk iops (I/Os per second) than the configured limit. Modern SATA drives can perform up to around 100 I/O operations per second, and limiting rt_merge_iops can reduce search performance degradation caused by merging. -
Example:
+Example:
rt_merge_iops = 40+-A maximum size of an I/O operation that the RT chunks merge thread is allowed to start. @@ -11890,14 +11875,14 @@
Author
This directive lets you throttle down the I/O impact arising from the
OPTIMIZEstatements. I/Os bigger than this limit will be broken down into 2 or more I/Os, which will then be accounted as separate I/Os -with regards to the rt_merge_iops +with regards to the rt_merge_iops limit. Thus, it is guaranteed that all the optimization activity will not generate more than (rt_merge_iops * rt_merge_maxiosize) bytes of disk I/O per second. -Example:
+Example:
rt_merge_maxiosize = 1M+-Costs for the query time prediction model, in nanoseconds. Optional, default is "doc=64, hit=48, skip=2048, match=64" (without the quotes). @@ -11944,10 +11929,10 @@
Author
is somewhat more error prone.) It is not necessary to specify all 4 costs at once, as the missed one will take the default values. However, we strongly suggest to specify all of them, for readability. -
Example:
+Example:
predicted_time_costs = doc=128, hit=96, skip=4096, match=128+-searchd --stopwait wait time, in seconds. Optional, default is 3 seconds. @@ -11958,10 +11943,10 @@
Author
flushing attributes and updating binlog. And it requires some time. searchd --stopwait will wait up to shutdown_time seconds for daemon to finish its jobs. Suitable time depends on your index size and load. -
Example:
+Example:
shutdown_timeout = 5 # wait for up to 5 seconds+-Instance-wide defaults for ondisk_attrs directive. Optional, default is 0 (all attributes are loaded in memory). This @@ -11969,32 +11954,32 @@
Author
served by this copy of searchd. Per-index directives take precedence, and will overwrite this instance-wide default value, allowing for fine-grain control.
+-Limit (in milliseconds) that prevents the query from being written to the query log. Optional, default is 0 (all queries are written to the query log). This directive specifies that only queries with execution times that exceed the specified limit will be logged.
+-Instance-wide defaults for agent_connect_timeout parameter. The last defined in distributed (network) indexes.
+-Instance-wide defaults for agent_query_timeout parameter. The last defined in distributed (network) indexes, or also may be overrided per-query using OPTION clause.
+-Integer, specifies how many times sphinx will try to connect and query remote agents in distributed index before reporting fatal query error. Default is 0 (i.e. no retries). This value may be also specified on per-query basis using 'OPTION retry_count=XXX' clause. If per-query option exists, it will override the one specified in config.
+@@ -12010,7 +11995,7 @@Integer, in milliseconds. Specifies the delay sphinx rest before retrying to query a remote agent in case it fails. -The value has sense only if non-zero agent_retry_count +The value has sense only if non-zero agent_retry_count or non-zero per-query OPTION retry_count specified. Default is 500. This value may be also specified on per-query basis using 'OPTION retry_delay=XXX' clause. If per-query option exists, it will override the one specified in config.
Author
the base dictionary path. File names are hardcoded and specific to a given lemmatizer; the Russian lemmatizer uses ru.pak dictionary file. The dictionaries can be obtained from the Sphinx website. -
Example:
+Example:
lemmatizer_base = /usr/local/share/sphinx/dicts/@@ -12023,7 +12008,7 @@Author
By default, JSON format errors are ignored (
ignore_attr) and the indexer tool will just show a warning. Setting this option tofail_indexwill rather make indexing fail at the first JSON format error. -Example:
+Example:
on_json_attr_error = ignore_attr@@ -12037,7 +12022,7 @@Author
of strings; if the option is 0, such values will be indexed as strings. This conversion applies to any data source, that is, JSON attributes originating from either SQL or XMLpipe2 sources will all be affected. -
Example:
+Example:
json_autoconv_numbers = 1@@ -12051,21 +12036,21 @@Author
will be automatically brought to lower case when indexing. This conversion applies to any data source, that is, JSON attributes originating from either SQL or XMLpipe2 sources will all be affected. -
Example:
+Example:
json_autoconv_keynames = lowercasePath to the RLP root folder. Mandatory if RLP is used. Added in 2.2.1-beta. -
Example:
+Example:
rlp_root = /home/myuser/RLPRLP environment configuration file. Mandatory if RLP is used. Added in 2.2.1-beta. -
Example:
+Example:
rlp_environment = /home/myuser/RLP/rlp-environment.xml@@ -12074,7 +12059,7 @@Author
Do not set this value to more than 10Mb because sphinx splits large documents to 10Mb chunks before processing them by the RLP. This option has effect only if
morphology = rlp_chinese_batchedis specified. Added in 2.2.1-beta. -Example:
+Example:
rlp_max_batch_size = 100k@@ -12082,54 +12067,98 @@+Author
Maximum number of documents batched before processing them by the RLP. Optional, default is 50. This option has effect only if
morphology = rlp_chinese_batchedis specified. Added in 2.2.1-beta. -Example:
+Example:
rlp_max_batch_docs = 100 +++Trusted location for the dynamic libraries (UDFs). +Optional, default is empty (no location). +Introduced in version 2.0.1-beta. +
+Specifies the trusted directory from which the +UDF libraries can be loaded. Requires +workers = thread to take effect. +
Example:
+plugin_dir = /usr/local/sphinx/lib-Table of Contents
- A.1. Version 2.2.7-release, 20 jan 2015
-- A.2. Version 2.2.6-release, 13 nov 2014
-- A.3. Version 2.2.5-release, 06 oct 2014
-- A.4. Version 2.2.4-release, 11 sep 2014
-- A.5. Version 2.2.3-beta, 13 may 2014
-- A.6. Version 2.2.2-beta, 11 feb 2014
-- A.7. Version 2.2.1-beta, 13 nov 2013
-- A.8. Version 2.1.9-release, 03 jul 2014
-- A.9. Version 2.1.8-release, 28 apr 2014
-- A.10. Version 2.1.7-release, 30 mar 2014
-- A.11. Version 2.1.6-release, 24 feb 2014
-- A.12. Version 2.1.5-release, 22 jan 2014
-- A.13. Version 2.1.4-release, 18 dec 2013
-- A.14. Version 2.1.3-release, 12 nov 2013
-- A.15. Version 2.1.2-release, 10 oct 2013
-- A.16. Version 2.1.1-beta, 20 feb 2013
-- A.17. Version 2.0.11-dev, xx xxx xxxx
-- A.18. Version 2.0.10-release, 22 jan 2014
-- A.19. Version 2.0.9-release, 26 aug 2013
-- A.20. Version 2.0.8-release, 26 apr 2013
-- A.21. Version 2.0.7-release, 26 mar 2013
-- A.22. Version 2.0.6-release, 22 oct 2012
-- A.23. Version 2.0.5-release, 28 jul 2012
-- A.24. Version 2.0.4-release, 02 mar 2012
-- A.25. Version 2.0.3-release, 23 dec 2011
-- A.26. Version 2.0.2-beta, 15 nov 2011
-- A.27. Version 2.0.1-beta, 22 apr 2011
-- A.28. Version 1.10-beta, 19 jul 2010
-- A.29. Version 0.9.9-release, 02 dec 2009
-- A.30. Version 0.9.9-rc2, 08 apr 2009
-- A.31. Version 0.9.9-rc1, 17 nov 2008
-- A.32. Version 0.9.8.1, 30 oct 2008
-- A.33. Version 0.9.8, 14 jul 2008
-- A.34. Version 0.9.7, 02 apr 2007
-- A.35. Version 0.9.7-rc2, 15 dec 2006
-- A.36. Version 0.9.7-rc1, 26 oct 2006
-- A.37. Version 0.9.6, 24 jul 2006
-- A.38. Version 0.9.6-rc1, 26 jun 2006
+-Table of Contents
- A.1. Version 2.2.9-release, 16 apr 2015
+- A.2. Version 2.2.8-release, 09 mar 2015
+- A.3. Version 2.2.7-release, 20 jan 2015
+- A.4. Version 2.2.6-release, 13 nov 2014
+- A.5. Version 2.2.5-release, 06 oct 2014
+- A.6. Version 2.2.4-release, 11 sep 2014
+- A.7. Version 2.2.3-beta, 13 may 2014
+- A.8. Version 2.2.2-beta, 11 feb 2014
+- A.9. Version 2.2.1-beta, 13 nov 2013
+- A.10. Version 2.1.9-release, 03 jul 2014
+- A.11. Version 2.1.8-release, 28 apr 2014
+- A.12. Version 2.1.7-release, 30 mar 2014
+- A.13. Version 2.1.6-release, 24 feb 2014
+- A.14. Version 2.1.5-release, 22 jan 2014
+- A.15. Version 2.1.4-release, 18 dec 2013
+- A.16. Version 2.1.3-release, 12 nov 2013
+- A.17. Version 2.1.2-release, 10 oct 2013
+- A.18. Version 2.1.1-beta, 20 feb 2013
+- A.19. Version 2.0.11-dev, xx xxx xxxx
+- A.20. Version 2.0.10-release, 22 jan 2014
+- A.21. Version 2.0.9-release, 26 aug 2013
+- A.22. Version 2.0.8-release, 26 apr 2013
+- A.23. Version 2.0.7-release, 26 mar 2013
+- A.24. Version 2.0.6-release, 22 oct 2012
+- A.25. Version 2.0.5-release, 28 jul 2012
+- A.26. Version 2.0.4-release, 02 mar 2012
+- A.27. Version 2.0.3-release, 23 dec 2011
+- A.28. Version 2.0.2-beta, 15 nov 2011
+- A.29. Version 2.0.1-beta, 22 apr 2011
+- A.30. Version 1.10-beta, 19 jul 2010
+- A.31. Version 0.9.9-release, 02 dec 2009
+- A.32. Version 0.9.9-rc2, 08 apr 2009
+- A.33. Version 0.9.9-rc1, 17 nov 2008
+- A.34. Version 0.9.8.1, 30 oct 2008
+- A.35. Version 0.9.8, 14 jul 2008
+- A.36. Version 0.9.7, 02 apr 2007
+- A.37. Version 0.9.7-rc2, 15 dec 2006
+- A.38. Version 0.9.7-rc1, 26 oct 2006
+- A.39. Version 0.9.6, 24 jul 2006
+- A.40. Version 0.9.6-rc1, 26 jun 2006
-Minor features
- +
added #2112, string equal comparison support for IF() function (for JSON and string attributes)
++Bug fixes
- +
fixed #2228, removed
searchdshutdown behavior on failed connection- +
fixed #2208, ZONESPANLIST() support for RT indexes
- +
- +
fixed #2201,
indextoolfalse positive error on RT index- +
fixed #2201, crash with string comparison at expressions and expression ranker
- +
fixed #2199, invalid packedfactors JSON output for index with stopwords
- +
fixed #2197, TRUNCATE fails to remove disk chunk files after calling OPTIMIZE
- +
fixed #2196, .NET connector issue (UTC_TIMESTAMP() support)
- +
- +
fixed #2176, agent used
ha_strategy=randominstead of specified in config- +
fixed #2144, query parser crash vs multiforms with heading numbers
- +
fixed #2122, id64 daemon failed to load RT disk chunk with kill-list from id32 build
- +
fixed #2120, aliased JSON elements support
- +
fixed #1979, snippets generation and span length and lcs calculation in proximity queries
- +
fixed truncated results (and a potential crash) vs long enough ZONESPANLIST() result
++Minor features
+
- +
added #2166, per agent HA strategy for distributed indexes
Bug fixes
- +
fixed #2182, incorrect query results with multiple same destination wordforms
- +
fixed #2181, improved error message on incorrect filters
- +
fixed #2178, ZONESPAN operator for queries with more than two words
- +
fixed #2172, incorrect results with field position fulltext operators
- +
fixed #2171, some index options do not work for template indexes
- +
fixed #2170, joined fields indexation with document id equals to 0
- +
fixed #2110, crash on snippet generation
- +
fixed WLCCS ranking factor computation
- +
fixed memory leak on queries with ZONEs
+Minor features
-Bug fixes
- +
fixed #2158, crash at RT index after morphology changed to AOT after index was created
Bug fixes
-
fixed #2158, crash at RT index after morphology changed to AOT after index was created
fixed #2155, stopwords got missed on disk chunk save at RT index
fixed #2151, agents statistics missed in case of huge amount of agents
- @@ -12142,8 +12171,8 @@
fixed #2139, escape all special characters in JSON result set, according to RFC 4627
Bug fixes
fixed seamless rotation in prefork mode
fixed snippets crash with blend chars at the beginning of a string
-Bug fixes
- +
fixed #2104, ALL()/ANY()/INDEXOF() support for distributed indexes
+Bug fixes
-
fixed #2104, ALL()/ANY()/INDEXOF() support for distributed indexes
fixed #2102, show agent status misses warnings from agents
fixed #2100, crash of
indexerwhile loading stopwords with tokenizer plugin- @@ -12151,10 +12180,10 @@
fixed #2098, arbitrary JSON subkeys and IS NULL for distributed indexes
Bug fixes
fixed possibly memory leak in plugin creation function
indexation of duplicate documents
-New minor features
- +
added OPTION rand_seed which affects ORDER BY RAND()
+New minor features
-
added OPTION rand_seed which affects ORDER BY RAND()
Bug fixes
- +
fixed #2042,
indextoolfails with field mask on 32+ fieldsBug fixes
-
fixed #2042,
indextoolfails with field mask on 32+ fieldsfixed #2031, wrong encoding with UnixODBC/Oracle source
fixed #2056, several bugs in RLP tokenizer
- @@ -12164,26 +12193,26 @@
fixed #2054, SHOW THREADS hangs if queries in prefork mode
Bug fixes
fixed crash in config parser
fixed MySQL protocol response when daemon maxed out
-New major features
- +
added ALTER RTINDEX rt1 RECONFIGURE which allows to change RT index settings on the fly
+New major features
-
added ALTER RTINDEX rt1 RECONFIGURE which allows to change RT index settings on the fly
added SHOW INDEX idx1 SETTINGS statement
added ability to specify several destination forms for the same source wordform (as a result, N:M mapping is now available)
added blended chars support to exceptions
New minor features
- +
New minor features
-
added FACTORS() alias for PACKEDFACTORS() function
added
LIMITclause for the FACET keywordadded JSON-formatted output to
PACKEDFACTORS()functionadded #1999 ATAN2() function
- -
added connections counter and also avg and max timers to agent status
- +
added
searchdconfiguration keys agent_connect_timeout, agent_query_timeout, agent_retry_count and agent_retry_delayadded
searchdconfiguration keys agent_connect_timeout, agent_query_timeout, agent_retry_count and agent_retry_delayGROUPBY() function now returns strings for string attributes
Optimizations and removals
- +
optimized json_autoconv_numbers option speed
Optimizations and removals
-
optimized json_autoconv_numbers option speed
optimized tokenizing with expections on
fixed #1970, speeding up ZONE and ZONESPAN operators
Bug fixes
- +
fixed #2027, slow queries to multiple indexes with large kill-lists
Bug fixes
-
fixed #2027, slow queries to multiple indexes with large kill-lists
fixed #2022, blend characters of matched word must not be outside of snippet passage
- @@ -12195,8 +12224,8 @@
fixed #2018, different wildcard behaviour in RT and plain indexes
Bug fixes
fixed buffer overrun when sizing packed factors (with way too many fields) in expression ranker
fixed cpu time logging for cases where work is done in child threads or agents
-New features
- +
added #1920, charset_table aliases
+New features
added #1920, charset_table aliases
added #1887, filtering over string attributes
- @@ -12205,10 +12234,10 @@
added #1689, GROUP BY JSON attributes
New features
-Optimizations and removals
- +
improved speed of concurrent insertion in RT indexes
Optimizations and removals
-
improved speed of concurrent insertion in RT indexes
removed max_matches config key
Bug fixes
- +
Bug fixes
-
fixed #1942, crash in SHOW THREADS command
fixed #1922, crash on snippet generation for queries with duplicated words
- @@ -12223,21 +12252,21 @@
Bug fixes
fixed #1870, crash on ORDER BY JSON attributes
fixed template index removing on rotation
-New features
- +
added #1604, CALL KEYWORDS can show now multiple lemmas for a keyword
+New features
-
added #1604, CALL KEYWORDS can show now multiple lemmas for a keyword
added ALTER TABLE DROP COLUMN
added ALTER for JSON/string/MVA attributes
added REMAP() function which surpasses SetOverride() API
added an argument to PACKEDFACTORS() to disable ATC calculation (syntax: PACKEDFACTORS({no_atc=1}))
added exact phrase query syntax
- -
added flag
'--enable-dl'to configure script which works withlibmysqlclient,libpostgresql,libexpat,libunixobdc- +
added new plugin system: CREATE/DROP PLUGIN, SHOW PLUGINS, plugin_dir now in common, index/query_token_filter plugins
added new plugin system: CREATE/DROP PLUGIN, SHOW PLUGINS, plugin_dir now in common, index/query_token_filter plugins
added ondisk_attrs support for RT indexes
added position shift operator to phrase operator
added possibility to add user-defined rankers (via plugins)
Optimizations, behavior changes, and removals
-
- -
changed #1797, per-term statistics report (expanded terms fold to their respective substrings)
- +
changed default thread_stack value to 1M
Optimizations, behavior changes, and removals
- +
changed #1797, per-term statistics report (expanded terms fold to their respective substrings)
changed default thread_stack value to 1M
changed local directive in a distributed index which takes now a list (eg.
local=shard1,shard2,shard3)deprecated SetMatchMode() API call
- @@ -12252,8 +12281,8 @@
deprecated SetOverride() API call
Optimizations, behavior changes, and removals
<removed deprecated
str2wordcountattributesremoved support for client versions 0.9.6 and below
-Major new features
- added ALTER TABLE that can add attributes to disk and RT indexes on the fly
++Major new features
- added ALTER TABLE that can add attributes to disk and RT indexes on the fly
- added ATTACH support for non-empty RT target indexes
- added Chinese segmentation with RLP (Rosette Linguistics platform) support
- added English, German lemmatization support
@@ -12264,23 +12293,23 @@Major new features
added full JSON attributes support, arbitrary JSON documents (with subobjects etc) can now be stored-- added in-place JSON updates for scalar values
- added index type=template directive (allows CALL KEYWORDS, CALL SNIPPETS)
-- added ondisk_attrs, ondisk_attrs_default directives that keep attributes on disk
+- added ondisk_attrs, ondisk_attrs_default directives that keep attributes on disk
- added table functions mechanism, and REMOVE_REPEATS() table function
- added support for arbitrary expressions in WHERE for DELETE queries
Ranking related features
- added OPTION local_df=1, an option to aggregate IDFs over local indexes (shards)
+Ranking related features
-
- added OPTION local_df=1, an option to aggregate IDFs over local indexes (shards)
- added UDF XXX_reinit() method to reload UDFs with
workers=prefork- added comma-separated syntax to OPTION
idf,tfidf_unnormalizedandtfidf_normalizedflags- added
lccs,wlccs,exact_order,min_gaps, andatcranking factors- added
sphinx_get_XXX_factors(), a faster interface to access PACKEDFACTORS() in UDFs- added support for exact_hit, exact_order field factors when using more than 32 fields (exact_hit, exact_order)
Instrumentation features
- added DESCRIBE and --dumpheader support for tokencount attributes (generated by index_field_lengths=1 directive)
+Instrumentation features
-
- added DESCRIBE and --dumpheader support for tokencount attributes (generated by index_field_lengths=1 directive)
- added RT index query profile, percentages, totals to SHOW PROFILE
- added
predicted_time,dist_predicted_time,fetched_docs,fetched_hitscounters to SHOW META- added
total_tokensanddisk_bytescounters to SHOW INDEX STATUSGeneral features
- added ALL(), ANY() and INDEXOF() functions for JSON subarrays
+General features
- added ALL(), ANY() and INDEXOF() functions for JSON subarrays
- added MIN_TOP_WEIGHT(), MIN_TOP_SORTVAL() functions
- added TOP() aggregate function to expression ranker
- added a check for duplicated tail hit positions in indextool --check
@@ -12290,13 +12319,13 @@General features
-added indexer --nohup and indextool --rotate switches to check index files before rotating them
- added scientific notation support for JSON attributes (as per RFC 4627)
- added several SphinxQL statements to fix MySQL Workbench connection issues (LIKE for session variables, etc.)
-- added shutdown_timeout directive to
+searchdconfig section- added shutdown_timeout directive to
searchdconfig section- added signed values support for INTEGER() and UINT() function
- added snippet generation options to SNIPPET() function
- added string filter support in distributed queries, SphinxAPI, SphinxQL query log
- added support for mixed distributed and local index queries (SELECT * FROM dist1,dist2,local3), and
index_weightsoption for that caseOptimizations, behavior changes, and removals
- optimized JSON attributes access (1.12x to 2.0x+ total query speedup depending on the JSON data)
+Optimizations, behavior changes, and removals
-
- optimized JSON attributes access (1.12x to 2.0x+ total query speedup depending on the JSON data)
- optimized SELECT (1.02x to 3.5x speedup, depending on index schema size)
- optimized UPDATE (up to 3x faster on big updates)
- optimized away internal threads table mutex contention with
@@ -12307,7 +12336,7 @@workers=threadsand 1000s of threadsOptimizations, behavior changes, and removals
<- disallowed dashes in index names in API requests (just like in SphinxQL)
- removed legacy
xmlpipedata source v1,compat_sphinxql_magicsdirective,SetWeights()SphinxAPI call, and SPH_SORT_CUSTOM SphinxAPI modeBug fixes
- fixed #1734, unquoted literal in json subscript could cause a crash, returns 'unknown column' now.
+Bug fixes
-
- fixed #1734, unquoted literal in json subscript could cause a crash, returns 'unknown column' now.
- fixed #1683, under certain conditions stopwords were not taken into account in RT indexes
- fixed #1648, #1644, when using AOT lemmas with snippet generation, not all the forms got highlighted
- fixed #1549, OPTION
@@ -12321,16 +12350,16 @@idf=tfidf_normalizedwas ignored for distributed queriesBug fixes
fixed wrong
lcsand min_best_span_pos ranking factor values when any expansion (expand_keywords or lemmatize) occurred- fixed a crash while creating indexes with sql_joined_field
-Bug fixes
- +
fixed #1994, parsing of empty JSON arrays
+-Bug fixes
fixed #1994, parsing of empty JSON arrays
fixed #1987, handling of index_exact_words with AOT morphology and infixes on
fixed #1984, teaching HTML parser to handle hex numbers
fixed #1983, master and agents networking issue
fixed #1977, escaping of characters doens't work with exceptions
fixed #1968, parsing of WEIGHT() function (queries to distributed indexes affected)
-Bug fixes
- +
+Bug fixes
--Bug fixes
- +
fixed #1917, field limit propagation outside of group
+Bug fixes
-
fixed #1917, field limit propagation outside of group
fixed #1915, exact form passes to index skipping stopwords filter
fixed #1905, multiple lemmas at the end of a field
- @@ -12354,8 +12383,8 @@
fixed #1903,
indextoolcheck mode for hitless indexes and indexes with large amount of documentsBug fixes
fixed unnecessary escaping in JSON result set
fixed Quick Tour documentation chapter
-Bug fixes
- +
fixed #1857, crash in arabic stemmer
+Bug fixes
-
fixed #1857, crash in arabic stemmer
fixed #1875, fixed crash on adding documents with long words in dict=keyword index with morphology and infixes enabled
fixed #1876, crash on words with large codepoints and infix searches
- @@ -12375,32 +12404,32 @@
fixed #1880, crash on multiquery with one incorrect query
Bug fixes
fixed crash on sending empty snippet result
fixed index corruption in UPDATE queries with non-existent attributes
-Bug fixes
- +
fixed #1848, infixes and morphology clash
+-Bug fixes
fixed #1848, infixes and morphology clash
fixed #1823,
indextoolfails to handle indexes with lemmatizer morphologyfixed #1799, crash in queries to distributed indexes with GROUP BY on multiple values
- -
fixed #1718,
expand_keywordsoption lost in disk chunks of RT indexes- +
fixed documentation on rt_flush_period
fixed documentation on rt_flush_period
fixed network protocol issue which results in timeouts of
libmysqlclientfor big Sphinx responses-Bug fixes
- +
fixed #1778, indexes with more than 255 attributes
+-Bug fixes
fixed #1778, indexes with more than 255 attributes
fixed #1777, ORDER BY WEIGHT()
fixed #1796, missing results in queries with quorum operator of indexes with some lemmatizer
fixed #1780, incorrect results while querying indexes with wordforms, some lemmatizer and enable_star=1
fixed, SHOW PROFILE for fullscan queries
fixed, --with-re2 check
-Bug fixes
- +
fixed #1753, path to re2 sources could not be set using
--with-re2, options--with-re2-libsand--with-re2-includesadded toconfigure+-Bug fixes
fixed #1753, path to re2 sources could not be set using
--with-re2, options--with-re2-libsand--with-re2-includesadded toconfigurefixed #1739, erroneous conversion of RAM chunk into disk chunk when loading id32 index with id64 binary
fixed #1738, unlinking RAM chunk when converting it to disk chunk
fixed #1710, unable to filter by attributes created by index_field_lengths=1
fixed #1716, random crash with with multiple running threads
fixed crash while querying index with lemmatizer and wordforms
-New features
- +
added FLUSH RAMCHUNK statement
+New features
added FLUSH RAMCHUNK statement
added SHOW PLAN statement
added support for GROUP BY on multiple attributes
- @@ -12414,7 +12443,7 @@
added BM25F() function to
SELECTexpressions (now works with the expression based ranker)New features
-
JSONattributes (up to 5-20% fasterSELECTsusing JSON objects)optimized xmlpipe2 indexing (up to 9 times faster on some schemas)
Bug fixes
- +
fixed #1684, COUNT(DISTINCT smth) with implicit
GROUP BYreturns correct value nowBug fixes
-
fixed #1684, COUNT(DISTINCT smth) with implicit
GROUP BYreturns correct value nowfixed #1672, exact token AOT vs lemma (
indexerskips exact form of token that passed AOT through tokenizer)fixed #1659, fail while loading empty infix dictionary with dict=keywords
- @@ -12472,26 +12501,26 @@
fixed #1638, force explicit JSON type conversion for aggregate functions
Bug fixes
fixed string case error with JSON attributes in select list of a query
fixed
TOP_COUNTusage inmisc/suggestand updated to PHP 5.3 and UTF-8-Major new features
- +
added query profiling (SET PROFILING=1 and SHOW PROFILE statements)
+Major new features
-
added query profiling (SET PROFILING=1 and SHOW PROFILE statements)
added AOT-based Russian lemmatizer (morphology={lemmatize_ru | lemmatize_ru_all}, lemmatizer_base, and lemmatizer_cache directives)
added wordbreaker, a tool to split compounds into individual words
added JSON attributes support (sql_attr_json, on_json_attr_error, json_autoconv_numbers, json_autoconv_keynames directives)
added initial subselects support, SELECT * FROM (SELECT ... ORDER BY cond1 LIMIT X) ORDER BY cond2 LIMIT Y
added bigram indexing, and phrase searching with bigrams (bigram_index, bigram_freq_words directives)
- -
added HA/LB support, ha_strategy and agent_persistent directives, SHOW AGENT STATUS statement
- +
added RT index optimization (OPTIMIZE INDEX statement, rt_merge_iops and rt_merge_maxiosize directives)
added RT index optimization (OPTIMIZE INDEX statement, rt_merge_iops and rt_merge_maxiosize directives)
added wildcards support to dict=keywords (eg. "t?st*")
added substring search support (min_infix_len=2 and above) to dict=keywords
New features
- +
added --checkconfig switch to indextool to check config file for correctness (bug #1395)
New features
added --checkconfig switch to indextool to check config file for correctness (bug #1395)
added global IDF support (global_idf directive, OPTION global_idf)
added "term1 term2 term3"/0.5 quorum fraction syntax (bug #1372)
added an option to apply stopwords before morphology, stopwords_unstemmed directive
added an alternative method to compute keyword IDFs, OPTION idf=plain
added boolean query optimizations, OPTION boolean_simplify=1 (bug #1294)
- -
added stringptr return type support to UDFs, and CREATE FUNCTION ... RETURNS STRING syntax
- +
added early query termination by predicted execution time (OPTION max_predicted_time, and predicted_time_costs directive)
added early query termination by predicted execution time (OPTION max_predicted_time, and predicted_time_costs directive)
added index_field_lengths directive, BM25A() and BM25F() functions to expression ranker
added ranker=export, and PACKEDFACTORS() function
- @@ -12506,7 +12535,7 @@
New features
added ZONESPANLIST() builtin function
added regexp_filter directive, regexp document/query filtering support (uses RE2)
- -
added min_idf, max_idf, sum_idf ranking factors
- +
added uservars persistence, and sphinxql_state directive (bug #1132)
added uservars persistence, and sphinxql_state directive (bug #1132)
added ZONESPAN operator
- @@ -12516,7 +12545,7 @@
added snippets_file_prefix directive
New features
-
added support for upto 255 keywords in quorum operator (bug #1030)
added multi-threaded agent querying (bug #1000)
New SphinxQL features
- +
added SHOW INDEX indexname STATUS statement
New SphinxQL features
added SHOW INDEX indexname STATUS statement
added LIKE clause support to multiple SHOW xxx statements
added SNIPPET() function
- @@ -12527,7 +12556,7 @@
added GROUP_CONCAT() aggregate function
New SphinxQL features
-added SHOW VARIABLES WHERE variable_name='xxx' syntax
added TRUNCATE RTINDEX statement
Major behavior changes and optimizations
- +
changed that UDFs are now allowed in fork/prefork modes via sphinxql_state startup script
Major behavior changes and optimizations
- -
changed that UDFs are now allowed in fork/prefork modes via sphinxql_state startup script
changed that compat_sphinxql_magics now defaults to 0
changed that small enough exceptions, wordforms, stopwords files are now embedded into the index header
- @@ -12535,10 +12564,10 @@
changed that rt_mem_limit can now be over 2 GB (bug #1059)
Major behavior changes and optimizations
optimized multi-keyword searching (added skiplists)
optimized filtering and scan in several frequent cases (single-value, 2-arg, 3-arg WHERE clauses)
-Bug fixes
- + +
fixed #1778, SENTENCE and PARAGRAPH operators and infix stars clash
+Bug fixes
-
fixed #1778, SENTENCE and PARAGRAPH operators and infix stars clash
fixed #1774, stack overflow on parsing large expressions
fixed #1744, daemon failed to write to log file bigger than 4G
- @@ -12551,10 +12580,10 @@
fixed #1705, expression ranker handling of indexes with more than 32 fields
Bug fixes
fixed #1520, SetLimits() API documentation
fixed #1491, documentation: space character is prohibited in charset_table
- -
fixed memory leak in expressions with max_window_hits
- +
fixed rt_flush_period - less stricter internal check and more often flushes overall
fixed rt_flush_period - less stricter internal check and more often flushes overall
-Bug fixes
- +
fixed #1655, special characters like ()?* were not processed correctly by exceptions
+Bug fixes
-
fixed #1655, special characters like ()?* were not processed correctly by exceptions
fixed #1651, CREATE FUNCTION can now be used with BIGINT return type
fixed #1649, incorrect warning message (about statistics mismatch) was returned when mixing wildcards and regular keywords
- @@ -12576,8 +12605,8 @@
fixed #1603, passing MVA64 arguments to non-MVA functions caused unpredicted behavior and crashes (now explicitly forbidden)
Bug fixes
optimized xmlpipe2 indexing
added a warning for missed stopwords, exception, wordforms files on index load and in
indextool --check-Bug fixes
- +
fixed #1515, log strings over 2KB were clipped when query_log_format=plain
+Bug fixes
-
fixed #1515, log strings over 2KB were clipped when query_log_format=plain
fixed #1514, RT index disk chunk lose attribute update on daemon restart
fixed #1512, crash while formatting log messages
- @@ -12594,12 +12623,12 @@
fixed #1511, crash on indexing PostgreSQL data source with MVA attributes
Bug fixes
fixed #1479, port handling in PHP Sphinx API
fixed #1474, daemon crash at SphinxQL packet overflows max_packet_size
- -
fixed #1472, crash on loading index to
indextoolfor check- +
fixed #1465, expansion_limit got lost in index rotation
fixed #1465, expansion_limit got lost in index rotation
fixed #1427, #1506, utf8 3 and 4-bytes codepoints
fixed #1405, between with mixed int float values
-Bug fixes
- +
fixed #1475, memory leak in the expression parser
+Bug fixes
-
fixed #1475, memory leak in the expression parser
fixed #1457, error messages over 2KB were clipped
fixed #1454, searchd did not display an error message when the binlog path did not exist
- @@ -12649,8 +12678,8 @@
fixed #1441, SHOW META in a query batch was returning the last non-batch error
Bug fixes
added a console message about crashes during index loading at startup
added more debug info about failed index loading
-Bug fixes
- +
fixed #1322, J connector seems to be broken in rel20 , but works in trunk
+Bug fixes
-
fixed #1322, J connector seems to be broken in rel20 , but works in trunk
fixed #1321, 'set names utf8' passes, but 'set names utf-8' doesn't because of syntax error '-'
fixed #1318, unhandled float comparison operators at filter
- @@ -12703,8 +12732,8 @@
fixed #1317, FD leaks on thread seamless rotation
Bug fixes
fixed sample config file
fixed x64 configurations for libstemmer
-Bug fixes
- +
fixed #1258,
xmlpipe2refused to index indexes withdocinfo=inline+Bug fixes
-
fixed #1258,
xmlpipe2refused to index indexes withdocinfo=inlinefixed #1257, legacy groupby modes vs
dist_threadscould occasionally return wrong search results (race condition)fixed #1253, missing single-word query performance optimization (simplified ranker) vs prefix-expanded keywords vs
dict=keywords- @@ -12759,8 +12788,8 @@
fixed #1252, COUNT(*) vs dist_threads could occasionally crash (race condition)
Bug fixes
fixed that blended vs multiforms vs min_word_len could hang the query parser
fixed missing command-line switches documentation
-Bug fixes
- +
fixed #605, pack vs mysql compress
+Bug fixes
fixed #605, pack vs mysql compress
fixed #783, #862, #917, #985, #990, #1032 documentation bugs
fixed #885, bitwise AND/OR were not available via API
- @@ -12772,7 +12801,7 @@
fixed #984, crash on indexing data with MAGIC_CODE_ZONE symbol
Bug fixes
-
fixed #1050, expression ranker vs agents
- -
fixed #1054, max_query_time not handled properly on searching at RT index
- +
fixed #1055, expansion_limit on searching at RT disk chunks
fixed #1055, expansion_limit on searching at RT disk chunks
fixed #1057, daemon crashes on generating snippet with 0 documents provided
fixed #1060, load_files_scattered don't work
- @@ -12795,8 +12824,8 @@
fixed #1065, libsphinxclient vs distribute index (agents)
Bug fixes
fixed #1119, missing expression ranker support in SphinxSE
fixed #1120, negative total_found, docs and hits counter on huge indexes
-Bug fixes
- +
fixed #1031, SphinxQL parsing syntax for MVA at insert \ replace statements
+Bug fixes
-
fixed #1031, SphinxQL parsing syntax for MVA at insert \ replace statements
fixed #1027, stalls on attribute update in high-concurrency load
fixed #1026, daemon crash on malformed API command
- @@ -12815,8 +12844,8 @@
fixed #1021,
max_childrenoption has been ignored withworker=threadsBug fixes
fixed build of SphinxSE with MySQL 5.1
fixed crash log for 'fork' and 'prefork' workers
-Major new features
- +
added keywords dictionary (
dict=keywords) support to RT indexes+Major new features
added keywords dictionary (
dict=keywords) support to RT indexesadded MVA, index_exact_words support to RT indexes (#888)
added MVA64 (a set of BIGINTs) support to both disk and RT indexes (rt_attr_multi_64 directive)
- @@ -12824,7 +12853,7 @@
added an expression-based ranker, and a number of new ranking factors
Major new features
-added
WHEREclause support to UPDATE statementadded
bigint,float, andMVAattribute support to UPDATE statementNew features
- +
added support for upto 256 searchable fields (was upto 32 before)
New features
added support for upto 256 searchable fields (was upto 32 before)
added
FIBONACCI()function to expressionsadded load_files_scattered option to snippets
- @@ -12835,7 +12864,7 @@
added implicit attribute type promotions in multi-index result sets (#939)
New features
added
ORDER BY RAND()support to SELECT statementadded Sphinx version to Windows crash log
- -
added RT index support to indextool
--check(checks disk chunks only) (#877)- +
added prefork_rotation_throttle directive (preforked children restart delay, in milliseconds) (#873)
added prefork_rotation_throttle directive (preforked children restart delay, in milliseconds) (#873)
added on_file_field_error directive (different
sql_file_fieldhandling modes)added manpages for all the programs
- @@ -12845,7 +12874,7 @@
added syslog logging support
New features
-
improved sentence extraction (handles salutations, starting initials better now)
changed max_filter_values sanity check to 10M values
New SphinxQL features
- +
added FLUSH RTINDEX statement
New SphinxQL features
added FLUSH RTINDEX statement
added
dist_threadsdirective (parallel processing),load_files,load_files_scattered, batch syntax (multiple documents) support to CALL SNIPPETS statementadded
OPTION comment='...'support to SELECT statement (#944)- @@ -12853,7 +12882,7 @@
added SHOW VARIABLES statement
New SphinxQL features
-added complete SphinxQL error logging (all errors are logged now, not just
SELECTs)improved SELECT statement syntax, made expressions aliases optional
Bug fixes
- +
fixed #982, empty binlogs prevented upgraded daemon from starting up
Bug fixes
-
fixed #982, empty binlogs prevented upgraded daemon from starting up
fixed #978, libsphinxclient build failed on sparc/sparc64 solaris
fixed #977, eliminated (most) compiler warnings
- @@ -12920,35 +12949,35 @@
fixed #969, broken expression MVA/string argument type check prevented IF(IN(mva..)) and other valid expressions from working
Bug fixes
fixed that in some extremely rare edge cases tiny parts of an index could end up corrupted with
dict=keywordsfixed that field/zone conditions were not propagated to expanded keywords with
dict=keywords-New general features
- +
added remapping support to blend_chars directive
+New general features
-
added remapping support to blend_chars directive
added multi-threaded snippet batches support (requires a batch sent via API, dist_threads, and
load_files)added collations (collation_server, collation_libc_locale directives)
- -
added support for sorting and grouping on string attributes (
ORDER BY,GROUP BY,WITHIN GROUP ORDER BY)- +
added UDF support (plugin_dir directive; CREATE FUNCTION, DROP FUNCTION statements)
added UDF support (plugin_dir directive; CREATE FUNCTION, DROP FUNCTION statements)
added query_log_format directive, SET GLOBAL query_log_format | log_level = ... statements; and connection id tracking
added sql_column_buffers directive, fixed out-of-buffer column handling in ODBC/MS SQL sources
added blend_mode directive that enables indexing multiple variants of a blended sequence
added UNIX socket support to C, Ruby APIs
- -
added ranged query support to sql_joined_field
- -
added rt_flush_period directive
- +
added thread_stack directive
- +
added rt_flush_period directive
added thread_stack directive
- -
added SENTENCE, PARAGRAPH, ZONE operators (and index_sp, index_zones directives)
- +
added keywords dictionary support (and dict, expansion_limit directives)
added keywords dictionary support (and dict, expansion_limit directives)
- -
added
passage_boundary,emit_zonesoptions to snippets- +
added a watchdog process in threaded mode
added a watchdog process in threaded mode
added persistent MVA updates
added crash dumps to
searchd.log, deprecatedcrash_log_pathdirectiveadded id32 index support in id64 binaries (EXPERIMENTAL)
added SphinxSE support for DELETE and REPLACE on SphinxQL tables
New SphinxQL features
- +
added new, more SQL compliant SphinxQL syntax; and a compat_sphinxql_magics directive
New SphinxQL features
added new, more SQL compliant SphinxQL syntax; and a compat_sphinxql_magics directive
added CRC32(), DAY(), MONTH(), YEAR(), YEARMONTH(), YEARMONTHDAY() functions
added reverse_scan=(0|1) option to SELECT
added support for MySQL packets over 16M
- -
added dummy SHOW VARIABLES, SHOW COLLATION, and SET character_set_results support (to support handshake with certain client libraries and frameworks)
- +
added mysql_version_string directive (to workaround picky MySQL client libraries)
added mysql_version_string directive (to workaround picky MySQL client libraries)
added support for global filter variables, SET GLOBAL @uservar=(int_list)
added DELETE ... IN (id_list) syntax support
- @@ -12956,19 +12985,19 @@
added C-style comments syntax (for example,
SELECT /*!40000 some comment*/ id FROM test)New SphinxQL features
-added SphinxQL multi-query support
added DESCRIBE, SHOW TABLES statements
New command-line switches
- +
added
--print-queriesswitch toindexerthat dumps SQL queries it runsNew command-line switches
-
added
--print-queriesswitch toindexerthat dumps SQL queries it runsadded
--sighup-eachswitch toindexerthat rotates indexes one by oneadded
--strip-pathswitch tosearchdthat skips file paths embedded in the index(-es)added
--dumpconfigswitch toindextoolthat dumps an index header insphinx.confformatMajor changes and optimizations
- +
changed default preopen_indexes value to 1
Major changes and optimizations
-
changed default preopen_indexes value to 1
optimized English stemmer (results in 1.3x faster snippets and indexing with morphology=stem_en)
optimized snippets, 1.6x general speedup
optimized const-list parsing in SphinxQL
optimized full-document highlighting CPU/RAM use
optimized binlog replay (improved performance on K-list update)
Bug fixes
- +
fixed #767, joined fields vs ODBC sources
Bug fixes
-
fixed #767, joined fields vs ODBC sources
fixed #757, wordforms shared by indexes with different settings
fixed #733, loading of indexes in formats prior to v.14
- @@ -13052,7 +13081,7 @@
fixed #763, occasional snippets failures
Bug fixes
fixed libsphinxclient SIGPIPE handling
fixed libsphinxclient vs VS2003 compiler bug
+-
added RT indexes support (Chapter 4, Real-time indexes)
added prefork and threads support (workers directives)
- @@ -13127,7 +13156,7 @@
added multi-threaded local searches in distributed indexes (dist_threads directive)
Bug fixes
+-+-
added IsConnectError(), Open(), Close() calls to Java API (bug #240)
added read_buffer, read_unhinted directives
- @@ -13256,7 +13285,7 @@
added checks for build options returned by mysql_config (builds on Solaris now)
Bug fixes
fixed stupid bug in escaping code, fixed EscapeString() and made it static
fixed parser vs @field -keyword, foo|@field bar, "" queries (bug #310)
+-
added min_stemming_len directive
added IsConnectError() API call (helps distingusih API vs remote errors)
- @@ -13299,7 +13328,7 @@
added duplicate log messages filter to searchd
Bug fixes
fixed backslash escaping, added backslash to EscapeString()
fixed handling of over-2GB dictionary files (.spi)
+-
added configure script to libsphinxclient
changed proximity/quorum operator syntax to require whitespace after length
- @@ -13320,8 +13349,8 @@
fixed potential head process crash on SIGPIPE during "maxed out" message
Bug fixes
fixed Python API to accept offset/limit of 'long' type
fixed default ID range (that filtered out all 64-bit values) in Java and Python APIs
-Indexing
- +
added support for 64-bit document and keyword IDs, --enable-id64 switch to configure
+Indexing
added support for 64-bit document and keyword IDs, --enable-id64 switch to configure
added support for floating point attributes
added support for bitfields in attributes, sql_attr_bool directive and bit-widths part in sql_attr_uint directive
- @@ -13343,7 +13372,7 @@
added support for multi-valued attributes (MVA)
Indexing
-
improved ordinals sorting; now runs in fixed RAM
improved handling of documents with zero/NULL ids, now skipping them instead of aborting
Search daemon
- +
added an option to unlink old index on succesful rotation, unlink_old directive
Search daemon
added an option to unlink old index on succesful rotation, unlink_old directive
added an option to keep index files open at all times (fixes subtle races on rotation), preopen and preopen_indexes directives
added an option to profile searchd disk I/O, --iostats command-line option
- @@ -13357,7 +13386,7 @@
added an option to rotate index seamlessly (fully avoids query stalls), seamless_rotate directive
Search daemon
-
added Windows --rotate support
improved log timestamping, now with millisecond precision
Querying
- +
added extended engine V2 (faster, cleaner, better; SPH_MATCH_EXTENDED2 mode)
Querying
added extended engine V2 (faster, cleaner, better; SPH_MATCH_EXTENDED2 mode)
added ranking modes support (V2 engine only; SetRankingMode() API call)
added quorum searching support to query language (V2 engine only; example: "any three of all these words"/3)
- @@ -13378,14 +13407,14 @@
added query escaping support to query language, and EscapeString() API call
Querying
-
added optional limit on query time, SetMaxQueryTime() API call
added optional limit on found matches count (4rd arg to SetLimits() API call, so-called 'cutoff')
APIs and SphinxSE
- +
added pure C API (libsphinxclient)
APIs and SphinxSE
-
added pure C API (libsphinxclient)
added Ruby API (thanks to Dmytro Shteflyuk)
added Java API
added SphinxSE support for MVAs (use varchar), floats (use float), 64bit docids (use bigint)
added SphinxSE options "floatrange", "geoanchor", "fieldweights", "indexweights", "maxquerytime", "comment", "host" and "port"; and support for "expr:CLAUSE"
improved SphinxSE max query size (using MySQL condition pushdown), upto 256K now
General
-
- +
added scripting (shebang syntax) support to config files (example: #!/usr/bin/php in the first line)
General
added scripting (shebang syntax) support to config files (example: #!/usr/bin/php in the first line)
added unified config handling and validation to all programs
added unified documentation
- @@ -13437,7 +13466,7 @@
added .spec file for RPM builds
Changes and fixes since 0.9.8-rc2
<fixed rare count(distinct) vs. querying multiple local indexes vs. reusable sorter issue
fixed sorting of negative floats in SPH_SORT_EXTENDED mode
+-
added support for
sql_str2ordinal_columnadded support for upto 5 sort-by attrs (in extended sorting mode)
- @@ -13475,7 +13504,7 @@
added support for separate groups sorting clause (in group-by mode)
Changes and fixes since 0.9.8-rc2
<fixed wrong assertion in SBCS encoder
fixed crashes with no-attribute indexes after rotate
+-
added support for extended matching mode (query language)
added support for extended sorting mode (sorting clauses)
- @@ -13492,7 +13521,7 @@
added support for SBCS excerpts
Changes and fixes since 0.9.8-rc2
<fixed
--with-mysql-includes/libs(they conflicted with well-known paths)fixes for 64-bit platforms
+-+-
added support for empty indexes
added support for multiple sql_query_pre/post/post_index
fixed timestamp ranges filter in "match any" mode
fixed configure issues with --without-mysql and --with-pgsql options
fixed building on Solaris 9
+
added boolean queries support (experimental, beta version)
added simple file-based query cache (experimental, beta version)
- diff --git a/doc/sphinx.txt b/doc/sphinx.txt index 917ded33..e71b94f2 100644 --- a/doc/sphinx.txt +++ b/doc/sphinx.txt @@ -1,5 +1,5 @@ -Sphinx 2.2.8-dev reference manual -================================= +Sphinx 2.2.10-dev reference manual +================================== Free open-source SQL full-text search engine ============================================ @@ -371,27 +371,26 @@ Table of Contents 12.4.28. snippets_file_prefix 12.4.29. collation_server 12.4.30. collation_libc_locale - 12.4.31. plugin_dir - 12.4.32. mysql_version_string - 12.4.33. rt_flush_period - 12.4.34. thread_stack - 12.4.35. expansion_limit - 12.4.36. watchdog - 12.4.37. prefork_rotation_throttle - 12.4.38. sphinxql_state - 12.4.39. ha_ping_interval - 12.4.40. ha_period_karma - 12.4.41. persistent_connections_limit - 12.4.42. rt_merge_iops - 12.4.43. rt_merge_maxiosize - 12.4.44. predicted_time_costs - 12.4.45. shutdown_timeout - 12.4.46. ondisk_attrs_default - 12.4.47. query_log_min_msec - 12.4.48. agent_connect_timeout - 12.4.49. agent_query_timeout - 12.4.50. agent_retry_count - 12.4.51. agent_retry_delay + 12.4.31. mysql_version_string + 12.4.32. rt_flush_period + 12.4.33. thread_stack + 12.4.34. expansion_limit + 12.4.35. watchdog + 12.4.36. prefork_rotation_throttle + 12.4.37. sphinxql_state + 12.4.38. ha_ping_interval + 12.4.39. ha_period_karma + 12.4.40. persistent_connections_limit + 12.4.41. rt_merge_iops + 12.4.42. rt_merge_maxiosize + 12.4.43. predicted_time_costs + 12.4.44. shutdown_timeout + 12.4.45. ondisk_attrs_default + 12.4.46. query_log_min_msec + 12.4.47. agent_connect_timeout + 12.4.48. agent_query_timeout + 12.4.49. agent_retry_count + 12.4.50. agent_retry_delay 12.5. Common section configuration options 12.5.1. lemmatizer_base @@ -402,46 +401,49 @@ Table of Contents 12.5.6. rlp_environment 12.5.7. rlp_max_batch_size 12.5.8. rlp_max_batch_docs + 12.5.9. plugin_dir A. Sphinx revision history - A.1. Version 2.2.7-release, 20 jan 2015 - A.2. Version 2.2.6-release, 13 nov 2014 - A.3. Version 2.2.5-release, 06 oct 2014 - A.4. Version 2.2.4-release, 11 sep 2014 - A.5. Version 2.2.3-beta, 13 may 2014 - A.6. Version 2.2.2-beta, 11 feb 2014 - A.7. Version 2.2.1-beta, 13 nov 2013 - A.8. Version 2.1.9-release, 03 jul 2014 - A.9. Version 2.1.8-release, 28 apr 2014 - A.10. Version 2.1.7-release, 30 mar 2014 - A.11. Version 2.1.6-release, 24 feb 2014 - A.12. Version 2.1.5-release, 22 jan 2014 - A.13. Version 2.1.4-release, 18 dec 2013 - A.14. Version 2.1.3-release, 12 nov 2013 - A.15. Version 2.1.2-release, 10 oct 2013 - A.16. Version 2.1.1-beta, 20 feb 2013 - A.17. Version 2.0.11-dev, xx xxx xxxx - A.18. Version 2.0.10-release, 22 jan 2014 - A.19. Version 2.0.9-release, 26 aug 2013 - A.20. Version 2.0.8-release, 26 apr 2013 - A.21. Version 2.0.7-release, 26 mar 2013 - A.22. Version 2.0.6-release, 22 oct 2012 - A.23. Version 2.0.5-release, 28 jul 2012 - A.24. Version 2.0.4-release, 02 mar 2012 - A.25. Version 2.0.3-release, 23 dec 2011 - A.26. Version 2.0.2-beta, 15 nov 2011 - A.27. Version 2.0.1-beta, 22 apr 2011 - A.28. Version 1.10-beta, 19 jul 2010 - A.29. Version 0.9.9-release, 02 dec 2009 - A.30. Version 0.9.9-rc2, 08 apr 2009 - A.31. Version 0.9.9-rc1, 17 nov 2008 - A.32. Version 0.9.8.1, 30 oct 2008 - A.33. Version 0.9.8, 14 jul 2008 - A.34. Version 0.9.7, 02 apr 2007 - A.35. Version 0.9.7-rc2, 15 dec 2006 - A.36. Version 0.9.7-rc1, 26 oct 2006 - A.37. Version 0.9.6, 24 jul 2006 - A.38. Version 0.9.6-rc1, 26 jun 2006 + A.1. Version 2.2.9-release, 16 apr 2015 + A.2. Version 2.2.8-release, 09 mar 2015 + A.3. Version 2.2.7-release, 20 jan 2015 + A.4. Version 2.2.6-release, 13 nov 2014 + A.5. Version 2.2.5-release, 06 oct 2014 + A.6. Version 2.2.4-release, 11 sep 2014 + A.7. Version 2.2.3-beta, 13 may 2014 + A.8. Version 2.2.2-beta, 11 feb 2014 + A.9. Version 2.2.1-beta, 13 nov 2013 + A.10. Version 2.1.9-release, 03 jul 2014 + A.11. Version 2.1.8-release, 28 apr 2014 + A.12. Version 2.1.7-release, 30 mar 2014 + A.13. Version 2.1.6-release, 24 feb 2014 + A.14. Version 2.1.5-release, 22 jan 2014 + A.15. Version 2.1.4-release, 18 dec 2013 + A.16. Version 2.1.3-release, 12 nov 2013 + A.17. Version 2.1.2-release, 10 oct 2013 + A.18. Version 2.1.1-beta, 20 feb 2013 + A.19. Version 2.0.11-dev, xx xxx xxxx + A.20. Version 2.0.10-release, 22 jan 2014 + A.21. Version 2.0.9-release, 26 aug 2013 + A.22. Version 2.0.8-release, 26 apr 2013 + A.23. Version 2.0.7-release, 26 mar 2013 + A.24. Version 2.0.6-release, 22 oct 2012 + A.25. Version 2.0.5-release, 28 jul 2012 + A.26. Version 2.0.4-release, 02 mar 2012 + A.27. Version 2.0.3-release, 23 dec 2011 + A.28. Version 2.0.2-beta, 15 nov 2011 + A.29. Version 2.0.1-beta, 22 apr 2011 + A.30. Version 1.10-beta, 19 jul 2010 + A.31. Version 0.9.9-release, 02 dec 2009 + A.32. Version 0.9.9-rc2, 08 apr 2009 + A.33. Version 0.9.9-rc1, 17 nov 2008 + A.34. Version 0.9.8.1, 30 oct 2008 + A.35. Version 0.9.8, 14 jul 2008 + A.36. Version 0.9.7, 02 apr 2007 + A.37. Version 0.9.7-rc2, 15 dec 2006 + A.38. Version 0.9.7-rc1, 26 oct 2006 + A.39. Version 0.9.6, 24 jul 2006 + A.40. Version 0.9.6-rc1, 26 jun 2006 List of Tables @@ -790,10 +792,10 @@ project files) manually. 1. Extract everything from the distribution tarball (haven't you already?) and go to the sphinx subdirectory. (We are using version - 2.2.8-dev here for the sake of example only; be sure to change this + 2.2.10-dev here for the sake of example only; be sure to change this to a specific version you're using.) - | $ tar xzvf sphinx-2.2.8-dev.tar.gz + | $ tar xzvf sphinx-2.2.10-dev.tar.gz | $ cd sphinx 2. Run the configuration program: @@ -872,7 +874,7 @@ Deb packages: $ sudo apt-get install mysql-client unixodbc libpq5 2. Now you can install Sphinx: - $ sudo dpkg -i sphinxsearch_2.2.8-dev-0ubuntu11~trusty_amd64.deb + $ sudo dpkg -i sphinxsearch_2.2.10-dev-0ubuntu12~trusty_amd64.deb PPA repository (Ubuntu only). @@ -922,10 +924,10 @@ a Linux environment; unless you are preparing code patches, you can use the pre-compiled binary files from the Downloads area on the website. 1. Extract everything from the .zip file you have downloaded - - sphinx-2.2.8-dev-win32.zip, or sphinx-2.2.8-dev-win32-pgsql.zip if - you need PostgresSQL support as well. (We are using version 2.2.8-dev - here for the sake of example only; be sure to change this to - a specific version you're using.) You can use Windows Explorer in + sphinx-2.2.10-dev-win32.zip, or sphinx-2.2.10-dev-win32-pgsql.zip if + you need PostgresSQL support as well. (We are using version + 2.2.10-dev here for the sake of example only; be sure to change this + to a specific version you're using.) You can use Windows Explorer in Windows XP and up to extract the files, or a freeware package like 7Zip to open the archive. @@ -1966,16 +1968,6 @@ on subsequent startup. RT indexes are currently quality feature, but there are still a few known usage quirks. Those quirks are listed in this section. - * Prefix indexing is supported with dict = keywords starting 2.0.2-beta. - Infix indexing is experimental in trunk. - - * Disk chunks optimization routine is not implemented yet. - - * On initial index creation, attributes are reordered by type, in the - following order: uint, bigint, float, timestamp, string. So when using - INSERT without an explicit column names list, specify all uint column - values first, then bigint, etc. - * Default conservative RAM chunk limit (rt_mem_limit) of 32M can lead to poor performance on bigger indexes, you should raise it to 256..1024M if you're planning to index gigabytes. @@ -4179,7 +4171,8 @@ currently defaults to libc_ci collation. Collations should affect all string attribute comparisons, including those within ORDER BY and GROUP BY, so differently ordered or grouped results can -be returned depending on the collation chosen. +be returned depending on the collation chosen. Note that collations don't +affect full-text searching, for that use charset_table. Chapter 6. Extending Sphinx =========================== @@ -5532,7 +5525,7 @@ omissions (such as (currently) missing support for JOINs). Specifically, * 'max_query_time' - integer (max search time threshold, msec) * 'max_predicted_time' - integer (max predicted search time, see - Section 12.4.44, <
added storage engine for MySQL 5.0 and 5.1 (experimental, beta version)
>) + Section 12.4.43, < >) * 'ranker' - any of 'proximity_bm25', 'bm25', 'none', 'wordcount', 'proximity', 'matchany', 'fieldmask', 'sph04', 'expr', or 'export' @@ -8970,27 +8963,26 @@ Table of Contents 12.4.28. snippets_file_prefix 12.4.29. collation_server 12.4.30. collation_libc_locale - 12.4.31. plugin_dir - 12.4.32. mysql_version_string - 12.4.33. rt_flush_period - 12.4.34. thread_stack - 12.4.35. expansion_limit - 12.4.36. watchdog - 12.4.37. prefork_rotation_throttle - 12.4.38. sphinxql_state - 12.4.39. ha_ping_interval - 12.4.40. ha_period_karma - 12.4.41. persistent_connections_limit - 12.4.42. rt_merge_iops - 12.4.43. rt_merge_maxiosize - 12.4.44. predicted_time_costs - 12.4.45. shutdown_timeout - 12.4.46. ondisk_attrs_default - 12.4.47. query_log_min_msec - 12.4.48. agent_connect_timeout - 12.4.49. agent_query_timeout - 12.4.50. agent_retry_count - 12.4.51. agent_retry_delay + 12.4.31. mysql_version_string + 12.4.32. rt_flush_period + 12.4.33. thread_stack + 12.4.34. expansion_limit + 12.4.35. watchdog + 12.4.36. prefork_rotation_throttle + 12.4.37. sphinxql_state + 12.4.38. ha_ping_interval + 12.4.39. ha_period_karma + 12.4.40. persistent_connections_limit + 12.4.41. rt_merge_iops + 12.4.42. rt_merge_maxiosize + 12.4.43. predicted_time_costs + 12.4.44. shutdown_timeout + 12.4.45. ondisk_attrs_default + 12.4.46. query_log_min_msec + 12.4.47. agent_connect_timeout + 12.4.48. agent_query_timeout + 12.4.49. agent_retry_count + 12.4.50. agent_retry_delay 12.5. Common section configuration options 12.5.1. lemmatizer_base @@ -9001,6 +8993,7 @@ Table of Contents 12.5.6. rlp_environment 12.5.7. rlp_max_batch_size 12.5.8. rlp_max_batch_docs + 12.5.9. plugin_dir 12.1. Data source configuration options ======================================= @@ -9301,8 +9294,8 @@ Example: | SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC | | sql_joined_field = bigint tag from ranged-query; \ - | SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \ - | SELECT MIN(id), MAX(id) FROM tags ORDER BY docid ASC + | SELECT id, tag FROM tags WHERE id>=$start AND id<=$end ORDER BY id ASC; \ + | SELECT MIN(id), MAX(id) FROM tags 12.1.14. sql_query_range ------------------------ @@ -10068,6 +10061,8 @@ For reference, different index files store the following data: * .sps stores string attribute data. + * .spe stores skip-lists to speed up doc-list filtering + Example: | path = /var/data/test1 @@ -10226,11 +10221,11 @@ follows: * lemmatize_ru_all - apply Russian lemmatizer and index all possible root forms (added in 2.1.1-beta); - * lemmatize_en_all - apply Russian lemmatizer and index all possible + * lemmatize_en_all - apply English lemmatizer and index all possible root forms (added in 2.2.1-beta); - * lemmatize_de_all - apply Russian lemmatizer and index all possible - root forms (added in 2.2.1-beta); + * lemmatize_de_all - apply German lemmatizer and index all possible root + forms (added in 2.2.1-beta); * stem_en - apply Porter's English stemmer; @@ -11886,12 +11881,12 @@ average per-index values) into the index. Optional, default is 0 (do not compute and store). Added in 2.1.1-beta. When index_field_lengths is set to 1, indexer will 1) create a respective -length attribute for every full-text field, sharing the same name; 2) -compute a field length (counted in keywords) for every document and store -in to a respective attribute; 3) compute the per-index averages. The -lengths attributes will have a special TOKENCOUNT type, but their values -are in fact regular 32-bit integers, and their values are generally -accessible. +length attribute for every full-text field, sharing the same name but with +_len suffix; 2) compute a field length (counted in keywords) for every +document and store in to a respective attribute; 3) compute the per-index +averages. The lengths attributes will have a special TOKENCOUNT type, but +their values are in fact regular 32-bit integers, and their values are +generally accessible. BM25A() and BM25F() functions in the expression ranker are based on these lengths and require index_field_lengths to be enabled. Historically, Sphinx @@ -12194,7 +12189,7 @@ open attempts, the document will still be indexed. Example: - | on_file_field_errors = skip_document + | on_file_field_error = skip_document 12.3.8. lemmatizer_cache ------------------------ @@ -12852,21 +12847,7 @@ Example: | collation_libc_locale = fr_FR -12.4.31. plugin_dir -------------------- - -Trusted location for the dynamic libraries (UDFs). Optional, default is -empty (no location). Introduced in version 2.0.1-beta. - -Specifies the trusted directory from which the UDF libraries can be loaded. -Requires workers = thread to take effect. - -Example: - - | workers = threads - | plugin_dir = /usr/local/sphinx/lib - -12.4.32. mysql_version_string +12.4.31. mysql_version_string ----------------------------- A server version string to return via MySQL protocol. Optional, default is @@ -12886,7 +12867,7 @@ Example: | mysql_version_string = 5.0.37 -12.4.33. rt_flush_period +12.4.32. rt_flush_period ------------------------ RT indexes RAM chunk flush check period, in seconds. Optional, default is @@ -12902,7 +12883,7 @@ Example: | rt_flush_period = 3600 # 1 hour -12.4.34. thread_stack +12.4.33. thread_stack --------------------- Per-thread stack size. Optional, default is 1M. Introduced in version @@ -12930,7 +12911,7 @@ Example: | thread_stack = 256K -12.4.35. expansion_limit +12.4.34. expansion_limit ------------------------ The maximum number of expanded keywords for a single wildcard. Optional, @@ -12948,7 +12929,7 @@ Example: | expansion_limit = 16 -12.4.36. watchdog +12.4.35. watchdog ----------------- Threaded server watchdog. Optional, default is 1 (watchdog enabled). @@ -12964,7 +12945,7 @@ Example: | watchdog = 0 # disable watchdog -12.4.37. prefork_rotation_throttle +12.4.36. prefork_rotation_throttle ---------------------------------- Delay between restarting preforked children on index rotation, in @@ -12988,7 +12969,7 @@ Example: | prefork_rotation_throttle = 50 # throttle children restarts by 50 msec each -12.4.38. sphinxql_state +12.4.37. sphinxql_state ----------------------- Path to a file where current SphinxQL state will be serialized. Available @@ -13005,7 +12986,7 @@ Example: | sphinxql_state = uservars.sql -12.4.39. ha_ping_interval +12.4.38. ha_ping_interval ------------------------- Interval between agent mirror pings, in milliseconds. Optional, default is @@ -13022,7 +13003,7 @@ Example: | ha_ping_interval = 0 -12.4.40. ha_period_karma +12.4.39. ha_period_karma ------------------------ Agent mirror statistics window size, in seconds. Optional, default is 60. @@ -13047,7 +13028,7 @@ Example: | ha_period_karma = 120 -12.4.41. persistent_connections_limit +12.4.40. persistent_connections_limit ------------------------------------- The maximum # of simultaneous persistent connections to remote persistent @@ -13066,7 +13047,7 @@ Example: | persistent_connections_limit = 29 # assume that each host of agents has max_children = 30 (or 29). -12.4.42. rt_merge_iops +12.4.41. rt_merge_iops ---------------------- A maximum number of I/O operations (per second) that the RT chunks merge @@ -13084,7 +13065,7 @@ Example: | rt_merge_iops = 40 -12.4.43. rt_merge_maxiosize +12.4.42. rt_merge_maxiosize --------------------------- A maximum size of an I/O operation that the RT chunks merge thread is @@ -13101,7 +13082,7 @@ Example: | rt_merge_maxiosize = 1M -12.4.44. predicted_time_costs +12.4.43. predicted_time_costs ----------------------------- Costs for the query time prediction model, in nanoseconds. Optional, @@ -13154,7 +13135,7 @@ Example: | predicted_time_costs = doc=128, hit=96, skip=4096, match=128 -12.4.45. shutdown_timeout +12.4.44. shutdown_timeout ------------------------- searchd --stopwait wait time, in seconds. Optional, default is 3 seconds. @@ -13170,7 +13151,7 @@ Example: | shutdown_timeout = 5 # wait for up to 5 seconds -12.4.46. ondisk_attrs_default +12.4.45. ondisk_attrs_default ----------------------------- Instance-wide defaults for ondisk_attrs directive. Optional, default is @@ -13179,7 +13160,7 @@ the default value of ondisk_attrs for all indexes served by this copy of searchd. Per-index directives take precedence, and will overwrite this instance-wide default value, allowing for fine-grain control. -12.4.47. query_log_min_msec +12.4.46. query_log_min_msec --------------------------- Limit (in milliseconds) that prevents the query from being written to the @@ -13187,20 +13168,20 @@ query log. Optional, default is 0 (all queries are written to the query log). This directive specifies that only queries with execution times that exceed the specified limit will be logged. -12.4.48. agent_connect_timeout +12.4.47. agent_connect_timeout ------------------------------ Instance-wide defaults for agent_connect_timeout parameter. The last defined in distributed (network) indexes. -12.4.49. agent_query_timeout +12.4.48. agent_query_timeout ---------------------------- Instance-wide defaults for agent_query_timeout parameter. The last defined in distributed (network) indexes, or also may be overrided per-query using OPTION clause. -12.4.50. agent_retry_count +12.4.49. agent_retry_count -------------------------- Integer, specifies how many times sphinx will try to connect and query @@ -13209,7 +13190,7 @@ Default is 0 (i.e. no retries). This value may be also specified on per-query basis using 'OPTION retry_count=XXX' clause. If per-query option exists, it will override the one specified in config. -12.4.51. agent_retry_delay +12.4.50. agent_retry_delay -------------------------- Integer, in milliseconds. Specifies the delay sphinx rest before retrying @@ -13330,51 +13311,139 @@ Example: | rlp_max_batch_docs = 100 +12.5.9. plugin_dir +------------------ + +Trusted location for the dynamic libraries (UDFs). Optional, default is +empty (no location). Introduced in version 2.0.1-beta. + +Specifies the trusted directory from which the UDF libraries can be loaded. +Requires workers = thread to take effect. + +Example: + + | plugin_dir = /usr/local/sphinx/lib + Appendix A. Sphinx revision history =================================== Table of Contents -A.1. Version 2.2.7-release, 20 jan 2015 -A.2. Version 2.2.6-release, 13 nov 2014 -A.3. Version 2.2.5-release, 06 oct 2014 -A.4. Version 2.2.4-release, 11 sep 2014 -A.5. Version 2.2.3-beta, 13 may 2014 -A.6. Version 2.2.2-beta, 11 feb 2014 -A.7. Version 2.2.1-beta, 13 nov 2013 -A.8. Version 2.1.9-release, 03 jul 2014 -A.9. Version 2.1.8-release, 28 apr 2014 -A.10. Version 2.1.7-release, 30 mar 2014 -A.11. Version 2.1.6-release, 24 feb 2014 -A.12. Version 2.1.5-release, 22 jan 2014 -A.13. Version 2.1.4-release, 18 dec 2013 -A.14. Version 2.1.3-release, 12 nov 2013 -A.15. Version 2.1.2-release, 10 oct 2013 -A.16. Version 2.1.1-beta, 20 feb 2013 -A.17. Version 2.0.11-dev, xx xxx xxxx -A.18. Version 2.0.10-release, 22 jan 2014 -A.19. Version 2.0.9-release, 26 aug 2013 -A.20. Version 2.0.8-release, 26 apr 2013 -A.21. Version 2.0.7-release, 26 mar 2013 -A.22. Version 2.0.6-release, 22 oct 2012 -A.23. Version 2.0.5-release, 28 jul 2012 -A.24. Version 2.0.4-release, 02 mar 2012 -A.25. Version 2.0.3-release, 23 dec 2011 -A.26. Version 2.0.2-beta, 15 nov 2011 -A.27. Version 2.0.1-beta, 22 apr 2011 -A.28. Version 1.10-beta, 19 jul 2010 -A.29. Version 0.9.9-release, 02 dec 2009 -A.30. Version 0.9.9-rc2, 08 apr 2009 -A.31. Version 0.9.9-rc1, 17 nov 2008 -A.32. Version 0.9.8.1, 30 oct 2008 -A.33. Version 0.9.8, 14 jul 2008 -A.34. Version 0.9.7, 02 apr 2007 -A.35. Version 0.9.7-rc2, 15 dec 2006 -A.36. Version 0.9.7-rc1, 26 oct 2006 -A.37. Version 0.9.6, 24 jul 2006 -A.38. Version 0.9.6-rc1, 26 jun 2006 - -A.1. Version 2.2.7-release, 20 jan 2015 +A.1. Version 2.2.9-release, 16 apr 2015 +A.2. Version 2.2.8-release, 09 mar 2015 +A.3. Version 2.2.7-release, 20 jan 2015 +A.4. Version 2.2.6-release, 13 nov 2014 +A.5. Version 2.2.5-release, 06 oct 2014 +A.6. Version 2.2.4-release, 11 sep 2014 +A.7. Version 2.2.3-beta, 13 may 2014 +A.8. Version 2.2.2-beta, 11 feb 2014 +A.9. Version 2.2.1-beta, 13 nov 2013 +A.10. Version 2.1.9-release, 03 jul 2014 +A.11. Version 2.1.8-release, 28 apr 2014 +A.12. Version 2.1.7-release, 30 mar 2014 +A.13. Version 2.1.6-release, 24 feb 2014 +A.14. Version 2.1.5-release, 22 jan 2014 +A.15. Version 2.1.4-release, 18 dec 2013 +A.16. Version 2.1.3-release, 12 nov 2013 +A.17. Version 2.1.2-release, 10 oct 2013 +A.18. Version 2.1.1-beta, 20 feb 2013 +A.19. Version 2.0.11-dev, xx xxx xxxx +A.20. Version 2.0.10-release, 22 jan 2014 +A.21. Version 2.0.9-release, 26 aug 2013 +A.22. Version 2.0.8-release, 26 apr 2013 +A.23. Version 2.0.7-release, 26 mar 2013 +A.24. Version 2.0.6-release, 22 oct 2012 +A.25. Version 2.0.5-release, 28 jul 2012 +A.26. Version 2.0.4-release, 02 mar 2012 +A.27. Version 2.0.3-release, 23 dec 2011 +A.28. Version 2.0.2-beta, 15 nov 2011 +A.29. Version 2.0.1-beta, 22 apr 2011 +A.30. Version 1.10-beta, 19 jul 2010 +A.31. Version 0.9.9-release, 02 dec 2009 +A.32. Version 0.9.9-rc2, 08 apr 2009 +A.33. Version 0.9.9-rc1, 17 nov 2008 +A.34. Version 0.9.8.1, 30 oct 2008 +A.35. Version 0.9.8, 14 jul 2008 +A.36. Version 0.9.7, 02 apr 2007 +A.37. Version 0.9.7-rc2, 15 dec 2006 +A.38. Version 0.9.7-rc1, 26 oct 2006 +A.39. Version 0.9.6, 24 jul 2006 +A.40. Version 0.9.6-rc1, 26 jun 2006 + +A.1. Version 2.2.9-release, 16 apr 2015 +======================================= + +Bug fixes +--------- + + * fixed #2228, removed searchd shutdown behavior on failed connection + + * fixed #2208, ZONESPANLIST() support for RT indexes + + * fixed #2203, legacy API SELECT list + + * fixed #2201, indextool false positive error on RT index + + * fixed #2201, crash with string comparison at expressions and + expression ranker + + * fixed #2199, invalid packedfactors JSON output for index with + stopwords + + * fixed #2197, TRUNCATE fails to remove disk chunk files after calling + OPTIMIZE + + * fixed #2196, .NET connector issue (UTC_TIMESTAMP() support) + + * fixed #2190, incorrect GROUP BY outer JSON object + + * fixed #2176, agent used ha_strategy=random instead of specified in + config + + * fixed #2144, query parser crash vs multiforms with heading numbers + + * fixed #2122, id64 daemon failed to load RT disk chunk with kill-list + from id32 build + + * fixed #2120, aliased JSON elements support + + * fixed #1979, snippets generation and span length and lcs calculation + in proximity queries + + * fixed truncated results (and a potential crash) vs long enough + ZONESPANLIST() result + +A.2. Version 2.2.8-release, 09 mar 2015 +======================================= + +Minor features +-------------- + + * added #2166, per agent HA strategy for distributed indexes + +Bug fixes +--------- + + * fixed #2182, incorrect query results with multiple same destination + wordforms + + * fixed #2181, improved error message on incorrect filters + + * fixed #2178, ZONESPAN operator for queries with more than two words + + * fixed #2172, incorrect results with field position fulltext operators + + * fixed #2171, some index options do not work for template indexes + + * fixed #2170, joined fields indexation with document id equals to 0 + + * fixed #2110, crash on snippet generation + + * fixed WLCCS ranking factor computation + + * fixed memory leak on queries with ZONEs + +A.3. Version 2.2.7-release, 20 jan 2015 ======================================= Minor features @@ -13416,7 +13485,7 @@ Bug fixes * fixed snippets crash with blend chars at the beginning of a string -A.2. Version 2.2.6-release, 13 nov 2014 +A.4. Version 2.2.6-release, 13 nov 2014 ======================================= Bug fixes @@ -13438,7 +13507,7 @@ Bug fixes * indexation of duplicate documents -A.3. Version 2.2.5-release, 06 oct 2014 +A.5. Version 2.2.5-release, 06 oct 2014 ======================================= New minor features @@ -13467,7 +13536,7 @@ Bug fixes * fixed MySQL protocol response when daemon maxed out -A.4. Version 2.2.4-release, 11 sep 2014 +A.6. Version 2.2.4-release, 11 sep 2014 ======================================= New major features @@ -13541,7 +13610,7 @@ Bug fixes * fixed cpu time logging for cases where work is done in child threads or agents -A.5. Version 2.2.3-beta, 13 may 2014 +A.7. Version 2.2.3-beta, 13 may 2014 ==================================== New features @@ -13602,7 +13671,7 @@ Bug fixes * fixed template index removing on rotation -A.6. Version 2.2.2-beta, 11 feb 2014 +A.8. Version 2.2.2-beta, 11 feb 2014 ==================================== New features @@ -13668,7 +13737,7 @@ Optimizations, behavior changes, and removals * removed support for client versions 0.9.6 and below -A.7. Version 2.2.1-beta, 13 nov 2013 +A.9. Version 2.2.1-beta, 13 nov 2013 ==================================== Major new features @@ -13796,8 +13865,8 @@ Bug fixes expansion (expand_keywords or lemmatize) occurred * fixed a crash while creating indexes with sql_joined_field -A.8. Version 2.1.9-release, 03 jul 2014 -======================================= +A.10. Version 2.1.9-release, 03 jul 2014 +======================================== Bug fixes --------- @@ -13816,8 +13885,8 @@ Bug fixes * fixed #1968, parsing of WEIGHT() function (queries to distributed indexes affected) -A.9. Version 2.1.8-release, 28 apr 2014 -======================================= +A.11. Version 2.1.8-release, 28 apr 2014 +======================================== Bug fixes --------- @@ -13842,7 +13911,7 @@ Bug fixes * fixed #1682, field end modifier doesn't work with words containing blended chars -A.10. Version 2.1.7-release, 30 mar 2014 +A.12. Version 2.1.7-release, 30 mar 2014 ======================================== Bug fixes @@ -13877,7 +13946,7 @@ Bug fixes * fixed Quick Tour documentation chapter -A.11. Version 2.1.6-release, 24 feb 2014 +A.13. Version 2.1.6-release, 24 feb 2014 ======================================== Bug fixes @@ -13923,7 +13992,7 @@ Bug fixes * fixed index corruption in UPDATE queries with non-existent attributes -A.12. Version 2.1.5-release, 22 jan 2014 +A.14. Version 2.1.5-release, 22 jan 2014 ======================================== Bug fixes @@ -13944,7 +14013,7 @@ Bug fixes * fixed network protocol issue which results in timeouts of libmysqlclient for big Sphinx responses -A.13. Version 2.1.4-release, 18 dec 2013 +A.15. Version 2.1.4-release, 18 dec 2013 ======================================== Bug fixes @@ -13964,7 +14033,7 @@ Bug fixes * fixed, --with-re2 check -A.14. Version 2.1.3-release, 12 nov 2013 +A.16. Version 2.1.3-release, 12 nov 2013 ======================================== Bug fixes @@ -13985,7 +14054,7 @@ Bug fixes * fixed crash while querying index with lemmatizer and wordforms -A.15. Version 2.1.2-release, 10 oct 2013 +A.17. Version 2.1.2-release, 10 oct 2013 ======================================== New features @@ -14159,7 +14228,7 @@ Bug fixes * fixed TOP_COUNT usage in misc/suggest and updated to PHP 5.3 and UTF-8 -A.16. Version 2.1.1-beta, 20 feb 2013 +A.18. Version 2.1.1-beta, 20 feb 2013 ===================================== Major new features @@ -14311,13 +14380,13 @@ Major behavior changes and optimizations * optimized filtering and scan in several frequent cases (single-value, 2-arg, 3-arg WHERE clauses) -A.17. Version 2.0.11-dev, xx xxx xxxx +A.19. Version 2.0.11-dev, xx xxx xxxx ===================================== Bug fixes --------- -A.18. Version 2.0.10-release, 22 jan 2014 +A.20. Version 2.0.10-release, 22 jan 2014 ========================================= Bug fixes @@ -14354,7 +14423,7 @@ Bug fixes * fixed rt_flush_period - less stricter internal check and more often flushes overall -A.19. Version 2.0.9-release, 26 aug 2013 +A.21. Version 2.0.9-release, 26 aug 2013 ======================================== Bug fixes @@ -14414,7 +14483,7 @@ Bug fixes * added a warning for missed stopwords, exception, wordforms files on index load and in indextool --check -A.20. Version 2.0.8-release, 26 apr 2013 +A.22. Version 2.0.8-release, 26 apr 2013 ======================================== Bug fixes @@ -14464,7 +14533,7 @@ Bug fixes * fixed #1405, between with mixed int float values -A.21. Version 2.0.7-release, 26 mar 2013 +A.23. Version 2.0.7-release, 26 mar 2013 ======================================== Bug fixes @@ -14597,7 +14666,7 @@ Bug fixes * added more debug info about failed index loading -A.22. Version 2.0.6-release, 22 oct 2012 +A.24. Version 2.0.6-release, 22 oct 2012 ======================================== Bug fixes @@ -14719,7 +14788,7 @@ Bug fixes * fixed x64 configurations for libstemmer -A.23. Version 2.0.5-release, 28 jul 2012 +A.25. Version 2.0.5-release, 28 jul 2012 ======================================== Bug fixes @@ -14879,7 +14948,7 @@ Bug fixes * fixed missing command-line switches documentation -A.24. Version 2.0.4-release, 02 mar 2012 +A.26. Version 2.0.4-release, 02 mar 2012 ======================================== Bug fixes @@ -14967,7 +15036,7 @@ Bug fixes * fixed #1120, negative total_found, docs and hits counter on huge indexes -A.25. Version 2.0.3-release, 23 dec 2011 +A.27. Version 2.0.3-release, 23 dec 2011 ======================================== Bug fixes @@ -15014,7 +15083,7 @@ Bug fixes * fixed crash log for 'fork' and 'prefork' workers -A.26. Version 2.0.2-beta, 15 nov 2011 +A.28. Version 2.0.2-beta, 15 nov 2011 ===================================== Major new features @@ -15281,7 +15350,7 @@ Bug fixes * fixed that field/zone conditions were not propagated to expanded keywords with dict=keywords -A.27. Version 2.0.1-beta, 22 apr 2011 +A.29. Version 2.0.1-beta, 22 apr 2011 ===================================== New general features @@ -15579,7 +15648,7 @@ Bug fixes * fixed libsphinxclient vs VS2003 compiler bug -A.28. Version 1.10-beta, 19 jul 2010 +A.30. Version 1.10-beta, 19 jul 2010 ==================================== * added RT indexes support (Chapter 4, Real-time indexes) @@ -15729,7 +15798,7 @@ A.28. Version 1.10-beta, 19 jul 2010 * fixed #509, indexing ranged results from stored procedures -A.29. Version 0.9.9-release, 02 dec 2009 +A.31. Version 0.9.9-release, 02 dec 2009 ======================================== * added Open, Close, Status calls to libsphinxclient (C API) @@ -15844,7 +15913,7 @@ A.29. Version 0.9.9-release, 02 dec 2009 * fixed #326 (missing CLOCK_xxx on FreeBSD) -A.30. Version 0.9.9-rc2, 08 apr 2009 +A.32. Version 0.9.9-rc2, 08 apr 2009 ==================================== * added IsConnectError(), Open(), Close() calls to Java API (bug #240) @@ -16014,7 +16083,7 @@ A.30. Version 0.9.9-rc2, 08 apr 2009 * fixed parser vs @field -keyword, foo|@field bar, "" queries (bug #310) -A.31. Version 0.9.9-rc1, 17 nov 2008 +A.33. Version 0.9.9-rc1, 17 nov 2008 ==================================== * added min_stemming_len directive @@ -16117,7 +16186,7 @@ A.31. Version 0.9.9-rc1, 17 nov 2008 * fixed handling of over-2GB dictionary files (.spi) -A.32. Version 0.9.8.1, 30 oct 2008 +A.34. Version 0.9.8.1, 30 oct 2008 ================================== * added configure script to libsphinxclient @@ -16172,7 +16241,7 @@ A.32. Version 0.9.8.1, 30 oct 2008 * fixed default ID range (that filtered out all 64-bit values) in Java and Python APIs -A.33. Version 0.9.8, 14 jul 2008 +A.35. Version 0.9.8, 14 jul 2008 ================================ Indexing @@ -16456,7 +16525,7 @@ Changes and fixes since 0.9.8-rc2 * fixed sorting of negative floats in SPH_SORT_EXTENDED mode -A.34. Version 0.9.7, 02 apr 2007 +A.36. Version 0.9.7, 02 apr 2007 ================================ * added support for sql_str2ordinal_column @@ -16541,7 +16610,7 @@ A.34. Version 0.9.7, 02 apr 2007 * fixed crashes with no-attribute indexes after rotate -A.35. Version 0.9.7-rc2, 15 dec 2006 +A.37. Version 0.9.7-rc2, 15 dec 2006 ==================================== * added support for extended matching mode (query language) @@ -16577,7 +16646,7 @@ A.35. Version 0.9.7-rc2, 15 dec 2006 * fixes for 64-bit platforms -A.36. Version 0.9.7-rc1, 26 oct 2006 +A.38. Version 0.9.7-rc1, 26 oct 2006 ==================================== * added alpha index merging code @@ -16627,7 +16696,7 @@ A.36. Version 0.9.7-rc1, 26 oct 2006 * fixed some issues with index rotation -A.37. Version 0.9.6, 24 jul 2006 +A.39. Version 0.9.6, 24 jul 2006 ================================ * added support for empty indexes @@ -16640,7 +16709,7 @@ A.37. Version 0.9.6, 24 jul 2006 * fixed building on Solaris 9 -A.38. Version 0.9.6-rc1, 26 jun 2006 +A.40. Version 0.9.6-rc1, 26 jun 2006 ==================================== * added boolean queries support (experimental, beta version) diff --git a/doc/sphinx.xml b/doc/sphinx.xml index c5b9f3ed..0ca3bc32 100644 --- a/doc/sphinx.xml +++ b/doc/sphinx.xml @@ -5,7 +5,7 @@ ]> - Sphinx 2.2.8-dev reference manual +Sphinx 2.2.10-dev reference manual Free open-source SQL full-text search engine @@ -292,10 +292,10 @@ specific project files) manually. Extract everything from the distribution tarball (haven't you already?) and go to the -sphinx subdirectory. (We are using - version 2.2.8-dev here for the sake of example only; be sure to change this + version 2.2.10-dev here for the sake of example only; be sure to change this to a specific version you're using.)$ tar xzvf sphinx-2.2.8-dev.tar.gz + $ tar xzvf sphinx-2.2.10-dev.tar.gz $ cd sphinx @@ -367,7 +367,7 @@ do not seem to help you, please don't hesitate to contact me. $ sudo apt-get install mysql-client unixodbc libpq5 + Now you can install Sphinx: -$ sudo dpkg -i sphinxsearch_2.2.8-dev-0ubuntu11~trusty_amd64.deb $ sudo dpkg -i sphinxsearch_2.2.10-dev-0ubuntu12~trusty_amd64.deb PPA repository (Ubuntu only). Installing Sphinx is much easier from Sphinxsearch PPA repository, because you will get all dependencies and can also update Sphinx to the latest version with the same command. @@ -407,9 +407,9 @@ area on the website.Extract everything from the .zip file you have downloaded - - @@ -1489,12 +1489,6 @@ RT indexes are currently quality feature, but there are still a few known usage quirks. Those quirks are listed in this section.sphinx-2.2.8-dev-win32.zip , - orsphinx-2.2.8-dev-win32-pgsql.zip if you need PostgresSQL support as well. - (We are using version 2.2.8-dev here for the sake of example only; +sphinx-2.2.10-dev-win32.zip , + orsphinx-2.2.10-dev-win32-pgsql.zip if you need PostgresSQL support as well. + (We are using version 2.2.10-dev here for the sake of example only; be sure to change this to a specific version you're using.) You can use Windows Explorer in Windows XP and up to extract the files, or a freeware package like 7Zip to open the archive.- - Prefix indexing is supported with dict = keywords starting 2.0.2-beta. Infix indexing is experimental in trunk. - Disk chunks optimization routine is not implemented yet. On initial index creation, attributes are reordered by type, -in the following order: uint, bigint, float, timestamp, string. So when -using INSERT without an explicit column names list, specify all uint -column values first, then bigint, etc. @@ -1715,7 +1709,6 @@ followed by Sphinx document ID in ascending order (earliest first). Default conservative RAM chunk limit () of 32M can lead to poor performance on bigger indexes, you should raise it to 256..1024M if you're planning to index gigabytes. Boolean queries allow the following special operators to be used: - explicit operator AND: hello & world operator OR: hello | world operator NOT: @@ -3978,7 +3971,8 @@ directive. Sphinx currently defaults to collation. @@ -10434,6 +10428,7 @@ For reference, different index files store the following data:Collations should affect all string attribute comparisons, including those within ORDER BY and GROUP BY, so differently ordered or grouped results -can be returned depending on the collation chosen. +can be returned depending on the collation chosen. Note that collations don't +affect full-text searching, for that use charset_table. @@ -9509,8 +9503,8 @@ sql_joined_field = \ SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC sql_joined_field = bigint tag from ranged-query; \ - SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \ - SELECT MIN(id), MAX(id) FROM tags ORDER BY docid ASC + SELECT id, tag FROM tags WHERE id>=$start AND id<=$end ORDER BY id ASC; \ + SELECT MIN(id), MAX(id) FROM tags.spm stores MVA data;.spp stores hit (aka posting, aka word occurrence) lists for each word ID;+ .sps stores string attribute data..spe stores skip-lists to speed up doc-list filteringExample: @@ -10614,8 +10609,8 @@ option are as follows:lemmatize_en - apply English lemmatizer and pick a single root form (added in 2.2.1-beta); lemmatize_de - apply German lemmatizer and pick a single root form (added in 2.2.1-beta); - lemmatize_ru_all - apply Russian lemmatizer and index all possible root forms (added in 2.1.1-beta); - lemmatize_en_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta); + lemmatize_de_all - apply Russian lemmatizer and index all possible root forms (added in 2.2.1-beta); + lemmatize_en_all - apply English lemmatizer and index all possible root forms (added in 2.2.1-beta); lemmatize_de_all - apply German lemmatizer and index all possible root forms (added in 2.2.1-beta); stem_en - apply Porter's English stemmer; stem_ru - apply Porter's Russian stemmer; @@ -12618,7 +12613,7 @@ Added in 2.1.1-beta. stem_enru - apply Porter's English and Russian stemmers; When index_field_lengthsis set to 1,indexer will 1) create a respective length attribute for every full-text field, -sharing the same name; 2) compute a field length (counted in keywords) for +sharing the same name but with_len suffix; 2) compute a field length (counted in keywords) for every document and store in to a respective attribute; 3) compute the per-index averages. The lengths attributes will have a special TOKENCOUNT type, but their values are in fact regular 32-bit integers, and their values are generally @@ -13001,7 +12996,7 @@ indexed.Example: -on_file_field_errors = skip_document +on_file_field_error = skip_document @@ -13798,25 +13793,6 @@ collation_libc_locale = fr_FR -- - plugin_dir --Trusted location for the dynamic libraries (UDFs). -Optional, default is empty (no location). -Introduced in version 2.0.1-beta. - --Specifies the trusted directory from which the -UDF libraries can be loaded. Requires -workers = thread to take effect. - -Example: --workers = threads -plugin_dir = /usr/local/sphinx/lib - -+ + mysql_version_string A server version string to return via MySQL protocol. @@ -14357,6 +14333,25 @@ rlp_max_batch_docs = 100 + + @@ -14380,6 +14375,48 @@ rlp_max_batch_docs = 100 plugin_dir ++Trusted location for the dynamic libraries (UDFs). +Optional, default is empty (no location). +Introduced in version 2.0.1-beta. + ++Specifies the trusted directory from which the +UDF libraries can be loaded. Requires +workers = thread to take effect. + +Example: ++plugin_dir = /usr/local/sphinx/lib + +Sphinx revision history ++ + + Version 2.2.9-release, 16 apr 2015 +Bug fixes ++ ++ fixed #2228, removed searchd shutdown behavior on failed connection+ fixed #2208, ZONESPANLIST() support for RT indexes + fixed #2203, legacy API SELECT list + fixed #2201, indextool false positive error on RT index+ fixed #2201, crash with string comparison at expressions and expression ranker + fixed #2199, invalid packedfactors JSON output for index with stopwords + fixed #2197, TRUNCATE fails to remove disk chunk files after calling OPTIMIZE + fixed #2196, .NET connector issue (UTC_TIMESTAMP() support) + fixed #2190, incorrect GROUP BY outer JSON object + fixed #2176, agent used instead of specified in config + fixed #2144, query parser crash vs multiforms with heading numbers + fixed #2122, id64 daemon failed to load RT disk chunk with kill-list from id32 build + fixed #2120, aliased JSON elements support + fixed #1979, snippets generation and span length and lcs calculation in proximity queries + fixed truncated results (and a potential crash) vs long enough ZONESPANLIST() result + + Version 2.2.8-release, 09 mar 2015 +Minor features ++ ++ added #2166, per agent HA strategy for distributed indexes Bug fixes ++ ++ fixed #2182, incorrect query results with multiple same destination wordforms + fixed #2181, improved error message on incorrect filters + fixed #2178, ZONESPAN operator for queries with more than two words + fixed #2172, incorrect results with field position fulltext operators + fixed #2171, some index options do not work for template indexes + fixed #2170, joined fields indexation with document id equals to 0 + fixed #2110, crash on snippet generation + fixed WLCCS ranking factor computation + fixed memory leak on queries with ZONEs Version 2.2.7-release, 20 jan 2015 Minor features diff --git a/libstemmer_c/Makefile.in b/libstemmer_c/Makefile.in index dbfc8b28..a9c11169 100644 --- a/libstemmer_c/Makefile.in +++ b/libstemmer_c/Makefile.in @@ -380,13 +380,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_finnish.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_french.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_german.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_2_hungarian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_italian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_norwegian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_porter.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_portuguese.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_spanish.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_1_swedish.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_2_hungarian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_ISO_8859_2_romanian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_KOI8_R_russian.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stem_UTF_8_danish.Po@am__quote@ diff --git a/mysqlse/ha_sphinx.cc b/mysqlse/ha_sphinx.cc index 9400b055..729cf60e 100644 --- a/mysqlse/ha_sphinx.cc +++ b/mysqlse/ha_sphinx.cc @@ -154,7 +154,7 @@ void sphUnalignedWrite ( void * pPtr, const T & tVal ) #define SPHINXSE_MAX_ALLOC (16*1024*1024) #define SPHINXSE_MAX_KEYWORDSTATS 4096 -#define SPHINXSE_VERSION "2.2.8-dev" +#define SPHINXSE_VERSION "2.2.10-dev" // FIXME? the following is cut-n-paste from sphinx.h and searchd.cpp // cut-n-paste is somewhat simpler that adding dependencies however.. diff --git a/src/llsphinxql.c b/src/llsphinxql.c index f70dd070..8f0d84fc 100644 --- a/src/llsphinxql.c +++ b/src/llsphinxql.c @@ -918,7 +918,7 @@ static yyconst flex_int16_t yy_chk[1268] = } -#line 913 "llsphinxql.c" +#line 922 "llsphinxql.c" #define INITIAL 0 #define ccomment 1 @@ -1038,7 +1038,12 @@ static int input (yyscan_t yyscanner ); /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else #define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ #endif /* Copy whatever the last rule matched to the standard output. */ @@ -1046,7 +1051,7 @@ static int input (yyscan_t yyscanner ); /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ -#define ECHO fwrite( yytext, yyleng, 1, yyout ) +#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, @@ -1143,7 +1148,7 @@ YY_DECL #line 31 "sphinxql.l" -#line 1136 "llsphinxql.c" +#line 1150 "llsphinxql.c" if ( !yyg->yy_init ) { @@ -1825,7 +1830,7 @@ case 120: /* rule 120 can match eol */ YY_RULE_SETUP #line 155 "sphinxql.l" -{ YYSTOREBOUNDS; pParser->m_pLastTokenStart = yytext; return TOK_QUOTED_STRING; } +{ YYSTOREBOUNDS; pParser->m_pLastTokenStart = yytext; lvalp->m_iValue = ( (SphAttr_t)lvalp->m_iStart<<32 ) | ( lvalp->m_iEnd-lvalp->m_iStart ); return TOK_QUOTED_STRING; } YY_BREAK case 121: YY_RULE_SETUP @@ -1924,7 +1929,7 @@ YY_RULE_SETUP #line 178 "sphinxql.l" ECHO; YY_BREAK -#line 1917 "llsphinxql.c" +#line 1931 "llsphinxql.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(ccomment): yyterminate(); @@ -2659,8 +2664,8 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) /** Setup the input buffer state to scan the given bytes. The next call to yylex() will * scan from a @e copy of @a bytes. - * @param bytes the byte buffer to scan - * @param len the number of bytes in the buffer pointed to by @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. * @param yyscanner The scanner object. * @return the newly allocated buffer state object. */ diff --git a/src/searchd.cpp b/src/searchd.cpp index e3b68439..d961eb61 100644 --- a/src/searchd.cpp +++ b/src/searchd.cpp @@ -2436,10 +2436,10 @@ int sphCreateInetSocket ( DWORD uAddr, int iPort ) int iOn = 1; if ( setsockopt ( iSock, SOL_SOCKET, SO_REUSEADDR, (char*)&iOn, sizeof(iOn) ) ) - sphFatal ( "setsockopt() failed: %s", sphSockError() ); + sphWarning ( "setsockopt() failed: %s", sphSockError() ); #ifdef TCP_NODELAY if ( setsockopt ( iSock, IPPROTO_TCP, TCP_NODELAY, (char*)&iOn, sizeof(iOn) ) ) - sphFatal ( "setsockopt() failed: %s", sphSockError() ); + sphWarning ( "setsockopt() failed: %s", sphSockError() ); #endif int iTries = 12; @@ -3578,7 +3578,9 @@ enum HAStrategies_e { HA_AVOIDDEAD, HA_AVOIDERRORS, HA_AVOIDDEADTM, ///< the same as HA_AVOIDDEAD, but uses just min timeout instead of weighted random - HA_AVOIDERRORSTM ///< the same as HA_AVOIDERRORS, but uses just min timeout instead of weighted random + HA_AVOIDERRORSTM, ///< the same as HA_AVOIDERRORS, but uses just min timeout instead of weighted random + + HA_DEFAULT = HA_RANDOM }; class InterWorkerStorage : public ISphNoncopyable @@ -3677,6 +3679,7 @@ struct MetaAgentDesc_t int * m_pRRCounter; /// pointer not owned, pointee IPC-shared InterWorkerStorage * m_pLock; /// pointer not owned, lock for threads/IPC DWORD m_uTimestamp; + HAStrategies_e m_eStrategy; public: MetaAgentDesc_t () @@ -3684,6 +3687,7 @@ struct MetaAgentDesc_t , m_pRRCounter ( NULL ) , m_pLock ( NULL ) , m_uTimestamp ( HostDashboard_t::GetCurSeconds() ) + , m_eStrategy ( HA_DEFAULT ) {} MetaAgentDesc_t ( const MetaAgentDesc_t & rhs ) @@ -3697,6 +3701,17 @@ struct MetaAgentDesc_t m_dAgents[i].m_bPersistent = true; } + void SetBlackhole () + { + ARRAY_FOREACH ( i, m_dAgents ) + m_dAgents[i].m_bBlackhole = true; + } + + void SetStrategy ( HAStrategies_e eStrategy ) + { + m_eStrategy = eStrategy; + } + inline void SetHAData ( int * pRRCounter, WORD * pWeights, InterWorkerStorage * pLock ) { m_pRRCounter = pRRCounter; @@ -3787,21 +3802,14 @@ struct MetaAgentDesc_t // renormalize the weights fNormale = 65535/fNormale; -#ifndef NDEBUG DWORD uCheck = 0; - sphInfo ( "Rebalancing the mirrors" ); -#endif ARRAY_FOREACH ( i, m_dAgents ) { m_pWeights[i] = WORD ( m_pWeights[i]*dCoefs[i]*fNormale ); -#ifndef NDEBUG uCheck += m_pWeights[i]; - sphInfo ( "Mirror %d, new weight (%d)", i, m_pWeights[i] ); -#endif + sphLogDebug ( "Mirror %d, new weight (%d)", i, m_pWeights[i] ); } -#ifndef NDEBUG - sphInfo ( "Rebalancing finished. The whole sum is %d", uCheck ); -#endif + sphLogDebug ( "Rebalancing finished. The whole sum is %d", uCheck ); } } @@ -4055,9 +4063,9 @@ struct MetaAgentDesc_t } - AgentDesc_t * GetRRAgent ( HAStrategies_e eStrategy ) + AgentDesc_t * GetRRAgent () { - switch ( eStrategy ) + switch ( m_eStrategy ) { case HA_AVOIDDEAD: return StDiscardDead(); @@ -4113,6 +4121,7 @@ struct MetaAgentDesc_t m_pWeights = rhs.m_pWeights; m_pRRCounter = rhs.m_pRRCounter; m_pLock = rhs.m_pLock; + m_eStrategy = rhs.m_eStrategy; return *this; } }; @@ -4250,7 +4259,7 @@ struct DistributedIndex_t , m_iAgentQueryTimeout ( g_iAgentQueryTimeout ) , m_bToDelete ( false ) , m_bDivideRemoteRanges ( false ) - , m_eHaStrategy ( HA_RANDOM ) + , m_eHaStrategy ( HA_DEFAULT ) , m_pHAStorage ( NULL ) {} ~DistributedIndex_t() @@ -5085,7 +5094,7 @@ int RemoteQueryAgents ( AgentConnectionContext_t * pCtx ) // send the client's proto version right now to avoid w-w-r pattern. NetOutputBuffer_c tOut ( tAgent.m_iSock ); tOut.SendDword ( SPHINX_CLIENT_VERSION ); - bool bFlushed = tOut.Flush (); // FIXME! handle flush failure? + tOut.Flush (); // FIXME! handle flush failure? tAgent.m_eState = AGENT_HANDSHAKE; continue; @@ -6789,7 +6798,8 @@ bool ParseSearchQuery ( InputBuffer_c & tReq, CSphQuery & tQuery, int iVer, int if ( tQuery.m_sSelect.Begins ( "*,*" ) ) // this is the legacy mark of agent for debug purpose { tQuery.m_bAgent = true; - tQuery.m_sSelect = tQuery.m_sSelect.SubString ( 4, tQuery.m_sSelect.Length()-4 ); + int iSelectLen = tQuery.m_sSelect.Length(); + tQuery.m_sSelect = ( iSelectLen>4 ? tQuery.m_sSelect.SubString ( 4, iSelectLen-4 ) : "*" ); } CSphString sError; @@ -6992,6 +7002,39 @@ void LogQueryPlain ( const CSphQuery & tQuery, const CSphQueryResult & tRes ) #endif } +class UnBackquote_fn : public ISphNoncopyable +{ + CSphString m_sBuf; + const char * m_pDst; + +public: + explicit UnBackquote_fn ( const char * pSrc ) + { + m_pDst = pSrc; + int iLen = 0; + if ( pSrc && *pSrc ) + iLen = strlen ( pSrc ); + + if ( iLen && memchr ( pSrc, '`', iLen ) ) + { + m_sBuf = pSrc; + char * pDst = const_cast ( m_sBuf.cstr() ); + const char * pEnd = pSrc + iLen; + + while ( pSrc Appendf ( " %s %s DESC", sPrefix, sSubst ); break; @@ -7009,6 +7055,7 @@ static void FormatOrderBy ( CSphStringBuilder * pBuf, const char * sPrefix, ESph case SPH_SORT_TIME_SEGMENTS: pBuf->Appendf ( " %s TIME_SEGMENT(%s)", sPrefix, sSubst ); break; case SPH_SORT_EXTENDED: pBuf->Appendf ( " %s %s", sPrefix, sSubst ); break; case SPH_SORT_EXPR: pBuf->Appendf ( " %s BUILTIN_EXPR()", sPrefix ); break; + case SPH_SORT_RELEVANCE: pBuf->Appendf ( " %s weight() desc%s%s", sPrefix, ( sSubst && *sSubst ? ", " : "" ), ( sSubst && *sSubst ? sSubst : "" ) ); break; default: pBuf->Appendf ( " %s mode-%d", sPrefix, (int)eSort ); break; } } @@ -7048,7 +7095,8 @@ static void LogQuerySphinxql ( const CSphQuery & q, const CSphQueryResult & tRes if ( q.m_bHasOuter ) tBuf += "SELECT * FROM ("; - tBuf.Appendf ( "SELECT %s FROM %s", q.m_sSelect.cstr(), q.m_sIndexes.cstr() ); + UnBackquote_fn tUnquoted ( q.m_sSelect.cstr() ); + tBuf.Appendf ( "SELECT %s FROM %s", tUnquoted.cstr(), q.m_sIndexes.cstr() ); // WHERE clause // (m_sRawQuery is empty when using MySQL handler) @@ -7221,6 +7269,9 @@ static void LogQuerySphinxql ( const CSphQuery & q, const CSphQueryResult & tRes tBuf.Appendf ( iOpts++ ? ", " : " OPTION " ); tBuf.Appendf ( "ranker=%s", sRanker ); + + if ( !q.m_sRankerExpr.IsEmpty() ) + tBuf.Appendf ( "(\'%s\')", q.m_sRankerExpr.scstr() ); } // outer order by, limit @@ -8964,6 +9015,21 @@ class UnlockOnDestroy }; +static int StringBinary2Number ( const char * sStr, int iLen ) +{ + if ( !sStr || !iLen ) + return 0; + + char sBuf[64]; + if ( (int)sizeof ( sBuf-1 ) GetNumArgs(); i++ ) { assert ( !pArglist->GetArg(i)->IsStringPtr() ); // aware of memleaks potentially caused by StringEval() - pArglist->GetArg(i)->StringEval ( tDummy, (const BYTE**)&pWords ); - if ( !pWords ) + int iLen = pArglist->GetArg(i)->StringEval ( tDummy, (const BYTE**)&pWords ); + if ( !pWords || !iLen ) continue; - while ( *pWords && sphIsSpace ( *pWords ) ) pWords++; + const char * sEnd = pWords + iLen; + while ( pWords m_pStatic ) + { + tRes.m_tSchema.FreeStringPtrs ( pSrc ); + pSrc++; + continue; + } + + Swap ( *pSrc, *pDst ); + pSrc++; + pDst++; + } + + tRes.m_dMatchCounts.Last() = pDst - pStart; + tRes.m_dMatches.Resize ( pDst - tRes.m_dMatches.Begin() ); +} + + void SearchHandler_c::RunLocalSearchesMT () { int64_t tmLocal = sphMicroTimer(); @@ -9777,10 +9874,15 @@ void SearchHandler_c::RunLocalSearchesMT () tRes.m_tStats.Add ( tRaw.m_tStats ); tRes.m_iPredictedTime = CalcPredictedTimeMsec ( tRes ); } + if ( tRaw.m_iBadRows ) + tRes.m_sWarning.SetSprintf ( "query result is inaccurate because of "INT64_FMT" missed documents", tRaw.m_iBadRows ); // extract matches from sorter FlattenToRes ( pSorter, tRes, iOrderTag+iQuery-m_iStart ); + if ( tRaw.m_iBadRows ) + RemoveMissedRows ( tRes ); + // take over the schema from sorter, it doesn't need it anymore tRes.m_tSchema = pSorter->GetSchema(); // can SwapOut @@ -9809,9 +9911,11 @@ bool SearchHandler_c::RunLocalSearch ( int iLocal, ISphMatchSorter ** ppSorters, const int iQueries = m_iEnd-m_iStart+1; const ServedIndex_t * pServed = UseIndex ( iLocal ); - if ( !pServed ) + if ( !pServed || !pServed->m_bEnabled ) { // FIXME! submit a failure? + if ( pServed ) + ReleaseIndex ( iLocal ); return false; } assert ( pServed->m_pIndex ); @@ -9863,7 +9967,7 @@ bool SearchHandler_c::RunLocalSearch ( int iLocal, ISphMatchSorter ** ppSorters, if ( !pKillListIndex ) continue; - if ( pKillListIndex->m_pIndex->GetKillListSize() ) + if ( pKillListIndex->m_bEnabled && pKillListIndex->m_pIndex->GetKillListSize() ) { KillListTrait_t & tElem = dKillist.Add (); tElem.m_pBegin = pKillListIndex->m_pIndex->GetKillList(); @@ -9925,11 +10029,14 @@ void SearchHandler_c::RunLocalSearches ( ISphMatchSorter * pLocalSorter, const c int iIndexWeight = m_dLocal[iLocal].m_iWeight; const ServedIndex_t * pServed = UseIndex ( iLocal ); - if ( !pServed ) + if ( !pServed || !pServed->m_bEnabled ) { if ( sDistName ) for ( int i=m_iStart; i<=m_iEnd; i++ ) m_dFailuresSet[i].SubmitEx ( sDistName, "local index %s missing", sLocal ); + + if ( pServed ) + ReleaseIndex ( iLocal ); continue; } @@ -10037,7 +10144,7 @@ void SearchHandler_c::RunLocalSearches ( ISphMatchSorter * pLocalSorter, const c if ( !pKillListIndex ) continue; - if ( pKillListIndex->m_pIndex->GetKillListSize() ) + if ( pKillListIndex->m_bEnabled && pKillListIndex->m_pIndex->GetKillListSize() ) { KillListTrait_t & tElem = dKillist.Add (); tElem.m_pBegin = pKillListIndex->m_pIndex->GetKillList(); @@ -10096,6 +10203,11 @@ void SearchHandler_c::RunLocalSearches ( ISphMatchSorter * pLocalSorter, const c // this one seems OK AggrResult_t & tRes = m_dResults[iQuery]; + + int64_t iBadRows = m_bMultiQueue ? tStats.m_iBadRows : tRes.m_iBadRows; + if ( iBadRows ) + tRes.m_sWarning.SetSprintf ( "query result is inaccurate because of "INT64_FMT" missed documents", iBadRows ); + // multi-queue only returned one result set meta, so we need to replicate it if ( m_bMultiQueue ) { @@ -10123,6 +10235,9 @@ void SearchHandler_c::RunLocalSearches ( ISphMatchSorter * pLocalSorter, const c // extract matches from sorter FlattenToRes ( pSorter, tRes, iOrderTag+iQuery-m_iStart ); + if ( iBadRows ) + RemoveMissedRows ( tRes ); + // move external attributes storage from tStats to actual result tStats.LeakStorages ( tRes ); } @@ -10212,11 +10327,11 @@ void SearchHandler_c::SetupLocalDF ( int iStart, int iEnd ) ARRAY_FOREACH_COND ( i, m_dLocal, bGlobalIDF ) { const ServedIndex_t * pIndex = UseIndex ( i ); - if ( pIndex ) - { + if ( pIndex && pIndex->m_bEnabled ) bGlobalIDF = !pIndex->m_sGlobalIDFPath.IsEmpty(); + + if ( pIndex ) ReleaseIndex ( i ); - } } // bail out on all indexes with global idf set if ( bGlobalIDF ) @@ -10264,8 +10379,12 @@ void SearchHandler_c::SetupLocalDF ( int iStart, int iEnd ) ARRAY_FOREACH ( i, m_dLocal ) { const ServedIndex_t * pIndex = UseIndex ( i ); - if ( !pIndex ) + if ( !pIndex || !pIndex->m_bEnabled ) + { + if ( pIndex ) + ReleaseIndex ( i ); continue; + } dLocal.Add(); dLocal.Last().m_iLocal = i; @@ -10281,9 +10400,14 @@ void SearchHandler_c::SetupLocalDF ( int iStart, int iEnd ) CSphVector < CSphKeywordInfo > dKeywords; ARRAY_FOREACH ( i, dLocal ) { - const ServedIndex_t * pIndex = UseIndex ( dLocal[i].m_iLocal ); - if ( !pIndex ) + int iLocalIndex = dLocal[i].m_iLocal; + const ServedIndex_t * pIndex = UseIndex ( iLocalIndex ); + if ( !pIndex || !pIndex->m_bEnabled ) + { + if ( pIndex ) + ReleaseIndex ( iLocalIndex ); continue; + } m_iTotalDocs += pIndex->m_pIndex->GetStats().m_iTotalDocuments; @@ -10496,7 +10620,7 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd ) dAgents.Reserve ( dAgents.GetLength() + pDist->m_dAgents.GetLength() ); ARRAY_FOREACH ( j, pDist->m_dAgents ) { - dAgents.Add().TakeTraits ( *pDist->m_dAgents[j].GetRRAgent ( pDist->m_eHaStrategy ) ); + dAgents.Add().TakeTraits ( *pDist->m_dAgents[j].GetRRAgent() ); dAgents.Last().m_iStoreTag = iTagsCount; dAgents.Last().m_iWeight = iWeight; iTagsCount += iTagStep; @@ -10549,6 +10673,9 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd ) ARRAY_FOREACH ( i, m_dLocal ) { const ServedIndex_t * pServedIndex = UseIndex ( i ); + bool bEnabled = pServedIndex && pServedIndex->m_bEnabled; + if ( pServedIndex ) + ReleaseIndex ( i ); // check that it exists if ( !pServedIndex ) @@ -10563,8 +10690,6 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd ) return; } - bool bEnabled = pServedIndex->m_bEnabled; - ReleaseIndex ( i ); // if it exists but is not enabled, remove it from the list and force recheck if ( !bEnabled ) m_dLocal.Remove ( i-- ); @@ -10596,16 +10721,22 @@ void SearchHandler_c::RunSubset ( int iStart, int iEnd ) bool bAllEqual = true; const ServedIndex_t * pFirstIndex = UseIndex ( 0 ); - if ( !pFirstIndex ) + if ( !pFirstIndex || !pFirstIndex->m_bEnabled ) + { + if ( pFirstIndex ) + ReleaseIndex ( 0 ); break; + } const CSphSchema & tFirstSchema = pFirstIndex->m_pIndex->GetMatchSchema(); for ( int i=1; i m_bEnabled ) { bAllEqual = false; + if ( pNextIndex ) + ReleaseIndex ( i ); break; } @@ -12365,6 +12496,11 @@ bool ParseSqlQuery ( const char * sQuery, int iLen, CSphVector & dStm CSphQuery & tQuery = dStmt[i].m_tQuery; if ( tQuery.m_iSQLSelectStart>=0 ) { + if ( tQuery.m_iSQLSelectStart-1>=0 && tParser.m_pBuf[tQuery.m_iSQLSelectStart-1]=='`' ) + tQuery.m_iSQLSelectStart--; + if ( tQuery.m_iSQLSelectEnd & dQueries, CS dDistLocal = pDist->m_dLocal; dRemoteSnippets.m_dAgents.Resize ( pDist->m_dAgents.GetLength() ); ARRAY_FOREACH ( i, pDist->m_dAgents ) - dRemoteSnippets.m_dAgents[i].TakeTraits ( *pDist->m_dAgents[i].GetRRAgent ( pDist->m_eHaStrategy ) ); + dRemoteSnippets.m_dAgents[i].TakeTraits ( *pDist->m_dAgents[i].GetRRAgent() ); } g_tDistLock.Unlock(); @@ -16333,6 +16469,7 @@ static void FormatFactors ( CSphVector & dOut, const unsigned int * pFacto iLen = snprintf ( (char *)dOut.Begin()+iOff, MAX_STR_LEN, "], \"words\":[" ); dOut.Resize ( iOff+iLen ); } + bool bWord = false; for ( int i = 0; i & dOut, const unsigned int * pFacto sphinx_get_term_factor_int ( pTerm, SPH_TERMF_TF ), sphinx_get_term_factor_float ( pTerm, SPH_TERMF_IDF ) ); } else { - iLen = snprintf ( (char *)dOut.Begin()+iOff, MAX_STR_LEN, "%s{\"tf\":%d, \"idf\":%f}", ( i==0 ? "" : ", " ), + iLen = snprintf ( (char *)dOut.Begin()+iOff, MAX_STR_LEN, "%s{\"tf\":%d, \"idf\":%f}", ( bWord ? ", " : "" ), sphinx_get_term_factor_int ( pTerm, SPH_TERMF_TF ), sphinx_get_term_factor_float ( pTerm, SPH_TERMF_IDF ) ); + bWord = true; } dOut.Resize ( iOff+iLen ); } @@ -18561,7 +18699,7 @@ static void HandleClientMySQL ( int iSock, const char * sClientIP, ThdDesc_t * p default: // default case, unknown command sError.SetSprintf ( "unknown command (code=%d)", uMysqlCmd ); - SendMysqlErrorPacket ( tOut, uPacketID, sQuery.cstr(), sError.cstr(), MYSQL_ERR_UNKNOWN_COM_ERROR ); + SendMysqlErrorPacket ( tOut, uPacketID, NULL, sError.cstr(), MYSQL_ERR_UNKNOWN_COM_ERROR ); break; } @@ -19927,7 +20065,7 @@ bool PrereadNewIndex ( ServedIndex_t & tIdx, const CSphConfigSection & hIndex, c return true; } -bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, const char * szIndexName, bool bBlackhole ) +bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, const char * szIndexName ) { AgentDesc_t * pAgent = tAgent.LastAgent(); assert ( pAgent ); @@ -19935,6 +20073,13 @@ bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, co // lookup address (if needed) if ( pAgent->m_iFamily==AF_INET ) { + if ( pAgent->m_sHost.IsEmpty() ) + { + sphWarning ( "index '%s': agent '%s': invalid host name 'empty' - SKIPPING AGENT", + szIndexName, pLine->cstr() ); + return false; + } + pAgent->m_uAddr = sphGetAddress ( pAgent->m_sHost.cstr() ); if ( pAgent->m_uAddr==0 ) { @@ -19947,8 +20092,6 @@ bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, co // hash for dashboard CSphString sHashKey = pAgent->GetName(); - pAgent->m_bBlackhole = bBlackhole; - // allocate stats slot // let us cheat and also allocate the dashboard slot under the same lock if ( g_pStats ) @@ -19956,7 +20099,7 @@ bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, co g_tStatsMutex.Lock(); pAgent->m_iStatsIndex = g_pStats->m_dAgentStats.AllocItem(); if ( pAgent->m_iStatsIndex<0 ) - sphWarning ( "index '%s': agent '%s': failed to allocate slot for stats", + sphWarning ( "index '%s': agent '%s': failed to allocate slot for stats%s", szIndexName, pLine->cstr(), ( tAgent.IsHA() ? ", HA might be wrong" : "" ) ); if ( g_pStats->m_hDashBoard.Exists ( sHashKey ) ) @@ -19968,7 +20111,7 @@ bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, co pAgent->m_iDashIndex = g_pStats->m_dDashboard.AllocItem(); if ( pAgent->m_iDashIndex<0 ) { - sphWarning ( "index '%s': agent '%s': failed to allocate slot for stat-dashboard", + sphWarning ( "index '%s': agent '%s': failed to allocate slot for stat-dashboard%s", szIndexName, pLine->cstr(), ( tAgent.IsHA() ? ", HA might be wrong" : "" ) ); } else { @@ -19994,13 +20137,59 @@ bool ValidateAgentDesc ( MetaAgentDesc_t & tAgent, const CSphVariant * pLine, co } return true; } -enum eAgentParse { apInHost, apInPort, apStartIndexList, apIndexList, apDone }; -bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, const char * szIndexName, bool bBlackhole, bool bPersistent=false ) + +#define sphStrMatchStatic(_str, _cstr) ( strncmp ( _str, _cstr, sizeof(_str)-1 )==0 ) + + +static bool ParseStrategyHA ( const char * sName, HAStrategies_e & eStrategy ) +{ + if ( sphStrMatchStatic ( "random", sName ) ) + eStrategy = HA_RANDOM; + else if ( sphStrMatchStatic ( "roundrobin", sName ) ) + eStrategy = HA_ROUNDROBIN; + else if ( sphStrMatchStatic ( "nodeads", sName ) ) + eStrategy = HA_AVOIDDEAD; + else if ( sphStrMatchStatic ( "noerrors", sName ) ) + eStrategy = HA_AVOIDERRORS; + else + return false; + + return true; +} + +static bool IsAgentDelimiter ( char c ) +{ + return c=='|' || c=='[' || c==']'; +} + +struct AgentOptions_t +{ + bool m_bBlackhole; + bool m_bPersistent; + HAStrategies_e m_eStrategy; +}; + +enum AgentParse_e { apInHost, apInPort, apStartIndexList, apIndexList, apOptions, apDone }; + +bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, const char * szIndexName, AgentOptions_t tDesc ) { - eAgentParse eState = apInHost; AgentDesc_t * pCurrent = tAgent.NewAgent(); + // extract host name or path const char * p = pAgent->cstr(); + while ( *p && isspace ( *p ) ) + p++; + AgentParse_e eState = apDone; + // might be agent options at head + if ( *p ) + { + if ( *p=='[' ) + { + eState = apOptions; + p += 1; + } else + eState = apInHost; + } const char * pAnchor = p; while ( eState!=apDone ) @@ -20009,8 +20198,15 @@ bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, cons { case apInHost: { + if ( !*p ) + { + eState = apDone; + break; + } + if ( sphIsAlpha(*p) || *p=='.' || *p=='-' || *p=='/' ) break; + if ( p==pAnchor ) { sphWarning ( "index '%s': agent '%s': host name or path expected - SKIPPING AGENT", @@ -20078,11 +20274,11 @@ bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, cons } #endif - if ( *p=='|' ) + if ( IsAgentDelimiter ( *p ) ) { + eState = ( *p=='|' ? apInHost : apOptions ); pAnchor = p+1; - eState = apInHost; - if ( !ValidateAgentDesc ( tAgent, pAgent, szIndexName, bBlackhole ) ) + if ( !ValidateAgentDesc ( tAgent, pAgent, szIndexName ) ) return false; pCurrent = tAgent.NewAgent(); break; @@ -20113,7 +20309,7 @@ bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, cons CSphString sIndexes = pAgent->strval().SubString ( pAnchor-pAgent->cstr(), p-pAnchor ); - if ( *p && *p!='|' ) + if ( *p && !IsAgentDelimiter ( *p ) ) { sphWarning ( "index '%s': agent '%s': index list expected near '%s' - SKIPPING AGENT", szIndexName, pAgent->cstr(), p ); @@ -20121,36 +20317,132 @@ bool ConfigureAgent ( MetaAgentDesc_t & tAgent, const CSphVariant * pAgent, cons } pCurrent->m_sIndexes = sIndexes; - if ( *p=='|' ) + if ( IsAgentDelimiter ( *p ) ) { + if ( *p=='|' ) + { + eState = apInHost; + if ( !ValidateAgentDesc ( tAgent, pAgent, szIndexName ) ) + return false; + pCurrent = tAgent.NewAgent(); + } else + eState = apOptions; + pAnchor = p+1; - eState = apInHost; - if ( !ValidateAgentDesc ( tAgent, pAgent, szIndexName, bBlackhole ) ) - return false; - pCurrent = tAgent.NewAgent(); break; + } else + eState = apDone; + } + break; + + case apOptions: + { + const char * sOptName = NULL; + const char * sOptValue = NULL; + bool bGotEq = false; + while ( *p ) + { + bool bNextOpt = ( *p==',' ); + bool bNextAgent = IsAgentDelimiter ( *p ); + bGotEq |= ( *p=='=' ); + + if ( bNextOpt || bNextAgent ) + { + if ( sOptName && sOptValue ) + { + bool bParsed = false; + if ( sphStrMatchStatic ( "conn", sOptName ) ) + { + if ( sphStrMatchStatic ( "pconn", sOptValue ) || sphStrMatchStatic ( "persistent", sOptValue ) ) + { + tDesc.m_bPersistent = true; + bParsed = true; + } + } else if ( sphStrMatchStatic ( "ha_strategy", sOptName ) ) + { + bParsed = ParseStrategyHA ( sOptValue, tDesc.m_eStrategy ); + } else if ( sphStrMatchStatic ( "blackhole", sOptName ) ) + { + tDesc.m_bBlackhole = ( atoi ( sOptValue )!=0 ); + bParsed = true; + } + + if ( !bParsed ) + { + CSphString sInvalid; + sInvalid.SetBinary ( sOptName, p-sOptName ); + sphWarning ( "index '%s': agent '%s': unknown agent option '%s' ", szIndexName, pAgent->cstr(), sInvalid.cstr() ); + } + } + + sOptName = sOptValue = NULL; + bGotEq = false; + if ( bNextAgent ) + break; + } + + if ( sphIsAlpha ( *p ) ) + { + if ( !sOptName ) + sOptName = p; + else if ( bGotEq && !sOptValue ) + sOptValue = p; + } + + p++; } - eState = apDone; + + if ( IsAgentDelimiter ( *p ) ) + { + eState = apInHost; + pAnchor = p+1; + } else + eState = apDone; } + break; + case apDone: default: break; } // switch (eState) p++; } // while (eState!=apDone) - bool bRes = ValidateAgentDesc ( tAgent, pAgent, szIndexName, bBlackhole ); + + bool bRes = ValidateAgentDesc ( tAgent, pAgent, szIndexName ); tAgent.QueuePings(); - if ( bPersistent ) + if ( tDesc.m_bPersistent ) tAgent.SetPersistent(); + if ( tDesc.m_bBlackhole ) + tAgent.SetBlackhole(); + tAgent.SetStrategy ( tDesc.m_eStrategy ); + return bRes; } +#undef sphStrMatchStatic + static void ConfigureDistributedIndex ( DistributedIndex_t * pIdx, const char * szIndexName, const CSphConfigSection & hIndex ) { assert ( hIndex("type") && hIndex["type"]=="distributed" ); assert ( pIdx!=NULL ); - DistributedIndex_t& tIdx = *pIdx; + DistributedIndex_t & tIdx = *pIdx; + + bool bSetHA = false; + // configure ha_strategy + if ( hIndex("ha_strategy") ) + { + bSetHA = ParseStrategyHA ( hIndex["ha_strategy"].cstr(), tIdx.m_eHaStrategy ); + if ( !bSetHA ) + sphWarning ( "index '%s': ha_strategy (%s) is unknown for me, will use random", szIndexName, hIndex["ha_strategy"].cstr() ); + } + + bool bEnablePersistentConns = ( g_eWorkers==MPM_THREADS && g_iPersistentPoolSize ); + if ( hIndex ( "agent_persistent" ) && !bEnablePersistentConns ) + { + sphWarning ( "index '%s': agent_persistent used, but no persistent_connections_limit defined. Fall back to non-persistent agent", szIndexName ); + bEnablePersistentConns = false; + } // add local agents CSphVector dLocs; @@ -20169,40 +20461,35 @@ static void ConfigureDistributedIndex ( DistributedIndex_t * pIdx, const char * } } - bool bHaveHA = false; + AgentOptions_t tAgentOptions; + tAgentOptions.m_bBlackhole = false; + tAgentOptions.m_bPersistent = false; + tAgentOptions.m_eStrategy = tIdx.m_eHaStrategy; // add remote agents for ( CSphVariant * pAgent = hIndex("agent"); pAgent; pAgent = pAgent->m_pNext ) { MetaAgentDesc_t& tAgent = tIdx.m_dAgents.Add(); - if ( ConfigureAgent ( tAgent, pAgent, szIndexName, false ) ) - bHaveHA |= tAgent.IsHA(); - else + if ( !ConfigureAgent ( tAgent, pAgent, szIndexName, tAgentOptions ) ) tIdx.m_dAgents.Pop(); } // for now work with client persistent connections only on per-thread basis, // to avoid locks, etc. - bool bEnablePersistentConns = g_eWorkers==MPM_THREADS; + tAgentOptions.m_bBlackhole = false; + tAgentOptions.m_bPersistent = bEnablePersistentConns; for ( CSphVariant * pAgent = hIndex("agent_persistent"); pAgent; pAgent = pAgent->m_pNext ) { MetaAgentDesc_t& tAgent = tIdx.m_dAgents.Add (); - if ( !g_iPersistentPoolSize ) - { - sphWarning ( "index '%s': agent_persistent used, but no persistent_connections_limit defined. Fall back to non-persistent agent", szIndexName ); - bEnablePersistentConns = false; - } - if ( ConfigureAgent ( tAgent, pAgent, szIndexName, false, bEnablePersistentConns ) ) - bHaveHA |= tAgent.IsHA(); - else + if ( !ConfigureAgent ( tAgent, pAgent, szIndexName, tAgentOptions ) ) tIdx.m_dAgents.Pop(); } + tAgentOptions.m_bBlackhole = true; + tAgentOptions.m_bPersistent = false; for ( CSphVariant * pAgent = hIndex("agent_blackhole"); pAgent; pAgent = pAgent->m_pNext ) { MetaAgentDesc_t& tAgent = tIdx.m_dAgents.Add (); - if ( ConfigureAgent ( tAgent, pAgent, szIndexName, true ) ) - bHaveHA |= tAgent.IsHA(); - else + if ( !ConfigureAgent ( tAgent, pAgent, szIndexName, tAgentOptions ) ) tIdx.m_dAgents.Pop(); } @@ -20225,24 +20512,12 @@ static void ConfigureDistributedIndex ( DistributedIndex_t * pIdx, const char * tIdx.m_iAgentQueryTimeout = hIndex["agent_query_timeout"].intval(); } + bool bHaveHA = ARRAY_ANY ( bHaveHA, tIdx.m_dAgents, tIdx.m_dAgents[_any].IsHA() ); + // configure ha_strategy - if ( hIndex("ha_strategy") ) - { - if ( !bHaveHA ) - sphWarning ( "index '%s': ha_strategy defined, but no ha agents in the index", szIndexName ); - - tIdx.m_eHaStrategy = HA_RANDOM; - if ( hIndex["ha_strategy"]=="random" ) - tIdx.m_eHaStrategy = HA_RANDOM; - else if ( hIndex["ha_strategy"]=="roundrobin" ) - tIdx.m_eHaStrategy = HA_ROUNDROBIN; - else if ( hIndex["ha_strategy"]=="nodeads" ) - tIdx.m_eHaStrategy = HA_AVOIDDEAD; - else if ( hIndex["ha_strategy"]=="noerrors" ) - tIdx.m_eHaStrategy = HA_AVOIDERRORS; - else - sphWarning ( "index '%s': ha_strategy (%s) is unknown for me, will use random", szIndexName, hIndex["ha_strategy"].cstr() ); - } + if ( bSetHA && !bHaveHA ) + sphWarning ( "index '%s': ha_strategy defined, but no ha agents in the index", szIndexName ); + tIdx.ShareHACounters(); } @@ -20273,15 +20548,12 @@ void FreeAgentStats ( DistributedIndex_t & tIndex ) g_tStatsMutex.Unlock(); } -void PreCreateTemplateIndex ( ServedDesc_t & tServed, const CSphConfigSection & hIndex ) +void PreCreateTemplateIndex ( ServedDesc_t & tServed, const CSphConfigSection & ) { tServed.m_pIndex = sphCreateIndexTemplate ( ); tServed.m_pIndex->m_bExpandKeywords = tServed.m_bExpand; tServed.m_pIndex->m_iExpansionLimit = g_iExpansionLimit; tServed.m_bEnabled = false; - - CSphString sError; - sphFixupIndexSettings ( tServed.m_pIndex, hIndex, sError, true ); } void PreCreatePlainIndex ( ServedDesc_t & tServed, const char * sName ) @@ -20507,6 +20779,22 @@ ESphAddIndex AddIndex ( const char * szIndexName, const CSphConfigSection & hInd // try to create index PreCreateTemplateIndex ( tIdx, hIndex ); tIdx.m_bEnabled = true; + + CSphIndexSettings s; + CSphString sError; + if ( !sphConfIndex ( hIndex, s, sError ) ) + { + sphWarning ( "failed to configure index %s: %s", szIndexName, sError.cstr() ); + return ADD_ERROR; + } + tIdx.m_pIndex->Setup(s); + + if ( !sphFixupIndexSettings ( tIdx.m_pIndex, hIndex, sError ) ) + { + sphWarning ( "index '%s': %s - NOT SERVING", szIndexName, sError.cstr() ); + return ADD_ERROR; + } + CSphIndexStatus tStatus; tIdx.m_pIndex->GetStatus ( &tStatus ); tIdx.m_iMass = CalculateMass ( tStatus ); @@ -21685,7 +21973,7 @@ void QueryStatus ( CSphVariant * v ) #ifdef TCP_NODELAY int iOn = 1; if ( setsockopt ( iSock, IPPROTO_TCP, TCP_NODELAY, (char*)&iOn, sizeof(iOn) ) ) - sphFatal ( "setsockopt() failed: %s", sphSockError() ); + sphWarning ( "setsockopt() failed: %s", sphSockError() ); #endif if ( connect ( iSock, (struct sockaddr*)&sin, sizeof(sin) )<0 ) @@ -21846,7 +22134,7 @@ Listener_t * DoAccept ( int * pClientSock, char * sClientName ) #ifdef TCP_NODELAY int iOn = 1; if ( g_dListeners[i].m_bTcp && setsockopt ( iClientSock, IPPROTO_TCP, TCP_NODELAY, (char*)&iOn, sizeof(iOn) ) ) - sphFatal ( "setsockopt() failed: %s", sphSockError() ); + sphWarning ( "setsockopt() failed: %s", sphSockError() ); #endif if ( g_pStats ) diff --git a/src/sphinx.cpp b/src/sphinx.cpp index e4f6c02f..6c4a4097 100644 --- a/src/sphinx.cpp +++ b/src/sphinx.cpp @@ -867,7 +867,7 @@ bool operator < ( SphDocID_t a, const SkiplistEntry_t & b ) { return a +template < bool INLINE_HITS, bool INLINE_DOCINFO, bool DISABLE_HITLIST_SEEK > class DiskIndexQword_c : public DiskIndexQwordTraits_c { public: @@ -941,12 +941,8 @@ class DiskIndexQword_c : public DiskIndexQwordTraits_c DWORD uField = m_rdDoclist.UnzipInt(); // field and end marker m_iHitlistPos = uFirst | ( uField << 23 ) | ( U64C(1)<<63 ); m_dQwordFields.UnsetAll(); - if_const ( DO_DEBUG_CHECK ) - { - if ( ( uField>>1 )>=SPH_MAX_FIELDS ) - uField = ( (DWORD)SPH_MAX_FIELDS-1 )<<1; - } - m_dQwordFields.Set ( uField >> 1 ); + // want to make sure bad field data not cause crash + m_dQwordFields.Set ( ( uField >> 1 ) & ( (DWORD)SPH_MAX_FIELDS-1 ) ); m_bAllFieldsKnown = true; } else { @@ -1031,10 +1027,10 @@ class DiskIndexQword_c : public DiskIndexQwordTraits_c \ switch ( ( INDEX##uInlineHits<<1 ) | INDEX##uInlineDocinfo ) \ { \ - case 0: { typedef DiskIndexQword_c < false, false, NO_SEEK, false > NAME; ACTION; break; } \ - case 1: { typedef DiskIndexQword_c < false, true, NO_SEEK, false > NAME; ACTION; break; } \ - case 2: { typedef DiskIndexQword_c < true, false, NO_SEEK, false > NAME; ACTION; break; } \ - case 3: { typedef DiskIndexQword_c < true, true, NO_SEEK, false > NAME; ACTION; break; } \ + case 0: { typedef DiskIndexQword_c < false, false, NO_SEEK > NAME; ACTION; break; } \ + case 1: { typedef DiskIndexQword_c < false, true, NO_SEEK > NAME; ACTION; break; } \ + case 2: { typedef DiskIndexQword_c < true, false, NO_SEEK > NAME; ACTION; break; } \ + case 3: { typedef DiskIndexQword_c < true, true, NO_SEEK > NAME; ACTION; break; } \ default: \ sphDie ( "INTERNAL ERROR: impossible qword settings" ); \ } \ @@ -1066,9 +1062,9 @@ struct DiskSubstringPayload_t : public ISphSubstringPayload template < bool INLINE_HITS > -class DiskPayloadQword_c : public DiskIndexQword_c +class DiskPayloadQword_c : public DiskIndexQword_c