1 /**
2    обвязка к xapin
3 
4    Copyright: © 2014 Semantic Machines
5    License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6    Authors: Valeriy Bushenev
7 
8  */
9 
10 module veda.bind.xapian_d_header;
11 
12 
13 /// Enum of stem strategy
14 enum stem_strategy { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z };
15 
16 /// Enum of possible query operations
17 enum xapian_op
18 {
19     /// Return iff both subqueries are satisfied
20     OP_AND,
21 
22     /// Return if either subquery is satisfied
23     OP_OR,
24 
25     /// Return if left but not right satisfied
26     OP_AND_NOT,
27 
28     /// Return if one query satisfied, but not both
29     OP_XOR,
30 
31     /// Return iff left satisfied, but use weights from both
32     OP_AND_MAYBE,
33 
34     /// As AND, but use only weights from left subquery
35     OP_FILTER,
36 
37     /** Find occurrences of a list of terms with all the terms
38      *  occurring within a specified window of positions.
39      *
40      *  Each occurrence of a term must be at a different position,
41      *  but the order they appear in is irrelevant.
42      *
43      *  The window parameter should be specified for this operation,
44      *  but will default to the number of terms in the list.
45      */
46     OP_NEAR,
47 
48     /** Find occurrences of a list of terms with all the terms
49      *  occurring within a specified window of positions, and all
50      *  the terms appearing in the order specified.
51      *
52      *  Each occurrence of a term must be at a different position.
53      *
54      *  The window parameter should be specified for this operation,
55      *  but will default to the number of terms in the list.
56      */
57     OP_PHRASE,
58 
59     /** Filter by a range test on a document value. */
60     OP_VALUE_RANGE,
61 
62     /** Scale the weight of a subquery by the specified factor.
63      *
64      *  A factor of 0 means this subquery will contribute no weight to
65      *  the query - it will act as a purely boolean subquery.
66      *
67      *  If the factor is negative, Xapian::InvalidArgumentError will
68      *  be thrown.
69      */
70     OP_SCALE_WEIGHT,
71 
72     /** Pick the best N subqueries and combine with OP_OR.
73      *
74      *  If you want to implement a feature which finds documents
75      *  similar to a piece of text, an obvious approach is to build an
76      *  "OR" query from all the terms in the text, and run this query
77      *  against a database containing the documents.  However such a
78      *  query can contain a lots of terms and be quite slow to perform,
79      *  yet many of these terms don't contribute usefully to the
80      *  results.
81      *
82      *  The OP_ELITE_SET operator can be used instead of OP_OR in this
83      *  situation.  OP_ELITE_SET selects the most important ''N'' terms
84      *  and then acts as an OP_OR query with just these, ignoring any
85      *  other terms.  This will usually return results just as good as
86      *  the full OP_OR query, but much faster.
87      *
88      *  In general, the OP_ELITE_SET operator can be used when you have
89      *  a large OR query, but it doesn't matter if the search
90      *  completely ignores some of the less important terms in the
91      *  query.
92      *
93      *  The subqueries don't have to be terms, but if they aren't then
94      *  OP_ELITE_SET will look at the estimated frequencies of the
95      *  subqueries and so could pick a subset which don't actually
96      *  match any documents even if the full OR would match some.
97      *
98      *  You can specify a parameter to the query constructor which
99      *  control the number of terms which OP_ELITE_SET will pick.  If
100      *  not specified, this defaults to 10 (or
101      *  <code>ceil(sqrt(number_of_subqueries))</code> if there are more
102      *  than 100 subqueries, but this rather arbitrary special case
103      *  will be dropped in 1.3.0).  For example, this will pick the
104      *  best 7 terms:
105      *
106      *  <pre>
107      *  Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7);
108      *  </pre>
109      *
110      * If the number of subqueries is less than this threshold,
111      * OP_ELITE_SET behaves identically to OP_OR.
112      */
113     OP_ELITE_SET,
114 
115     /** Filter by a greater-than-or-equal test on a document value. */
116     OP_VALUE_GE,
117 
118     /** Filter by a less-than-or-equal test on a document value. */
119     OP_VALUE_LE,
120 
121     /** Treat a set of queries as synonyms.
122      *
123      *  This returns all results which match at least one of the
124      *  queries, but weighting as if all the sub-queries are instances
125      *  of the same term: so multiple matching terms for a document
126      *  increase the wdf value used, and the term frequency is based on
127      *  the number of documents which would match an OR of all the
128      *  subqueries.
129      *
130      *  The term frequency used will usually be an approximation,
131      *  because calculating the precise combined term frequency would
132      *  be overly expensive.
133      *
134      *  Identical to OP_OR, except for the weightings returned.
135      */
136     OP_SYNONYM
137 };
138 
139 /// Enum of feature flag
140 enum feature_flag
141 {
142     /// Support AND, OR, etc and bracketed subexpressions.
143     FLAG_BOOLEAN                 = 1,
144     /// Support quoted phrases.
145     FLAG_PHRASE                  = 2,
146     /// Support + and -.
147     FLAG_LOVEHATE                = 4,
148     /// Support AND, OR, etc even if they aren't in ALLCAPS.
149     FLAG_BOOLEAN_ANY_CASE        = 8,
150     /** Support right truncation (e.g. Xap*).
151      *
152      *  Currently you can't use wildcards with boolean filter prefixes,
153      *  or in a phrase (either an explicitly quoted one, or one implicitly
154      *  generated by hyphens or other punctuation).
155      *
156      *  NB: You need to tell the QueryParser object which database to
157      *  expand wildcards from by calling set_database.
158      */
159     FLAG_WILDCARD                = 16,
160     /** Allow queries such as 'NOT apples'.
161      *
162      *  These require the use of a list of all documents in the database
163      *  which is potentially expensive, so this feature isn't enabled by
164      *  default.
165      */
166     FLAG_PURE_NOT                = 32,
167     /** Enable partial matching.
168      *
169      *  Partial matching causes the parser to treat the query as a
170      *  "partially entered" search.  This will automatically treat the
171      *  final word as a wildcarded match, unless it is followed by
172      *  whitespace, to produce more stable results from interactive
173      *  searches.
174      *
175      *  Currently FLAG_PARTIAL doesn't do anything if the final word
176      *  in the query has a boolean filter prefix, or if it is in a phrase
177      *  (either an explicitly quoted one, or one implicitly generated by
178      *  hyphens or other punctuation).  It also doesn't do anything if
179      *  if the final word is part of a value range.
180      *
181      *  NB: You need to tell the QueryParser object which database to
182      *  expand wildcards from by calling set_database.
183      */
184     FLAG_PARTIAL                 = 64,
185 
186     /** Enable spelling correction.
187      *
188      *  For each word in the query which doesn't exist as a term in the
189      *  database, Database::get_spelling_suggestion() will be called and if
190      *  a suggestion is returned, a corrected version of the query string
191      *  will be built up which can be read using
192      *  QueryParser::get_corrected_query_string().  The query returned is
193      *  based on the uncorrected query string however - if you want a
194      *  parsed query based on the corrected query string, you must call
195      *  QueryParser::parse_query() again.
196      *
197      *  NB: You must also call set_database() for this to work.
198      */
199     FLAG_SPELLING_CORRECTION     = 128,
200 
201     /** Enable synonym operator '~'.
202      *
203      *  NB: You must also call set_database() for this to work.
204      */
205     FLAG_SYNONYM                 = 256,
206 
207     /** Enable automatic use of synonyms for single terms.
208      *
209      *  NB: You must also call set_database() for this to work.
210      */
211     FLAG_AUTO_SYNONYMS           = 512,
212 
213     /** Enable automatic use of synonyms for single terms and groups of
214      *  terms.
215      *
216      *  NB: You must also call set_database() for this to work.
217      */
218     FLAG_AUTO_MULTIWORD_SYNONYMS = 1024 | FLAG_AUTO_SYNONYMS,
219 
220     /** The default flags.
221      *
222      *  Used if you don't explicitly pass any to @a parse_query().
223      *  The default flags are FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE.
224      *
225      *  Added in Xapian 1.0.11.
226      */
227     FLAG_DEFAULT                 = FLAG_PHRASE | FLAG_BOOLEAN | FLAG_LOVEHATE
228 };
229 
230 //alias int   size_t;
231 alias int   int32_t;
232 alias byte  int8_t;
233 alias ubyte uint8_t;
234 alias dchar TCHAR;
235 
236 /** Open for read/write; create if no db exists. */
237 const int DB_CREATE_OR_OPEN      = 1;
238 /** Create a new database; fail if db exists. */
239 const int DB_CREATE              = 2;
240 /** Overwrite existing db; create if none exists. */
241 const int DB_CREATE_OR_OVERWRITE = 3;
242 /** Open for read/write; fail if no db exists. */
243 const int DB_OPEN                = 4;
244 
245 const int BRASS                  = 1;
246 const int CHERT                  = 2;
247 const int IN_MEMORY              = 3;
248 
249 extern (C++)
250 {
251 /// Base class for value range processors
252 interface XapianNumberValueRangeProcessor
253 {
254 }
255 
256 interface XapianTermIterator
257 {
258     @nogc void reset(byte *err);
259     @nogc void next(byte *err);
260     @nogc bool is_next(byte *err);
261     @nogc void get_term(char **out_val, uint **out_val_length, byte *err);
262 }
263 
264 /// Parses a piece of text and generate terms.
265 interface XapianTermGenerator
266 {
267     /// Set the XapianStem object to be used for generating stemmed terms.
268     @nogc void set_stemmer(XapianStem stemmer, byte *err);
269 
270     /// -
271     @nogc void set_document(XapianDocument doc, byte *err);
272 
273     /// -
274     @nogc void index_text(const char *data_str, ulong data_len, byte *err);
275 
276     /// -
277     @nogc void index_text(const char *data_str, ulong data_len, const char *prefix_str, ulong prefix_len, byte *err);
278 
279     /// -
280     @nogc void index_text_without_positions(const char *data_str, ulong data_len, byte *err);
281 
282     /// -
283     @nogc void index_text_without_positions(const char *data_str, ulong data_len, const char *prefix_str, ulong prefix_len, byte *err);
284 
285     /// -
286     @nogc void index_data(int data, const char *prefix_str, ulong prefix_len, byte *err);
287 
288     /// -
289     @nogc void index_data(long data, const char *prefix_str, ulong prefix_len, byte *err);
290 
291     /// -
292     @nogc void index_data(float data, const char *prefix_str, ulong prefix_len, byte *err);
293 
294     /// -
295     @nogc void index_data(double data, const char *prefix_str, ulong prefix_len, byte *err);
296 }
297 
298 /// This class is used to access a database, or a group of databases.
299 interface XapianDatabase
300 {
301     @nogc XapianEnquire new_Enquire(byte *err);
302     @nogc void close(byte *err);
303     @nogc void reopen(byte *err);
304     @nogc XapianTermIterator allterms(const char *prefix_str, ulong prefix_len, byte *err);
305     @nogc void add_database(XapianDatabase add_db, byte *err);
306 }
307 
308 /// This class provides read/write access to a database
309 interface XapianWritableDatabase
310 {
311     @nogc XapianEnquire new_Enquire(byte *err);
312     @nogc uint add_document(XapianDocument doc, byte *err);
313     @nogc uint replace_document(const char *_unique_term, ulong _unique_term_len, XapianDocument document, byte *err);
314     @nogc void delete_document(const char *_unique_term, ulong _unique_term_len, byte *err);
315     @nogc void commit(byte *err);
316     @nogc void close(byte *err);
317     @nogc void reopen(byte *err);
318 }
319 
320 /// Class representing a query
321 interface XapianQuery
322 {
323     @nogc void get_description(char **out_val, uint **out_val_length, byte *err);
324     @nogc void serialise(char **out_val, uint **out_val_length, byte *err);
325     @nogc XapianQuery add_right_query(int op_, XapianQuery _right, byte *err);
326     @nogc int  get_length (byte *err);
327 }
328 
329 /// A handle representing a document in a Xapian database
330 interface XapianDocument
331 {
332     @nogc char *get_data(char **out_val, uint **out_val_length, byte *err);
333     @nogc void set_data(const char *data_str, ulong data_len, byte *err);
334     @nogc void add_boolean_term(const char *_data, ulong _data_len, byte *err);
335     @nogc void add_value(int slot, const char *_data, ulong _data_len, byte *err);
336     @nogc void add_value(int slot, int _data, byte *err);
337     @nogc void add_value(int slot, long _data, byte *err);
338     @nogc void add_value(int slot, float _data, byte *err);
339     @nogc void add_value(int slot, double _data, byte *err);
340 }
341 
342 /// An iterator pointing to items in an MSet
343 interface XapianMSetIterator
344 {
345     @nogc uint get_documentid(byte *err);
346     @nogc XapianDocument get_document(byte *err);
347     @nogc void get_document_data(char **out_val, uint **out_val_length, byte *err);
348 
349     @nogc void next(byte *err);
350     @nogc bool is_next(byte *err);
351 }
352 
353 /// A match set (MSet)
354 interface XapianMSet
355 {
356     @nogc int get_matches_estimated(byte *err);
357     @nogc int size(byte *err);
358     @nogc XapianMSetIterator iterator(byte *err);
359 }
360 
361 /// This class provides an interface to the information retrieval system for the purpose of searching
362 interface XapianEnquire
363 {
364     @nogc void set_query(XapianQuery query, byte *err);
365     @nogc XapianMSet get_mset(int from, int size, byte *err);
366     @nogc void set_sort_by_key(XapianMultiValueKeyMaker sorter, bool p, byte *err);
367     @nogc void clear_matchspies();
368 }
369 
370 /// Class representing a stemming algorithm
371 interface XapianStem
372 {
373 }
374 
375 /// Build a XapianQuery object from a user query string
376 interface XapianQueryParser
377 {
378     @nogc void set_stemmer(XapianStem stemmer, byte *err);
379     @nogc void set_database(XapianDatabase db, byte *err);
380     @nogc void set_database(XapianWritableDatabase db, byte *err);
381     @nogc void set_stemming_strategy(stem_strategy strategy, byte *err);
382     @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, byte *err);
383     @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, uint flags, byte *err);
384     @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, uint flags, char *prefix_string, ulong prefix_string_len,
385                                   byte *err);
386     @nogc void add_prefix(char *field_string, ulong field_string_len, char *prefix_string, ulong prefix_string_len, byte *err);
387     @nogc void add_valuerangeprocessor(XapianNumberValueRangeProcessor pp, byte *err);
388     @nogc void set_max_wildcard_expansion (int limit, byte *err);
389 }
390 
391 /// KeyMaker subclass which combines several values
392 interface XapianMultiValueKeyMaker
393 {
394     @nogc void add_value(int pos, byte *err);
395     @nogc void add_value(int pos, bool asc_desc, byte *err);
396 }
397 
398 /// -
399 @nogc XapianDatabase new_Database(byte *err);
400 
401 /// -
402 @nogc XapianDatabase new_Database(const char *path, uint path_len, int db_type, byte *err);
403 
404 /// -
405 @nogc XapianWritableDatabase new_WritableDatabase(const char *path, uint path_len, int action, int db_type, byte *err);
406 
407 /// -
408 @nogc XapianDocument new_Document(byte *err);
409 
410 /// -
411 @nogc XapianMultiValueKeyMaker new_MultiValueKeyMaker(byte *err);
412 
413 /// -
414 @nogc XapianQueryParser new_QueryParser(byte *err);
415 
416 /// -
417 @nogc XapianStem new_Stem(char *language, uint language_len, byte *err);
418 
419 /// -
420 @nogc XapianTermGenerator new_TermGenerator(byte *err);
421 
422 /// -
423 @nogc XapianNumberValueRangeProcessor new_NumberValueRangeProcessor(int slot, const char *_str, ulong _str_len, bool prefix, byte *err);
424 
425 /// -
426 @nogc XapianQuery new_Query(byte *err);
427 
428 /// -
429 @nogc XapianQuery new_Query(const char *_str, uint _str_len, byte *err);
430 //    XapianQuery new_Query_add (XapianQuery _left, XapianQuery _right);//, int op_);
431 /// -
432 @nogc XapianQuery new_Query_range(int op_, int slot, double _begin, double _end, byte *err);
433 
434 /// -
435 @nogc XapianQuery new_Query_double(int op_, int slot, double _value, byte *err);
436 
437 /// -
438 @nogc XapianQuery new_Query_equal(int op_, int slot, const char *_str, ulong _str_len, byte *err);
439 
440 /// -
441 @nogc void sortable_serialise(double value, char **out_val, uint **out_val_length, byte *err);
442 
443 ////////
444 
445 /// -
446 @nogc void destroy_Document(XapianDocument doc);
447 
448 /// -
449 @nogc void destroy_MSet(XapianMSet mset);
450 
451 /// -
452 @nogc void destroy_MSetIterator(XapianMSetIterator msetit);
453 
454 /// -
455 @nogc void destroy_Query(XapianQuery query);
456 
457 /// -
458 @nogc void destroy_Enquire(XapianEnquire enquire);
459 
460 /// -
461 @nogc void destroy_MultiValueKeyMaker(XapianMultiValueKeyMaker sorter);
462 
463 /// -
464 @nogc void destroy_Database(XapianDatabase db);
465 }
466 
467 class XapianError : Exception
468 {
469     byte code = 0;
470 
471     this(byte _code, string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
472     {
473         super(msg, file, line, next);
474         code = _code;
475     }
476 
477     string get_xapian_msg()
478     {
479         if (xapian_msg_code.length == 0)
480             init_err_code();
481 
482         return xapian_msg_code.get(code, "Unknown");
483     }
484 }
485 
486 string get_xapian_err_msg(byte code)
487 {
488     if (xapian_msg_code.length == 0)
489         init_err_code();
490 
491     return xapian_msg_code.get(code, "Unknown");
492 }
493 
494 private string[ byte ] xapian_msg_code;
495 
496 private void init_err_code()
497 {
498     xapian_msg_code =
499     [
500         -1:"DatabaseModifiedError",
501         -2:"DatabaseLockError"
502         ,
503         -3:"LogicError"
504         ,
505         -4:"AssertionError"
506         ,
507         -5:"InvalidArgumentError"
508         ,
509         -6:"InvalidOperationError"
510         ,
511         -7:"UnimplementedError"
512         ,
513         -8:"RuntimeError"
514         ,
515         -9:"DatabaseError"
516         ,
517         -10:"DatabaseCorruptError"
518         ,
519         -11:"DatabaseCreateError"
520         ,
521         -12:"DatabaseOpeningError"
522         ,
523         -13:"DatabaseVersionError"
524         ,
525         -14:"DocNotFoundError"
526         ,
527         -15:"FeatureUnavailableError"
528         ,
529         -16:"InternalError"
530         ,
531         -17:"NetworkError"
532         ,
533         -18:"NetworkTimeoutError"
534         ,
535         -19:"QueryParserError"
536         ,
537         -20:"RangeError"
538         ,
539         -21:"SerialisationError"
540     ];
541 }
542