1 /** 2 обвязка к xapin 3 4 Copyright: © 2014 Semantic Machines 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Valeriy Bushenev 7 8 */ 9 10 module veda.bind.xapian_d_header; 11 12 13 /// Enum of stem strategy 14 enum stem_strategy { STEM_NONE, STEM_SOME, STEM_ALL, STEM_ALL_Z }; 15 16 /// Enum of possible query operations 17 enum xapian_op 18 { 19 /// Return iff both subqueries are satisfied 20 OP_AND, 21 22 /// Return if either subquery is satisfied 23 OP_OR, 24 25 /// Return if left but not right satisfied 26 OP_AND_NOT, 27 28 /// Return if one query satisfied, but not both 29 OP_XOR, 30 31 /// Return iff left satisfied, but use weights from both 32 OP_AND_MAYBE, 33 34 /// As AND, but use only weights from left subquery 35 OP_FILTER, 36 37 /** Find occurrences of a list of terms with all the terms 38 * occurring within a specified window of positions. 39 * 40 * Each occurrence of a term must be at a different position, 41 * but the order they appear in is irrelevant. 42 * 43 * The window parameter should be specified for this operation, 44 * but will default to the number of terms in the list. 45 */ 46 OP_NEAR, 47 48 /** Find occurrences of a list of terms with all the terms 49 * occurring within a specified window of positions, and all 50 * the terms appearing in the order specified. 51 * 52 * Each occurrence of a term must be at a different position. 53 * 54 * The window parameter should be specified for this operation, 55 * but will default to the number of terms in the list. 56 */ 57 OP_PHRASE, 58 59 /** Filter by a range test on a document value. */ 60 OP_VALUE_RANGE, 61 62 /** Scale the weight of a subquery by the specified factor. 63 * 64 * A factor of 0 means this subquery will contribute no weight to 65 * the query - it will act as a purely boolean subquery. 66 * 67 * If the factor is negative, Xapian::InvalidArgumentError will 68 * be thrown. 69 */ 70 OP_SCALE_WEIGHT, 71 72 /** Pick the best N subqueries and combine with OP_OR. 73 * 74 * If you want to implement a feature which finds documents 75 * similar to a piece of text, an obvious approach is to build an 76 * "OR" query from all the terms in the text, and run this query 77 * against a database containing the documents. However such a 78 * query can contain a lots of terms and be quite slow to perform, 79 * yet many of these terms don't contribute usefully to the 80 * results. 81 * 82 * The OP_ELITE_SET operator can be used instead of OP_OR in this 83 * situation. OP_ELITE_SET selects the most important ''N'' terms 84 * and then acts as an OP_OR query with just these, ignoring any 85 * other terms. This will usually return results just as good as 86 * the full OP_OR query, but much faster. 87 * 88 * In general, the OP_ELITE_SET operator can be used when you have 89 * a large OR query, but it doesn't matter if the search 90 * completely ignores some of the less important terms in the 91 * query. 92 * 93 * The subqueries don't have to be terms, but if they aren't then 94 * OP_ELITE_SET will look at the estimated frequencies of the 95 * subqueries and so could pick a subset which don't actually 96 * match any documents even if the full OR would match some. 97 * 98 * You can specify a parameter to the query constructor which 99 * control the number of terms which OP_ELITE_SET will pick. If 100 * not specified, this defaults to 10 (or 101 * <code>ceil(sqrt(number_of_subqueries))</code> if there are more 102 * than 100 subqueries, but this rather arbitrary special case 103 * will be dropped in 1.3.0). For example, this will pick the 104 * best 7 terms: 105 * 106 * <pre> 107 * Xapian::Query query(Xapian::Query::OP_ELITE_SET, subqs.begin(), subqs.end(), 7); 108 * </pre> 109 * 110 * If the number of subqueries is less than this threshold, 111 * OP_ELITE_SET behaves identically to OP_OR. 112 */ 113 OP_ELITE_SET, 114 115 /** Filter by a greater-than-or-equal test on a document value. */ 116 OP_VALUE_GE, 117 118 /** Filter by a less-than-or-equal test on a document value. */ 119 OP_VALUE_LE, 120 121 /** Treat a set of queries as synonyms. 122 * 123 * This returns all results which match at least one of the 124 * queries, but weighting as if all the sub-queries are instances 125 * of the same term: so multiple matching terms for a document 126 * increase the wdf value used, and the term frequency is based on 127 * the number of documents which would match an OR of all the 128 * subqueries. 129 * 130 * The term frequency used will usually be an approximation, 131 * because calculating the precise combined term frequency would 132 * be overly expensive. 133 * 134 * Identical to OP_OR, except for the weightings returned. 135 */ 136 OP_SYNONYM 137 }; 138 139 /// Enum of feature flag 140 enum feature_flag 141 { 142 /// Support AND, OR, etc and bracketed subexpressions. 143 FLAG_BOOLEAN = 1, 144 /// Support quoted phrases. 145 FLAG_PHRASE = 2, 146 /// Support + and -. 147 FLAG_LOVEHATE = 4, 148 /// Support AND, OR, etc even if they aren't in ALLCAPS. 149 FLAG_BOOLEAN_ANY_CASE = 8, 150 /** Support right truncation (e.g. Xap*). 151 * 152 * Currently you can't use wildcards with boolean filter prefixes, 153 * or in a phrase (either an explicitly quoted one, or one implicitly 154 * generated by hyphens or other punctuation). 155 * 156 * NB: You need to tell the QueryParser object which database to 157 * expand wildcards from by calling set_database. 158 */ 159 FLAG_WILDCARD = 16, 160 /** Allow queries such as 'NOT apples'. 161 * 162 * These require the use of a list of all documents in the database 163 * which is potentially expensive, so this feature isn't enabled by 164 * default. 165 */ 166 FLAG_PURE_NOT = 32, 167 /** Enable partial matching. 168 * 169 * Partial matching causes the parser to treat the query as a 170 * "partially entered" search. This will automatically treat the 171 * final word as a wildcarded match, unless it is followed by 172 * whitespace, to produce more stable results from interactive 173 * searches. 174 * 175 * Currently FLAG_PARTIAL doesn't do anything if the final word 176 * in the query has a boolean filter prefix, or if it is in a phrase 177 * (either an explicitly quoted one, or one implicitly generated by 178 * hyphens or other punctuation). It also doesn't do anything if 179 * if the final word is part of a value range. 180 * 181 * NB: You need to tell the QueryParser object which database to 182 * expand wildcards from by calling set_database. 183 */ 184 FLAG_PARTIAL = 64, 185 186 /** Enable spelling correction. 187 * 188 * For each word in the query which doesn't exist as a term in the 189 * database, Database::get_spelling_suggestion() will be called and if 190 * a suggestion is returned, a corrected version of the query string 191 * will be built up which can be read using 192 * QueryParser::get_corrected_query_string(). The query returned is 193 * based on the uncorrected query string however - if you want a 194 * parsed query based on the corrected query string, you must call 195 * QueryParser::parse_query() again. 196 * 197 * NB: You must also call set_database() for this to work. 198 */ 199 FLAG_SPELLING_CORRECTION = 128, 200 201 /** Enable synonym operator '~'. 202 * 203 * NB: You must also call set_database() for this to work. 204 */ 205 FLAG_SYNONYM = 256, 206 207 /** Enable automatic use of synonyms for single terms. 208 * 209 * NB: You must also call set_database() for this to work. 210 */ 211 FLAG_AUTO_SYNONYMS = 512, 212 213 /** Enable automatic use of synonyms for single terms and groups of 214 * terms. 215 * 216 * NB: You must also call set_database() for this to work. 217 */ 218 FLAG_AUTO_MULTIWORD_SYNONYMS = 1024 | FLAG_AUTO_SYNONYMS, 219 220 /** The default flags. 221 * 222 * Used if you don't explicitly pass any to @a parse_query(). 223 * The default flags are FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE. 224 * 225 * Added in Xapian 1.0.11. 226 */ 227 FLAG_DEFAULT = FLAG_PHRASE | FLAG_BOOLEAN | FLAG_LOVEHATE 228 }; 229 230 //alias int size_t; 231 alias int int32_t; 232 alias byte int8_t; 233 alias ubyte uint8_t; 234 alias dchar TCHAR; 235 236 /** Open for read/write; create if no db exists. */ 237 const int DB_CREATE_OR_OPEN = 1; 238 /** Create a new database; fail if db exists. */ 239 const int DB_CREATE = 2; 240 /** Overwrite existing db; create if none exists. */ 241 const int DB_CREATE_OR_OVERWRITE = 3; 242 /** Open for read/write; fail if no db exists. */ 243 const int DB_OPEN = 4; 244 245 const int BRASS = 1; 246 const int CHERT = 2; 247 const int IN_MEMORY = 3; 248 249 extern (C++) 250 { 251 /// Base class for value range processors 252 interface XapianNumberValueRangeProcessor 253 { 254 } 255 256 interface XapianTermIterator 257 { 258 @nogc void reset(byte *err); 259 @nogc void next(byte *err); 260 @nogc bool is_next(byte *err); 261 @nogc void get_term(char **out_val, uint **out_val_length, byte *err); 262 } 263 264 /// Parses a piece of text and generate terms. 265 interface XapianTermGenerator 266 { 267 /// Set the XapianStem object to be used for generating stemmed terms. 268 @nogc void set_stemmer(XapianStem stemmer, byte *err); 269 270 /// - 271 @nogc void set_document(XapianDocument doc, byte *err); 272 273 /// - 274 @nogc void index_text(const char *data_str, ulong data_len, byte *err); 275 276 /// - 277 @nogc void index_text(const char *data_str, ulong data_len, const char *prefix_str, ulong prefix_len, byte *err); 278 279 /// - 280 @nogc void index_text_without_positions(const char *data_str, ulong data_len, byte *err); 281 282 /// - 283 @nogc void index_text_without_positions(const char *data_str, ulong data_len, const char *prefix_str, ulong prefix_len, byte *err); 284 285 /// - 286 @nogc void index_data(int data, const char *prefix_str, ulong prefix_len, byte *err); 287 288 /// - 289 @nogc void index_data(long data, const char *prefix_str, ulong prefix_len, byte *err); 290 291 /// - 292 @nogc void index_data(float data, const char *prefix_str, ulong prefix_len, byte *err); 293 294 /// - 295 @nogc void index_data(double data, const char *prefix_str, ulong prefix_len, byte *err); 296 } 297 298 /// This class is used to access a database, or a group of databases. 299 interface XapianDatabase 300 { 301 @nogc XapianEnquire new_Enquire(byte *err); 302 @nogc void close(byte *err); 303 @nogc void reopen(byte *err); 304 @nogc XapianTermIterator allterms(const char *prefix_str, ulong prefix_len, byte *err); 305 @nogc void add_database(XapianDatabase add_db, byte *err); 306 } 307 308 /// This class provides read/write access to a database 309 interface XapianWritableDatabase 310 { 311 @nogc XapianEnquire new_Enquire(byte *err); 312 @nogc uint add_document(XapianDocument doc, byte *err); 313 @nogc uint replace_document(const char *_unique_term, ulong _unique_term_len, XapianDocument document, byte *err); 314 @nogc void delete_document(const char *_unique_term, ulong _unique_term_len, byte *err); 315 @nogc void commit(byte *err); 316 @nogc void close(byte *err); 317 @nogc void reopen(byte *err); 318 } 319 320 /// Class representing a query 321 interface XapianQuery 322 { 323 @nogc void get_description(char **out_val, uint **out_val_length, byte *err); 324 @nogc void serialise(char **out_val, uint **out_val_length, byte *err); 325 @nogc XapianQuery add_right_query(int op_, XapianQuery _right, byte *err); 326 @nogc int get_length (byte *err); 327 } 328 329 /// A handle representing a document in a Xapian database 330 interface XapianDocument 331 { 332 @nogc char *get_data(char **out_val, uint **out_val_length, byte *err); 333 @nogc void set_data(const char *data_str, ulong data_len, byte *err); 334 @nogc void add_boolean_term(const char *_data, ulong _data_len, byte *err); 335 @nogc void add_value(int slot, const char *_data, ulong _data_len, byte *err); 336 @nogc void add_value(int slot, int _data, byte *err); 337 @nogc void add_value(int slot, long _data, byte *err); 338 @nogc void add_value(int slot, float _data, byte *err); 339 @nogc void add_value(int slot, double _data, byte *err); 340 } 341 342 /// An iterator pointing to items in an MSet 343 interface XapianMSetIterator 344 { 345 @nogc uint get_documentid(byte *err); 346 @nogc XapianDocument get_document(byte *err); 347 @nogc void get_document_data(char **out_val, uint **out_val_length, byte *err); 348 349 @nogc void next(byte *err); 350 @nogc bool is_next(byte *err); 351 } 352 353 /// A match set (MSet) 354 interface XapianMSet 355 { 356 @nogc int get_matches_estimated(byte *err); 357 @nogc int size(byte *err); 358 @nogc XapianMSetIterator iterator(byte *err); 359 } 360 361 /// This class provides an interface to the information retrieval system for the purpose of searching 362 interface XapianEnquire 363 { 364 @nogc void set_query(XapianQuery query, byte *err); 365 @nogc XapianMSet get_mset(int from, int size, byte *err); 366 @nogc void set_sort_by_key(XapianMultiValueKeyMaker sorter, bool p, byte *err); 367 @nogc void clear_matchspies(); 368 } 369 370 /// Class representing a stemming algorithm 371 interface XapianStem 372 { 373 } 374 375 /// Build a XapianQuery object from a user query string 376 interface XapianQueryParser 377 { 378 @nogc void set_stemmer(XapianStem stemmer, byte *err); 379 @nogc void set_database(XapianDatabase db, byte *err); 380 @nogc void set_database(XapianWritableDatabase db, byte *err); 381 @nogc void set_stemming_strategy(stem_strategy strategy, byte *err); 382 @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, byte *err); 383 @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, uint flags, byte *err); 384 @nogc XapianQuery parse_query(char *query_string, ulong query_string_len, uint flags, char *prefix_string, ulong prefix_string_len, 385 byte *err); 386 @nogc void add_prefix(char *field_string, ulong field_string_len, char *prefix_string, ulong prefix_string_len, byte *err); 387 @nogc void add_valuerangeprocessor(XapianNumberValueRangeProcessor pp, byte *err); 388 @nogc void set_max_wildcard_expansion (int limit, byte *err); 389 } 390 391 /// KeyMaker subclass which combines several values 392 interface XapianMultiValueKeyMaker 393 { 394 @nogc void add_value(int pos, byte *err); 395 @nogc void add_value(int pos, bool asc_desc, byte *err); 396 } 397 398 /// - 399 @nogc XapianDatabase new_Database(byte *err); 400 401 /// - 402 @nogc XapianDatabase new_Database(const char *path, uint path_len, int db_type, byte *err); 403 404 /// - 405 @nogc XapianWritableDatabase new_WritableDatabase(const char *path, uint path_len, int action, int db_type, byte *err); 406 407 /// - 408 @nogc XapianDocument new_Document(byte *err); 409 410 /// - 411 @nogc XapianMultiValueKeyMaker new_MultiValueKeyMaker(byte *err); 412 413 /// - 414 @nogc XapianQueryParser new_QueryParser(byte *err); 415 416 /// - 417 @nogc XapianStem new_Stem(char *language, uint language_len, byte *err); 418 419 /// - 420 @nogc XapianTermGenerator new_TermGenerator(byte *err); 421 422 /// - 423 @nogc XapianNumberValueRangeProcessor new_NumberValueRangeProcessor(int slot, const char *_str, ulong _str_len, bool prefix, byte *err); 424 425 /// - 426 @nogc XapianQuery new_Query(byte *err); 427 428 /// - 429 @nogc XapianQuery new_Query(const char *_str, uint _str_len, byte *err); 430 // XapianQuery new_Query_add (XapianQuery _left, XapianQuery _right);//, int op_); 431 /// - 432 @nogc XapianQuery new_Query_range(int op_, int slot, double _begin, double _end, byte *err); 433 434 /// - 435 @nogc XapianQuery new_Query_double(int op_, int slot, double _value, byte *err); 436 437 /// - 438 @nogc XapianQuery new_Query_equal(int op_, int slot, const char *_str, ulong _str_len, byte *err); 439 440 /// - 441 @nogc void sortable_serialise(double value, char **out_val, uint **out_val_length, byte *err); 442 443 //////// 444 445 /// - 446 @nogc void destroy_Document(XapianDocument doc); 447 448 /// - 449 @nogc void destroy_MSet(XapianMSet mset); 450 451 /// - 452 @nogc void destroy_MSetIterator(XapianMSetIterator msetit); 453 454 /// - 455 @nogc void destroy_Query(XapianQuery query); 456 457 /// - 458 @nogc void destroy_Enquire(XapianEnquire enquire); 459 460 /// - 461 @nogc void destroy_MultiValueKeyMaker(XapianMultiValueKeyMaker sorter); 462 463 /// - 464 @nogc void destroy_Database(XapianDatabase db); 465 } 466 467 class XapianError : Exception 468 { 469 byte code = 0; 470 471 this(byte _code, string msg, string file = __FILE__, size_t line = __LINE__, Throwable next = null) 472 { 473 super(msg, file, line, next); 474 code = _code; 475 } 476 477 string get_xapian_msg() 478 { 479 if (xapian_msg_code.length == 0) 480 init_err_code(); 481 482 return xapian_msg_code.get(code, "Unknown"); 483 } 484 } 485 486 string get_xapian_err_msg(byte code) 487 { 488 if (xapian_msg_code.length == 0) 489 init_err_code(); 490 491 return xapian_msg_code.get(code, "Unknown"); 492 } 493 494 private string[ byte ] xapian_msg_code; 495 496 private void init_err_code() 497 { 498 xapian_msg_code = 499 [ 500 -1:"DatabaseModifiedError", 501 -2:"DatabaseLockError" 502 , 503 -3:"LogicError" 504 , 505 -4:"AssertionError" 506 , 507 -5:"InvalidArgumentError" 508 , 509 -6:"InvalidOperationError" 510 , 511 -7:"UnimplementedError" 512 , 513 -8:"RuntimeError" 514 , 515 -9:"DatabaseError" 516 , 517 -10:"DatabaseCorruptError" 518 , 519 -11:"DatabaseCreateError" 520 , 521 -12:"DatabaseOpeningError" 522 , 523 -13:"DatabaseVersionError" 524 , 525 -14:"DocNotFoundError" 526 , 527 -15:"FeatureUnavailableError" 528 , 529 -16:"InternalError" 530 , 531 -17:"NetworkError" 532 , 533 -18:"NetworkTimeoutError" 534 , 535 -19:"QueryParserError" 536 , 537 -20:"RangeError" 538 , 539 -21:"SerialisationError" 540 ]; 541 } 542