ndx下载ndx源代码下载

ndx

其他源码

1.0.0

下载

ndx·

轻巧的全文索引和搜索库。

当将所有文档存储在磁盘上（索引EDDB）上时，该库是为特定用例设计的，并且可以动态添加或删除到索引中。

查询函数仅支持分离运算符。像one two这样的查询将以"one" or "two"作用。

倒置索引不会存储术语位置，查询功能将无法搜索诸如"Super Mario"之类的短语。

有许多具有不同权衡的替代解决方案，可以更适合您的特定用例。对于使用静态数据集的简单文档搜索，我建议使用诸如FST之类的东西并将其部署为边缘功能（WASM）。

特征

多个字段全文索引和搜索。
每场得分提升。
BM25排名函数以排名匹配文档。
基于TRIE的动态倒置索引。
可配置的令牌和术语过滤器。
与查询扩展的免费文本查询。

例子

 import { createIndex , indexAdd } from "ndx" ;
import { indexQuery } from "ndx/query" ;

const termFilter = ( term ) => term . toLowerCase ( ) ;

function createDocumentIndex ( fields ) {
  // `createIndex()` creates an index data structure.
  // First argument specifies how many different fields we want to index.
  const index = createIndex (
    fields . length ,
    // Tokenizer is a function that breaks text into words, phrases, symbols,
    // or other meaningful elements called tokens.
    ( s ) => s . split ( " " ) ,
    // Filter is a function that processes tokens and returns terms, terms are
    // used in Inverted Index to index documents.
    termFilter ,
  ) ;
  // `fieldGetters` is an array with functions that will be used to retrieve
  // data from different fields.
  const fieldGetters = fields . map ( ( f ) => ( doc ) => doc [ f . name ] ) ;
  // `fieldBoostFactors` is an array of boost factors for each field, in this
  // example all fields will have identical weight.
  const fieldBoostFactors = fields . map ( ( ) => 1 ) ;

  return {
    index ,
    // `add()` will add documents to the index.
    add ( doc ) {
      indexAdd (
        index ,
        fieldGetters ,
        // Docum  ent key, it can be an unique document id or a refernce to a
        // document if you want to store all documents in memory.
        doc . id ,
        // Document.
        doc ,
      ) ;
    } ,
    // `remove()` will remove documents from the index.
    remove ( id ) {
      // When document is removed we are just marking document id as being
      // removed. Index data structure still contains references to the removed
      // document.
      indexRemove ( index , removed , id ) ;
      if ( removed . size > 10 ) {
        // `indexVacuum()` removes all references to removed documents from the
        // index.
        indexVacuum ( index , removed ) ;
      }
    } ,

    // `search()` will be used to perform queries.
    search ( q ) {
      return indexQuery (
        index ,
        fieldBoostFactors ,
        // BM25 ranking function constants:
        // BM25 k1 constant, controls non-linear term frequency normalization
        // (saturation).
        1.2 ,
        // BM25 b constant, controls to what degree document length normalizes
        // tf values.
        0.75 ,
        q ,
      ) ;
    }
  } ;
}

// Create a document index that will index `content` field.
const index = createDocumentIndex ( [ { name : "content" } ] ) ;

const docs = [
  {
    "id" : "1" ,
    "content" : "Lorem ipsum dolor" ,
  } ,
  {
    "id" : "2" ,
    "content" : "Lorem ipsum" ,
  }
] ;

// Add documents to the index.
docs . forEach ( ( d ) => { index . add ( d ) ; } ) ;

// Perform a search query.
index . search ( "Lorem" ) ;
// => [{ key: "2" , score: ... }, { key: "1", score: ... } ]
//
// document with an id `"2"` is ranked higher because it has a `"content"`
// field with a less number of terms than document with an id `"1"`.

index . search ( "dolor" ) ;
// => [{ key: "1", score: ... }]