ndx ดาวน์โหลด - ดาวน์โหลดซอร์สโค้ด ndx

ndx

ซอร์สโค้ดอื่น ๆ

1.0.0

ดาวน์โหลด

ndx ·

การจัดทำดัชนีข้อความและการค้นหาแบบเต็มน้ำหนักเบา

ไลบรารีนี้ได้รับการออกแบบสำหรับกรณีการใช้งานเฉพาะเมื่อเอกสารทั้งหมดถูกเก็บไว้ในดิสก์ (indexedDB) และสามารถเพิ่มหรือลบออกเป็นดัชนีแบบไดนามิก

ฟังก์ชั่นการสืบค้นรองรับเฉพาะผู้ให้บริการที่แยกจากกัน แบบสอบถามเช่น one two จะทำงานเป็น "one" or "two"

ดัชนีกลับด้านไม่เก็บตำแหน่งคำศัพท์และฟังก์ชั่นการสืบค้นจะไม่สามารถค้นหาวลีเช่น "Super Mario"

มีวิธีแก้ปัญหาทางเลือกมากมายที่มีการแลกเปลี่ยนที่แตกต่างกันซึ่งอาจเหมาะกับกรณีการใช้งานเฉพาะของคุณ สำหรับการค้นหาเอกสารอย่างง่ายด้วยชุดข้อมูลแบบคงที่ฉันขอแนะนำให้ใช้บางอย่างเช่น FST และปรับใช้เป็นฟังก์ชันขอบ (WASM)

คุณสมบัติ

การจัดทำดัชนีและค้นหาข้อความแบบเต็มหลายฟิลด์
คะแนนต่อฟิลด์เพิ่มขึ้น
ฟังก์ชั่นการจัดอันดับ BM25 เพื่อจัดอันดับเอกสารที่ตรงกัน
ดัชนีกลับแบบไดนามิกแบบไดนามิก
Tokenizer ที่กำหนดค่าได้และตัวกรองคำ
ข้อความค้นหาข้อความฟรีพร้อมการขยายแบบสอบถาม

ตัวอย่าง

 import { createIndex , indexAdd } from "ndx" ;
import { indexQuery } from "ndx/query" ;

const termFilter = ( term ) => term . toLowerCase ( ) ;

function createDocumentIndex ( fields ) {
  // `createIndex()` creates an index data structure.
  // First argument specifies how many different fields we want to index.
  const index = createIndex (
    fields . length ,
    // Tokenizer is a function that breaks text into words, phrases, symbols,
    // or other meaningful elements called tokens.
    ( s ) => s . split ( " " ) ,
    // Filter is a function that processes tokens and returns terms, terms are
    // used in Inverted Index to index documents.
    termFilter ,
  ) ;
  // `fieldGetters` is an array with functions that will be used to retrieve
  // data from different fields.
  const fieldGetters = fields . map ( ( f ) => ( doc ) => doc [ f . name ] ) ;
  // `fieldBoostFactors` is an array of boost factors for each field, in this
  // example all fields will have identical weight.
  const fieldBoostFactors = fields . map ( ( ) => 1 ) ;

  return {
    index ,
    // `add()` will add documents to the index.
    add ( doc ) {
      indexAdd (
        index ,
        fieldGetters ,
        // Docum  ent key, it can be an unique document id or a refernce to a
        // document if you want to store all documents in memory.
        doc . id ,
        // Document.
        doc ,
      ) ;
    } ,
    // `remove()` will remove documents from the index.
    remove ( id ) {
      // When document is removed we are just marking document id as being
      // removed. Index data structure still contains references to the removed
      // document.
      indexRemove ( index , removed , id ) ;
      if ( removed . size > 10 ) {
        // `indexVacuum()` removes all references to removed documents from the
        // index.
        indexVacuum ( index , removed ) ;
      }
    } ,

    // `search()` will be used to perform queries.
    search ( q ) {
      return indexQuery (
        index ,
        fieldBoostFactors ,
        // BM25 ranking function constants:
        // BM25 k1 constant, controls non-linear term frequency normalization
        // (saturation).
        1.2 ,
        // BM25 b constant, controls to what degree document length normalizes
        // tf values.
        0.75 ,
        q ,
      ) ;
    }
  } ;
}

// Create a document index that will index `content` field.
const index = createDocumentIndex ( [ { name : "content" } ] ) ;

const docs = [
  {
    "id" : "1" ,
    "content" : "Lorem ipsum dolor" ,
  } ,
  {
    "id" : "2" ,
    "content" : "Lorem ipsum" ,
  }
] ;

// Add documents to the index.
docs . forEach ( ( d ) => { index . add ( d ) ; } ) ;

// Perform a search query.
index . search ( "Lorem" ) ;
// => [{ key: "2" , score: ... }, { key: "1", score: ... } ]
//
// document with an id `"2"` is ranked higher because it has a `"content"`
// field with a less number of terms than document with an id `"1"`.

index . search ( "dolor" ) ;
// => [{ key: "1", score: ... }]