Making a full-text search module that works on both desktop and mobile (Pt. 2)

#sqlite3 #reactnative #electron

I'm a solo developer of Inkdrop, a Markdown note-taking app for programmers which supports macOS, windows, linux, android and iOS. It is built on top of Electron for desktop platforms and React Native for mobile platforms.
I'm currently working on rebuilding full-text search feature as I declared in our roadmap.

This is part-two of my work progress note on making it.

Use the latest SQLite on Android to get more speed

So Android's sqlite is kind of old and it prevents me from using FTS5.
But making it more performant is important in this work.
How can I use it..found this:

requery/sqlite-android: Android SQLite support library

It seems like the latest SQLite is faster than the built-in one. Sounds nice.

I have already made a SQLite module for react native: craftzdog/react-native-sqlite-2: SQLite3 Native Plugin for React Native for iOS, Android and Windows.

It would be great to use requery's SQLite library in my module in order to make it performant.

...After having some work, it turned out that it is not possible to use because sqlite-android requires androidx namespace but react native uses android namespace. When I try to compile it in a react native project, it will generate following error:

error: cannot access SupportSQLiteDatabase
class file for androidx.sqlite.db.SupportSQLiteDatabase not found

Also I found that this library is not compiled with FTS5 extension enabled. Okay, I put that aside for now.

Understanding `pouchdb-quick-search`

I need to know how pouchdb-quick-search maintains the full text index.
I guess it watches changes feed.
But where does it store the index state?

It depends on pouchdb-mapreduce-no-ddocs that provides _search_query method.

As I guessed, it uses changes feed at pouchdb-mapreduce-no-ddocs/src/index.js:534:

    function processNextBatch() {
      view.sourceDB.changes({
        conflicts: true,
        include_docs: true,
        style: 'all_docs',
        since: currentSeq,
        limit: CHANGES_BATCH_SIZE
      }).on('complete', function (response) {

Where does currentSeq come from.. I guess it is stored in a doc with id: _local/lastSeq:

function saveKeyValues(view, docIdsToChangesAndEmits, seq) {
  var seqDocId = '_local/lastSeq';
  return view.db.get(seqDocId)

And they get the last seq in here: pouchdb-mapreduce-no-ddocs/src/createView.js:55:

    function registerDependentDb() {
      return sourceDB.registerDependentDatabase(depDbName).then(function (res) {
        var db = res.db;
        db.auto_compaction = true;
        var view = {
          name: depDbName,
          db: db,
          sourceDB: sourceDB,
          adapter: sourceDB.adapter,
          mapFun: mapFun,
          reduceFun: reduceFun
        };
        return view.db.get('_local/lastSeq').catch(function (err) {
          /* istanbul ignore if */
          if (err.status !== 404) {
            throw err;
          }
        }).then(function (lastSeqDoc) {
          view.seq = lastSeqDoc ? lastSeqDoc.seq : 0;

Got it.
So I do the same way to remember the last state.

Making a PouchDB Plugin

Wrote an empty plugin:

const seqDocId = '_local/lastFTSSeq'
const CHANGES_BATCH_SIZE = 10

function defaultsTo(value) {
  return function(reason) {
    /* istanbul ignore else */
    if (reason.status === 404) {
      return value
    } else {
      throw reason
    }
  }
}

function getLastSeq(db) {
  return db
    .get(seqDocId)
    .catch(function(err) {
      /* istanbul ignore if */
      if (err.status !== 404) {
        throw err
      }
    })
    .then(function(lastSeqDoc) {
      return lastSeqDoc ? lastSeqDoc.seq : 0
    })
}

function saveLastSeq(db, lastSeq) {
  return db
    .get(seqDocId)
    .catch(defaultsTo({ _id: seqDocId, seq: 0 }))
    .then(function(lastSeqDoc) {
      lastSeqDoc.seq = lastSeq
      // write all docs in a single operation, update the seq once
      return db.put(lastSeqDoc)
    })
}

async function updateFTSIndex() {
  const db = this
  let currentSeq = await getLastSeq(db)

  return new Promise((resolve, reject) => {
    async function complete() {
      await saveLastSeq(db, currentSeq)
      resolve()
    }

    function processDoc(note) {
      // TODO: build index
    }

    function deleteNoteFromIndex(note) {
      // TODO: delete
    }

    function processNextBatch() {
      db.changes({
        conflicts: true,
        include_docs: true,
        style: 'all_docs',
        since: currentSeq,
        limit: CHANGES_BATCH_SIZE
      })
        .on('complete', response => {
          const { results } = response
          if (results.length <= 0) {
            return complete()
          }
          for (let i = 0, l = results.length; i < l; i++) {
            const change = results[i]
            const doc = change.doc
            if (doc._id[0] !== '_' && doc._id.startsWith('note:')) {
              if (!doc._deleted) {
                processDoc(doc)
              } else {
                deleteNoteFromIndex(doc)
              }
            }
            currentSeq = change.seq
          }
          if (results.length < CHANGES_BATCH_SIZE) {
            return complete()
          } else {
            return processNextBatch()
          }
        })
        .on('error', err => {
          reject(err)
        })
    }

    processNextBatch()
  })
}

module.exports = {
  updateFTSIndex
}

This plugin processes only docs with it starts with note:.

Now I've got everything necessary to know to make the module!

Top comments (2)

Ivan Luque • Jul 31 '22

Hi!
This post is a bit old, but I'm curious about the FTS5 Japanese support, as I am planning on using FTS for my app as well. Did you find any issues with that? It was my understanding that FTS5 does not support the needed ICU tokenizer so I was wondering what did you do ultimately, as I guess Japanese support is important for your app as well.