diff --git a/app/models/active_record/sqlite/index.rb b/app/models/active_record/sqlite/index.rb new file mode 100644 index 00000000..174105eb --- /dev/null +++ b/app/models/active_record/sqlite/index.rb @@ -0,0 +1,22 @@ +module ActiveRecord::SQLite::Index + extend ActiveSupport::Concern + + included do + self.ignored_columns = [table_name.to_sym, :rank] # Rails parses our virtual table with these extra non-attributes. + + attribute :rowid, :integer + alias_attribute :id, :rowid + self.primary_key = :rowid + + # Active Record doesn't pick up the `rowid` primary key column. + # So we have have explicitly declare this scope to have `rowid` populated in the result set. + default_scope { select("#{table_name}.rowid, #{table_name}.*") } + end + + private + + def attributes_for_create(attribute_names) + # Prevent `super` filtering out the primary key because it isn't in `self.class.column_names`. + [self.class.primary_key, *super] + end +end diff --git a/app/models/talk/index.rb b/app/models/talk/index.rb new file mode 100644 index 00000000..097047f8 --- /dev/null +++ b/app/models/talk/index.rb @@ -0,0 +1,27 @@ +class Talk::Index < ApplicationRecord + self.table_name = :talks_search_index + + include ActiveRecord::SQLite::Index # Depends on `table_name` being assigned. + class_attribute :index_columns, default: {title: 0, summary: 1, speaker_names: 2} + + belongs_to :talk, foreign_key: :rowid + + def self.search(query) + query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters + query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search + where("#{table_name} match ?", query) + end + + def self.snippets(**) + index_columns.each_key.reduce(all) { |relation, column| relation.snippet(column, **) } + end + + def self.snippet(column, tag: "mark", omission: "…", limit: 32) + offset = index_columns.fetch(column) + select("snippet(#{table_name}, #{offset}, '<#{tag}>', '', '#{omission}', #{limit}) AS #{column}_snippet") + end + + def reindex + update! id: talk.id, title: talk.title, summary: talk.summary, speaker_names: talk.speaker_names + end +end diff --git a/app/models/talk/searchable.rb b/app/models/talk/searchable.rb index d9c43c5b..3277ef28 100644 --- a/app/models/talk/searchable.rb +++ b/app/models/talk/searchable.rb @@ -4,40 +4,27 @@ module Talk::Searchable DATE_WEIGHT = 0.000000001 included do - scope :ft_search, ->(query) do - query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters - query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search - joins("join talks_search_index idx on talks.id = idx.rowid") - .where("talks_search_index match ?", query) - end + has_one :index, foreign_key: :rowid, inverse_of: :talk, dependent: :destroy + + scope :ft_search, ->(query) { select("talks.*").joins(:index).merge(Talk::Index.search(query)) } scope :with_snippets, ->(**options) do - select("talks.*") - .select_snippet("title", 0, **options) - .select_snippet("summary", 1, **options) - .select_snippet("speaker_names", 2, **options) + select("talks.*").merge(Talk::Index.snippets(**options)) end scope :ranked, -> do select("talks.*, bm25(talks_search_index, 10.0, 1.0, 5.0) + (strftime('%s', 'now') - strftime('%s', talks.date)) * #{DATE_WEIGHT} AS combined_score") - .order(Arel.sql("combined_score ASC")) + .order(combined_score: :asc) end - after_create_commit :create_in_index - after_update_commit :update_in_index - after_destroy_commit :remove_from_index + after_save_commit :reindex end class_methods do - def rebuild_search_index - connection.execute("DELETE FROM talks_search_index") - Talk.find_each(&:create_in_index) - end - - def select_snippet(column, offset, tag: "mark", omission: "…", limit: 32) - select("snippet(talks_search_index, #{offset}, '<#{tag}>', '', '#{omission}', #{limit}) AS #{column}_snippet") + def reindex_all + includes(:index).find_each(&:reindex) end end @@ -45,21 +32,8 @@ def title_with_snippet try(:title_snippet) || title end - def create_in_index - execute_sql_with_binds "insert into talks_search_index(rowid, title, summary, speaker_names) values (?, ?, ?, ?)", id, title, summary, speaker_names - end - - def update_in_index - execute_sql_with_binds "update talks_search_index set title = ?, summary = ?, speaker_names = ? where rowid = ?", title, summary, speaker_names, id - end - - def remove_from_index - execute_sql_with_binds "delete from talks_search_index where rowid = ?", id - end - - private - - def execute_sql_with_binds(*statement) - self.class.connection.execute self.class.sanitize_sql(statement) + def index + super || build_index end + delegate :reindex, to: :index end diff --git a/config/initializers/inflections.rb b/config/initializers/inflections.rb index 9e26b18e..27ee5080 100644 --- a/config/initializers/inflections.rb +++ b/config/initializers/inflections.rb @@ -13,4 +13,5 @@ # These inflection rules are supported but not enabled by default: ActiveSupport::Inflector.inflections(:en) do |inflect| inflect.acronym "GitHub" + inflect.acronym "SQLite" end diff --git a/db/migrate/20241019135118_create_talk_fts.rb b/db/migrate/20241019135118_create_talk_fts.rb index 81e656d0..263fd500 100644 --- a/db/migrate/20241019135118_create_talk_fts.rb +++ b/db/migrate/20241019135118_create_talk_fts.rb @@ -5,7 +5,7 @@ def up "tokenize = porter" ] - Talk.rebuild_search_index + Talk.reindex_all end def down diff --git a/test/models/talk_test.rb b/test/models/talk_test.rb index b39ea2e4..4518f6d7 100644 --- a/test/models/talk_test.rb +++ b/test/models/talk_test.rb @@ -204,6 +204,24 @@ class TalkTest < ActiveSupport::TestCase assert_equal [@talk], Talk.ft_search("Hotwire Cookbook") assert_equal [@talk], Talk.ft_search("Hotwire Cookbook: Common Uses, Essential Patterns") assert_equal [@talk], Talk.ft_search('Hotwire"') # with an escaped quote + + @talk.index.destroy! + @talk.reload.reindex # Need to reload or we get a FrozenError from trying to update attributes on the destroyed index record. + assert_equal [@talk], Talk.ft_search("Hotwire Cookbook") + end + + test "full text search creating and deleting a talk" do + talk = Talk.create!(title: "Full text seach with Sqlite", summary: "On using sqlite full text search with an ActiveRecord backed virtual table") + talk.speakers.create!(name: "Kasper Timm Hansen") + + assert_equal [talk], Talk.ft_search("sqlite full text search") # title + assert_equal [talk], Talk.ft_search("ActiveRecord backed virtual table") # summary + assert_equal [talk], Talk.ft_search("Kasper Timm Hansen") # speaker + + talk.destroy + assert_empty Talk.ft_search("sqlite full text search") + assert_empty Talk.ft_search("ActiveRecord backed virtual table") + assert_empty Talk.ft_search("Kasper Timm Hansen") end test "full text search on title with snippets" do diff --git a/test/test_helper.rb b/test/test_helper.rb index 300df4bc..b94fea19 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -22,7 +22,7 @@ class ActiveSupport::TestCase # true # end - Talk.rebuild_search_index + Talk.reindex_all Speaker.rebuild_search_index end # Run tests in parallel with specified workers