Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Speaker indexing into a Speaker::Index record. #593

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
22 changes: 22 additions & 0 deletions app/models/active_record/sqlite/index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
module ActiveRecord::SQLite::Index
extend ActiveSupport::Concern

included do
self.ignored_columns = [table_name.to_sym, :rank] # Rails parses our virtual table with these extra non-attributes.

attribute :rowid, :integer
alias_attribute :id, :rowid
self.primary_key = :rowid

# Active Record doesn't pick up the `rowid` primary key column.
# So we have have explicitly declare this scope to have `rowid` populated in the result set.
default_scope { select("#{table_name}.rowid, #{table_name}.*") }
end

private

def attributes_for_create(attribute_names)
# Prevent `super` filtering out the primary key because it isn't in `self.class.column_names`.
[self.class.primary_key, *super]
end
end
27 changes: 27 additions & 0 deletions app/models/speaker/index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class Speaker::Index < ApplicationRecord
self.table_name = :speakers_search_index

include ActiveRecord::SQLite::Index # Depends on `table_name` being assigned.
class_attribute :index_columns, default: {name: 0, github: 1}

belongs_to :speaker, foreign_key: :rowid

def self.search(query)
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search
where("#{table_name} match ?", query)
end

def self.snippets(**)
index_columns.each_key.reduce(all) { |relation, column| relation.snippet(column, **) }
end

def self.snippet(column, tag: "mark", omission: "…", limit: 32)
offset = index_columns.fetch(column)
select("snippet(#{table_name}, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet")
end

def reindex
update! id: speaker.id, name: speaker.name, github: speaker.github
end
end
47 changes: 11 additions & 36 deletions app/models/speaker/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,35 @@ module Speaker::Searchable
extend ActiveSupport::Concern

included do
scope :ft_search, ->(query) do
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search
joins("join speakers_search_index idx on speakers.id = idx.rowid")
.where("speakers_search_index match ?", query)
end
has_one :index, foreign_key: :rowid, inverse_of: :speaker, dependent: :destroy

scope :ft_search, ->(query) { select("speakers.*").joins(:index).merge(Speaker::Index.search(query)) }

scope :with_snippets, ->(**options) do
select("speakers.*")
.select_snippet("name", 0, **options)
.select_snippet("github", 1, **options)
select("speakers.*").merge(Speaker::Index.snippets(**options))
end

scope :ranked, -> do
select("speakers.*,
bm25(speakers_search_index, 2, 1) AS combined_score")
.order(Arel.sql("combined_score ASC"))
.order(combined_score: :asc)
end

after_create_commit :create_in_index
after_update_commit :update_in_index
after_destroy_commit :remove_from_index
after_save_commit :reindex
end

class_methods do
def rebuild_search_index
connection.execute("DELETE FROM speakers_search_index")
Speaker.find_each(&:create_in_index)
end

def select_snippet(column, offset, tag: "mark", omission: "…", limit: 32)
select("snippet(speakers_search_index, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet")
def reindex_all
includes(:index).find_each(&:reindex)
end
end

def name_with_snippet
try(:name_snippet) || name
end

def create_in_index
execute_sql_with_binds "insert into speakers_search_index(rowid, name, github) values (?, ?, ?)", id, name, github
end

def update_in_index
execute_sql_with_binds "update speakers_search_index set name = ?, github = ? where rowid = ?", name, github, id
end

def remove_from_index
execute_sql_with_binds "delete from speakers_search_index where rowid = ?", id
end

private

def execute_sql_with_binds(*statement)
self.class.connection.execute self.class.sanitize_sql(statement)
def index
super || build_index
end
delegate :reindex, to: :index
end
27 changes: 27 additions & 0 deletions app/models/talk/index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class Talk::Index < ApplicationRecord
self.table_name = :talks_search_index

include ActiveRecord::SQLite::Index # Depends on `table_name` being assigned.
class_attribute :index_columns, default: {title: 0, summary: 1, speaker_names: 2}

belongs_to :talk, foreign_key: :rowid

def self.search(query)
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search
where("#{table_name} match ?", query)
end

def self.snippets(**)
index_columns.each_key.reduce(all) { |relation, column| relation.snippet(column, **) }
end

def self.snippet(column, tag: "mark", omission: "…", limit: 32)
offset = index_columns.fetch(column)
select("snippet(#{table_name}, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet")
end

def reindex
update! id: talk.id, title: talk.title, summary: talk.summary, speaker_names: talk.speaker_names
end
end
48 changes: 11 additions & 37 deletions app/models/talk/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,36 @@ module Talk::Searchable
DATE_WEIGHT = 0.000000001

included do
scope :ft_search, ->(query) do
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search
joins("join talks_search_index idx on talks.id = idx.rowid")
.where("talks_search_index match ?", query)
end
has_one :index, foreign_key: :rowid, inverse_of: :talk, dependent: :destroy

scope :ft_search, ->(query) { select("talks.*").joins(:index).merge(Talk::Index.search(query)) }

scope :with_snippets, ->(**options) do
select("talks.*")
.select_snippet("title", 0, **options)
.select_snippet("summary", 1, **options)
.select_snippet("speaker_names", 2, **options)
select("talks.*").merge(Talk::Index.snippets(**options))
end

scope :ranked, -> do
select("talks.*,
bm25(talks_search_index, 10.0, 1.0, 5.0) +
(strftime('%s', 'now') - strftime('%s', talks.date)) * #{DATE_WEIGHT} AS combined_score")
.order(Arel.sql("combined_score ASC"))
.order(combined_score: :asc)
end

after_create_commit :create_in_index
after_update_commit :update_in_index
after_destroy_commit :remove_from_index
after_save_commit :reindex
end

class_methods do
def rebuild_search_index
connection.execute("DELETE FROM talks_search_index")
Talk.find_each(&:create_in_index)
end

def select_snippet(column, offset, tag: "mark", omission: "…", limit: 32)
select("snippet(talks_search_index, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet")
def reindex_all
includes(:index).find_each(&:reindex)
end
end

def title_with_snippet
try(:title_snippet) || title
end

def create_in_index
execute_sql_with_binds "insert into talks_search_index(rowid, title, summary, speaker_names) values (?, ?, ?, ?)", id, title, summary, speaker_names
end

def update_in_index
execute_sql_with_binds "update talks_search_index set title = ?, summary = ?, speaker_names = ? where rowid = ?", title, summary, speaker_names, id
end

def remove_from_index
execute_sql_with_binds "delete from talks_search_index where rowid = ?", id
end

private

def execute_sql_with_binds(*statement)
self.class.connection.execute self.class.sanitize_sql(statement)
def index
super || build_index
end
delegate :reindex, to: :index
end
1 change: 1 addition & 0 deletions config/initializers/inflections.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
# These inflection rules are supported but not enabled by default:
ActiveSupport::Inflector.inflections(:en) do |inflect|
inflect.acronym "GitHub"
inflect.acronym "SQLite"
end
2 changes: 1 addition & 1 deletion db/migrate/20241019135118_create_talk_fts.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def up
"tokenize = porter"
]

Talk.rebuild_search_index
Talk.reindex_all
end

def down
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ def up
"name", "github", "tokenize = porter"
]

Speaker.rebuild_search_index
Speaker.reindex_all
end

def down
Expand Down
4 changes: 4 additions & 0 deletions test/models/talk_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ class TalkTest < ActiveSupport::TestCase
assert_equal [@talk], Talk.ft_search("Hotwire Cookbook")
assert_equal [@talk], Talk.ft_search("Hotwire Cookbook: Common Uses, Essential Patterns")
assert_equal [@talk], Talk.ft_search('Hotwire"') # with an escaped quote

@talk.index.destroy!
@talk.reload.reindex # Need to reload or we get a FrozenError from trying to update attributes on the destroyed index record.
assert_equal [@talk], Talk.ft_search("Hotwire Cookbook")
end

test "full text search on title with snippets" do
Expand Down
4 changes: 2 additions & 2 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class ActiveSupport::TestCase
# true
# end

Talk.rebuild_search_index
Speaker.rebuild_search_index
Talk.reindex_all
Speaker.reindex_all
end
# Run tests in parallel with specified workers
parallelize(workers: :number_of_processors)
Expand Down
Loading