Skip to content

Commit

Permalink
Timestamp index
Browse files Browse the repository at this point in the history
  • Loading branch information
umeshma committed Jan 31, 2025
1 parent 34901bf commit 30029d7
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 22 deletions.
10 changes: 10 additions & 0 deletions ts/packages/knowPro/src/dataFormat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ export interface IConversation<TMeta extends IKnowledgeSource = any> {
semanticRefIndex?: ITermToSemanticRefIndex | undefined;
semanticRefs: SemanticRef[] | undefined;
relatedTermsIndex?: ITermToRelatedTermsIndex | undefined;
timestampIndex?: ITimestampToMessageIndex | undefined;
}

export type MessageIndex = number;
Expand Down Expand Up @@ -138,3 +139,12 @@ export interface ITextEmbeddingDataItem {
text: string;
embedding: number[];
}

export type DateRange = {
start: Date;
end?: Date | undefined;
};

export interface ITimestampToMessageIndex {
getMessagesInDateRange(dateRange: DateRange): MessageIndex[];
}
13 changes: 12 additions & 1 deletion ts/packages/knowPro/src/import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import {
SemanticIndexSettings,
TermSemanticIndex,
} from "./termIndex.js";
import { TimestampToMessageIndex } from "./timestampIndex.js";

// metadata for podcast messages
export class PodcastMessageMeta implements IKnowledgeSource {
Expand Down Expand Up @@ -122,6 +123,7 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
public semanticRefs: SemanticRef[] = [],
public semanticRefIndex: ConversationIndex | undefined = undefined,
public relatedTermsIndex: TermSemanticIndex | undefined = undefined,
public timestampIndex: TimestampToMessageIndex | undefined = undefined,
) {
this.settings = createPodcastSettings();
}
Expand Down Expand Up @@ -177,6 +179,7 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
): Promise<ConversationIndexingResult> {
const result = await buildConversationIndex(this, progressCallback);
this.addMetadataToIndex();
this.buildTimestampIndex();
return result;
}

Expand All @@ -198,6 +201,10 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
}
}

public buildTimestampIndex(): void {
this.timestampIndex = new TimestampToMessageIndex(this.messages);
}

public serialize(): PodcastData {
return {
nameTag: this.nameTag,
Expand All @@ -221,6 +228,7 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
data.relatedTermIndexData,
);
}
this.buildTimestampIndex();
}
}

Expand All @@ -231,6 +239,8 @@ export interface PodcastData extends IConversationData<PodcastMessage> {
export async function importPodcast(
transcriptFilePath: string,
podcastName?: string,
startDate?: Date,
lengthMinutes: number = 60,
): Promise<Podcast> {
const transcriptText = await readAllText(transcriptFilePath);
podcastName ??= getFileName(transcriptFilePath);
Expand Down Expand Up @@ -276,7 +286,8 @@ export async function importPodcast(
}
assignMessageListeners(msgs, participants);
const pod = new Podcast(podcastName, msgs, [podcastName]);
// TODO: add timestamps and more tags
pod.generateTimestamps(startDate, lengthMinutes);
// TODO: add more tags
// list all the books
// what did K say about Children of Time?
return pod;
Expand Down
23 changes: 2 additions & 21 deletions ts/packages/knowPro/src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

import {
DateRange,
IConversation,
IMessage,
ITag,
Expand All @@ -23,7 +24,7 @@ import {
SemanticRefAccumulator,
TextRangeAccumulator,
} from "./accumulators.js";
import { collections, dateTime } from "typeagent";
import { collections } from "typeagent";

export function isConversationSearchable(conversation: IConversation): boolean {
return (
Expand Down Expand Up @@ -62,21 +63,6 @@ export function timestampRangeForConversation(
return undefined;
}

/**
* Assumes messages are in timestamp order.
* @param conversation
*/
export function getMessagesInDateRange(
conversation: IConversation,
dateRange: DateRange,
): IMessage[] {
return collections.getInRange(
conversation.messages,
dateTime.timestampString(dateRange.start),
dateRange.end ? dateTime.timestampString(dateRange.end) : undefined,
(x, y) => x.localeCompare(y),
);
}
/**
* Returns:
* 0 if locations are equal
Expand Down Expand Up @@ -118,11 +104,6 @@ export function isInTextRange(
return cmpStart <= 0 && cmpEnd <= 0;
}

export type DateRange = {
start: Date;
end?: Date | undefined;
};

export function compareDates(x: Date, y: Date): number {
return x.getTime() - y.getTime();
}
Expand Down
72 changes: 72 additions & 0 deletions ts/packages/knowPro/src/timestampIndex.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { collections, dateTime } from "typeagent";
import {
DateRange,
IMessage,
ITimestampToMessageIndex,
MessageIndex,
} from "./dataFormat.js";

export class TimestampToMessageIndex implements ITimestampToMessageIndex {
private messageIndex: Timestamped<MessageIndex>[];
constructor(messages: IMessage[]) {
this.messageIndex = [];
for (let i = 0; i < messages.length; ++i) {
this.addMessage(messages[i], i);
}
this.messageIndex.sort(compareTimestamped);
}

public getMessagesInDateRange(dateRange: DateRange): MessageIndex[] {
return collections.getInRange(
this.messageIndex,
dateTime.timestampString(dateRange.start),
dateRange.end ? dateTime.timestampString(dateRange.end) : undefined,
compareTimestamped,
);
}

private addMessage(
message: IMessage,
messageIndex: MessageIndex,
inOrder = false,
): boolean {
if (!message.timestamp) {
return false;
}
const date = new Date(message.timestamp);
// This string is formatted to be searchable
const entry: Timestamped<MessageIndex> = makeTimestamped(
date,
messageIndex,
);
if (inOrder) {
collections.insertIntoSorted(
this.messageIndex,
entry,
compareTimestamped,
);
} else {
this.messageIndex.push(entry);
}
return true;
}
}

type Timestamped<T = any> = {
timestamp: string;
value: T;
};

function compareTimestamped(x: Timestamped, y: Timestamped) {
return x.timestamp.localeCompare(y.timestamp);
}

function makeTimestamped(timestamp: Date, value: any): Timestamped {
return {
value,
timestamp: dateTime.timestampString(timestamp, false),
};
}

0 comments on commit 30029d7

Please sign in to comment.