Skip to content

Commit

Permalink
API Storage Provider Support (#539)
Browse files Browse the repository at this point in the history
Abstracted the Azure Blob storage code in `api` into
`TypeAgentStorageProvider` to support using s3 or any other storage
provider for session storage as well.

Note: I do not have the ability to test the Azure Storage Provider,
would be prudent to have someone with azure credentials do a test run
with blob storage to ensure it is working as expected.

---------

Co-authored-by: Steve Lucco <[email protected]>
  • Loading branch information
hlucco and steveluc authored Jan 10, 2025
1 parent 958b2cf commit 259ac39
Show file tree
Hide file tree
Showing 10 changed files with 1,657 additions and 79 deletions.
10 changes: 6 additions & 4 deletions ts/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ COPY --from=build /prod/tools/ /prod/tools/
COPY --from=build /usr/src/app/packages/shell/out/ /prod/shell/out/
WORKDIR /prod/api

COPY .env /prod/api

# dependencies
RUN apt-get update
RUN apt install -y gnome-keyring
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash
# RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash

######################################### BLOB STORAGE MOUNT #########################################
## blobfuse - https://github.com/Azure/azure-storage-fuse/tree/main
Expand All @@ -77,13 +79,13 @@ EXPOSE 80:3000

# start actions
# az login
RUN echo az login --identity >> start.sh
# RUN echo az login --identity >> start.sh
## mount blob storage locally
#RUN echo blobfuse2 mount /mnt/blob --config-file=mount-blobfuse.yaml >> start.sh
# run getKeys
RUN echo node ../tools/scripts/getKeys.mjs >> start.sh
# RUN echo node ../tools/scripts/getKeys.mjs >> start.sh
# start API server
RUN echo pnpm start >> start.sh

# run start script
CMD bash start.sh
CMD bash start.sh
3 changes: 2 additions & 1 deletion ts/packages/api/data/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"wwwroot": "../shell/out/renderer",
"port": 3000,
"broadcast": true,
"blobBackupEnabled": true
"blobBackupEnabled": true,
"storageProvider": "aws"
}
2 changes: 2 additions & 0 deletions ts/packages/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
"tsc": "tsc -b"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.723.0",
"@aws-sdk/lib-storage": "^3.726.0",
"@azure/identity": "^4.2.1",
"@azure/storage-blob": "^12.26.0",
"@typeagent/agent-sdk": "workspace:*",
Expand Down
2 changes: 1 addition & 1 deletion ts/packages/api/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Microsoft Corporation.
// Copyright (c) Microsoft Corporation and Henry Lucco.
// Licensed under the MIT License.

import { TypeAgentServer } from "./typeAgentServer.js";
Expand Down
29 changes: 29 additions & 0 deletions ts/packages/api/src/storageProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) Microsoft Corporation and Henry Lucco.
// Licensed under the MIT License.

export interface TypeAgentStorageProvider {
/**
* Lists remote files (possibly under a prefix or "folder path" if the
* provider supports a hierarchical structure).
*
* @param prefix Optional prefix to filter the remote file listing.
* @returns An array of file paths/names in the remote store.
*/
listRemoteFiles(prefix?: string): Promise<string[]>;

/**
* Downloads a remote file from the storage provider to a local path.
*
* @param remotePath The path or key of the file in the remote store.
* @param localPath The local file path where the file will be saved.
*/
downloadFile(remotePath: string, localPath: string): Promise<void>;

/**
* Uploads a local file to the storage provider.
*
* @param localPath The path of the local file to upload.
* @param remotePath The path or key of the file in the remote store.
*/
uploadFile(localPath: string, remotePath: string): Promise<void>;
}
121 changes: 121 additions & 0 deletions ts/packages/api/src/storageProviders/awsStorageProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright (c) Microsoft Corporation and Henry Lucco.
// Licensed under the MIT License.

import {
GetObjectCommand,
ListObjectsV2Command,
S3Client,
} from "@aws-sdk/client-s3";
import { Upload } from "@aws-sdk/lib-storage";
import { TypeAgentStorageProvider } from "../storageProvider.js";
import fs from "node:fs";
import path from "node:path";
import { Readable } from "node:stream";

export class AWSStorageProvider implements TypeAgentStorageProvider {
private s3Client: S3Client;
private bucketName: string;

constructor() {
if (!process.env.AWS_S3_BUCKET_NAME) {
throw new Error("AWS_S3_BUCKET_NAME not set");
}

if (!process.env.AWS_S3_REGION) {
throw new Error("AWS_S3_REGION not set");
}

if (!process.env.AWS_ACCESS_KEY_ID) {
throw new Error("AWS_ACCESS_KEY_ID not set");
}

if (!process.env.AWS_SECRET_ACCESS_KEY) {
throw new Error("AWS_SECRET_ACCESS_KEY not set");
}

this.bucketName = process.env.AWS_S3_BUCKET_NAME;
this.s3Client = new S3Client({
region: process.env.AWS_S3_REGION,
});

console.log("AWSStorageProvider initialized");
}

async listRemoteFiles(prefix?: string): Promise<string[]> {
const results: string[] = [];
let continueToken: string | undefined = undefined;

do {
const command = new ListObjectsV2Command({
Bucket: this.bucketName,
Prefix: prefix,
ContinuationToken: continueToken,
});
const response = await this.s3Client.send(command);

if (response.Contents) {
for (const content of response.Contents) {
if (content.Key) {
results.push(content.Key);
}
}
}
} while (continueToken);

return results;
}

async downloadFile(remotePath: string, localPath: string): Promise<void> {
const command = new GetObjectCommand({
Bucket: this.bucketName,
Key: remotePath,
});

const commandResponse = await this.s3Client.send(command);
const bodyStream = commandResponse.Body;

if (bodyStream) {
const dirName = path.dirname(localPath);
if (!fs.existsSync(dirName)) {
fs.mkdirSync(dirName, { recursive: true });
}

const writeStream = fs.createWriteStream(localPath);

await new Promise<void>((resolve, reject) => {
(bodyStream as Readable)
.pipe(writeStream)
.on("finish", () => resolve())
.on("error", (err: Error) => {
console.log("Error downloading file: ", err);
reject(err);
});
});
}
}

async uploadFile(localPath: string, remotePath: string): Promise<void> {
const resolvedPath = path.resolve(localPath);
if (!fs.existsSync(resolvedPath)) {
throw new Error(`File does not actually exist at ${resolvedPath}`);
}

const fileStream = fs.createReadStream(localPath);

const upload = new Upload({
client: this.s3Client,
params: {
Bucket: this.bucketName,
Key: remotePath,
Body: fileStream,
},
});

try {
await upload.done();
// console.log("upload complete: ", result);
} catch (e) {
console.log("error", e);
}
}
}
130 changes: 130 additions & 0 deletions ts/packages/api/src/storageProviders/azureStorageProvider.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright (c) Microsoft Corporation and Henry Lucco.
// Licensed under the MIT License.

import { DefaultAzureCredential } from "@azure/identity";
import { TypeAgentStorageProvider } from "../storageProvider.js";
import { getEnvSetting, openai } from "aiclient";
import { env } from "node:process";
import {
BlobServiceClient,
BlockBlobClient,
ContainerClient,
ContainerListBlobsOptions,
} from "@azure/storage-blob";
import path from "node:path";
import fs from "node:fs";
import { getUserDataDir } from "agent-dispatcher";

export class AzureStorageProvider implements TypeAgentStorageProvider {
private containerName: string | undefined;
private storageAccount: string | undefined;
private accountURL: string;
private blobServiceClient: BlobServiceClient | undefined;

constructor() {
this.storageAccount = getEnvSetting(
env,
openai.EnvVars.AZURE_STORAGE_ACCOUNT,
undefined,
undefined,
);

this.containerName = getEnvSetting(
env,
openai.EnvVars.AZURE_STORAGE_CONTAINER,
undefined,
"",
);

// blob storage config
this.accountURL = `https://${this.storageAccount}.blob.core.windows.net`;
this.blobServiceClient = new BlobServiceClient(
this.accountURL,
new DefaultAzureCredential(),
);
}

async listRemoteFiles(prefix?: string): Promise<string[]> {
const results: string[] = [];

if (!this.blobServiceClient) {
return results;
}

const containerClient: ContainerClient =
this.blobServiceClient.getContainerClient(this.containerName!!);

// Some options for filtering results
const listOptions: ContainerListBlobsOptions = {
includeMetadata: false,
includeSnapshots: false,
prefix: "", // Filter results by blob name prefix
};

const maxPageSize = 100;

// List blobs with an optional prefix, page by page if needed
for await (const response of containerClient
.listBlobsFlat(listOptions)
.byPage({ maxPageSize })) {
if (response.segment.blobItems) {
for (const blob of response.segment.blobItems) {
/*
const blobClient = containerClient.getBlobClient(blob.name);
const filePath = path.join(getUserDataDir(), blob.name);
let dir = path.dirname(filePath);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
// only download the file if it doesn't already exist
if (!fs.existsSync(filePath)) {
await blobClient.downloadToFile(filePath, 0);
}*/

// may require some tweaking with specifics of
// how blob storage handles paths
results.push(blob.name);
}
}
}

return results;
}

async downloadFile(remotePath: string, localPath: string): Promise<void> {
if (!this.blobServiceClient) {
return;
}

const containerClient: ContainerClient =
this.blobServiceClient.getContainerClient(this.containerName!!);

const blockBlobClient = containerClient.getBlockBlobClient(remotePath);

// Ensure local directory structure exists
const dirName = path.dirname(localPath);
if (!fs.existsSync(dirName)) {
fs.mkdirSync(dirName, { recursive: true });
}

// Download the blob to the specified local file
await blockBlobClient.downloadToFile(localPath);
}

async uploadFile(localPath: string, fileName: string): Promise<void> {
if (!this.blobServiceClient) {
return;
}

const containerClient: ContainerClient =
this.blobServiceClient.getContainerClient(this.containerName!!);

let blobName = fileName.replace(getUserDataDir(), "");
const blockBlobClient: BlockBlobClient =
containerClient.getBlockBlobClient(blobName!!);

await blockBlobClient.uploadFile(localPath);
}
}
Loading

0 comments on commit 259ac39

Please sign in to comment.