Skip to content

Commit

Permalink
perf(cu): instrument runtime metrics and add ao process count metric #…
Browse files Browse the repository at this point in the history
  • Loading branch information
TillaTheHun0 committed May 29, 2024
1 parent 5eab2ff commit e7a012e
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 3 deletions.
1 change: 1 addition & 0 deletions servers/cu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ There are a few environment variables that you can set. Besides
- `WALLET`/`WALLET_FILE`: the JWK Interface stringified JSON that will be used
by the CU, or a file to load it from
- `PORT`: Which port the web server should listen on (defaults to port `6363`)
- `ENABLE_METRICS_ENDPOINT`: Whether the OpenTelemetry endpoint `/metrics` should be enabled. Set to any value to enable. (defaults to disabled)
- `DB_MODE`: Whether the database being used by the CU is embedded within the CU
or is remote to the CU. Can be either `embedded` or `remote` (defaults to
`embedded`)
Expand Down
61 changes: 61 additions & 0 deletions servers/cu/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions servers/cu/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"lru-cache": "^10.2.2",
"ms": "^2.1.3",
"p-map": "^7.0.2",
"prom-client": "^15.1.2",
"ramda": "^0.30.0",
"warp-arbundles": "^1.0.4",
"workerpool": "^9.1.1",
Expand Down
3 changes: 3 additions & 0 deletions servers/cu/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const DEFAULT_PROCESS_WASM_MODULE_FORMATS = ['wasm32-unknown-emscripten', 'wasm3
const serverConfigSchema = domainConfigSchema.extend({
MODE: z.enum(['development', 'production']),
port: positiveIntSchema,
ENABLE_METRICS_ENDPOINT: z.preprocess((val) => !!val, z.boolean()),
DUMP_PATH: z.string().min(1)
})

Expand Down Expand Up @@ -91,6 +92,7 @@ const CONFIG_ENVS = {
development: {
MODE,
port: process.env.PORT || 6363,
ENABLE_METRICS_ENDPOINT: process.env.ENABLE_METRICS_ENDPOINT,
GATEWAY_URL: process.env.GATEWAY_URL || 'https://arweave.net',
GRAPHQL_URL: process.env.GRAPHQL_URL,
CHECKPOINT_GRAPHQL_URL: process.env.CHECKPOINT_GRAPHQL_URL,
Expand Down Expand Up @@ -128,6 +130,7 @@ const CONFIG_ENVS = {
production: {
MODE,
port: process.env.PORT || 6363,
ENABLE_METRICS_ENDPOINT: process.env.ENABLE_METRICS_ENDPOINT,
GATEWAY_URL: process.env.GATEWAY_URL || 'https://arweave.net',
GRAPHQL_URL: process.env.GRAPHQL_URL,
CHECKPOINT_GRAPHQL_URL: process.env.CHECKPOINT_GRAPHQL_URL,
Expand Down
14 changes: 13 additions & 1 deletion servers/cu/src/domain/client/ao-process.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export const LATEST = 'LATEST'
* @prop {string} [cron]
*/
let processMemoryCache
export async function createProcessMemoryCache ({ MAX_SIZE, TTL, DRAIN_TO_FILE_THRESHOLD, onEviction, writeProcessMemoryFile }) {
export async function createProcessMemoryCache ({ MAX_SIZE, TTL, DRAIN_TO_FILE_THRESHOLD, gauge, onEviction, writeProcessMemoryFile }) {
if (processMemoryCache) return processMemoryCache

const clearTimerWith = (map) => (key) => {
Expand All @@ -70,6 +70,18 @@ export async function createProcessMemoryCache ({ MAX_SIZE, TTL, DRAIN_TO_FILE_T
const drainToFileTimers = new Map()
const clearDrainToFileTimer = clearTimerWith(drainToFileTimers)

/**
* Expose the total count of processes cached on this unit,
* on the CU's application level metrics
*/
gauge({
name: 'ao_process_total',
description: 'The total amount of ao Processes cached on the Compute Unit',
collect: function () {
this.set(data.size)
}
})

/**
* @type {LRUCache<string, { evaluation: Evaluation, File?: string, Memory?: ArrayBuffer }}
*/
Expand Down
37 changes: 37 additions & 0 deletions servers/cu/src/domain/client/metrics.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,19 @@
import { randomBytes } from 'node:crypto'
import PromClient from 'prom-client'

export const initializeRuntimeMetricsWith = ({ prefix = 'ao_cu' }) => {
let initialized = false

return () => {
if (initialized) return PromClient.register

PromClient.register.setContentType(PromClient.Registry.OPENMETRICS_CONTENT_TYPE)
PromClient.collectDefaultMetrics({ prefix })
initialized = true
return PromClient.register
}
}

/**
* Simple for now, maybe hook into something like Prometheus later
*/
Expand All @@ -14,3 +29,25 @@ export const timer = (label, ctx) => {
}
}
}

export const gaugeWith = ({ prefix = 'ao_cu' }) => {
return ({ name, description, collect }) => {
const g = new PromClient.Gauge({ name: `${prefix}_${name}`, help: description, collect })

return {
inc: (n) => g.inc(n),
dec: (n) => g.dec(n),
set: (n) => g.set(n)
}
}
}

export const histogramWith = ({ prefix = 'ao_cu' }) => {
return ({ name, description }) => {
const h = new PromClient.Histogram({ name: `${prefix}_${name}`, help: description })

return {
observe: (v) => h.observe(v)
}
}
}
16 changes: 15 additions & 1 deletion servers/cu/src/domain/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import * as AoProcessClient from './client/ao-process.js'
import * as AoModuleClient from './client/ao-module.js'
import * as AoEvaluationClient from './client/ao-evaluation.js'
import * as AoBlockClient from './client/ao-block.js'
import * as MetricsClient from './client/metrics.js'

import { readResultWith } from './api/readResult.js'
import { readStateWith, pendingReadStates } from './api/readState.js'
Expand Down Expand Up @@ -85,6 +86,14 @@ export const createApis = async (ctx) => {
const arweave = ArweaveClient.createWalletClient()
const address = ArweaveClient.addressWith({ WALLET: ctx.WALLET, arweave })

/**
* TODO: I don't really like implictly doing this,
* but works for now.
*/
const _metrics = MetricsClient.initializeRuntimeMetricsWith({})()

const gauge = MetricsClient.gaugeWith({})

const readProcessMemoryFile = AoProcessClient.readProcessMemoryFileWith({
DIR: ctx.PROCESS_MEMORY_CACHE_FILE_DIR,
readFile
Expand Down Expand Up @@ -114,6 +123,10 @@ export const createApis = async (ctx) => {
loadMemoryUsage: () => process.memoryUsage(),
loadProcessCacheUsage: () => AoProcessClient.loadProcessCacheUsage()
})
const metrics = {
contentType: _metrics.contentType,
compute: fromPromise(() => _metrics.metrics())
}

const saveCheckpoint = AoProcessClient.saveCheckpointWith({
address,
Expand All @@ -133,6 +146,7 @@ export const createApis = async (ctx) => {
})

const wasmMemoryCache = await AoProcessClient.createProcessMemoryCache({
gauge,
MAX_SIZE: ctx.PROCESS_MEMORY_CACHE_MAX_SIZE,
TTL: ctx.PROCESS_MEMORY_CACHE_TTL,
DRAIN_TO_FILE_THRESHOLD: ctx.PROCESS_MEMORY_CACHE_DRAIN_TO_FILE_THRESHOLD,
Expand Down Expand Up @@ -305,5 +319,5 @@ export const createApis = async (ctx) => {

const healthcheck = healthcheckWith({ walletAddress: address })

return { stats, pendingReadStates, readState, dryRun, readResult, readResults, readCronResults, checkpointWasmMemoryCache, healthcheck }
return { metrics, stats, pendingReadStates, readState, dryRun, readResult, readResults, readCronResults, checkpointWasmMemoryCache, healthcheck }
}
4 changes: 3 additions & 1 deletion servers/cu/src/routes/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ import { withDryRunRoutes } from './dryRun.js'
import { withResultsRoutes } from './results.js'
import { withCronRoutes } from './cron.js'
import { withHealthcheckRoutes } from './healthcheck.js'
import { withMetricRoutes } from './metrics.js'

export const withRoutes = pipe(
withHealthcheckRoutes,
withStateRoutes,
withResultRoutes,
withDryRunRoutes,
withResultsRoutes,
withCronRoutes
withCronRoutes,
withMetricRoutes
)
29 changes: 29 additions & 0 deletions servers/cu/src/routes/metrics.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { always, compose } from 'ramda'
import { withMiddleware } from './middleware/index.js'

import { config } from '../config.js'

export const withMetricRoutes = (app) => {
if (!config.ENABLE_METRICS_ENDPOINT) return app

app.get(
'/metrics',
compose(
withMiddleware,
always(async (req, res) => {
const {
domain: { apis: { metrics } }
} = req

await metrics.compute()
.toPromise()
.then((output) => {
res.type(metrics.contentType)
res.send(output)
})
})
)()
)

return app
}

0 comments on commit e7a012e

Please sign in to comment.