Skip to content

Commit

Permalink
Merge pull request #2 from microsoft/jluey/website
Browse files Browse the repository at this point in the history
Initial Website Checkin
  • Loading branch information
jluey1 authored Sep 11, 2024
2 parents 5f9d40f + 28ef6f7 commit 61d0d3b
Show file tree
Hide file tree
Showing 31 changed files with 18,517 additions and 0 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Deploy to GitHub Pages

on:
push:
branches:
- main
# Review gh actions docs if you want to further define triggers, paths, etc
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on

jobs:
build:
name: Build Docusaurus
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-node@v4
with:
node-version: 18
cache: npm

- name: Install dependencies
run: npm ci
working-directory: ./website
- name: Build website
run: npm run build
working-directory: ./website

- name: Upload Build Artifact
uses: actions/upload-pages-artifact@v3
with:
path: build

deploy:
name: Deploy to GitHub Pages
needs: build

# Grant GITHUB_TOKEN the permissions required to make a Pages deployment
permissions:
pages: write # to deploy to Pages
id-token: write # to verify the deployment originates from an appropriate source

# Deploy to the github-pages environment
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}

runs-on: ubuntu-latest
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
33 changes: 33 additions & 0 deletions .github/workflows/test-deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Test deployment

on:
pull_request:
branches:
- main
# Review gh actions docs if you want to further define triggers, paths, etc
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on

jobs:
test-deploy:
name: Test deployment
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-node@v4
with:
node-version: 18
# cache: npm

- name: List files in website directory
run: ls -la ./website

- name: Install dependencies
run: |
cd website
npm ci
- name: Test build website
run: |
cd website
npm run build
84 changes: 84 additions & 0 deletions utils/eval_report_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os
import json
import re

# Usage instructions (internal use only):
# TODO: Should this be checked into somewhere else?
# 1. Download the "reports" folder from blob storage and extract it to a local directory
# 2. Point release_directory_path to the release directory inside of the extracted "reports" folder
# 3. Run 'python utils/eval_report_parsing.py'
# 4. The compiled results will be written to 'website/static/compiled_results.json'

def coallate_results(release_directory_path, config):
file_pattern = re.compile(r'^(?!.*by).*\.json$', re.IGNORECASE)
mapping = config["capability_mapping"]
model_family_list = config["model_families"]
data = {
"language": {
"capabilities": [ ]
},
"multimodal": {
"capabilities": [ ]
}
}
for capability in mapping:
name = capability["capability"]
modality = capability["modality"]
description = capability["description"]

model_scores = []
model_families = os.listdir(os.path.join(release_directory_path, *capability["path"]))
for model_family in model_families:
if model_family.lower() not in model_family_list:
continue
models = os.listdir(os.path.join(release_directory_path, *capability["path"], model_family))
for model in models:
if capability["run"] == "average":
runs = os.listdir(os.path.join(release_directory_path, *capability["path"], model_family, model))
else:
runs = [capability["run"]]

sum = 0.0
num = 0 # there's a chance that one of the runs doesn't have the correct output file so need to keep track separately
for run in runs:
try:
file_pattern = re.compile(r'^(?!.*by).*\.json$', re.IGNORECASE)
if name == "Long Context QA Longest Context (3K)":
file_pattern = re.compile(r'^.*by_ctx_size_normalized.*\.json$', re.IGNORECASE)
report = [f for f in os.listdir(os.path.join(release_directory_path, *capability["path"], model_family, model, run, 'eval_report')) if file_pattern.match(f)][0]
file_path = os.path.join(release_directory_path, *capability["path"], model_family, model, run, 'eval_report', report)
with open(file_path, 'r') as f:
file_contents = f.read()
scores = json.loads(file_contents)
for metric in capability["metric"]:
scores = scores[metric]
sum += scores
num += 1
break
except FileNotFoundError:
continue
if model == 'GPT-4o_2024_05_13_450K':
model = 'GPT-4o-2024-05-13'
if model == "LLaVA-34B":
model = "Llava-1_6-34B"
if model == "GPT-4":
model = "GPT-4-1106-Preview"
model_scores.append({
"name": model,
"score": sum / num
})
data[modality]["capabilities"].append({
"name": name,
"description": description,
"models": model_scores
})

# Write the final JSON file
with open('website\\static\\compiled_results.json', 'w') as f:
json.dump(data, f, indent=2)

# Example usage
release_directory_path = 'C:\\Users\\jluey\\Downloads\\reports\\release'
config_path = 'website\\static\\config.json'

coallate_results(release_directory_path, json.load(open(config_path)))
21 changes: 21 additions & 0 deletions website/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Dependencies
/node_modules

# Production
/build

# Generated files
.docusaurus
.cache-loader

# Misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*
yarn.lock*
41 changes: 41 additions & 0 deletions website/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Website

This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.

### Installation

```
$ yarn
```

### Local Development

```
$ yarn start
```

This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.

### Build

```
$ yarn build
```

This command generates static content into the `build` directory and can be served using any static contents hosting service.

### Deployment

Using SSH:

```
$ USE_SSH=true yarn deploy
```

Not using SSH:

```
$ GIT_USER=<Your GitHub username> yarn deploy
```

If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
3 changes: 3 additions & 0 deletions website/babel.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports = {
presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
};
5 changes: 5 additions & 0 deletions website/docs/intro.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
sidebar_position: 1
---

# Paper here?
106 changes: 106 additions & 0 deletions website/docusaurus.config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import {themes as prismThemes} from 'prism-react-renderer';
import type {Config} from '@docusaurus/types';
import type * as Preset from '@docusaurus/preset-classic';

const config: Config = {
title: 'Project Eureka',
tagline: 'AI Frontiers Evaluation and Understanding',
favicon: 'img/eureka_logo.png',

// Set the production url of your site here
url: 'https://microsoft.github.io',
// Set the /<baseUrl>/ pathname under which your site is served
// For GitHub pages deployment, it is often '/<projectName>/'
baseUrl: '/eureka-ml-insights/',

// GitHub pages deployment config.
// If you aren't using GitHub pages, you don't need these.
organizationName: 'Microsoft', // Usually your GitHub org/user name.
projectName: 'eureka-ml-insights', // Usually your repo name.

onBrokenLinks: 'throw',
onBrokenMarkdownLinks: 'warn',

// Even if you don't use internationalization, you can use this field to set
// useful metadata like html lang. For example, if your site is Chinese, you
// may want to replace "en" with "zh-Hans".
i18n: {
defaultLocale: 'en',
locales: ['en'],
},

presets: [
[
'classic',
{
docs: {
sidebarPath: './sidebars.ts',
editUrl:
'https://github.com/microsoft/eureka-ml-insights',
},
blog: {
showReadingTime: true,
// Please change this to your repo.
// Remove this to remove the "edit this page" links.
editUrl:
'https://aka.ms/eureka-ml-insights-blog',
},
theme: {
customCss: './src/css/custom.css',
},
} satisfies Preset.Options,
],
],

themeConfig: {
// Replace with your project's social card
// image: 'img/background.png',
navbar: {
title: 'Eureka Model Benchmarks',
logo: {
alt: 'Project Eureka',
src: 'img/eureka_logo.png',
},
items: [
{
href: 'https://aka.ms/eureka-ml-insights-blog',
label: 'Blog',
position: 'right'
},
{
href: 'https://github.com/microsoft/eureka-ml-insights',
label: 'GitHub',
position: 'right',
},
],
},
footer: {
style: 'dark',
links: [
{
label: 'Blog',
href: 'https://aka.ms/eureka-ml-insights-blog',
},
{
label: 'GitHub',
href: 'https://github.com/microsoft/eureka-ml-insights',
},
],
copyright: `Copyright © ${new Date().getFullYear()} Microsoft Research |
<a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy and Cookies</a> |
<a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?linkid=2259814">Consumer Health Privacy</a> |
<a target=_blank style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a> |
<a target="_blank" style="color:#10adff" href="mailto:[email protected]">Contact Us</a> |
<a target="_blank" style="color:#10adff" href="https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks">Trademarks</a>`,
},
colorMode: {
disableSwitch: true,
},
prism: {
theme: prismThemes.github,
darkTheme: prismThemes.dracula,
},
} satisfies Preset.ThemeConfig,
};

export default config;
Loading

0 comments on commit 61d0d3b

Please sign in to comment.