Merge pull request #2 from microsoft/jluey/website

Initial Website Checkin
microsoft · Sep 11, 2024 · 61d0d3b · 61d0d3b
2 parents 5f9d40f + 28ef6f7
commit 61d0d3b
Show file tree

Hide file tree

Showing 31 changed files with 18,517 additions and 0 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -0,0 +1,53 @@
+name: Deploy to GitHub Pages
+
+on:
+  push:
+    branches:
+      - main
+    # Review gh actions docs if you want to further define triggers, paths, etc
+    # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
+
+jobs:
+  build:
+    name: Build Docusaurus
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 18
+          cache: npm
+
+      - name: Install dependencies
+        run: npm ci
+        working-directory: ./website
+      - name: Build website
+        run: npm run build
+        working-directory: ./website
+
+      - name: Upload Build Artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: build
+
+  deploy:
+    name: Deploy to GitHub Pages
+    needs: build
+
+    # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
+    permissions:
+      pages: write # to deploy to Pages
+      id-token: write # to verify the deployment originates from an appropriate source
+
+    # Deploy to the github-pages environment
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/test-deploy.yml b/.github/workflows/test-deploy.yml
@@ -0,0 +1,33 @@
+name: Test deployment
+
+on:
+  pull_request:
+    branches:
+      - main
+    # Review gh actions docs if you want to further define triggers, paths, etc
+    # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
+
+jobs:
+  test-deploy:
+    name: Test deployment
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 18
+          # cache: npm
+
+      - name: List files in website directory  
+        run: ls -la ./website  
+
+      - name: Install dependencies
+        run:  |
+          cd website
+          npm ci
+      - name: Test build website
+        run:  |
+          cd website
+          npm run build
diff --git a/utils/eval_report_parsing.py b/utils/eval_report_parsing.py
@@ -0,0 +1,84 @@
+import os
+import json
+import re
+
+# Usage instructions (internal use only):
+# TODO: Should this be checked into somewhere else?
+# 1. Download the "reports" folder from blob storage and extract it to a local directory
+# 2. Point release_directory_path to the release directory inside of the extracted "reports" folder
+# 3. Run 'python utils/eval_report_parsing.py'
+# 4. The compiled results will be written to 'website/static/compiled_results.json'
+
+def coallate_results(release_directory_path, config):
+    file_pattern = re.compile(r'^(?!.*by).*\.json$', re.IGNORECASE)
+    mapping = config["capability_mapping"]
+    model_family_list = config["model_families"]
+    data = {
+        "language": {
+            "capabilities": [ ]
+        },
+        "multimodal": {
+            "capabilities": [ ]
+        }
+    }
+    for capability in mapping:
+        name = capability["capability"]
+        modality = capability["modality"]
+        description = capability["description"]
+
+        model_scores = []
+        model_families = os.listdir(os.path.join(release_directory_path, *capability["path"]))
+        for model_family in model_families:
+            if model_family.lower() not in model_family_list:
+                continue
+            models = os.listdir(os.path.join(release_directory_path, *capability["path"], model_family))
+            for model in models:
+                if capability["run"] == "average":
+                    runs = os.listdir(os.path.join(release_directory_path, *capability["path"], model_family, model))
+                else:
+                    runs = [capability["run"]]
+
+                sum = 0.0
+                num = 0 # there's a chance that one of the runs doesn't have the correct output file so need to keep track separately
+                for run in runs:
+                    try:
+                        file_pattern = re.compile(r'^(?!.*by).*\.json$', re.IGNORECASE)
+                        if name == "Long Context QA Longest Context (3K)":
+                            file_pattern = re.compile(r'^.*by_ctx_size_normalized.*\.json$', re.IGNORECASE)
+                        report = [f for f in os.listdir(os.path.join(release_directory_path, *capability["path"], model_family, model, run, 'eval_report')) if file_pattern.match(f)][0]
+                        file_path = os.path.join(release_directory_path, *capability["path"], model_family, model, run, 'eval_report', report)
+                        with open(file_path, 'r') as f:
+                            file_contents = f.read()
+                            scores = json.loads(file_contents)
+                            for metric in capability["metric"]:
+                                scores = scores[metric]
+                            sum += scores
+                        num += 1
+                        break
+                    except FileNotFoundError:
+                        continue
+                if model == 'GPT-4o_2024_05_13_450K':
+                    model = 'GPT-4o-2024-05-13'
+                if model == "LLaVA-34B":
+                    model = "Llava-1_6-34B"
+                if model == "GPT-4":
+                    model = "GPT-4-1106-Preview"
+                model_scores.append({   
+                    "name": model,
+                    "score": sum / num
+                })
+        data[modality]["capabilities"].append({
+            "name": name,
+            "description": description,
+            "models": model_scores
+        })
+
+    # Write the final JSON file
+    with open('website\\static\\compiled_results.json', 'w') as f:
+        json.dump(data, f, indent=2)
+
+# Example usage
+release_directory_path = 'C:\\Users\\jluey\\Downloads\\reports\\release'
+config_path = 'website\\static\\config.json'
+
+coallate_results(release_directory_path, json.load(open(config_path)))
diff --git a/website/.gitignore b/website/.gitignore
@@ -0,0 +1,21 @@
+# Dependencies
+/node_modules
+
+# Production
+/build
+
+# Generated files
+.docusaurus
+.cache-loader
+
+# Misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+yarn.lock*
diff --git a/website/README.md b/website/README.md
@@ -0,0 +1,41 @@
+# Website
+
+This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
+
+### Installation
+
+```
+$ yarn
+```
+
+### Local Development
+
+```
+$ yarn start
+```
+
+This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
+
+### Build
+
+```
+$ yarn build
+```
+
+This command generates static content into the `build` directory and can be served using any static contents hosting service.
+
+### Deployment
+
+Using SSH:
+
+```
+$ USE_SSH=true yarn deploy
+```
+
+Not using SSH:
+
+```
+$ GIT_USER=<Your GitHub username> yarn deploy
+```
+
+If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
diff --git a/website/babel.config.js b/website/babel.config.js
@@ -0,0 +1,3 @@
+module.exports = {
+  presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
+};
diff --git a/website/docs/intro.md b/website/docs/intro.md
@@ -0,0 +1,5 @@
+---
+sidebar_position: 1
+---
+
+# Paper here?
diff --git a/website/docusaurus.config.ts b/website/docusaurus.config.ts
@@ -0,0 +1,106 @@
+import {themes as prismThemes} from 'prism-react-renderer';
+import type {Config} from '@docusaurus/types';
+import type * as Preset from '@docusaurus/preset-classic';
+
+const config: Config = {
+  title: 'Project Eureka',
+  tagline: 'AI Frontiers Evaluation and Understanding',
+  favicon: 'img/eureka_logo.png',
+
+  // Set the production url of your site here
+  url: 'https://microsoft.github.io',
+  // Set the /<baseUrl>/ pathname under which your site is served
+  // For GitHub pages deployment, it is often '/<projectName>/'
+  baseUrl: '/eureka-ml-insights/',
+
+  // GitHub pages deployment config.
+  // If you aren't using GitHub pages, you don't need these.
+  organizationName: 'Microsoft', // Usually your GitHub org/user name.
+  projectName: 'eureka-ml-insights', // Usually your repo name.
+
+  onBrokenLinks: 'throw',
+  onBrokenMarkdownLinks: 'warn',
+
+  // Even if you don't use internationalization, you can use this field to set
+  // useful metadata like html lang. For example, if your site is Chinese, you
+  // may want to replace "en" with "zh-Hans".
+  i18n: {
+    defaultLocale: 'en',
+    locales: ['en'],
+  },
+
+  presets: [
+    [
+      'classic',
+      {
+        docs: {
+          sidebarPath: './sidebars.ts',
+          editUrl:
+            'https://github.com/microsoft/eureka-ml-insights',
+        },
+        blog: {
+          showReadingTime: true,
+          // Please change this to your repo.
+          // Remove this to remove the "edit this page" links.
+          editUrl:
+            'https://aka.ms/eureka-ml-insights-blog',
+        },
+        theme: {
+          customCss: './src/css/custom.css',
+        },
+      } satisfies Preset.Options,
+    ],
+  ],
+
+  themeConfig: {
+    // Replace with your project's social card
+    // image: 'img/background.png',
+    navbar: {
+      title: 'Eureka Model Benchmarks',
+      logo: {
+        alt: 'Project Eureka',
+        src: 'img/eureka_logo.png',
+      },
+      items: [
+        {
+          href: 'https://aka.ms/eureka-ml-insights-blog',
+          label: 'Blog', 
+          position: 'right'
+        },
+        {
+          href: 'https://github.com/microsoft/eureka-ml-insights',
+          label: 'GitHub',
+          position: 'right',
+        },
+      ],
+    },
+    footer: {
+      style: 'dark',
+      links: [
+        {
+          label: 'Blog',
+          href: 'https://aka.ms/eureka-ml-insights-blog',
+        },
+        {
+          label: 'GitHub',
+          href: 'https://github.com/microsoft/eureka-ml-insights',
+        },
+      ],
+      copyright: `Copyright © ${new Date().getFullYear()} Microsoft Research | 
+        <a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy and Cookies</a> |  
+        <a target="_blank" style="color:#10adff" href="https://go.microsoft.com/fwlink/?linkid=2259814">Consumer Health Privacy</a> |  
+        <a target=_blank style="color:#10adff" href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a> | 
+        <a target="_blank" style="color:#10adff" href="mailto:[email protected]">Contact Us</a> | 
+        <a target="_blank" style="color:#10adff" href="https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks">Trademarks</a>`,
+    },
+    colorMode: {
+      disableSwitch: true,
+    },
+    prism: {
+      theme: prismThemes.github,
+      darkTheme: prismThemes.dracula,
+    },
+  } satisfies Preset.ThemeConfig,
+};
+
+export default config;