From 986460b1012c9c841ffa40854658565bdebd603a Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Fri, 3 Jan 2020 18:01:34 -0800 Subject: [PATCH] Forward all issues for Kubeflow org; setup dev environment * #57 is tracking setting up new staging and prod environments * This PR sets up a new staging (or dev environment) * We create a kustomize manifest for deploying the front end into that namespace * The staging environment is configured to use the dev instance of the issue label bot backend microservice (i.e the pubsub workers) * I created some python scripts to make it easier to setup the secrets. * The motivation for doing this was to test the changes to the front end * Front end now forwards all issues for the kubeflow org to the backend * This is needed because we want to use multiple models for all Kubeflow repos kubeflow/code-intelligence#70 * The backend should also be configured with logging to measure the impact of the predictions. kubeflow/code-intelligence#104 is an a test issue showing that the bot is working. * Fix how keys are handled * For GOOGLE_APPLICATION_CREDENTIALS; depend on that environment variable being set and pointing to the file containing the private key; don't get the private key from an environment variable and then write it to a file. * For the GitHub App private key; use an environment variable to point to the file containing the PEM key. * Create a script to create the secrets. * Flask app is running in dev namespace * create_secrets.py creates secrets needed for dev instance --- deployment/Dockerfile | 1 + deployment/README.md | 27 ++- deployment/base/deployment.yaml | 78 +++++++ deployment/base/ingress.yaml | 20 ++ deployment/base/kustomization.yaml | 13 ++ deployment/base/service.yaml | 13 ++ deployment/overlays/dev/certificate.yaml | 7 + deployment/overlays/dev/deployment.yaml | 40 ++++ deployment/overlays/dev/ingress.yaml | 7 + deployment/overlays/dev/kustomization.yaml | 12 + deployment/overlays/prod/deployment.yaml | 38 ++++ developer_guide.md | 19 ++ flask_app/app.py | 244 ++++++++++++--------- flask_app/forward_utils.py | 2 +- flask_app/forwarded_repo.yaml | 3 +- script/create_secrets.py | 152 +++++++++++++ script/send_request.py | 62 ++++++ skaffold.yaml | 40 ++++ 18 files changed, 665 insertions(+), 113 deletions(-) create mode 100644 deployment/base/deployment.yaml create mode 100644 deployment/base/ingress.yaml create mode 100644 deployment/base/kustomization.yaml create mode 100644 deployment/base/service.yaml create mode 100644 deployment/overlays/dev/certificate.yaml create mode 100644 deployment/overlays/dev/deployment.yaml create mode 100644 deployment/overlays/dev/ingress.yaml create mode 100644 deployment/overlays/dev/kustomization.yaml create mode 100644 deployment/overlays/prod/deployment.yaml create mode 100644 developer_guide.md create mode 100644 script/create_secrets.py create mode 100644 script/send_request.py create mode 100644 skaffold.yaml diff --git a/deployment/Dockerfile b/deployment/Dockerfile index fc3de532..06e61519 100644 --- a/deployment/Dockerfile +++ b/deployment/Dockerfile @@ -68,6 +68,7 @@ RUN pip install \ tensorflow==1.12.0 \ seldon-core==0.2.6 + COPY requirements.txt . RUN pip install -r requirements.txt COPY flask_app flask_app/ diff --git a/deployment/README.md b/deployment/README.md index eece6c24..4a51221d 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -5,6 +5,21 @@ with mlbot.net. This is currently running on a GKE cluster. +See [machine-learning-apps/Issue-Label-Bot#57](https://github.com/machine-learning-apps/Issue-Label-Bot/issues/57) for a log of how +the service was deployed. + +To build a new image + +``` +skaffold build +``` + +Then to update the image + +``` +cd overlays/dev|prod +kustomize edit set image gcr.io/github-probots/label-bot-frontend=gcr.io/github-probots/label-bot-frontend:${TAG}@${SHA} +``` ## github-probots @@ -40,12 +55,18 @@ Deploying it There is a staging cluster for testing running in -* **GCP project**: issue-label-bot-dev -* **cluster**: github-mlapp-test -* **namespace**: mlapp +* **GCP project**: github-probots +* **cluster**: kf-ci-ml +* **namespace**: label-bot-dev Deploying it +1. Create the secrets + + + +TODO(jlewi): instructions below are outdated + 1. Create the deployment ``` diff --git a/deployment/base/deployment.yaml b/deployment/base/deployment.yaml new file mode 100644 index 00000000..07a0b2de --- /dev/null +++ b/deployment/base/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-github-app + labels: + app: ml-github-app +spec: + replicas: 9 + selector: + matchLabels: + app: ml-github-app + template: + metadata: + labels: + app: ml-github-app + spec: + containers: + - name: frontend + image: gcr.io/github-probots/label-bot-frontend + command: ["python", "app.py"] + workingDir: "/flask_app" + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 10 + periodSeconds: 3 + env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: DATABASE_URL + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: WEBHOOK_SECRET + # The values for the Kubeflow kf-label-bot-dev application + # See kubeflow/code-intelligence#84. This is suitable + # for development but shouldn't be used in production + - name: APP_ID + value: "50112" + # Pato the GitHub app PEM key + - name: GITHUB_APP_PEM_KEY + value: /var/secrets/github/kf-label-bot-dev.private-key.pem + # The GCP project and pubsub topic to publish to. + # Default to the test/dev topic + - name: GCP_PROJECT_ID + value: issue-label-bot-dev + - name: GCP_PUBSUB_TOPIC_NAME + value: TEST_event_queue + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /var/secrets/google/user-gcp-sa.json + - name: FLASK_ENV + value: production + - name: PORT + value: '3000' + - name: APP_URL + value: https://mlbot.net/ + - name: authors + value: 'c' + ports: + - containerPort: 443 + - containerPort: 80 + - containerPort: 3000 + volumeMounts: + - name: user-gcp-sa + mountPath: /var/secrets/google + - name: github-app + mountPath: /var/secrets/github + volumes: + - name: user-gcp-sa + secret: + secretName: user-gcp-sa + - name: github-app + secret: + secretName: github-app \ No newline at end of file diff --git a/deployment/base/ingress.yaml b/deployment/base/ingress.yaml new file mode 100644 index 00000000..53f60af4 --- /dev/null +++ b/deployment/base/ingress.yaml @@ -0,0 +1,20 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: frontend + annotations: + # The ip and certificate name should be overwritten for each + # overlay and set to the correct values + kubernetes.io/ingress.global-static-ip-name: fake-ip + networking.gke.io/managed-certificates: fake-certificate +spec: + backend: + serviceName: ml-github-app + servicePort: 3000 + rules: + - http: + paths: + - path: / + backend: + serviceName: ml-github-app + servicePort: 3000 \ No newline at end of file diff --git a/deployment/base/kustomization.yaml b/deployment/base/kustomization.yaml new file mode 100644 index 00000000..3f8cdd9d --- /dev/null +++ b/deployment/base/kustomization.yaml @@ -0,0 +1,13 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namePrefix: label-bot- +commonLabels: + app: label-bot + service: label-bot +images: +- name: gcr.io/github-probots/label-bot-frontend + newName: gcr.io/github-probots/label-bot-frontend +resources: +- deployment.yaml +- service.yaml +- ingress.yaml diff --git a/deployment/base/service.yaml b/deployment/base/service.yaml new file mode 100644 index 00000000..fb7467fb --- /dev/null +++ b/deployment/base/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: ml-github-app + labels: + app: ml-github-app +spec: + ports: + - port: 3000 + protocol: TCP + selector: + app: ml-github-app + type: NodePort \ No newline at end of file diff --git a/deployment/overlays/dev/certificate.yaml b/deployment/overlays/dev/certificate.yaml new file mode 100644 index 00000000..7de946f3 --- /dev/null +++ b/deployment/overlays/dev/certificate.yaml @@ -0,0 +1,7 @@ +apiVersion: networking.gke.io/v1beta1 +kind: ManagedCertificate +metadata: + name: certificate +spec: + domains: + - label-bot-dev.mlbot.net \ No newline at end of file diff --git a/deployment/overlays/dev/deployment.yaml b/deployment/overlays/dev/deployment.yaml new file mode 100644 index 00000000..b4a203c5 --- /dev/null +++ b/deployment/overlays/dev/deployment.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-github-app +spec: + replicas: 1 + template: + spec: + containers: + - name: frontend + env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: DATABASE_URL + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: WEBHOOK_SECRET + # The values for the Kubeflow kf-label-bot-dev application + # See kubeflow/code-intelligence#84. This is suitable + # for development but shouldn't be used in production + - name: APP_ID + value: "50112" + # Path the GitHub app PEM key + - name: GITHUB_APP_PEM_KEY + value: /var/secrets/github/kf-label-bot-dev.private-key.pem + # The GCP project and pubsub topic to publish to should + # correspond to the production backend + - name: GCP_PROJECT_ID + value: issue-label-bot-dev + - name: GCP_PUBSUB_TOPIC_NAME + value: TEST_event_queue + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /var/secrets/google/user-gcp-sa.json + - name: FLASK_ENV + value: production + \ No newline at end of file diff --git a/deployment/overlays/dev/ingress.yaml b/deployment/overlays/dev/ingress.yaml new file mode 100644 index 00000000..de9dd8dd --- /dev/null +++ b/deployment/overlays/dev/ingress.yaml @@ -0,0 +1,7 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: frontend + annotations: + kubernetes.io/ingress.global-static-ip-name: label-bot-dev + networking.gke.io/managed-certificates: certificate diff --git a/deployment/overlays/dev/kustomization.yaml b/deployment/overlays/dev/kustomization.yaml new file mode 100644 index 00000000..ddb9c178 --- /dev/null +++ b/deployment/overlays/dev/kustomization.yaml @@ -0,0 +1,12 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +bases: +- ../../base +commonLabels: + environment: dev +namespace: label-bot-dev +resources: +- certificate.yaml +patchesStrategicMerge: +- deployment.yaml +- ingress.yaml \ No newline at end of file diff --git a/deployment/overlays/prod/deployment.yaml b/deployment/overlays/prod/deployment.yaml new file mode 100644 index 00000000..0054c116 --- /dev/null +++ b/deployment/overlays/prod/deployment.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-github-app +spec: + replicas: 9 + spec: + containers: + - name: frontend + env: + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: DATABASE_URL + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: WEBHOOK_SECRET + # The values for the Kubeflow issue-label-bot application + # that is available in the GitHub marketplace + - name: APP_ID + value: "27079" + # Pato the GitHub app PEM key + - name: GITHUB_APP_PEM_KEY + value: /var/secrets/github/kf-label-bot-dev.private-key.pem + # The GCP project and pubsub topic to publish to should + # correspond to the production backend + - name: GCP_PROJECT_ID + value: issue-label-bot-dev + - name: GCP_PUBSUB_TOPIC_NAME + value: event_queue + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /var/secrets/google/user-gcp-sa.json + - name: FLASK_ENV + value: production + \ No newline at end of file diff --git a/developer_guide.md b/developer_guide.md new file mode 100644 index 00000000..27bd9dbf --- /dev/null +++ b/developer_guide.md @@ -0,0 +1,19 @@ +# Developer guide + +1. You can deploy the front end using skaffold + + ``` + skaffold dev --cleanup=False + ``` + + * Your Kubernetes context should be set to using the `github-probots-dev` namespace + * This will continually rebuild and upate your code + * Skaffold's file sync feature is used to update the code in the image without rebuilding and + redeploying + * This makes redeploying very easy. + +1. To send a GitHub webhook event you can either open up an issue or you can use `scripts/send_request.py` + + * The latter is useful because it avoids needing to open up a new GitHub issue + + * Right now the bot is only designed to respond to issues opened events. \ No newline at end of file diff --git a/flask_app/app.py b/flask_app/app.py index eb87f4b0..5d802e4d 100644 --- a/flask_app/app.py +++ b/flask_app/app.py @@ -17,6 +17,7 @@ from sql_models import db, Issues, Predictions import tensorflow as tf import requests +import traceback import yaml import random from forward_utils import get_forwarded_repos @@ -49,7 +50,8 @@ # get repos that should possibly be forwarded # dict: {repo_owner/repo_name: proportion} -forwarded_repos = get_forwarded_repos() +forwarded_repos = get_forwarded_repos(os.getenv("LABEL_BOT_CONFIG", + "forwarded_repo.yaml")) def init_issue_labeler(): "Load all necessary artifacts to make predictions." @@ -68,30 +70,15 @@ def init_issue_labeler(): model_path = get_file(fname=model_filename, origin=model_url) model = load_model(model_path) + logging.info(f"Forwarded repo config:\n{forwarded_repos}") return IssueLabeler(body_text_preprocessor=body_pp, title_text_preprocessor=title_pp, model=model) def init(): "Load all necessary artifacts to make predictions." - #save keyfile - pem_string = os.getenv('PRIVATE_KEY') - if not pem_string: - raise ValueError('Environment variable PRIVATE_KEY was not supplied.') - - with open('private-key.pem', 'wb') as f: - f.write(str.encode(pem_string)) - - pubsub_json_string = os.getenv('PUBSUB_CREDENTIALS_JSON_BLOB') - if not pubsub_json_string: - raise ValueError('Environment variable PUBSUB_CREDENTIALS_JSON_BLOB was not supplied.') - - with open('pubsub-credentials.json', 'w') as f: - # set GCP Auth per https://cloud.google.com/docs/authentication/getting-started - json.dump(eval(pubsub_json_string), f) - json_file_path = os.path.realpath(f.name) - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = json_file_path - + logging.info(f"Initializing the app") + logging.info(f"Forwarded repo config:\n{forwarded_repos}") app.graph = tf.get_default_graph() app.issue_labeler = init_issue_labeler() create_topic_if_not_exists(PUBSUB_PROJECT_ID, PUBSUB_TOPIC_NAME) @@ -123,95 +110,124 @@ def index(): @app.route("/event_handler", methods=["POST"]) def bot(): "Handle payload" + + logging.debug("Request Data:\n%s", request.data) + if not request.json: + logging.error("Request is not a json request. Please fix.") + # TODO(jlewi): What is the proper code invalid request? + abort(400) + + logging.debug("Handling request with action: %s", + request.json.get('action', 'None')) # authenticate webhook to make sure it is from GitHub verify_webhook(request) # Check if payload corresponds to an issue being opened - if 'action' in request.json and request.json['action'] == 'opened' and ('issue' in request.json): - # get metadata - installation_id = request.json['installation']['id'] - issue_num = request.json['issue']['number'] - private = request.json['repository']['private'] - username, repo = request.json['repository']['full_name'].split('/') - title = request.json['issue']['title'] - body = request.json['issue']['body'] - - # don't do anything if repo is private. - if private: - return 'ok' + if 'action' not in request.json or request.json['action'] != 'opened' or 'issue' not in request.json: + logging.warning("Event is not for an issue with action opened.") + return 'ok' - try: - # forward some issues of specific repos and select by their given forwarded proportion - if f'{username}/{repo}' in forwarded_repos and random.random() <= forwarded_repos[f'{username}/{repo}']: + # get metadata + installation_id = request.json['installation']['id'] + issue_num = request.json['issue']['number'] + private = request.json['repository']['private'] + username, repo = request.json['repository']['full_name'].split('/') + title = request.json['issue']['title'] + body = request.json['issue']['body'] + + # don't do anything if repo is private. + if private: + logging.info(f"Recieved a private issue which is being skipped") + return 'ok' + + logging.info(f"Recieved {username}/{repo}#{issue_num}") + try: + # forward some issues of specific repos and select by their given forwarded proportion + forward_probability = None + repo_spec = f'{username}/{repo}' + if username in forwarded_repos.get("orgs", {}): + forward_probability = forwarded_repos["orgs"][username] + elif repo_spec in forwarded_repos.get("repos", {}): + forward_probability = forwarded_repos["repos"][repo_spec] + + if forward_probability: + if random.random() <= forward_probability: + logging.info(f"Publishing {username}/{repo}#{issue_num} to " + f"projects/{PUBSUB_PROJECT_ID}/topics/{PUBSUB_TOPIC_NAME}") # send the event to pubsub publish_message(PUBSUB_PROJECT_ID, PUBSUB_TOPIC_NAME, installation_id, username, repo, issue_num) return f'Labeling of {username}/{repo}/issues/{issue_num} delegated to microservice via pubsub.' - except Exception as e: - LOG.error(e) - - # write the issue to the database using ORM - issue_db_obj = Issues(repo=repo, - username=username, - issue_num=issue_num, - title=title, - body=body) - - db.session.add(issue_db_obj) - db.session.commit() - - # make predictions with the model - with app.graph.as_default(): - predictions = app.issue_labeler.get_probabilities(body=body, title=title) - #log to console - LOG.warning(f'issue opened by {username} in {repo} #{issue_num}: {title} \nbody:\n {body}\n') - LOG.warning(f'predictions: {str(predictions)}') - - # get the most confident prediction - argmax = max(predictions, key=predictions.get) - - # get the isssue handle - issue = get_issue_handle(installation_id, username, repo, issue_num) - - - labeled = True - threshold = prediction_threshold[argmax] - - # take an action if the prediction is confident enough - if (predictions[argmax] >= threshold): - # initialize the label name to = the argmax - label_name = argmax - - # handle the yaml file - yaml = get_yaml(owner=username, repo=repo) - if yaml and 'label-alias' in yaml: - if argmax in yaml['label-alias']: - LOG.warning('User has custom names: ', yaml['label-alias']) - new_name = yaml['label-alias'][argmax] - if new_name: - label_name = new_name - - # create message - message = f'Issue-Label Bot is automatically applying the label `{label_name}` to this issue, with a confidence of {predictions[argmax]:.2f}. Please mark this comment with :thumbsup: or :thumbsdown: to give our bot feedback! \n\n Links: [app homepage](https://github.com/marketplace/issue-label-bot), [dashboard]({app_url}data/{username}/{repo}) and [code](https://github.com/hamelsmu/MLapp) for this bot.' - # label the issue using the GitHub api - issue.add_labels(label_name) - - else: - message = f'Issue Label Bot is not confident enough to auto-label this issue. See [dashboard]({app_url}data/{username}/{repo}) for more details.' - LOG.warning(f'Not confident enough to label this issue: # {str(issue_num)}') - labeled = False - - # Make a comment using the GitHub api - comment = issue.create_comment(message) - - # log the event to the database using ORM - issue_db_obj.add_prediction(comment_id=comment.id, - prediction=argmax, - probability=predictions[argmax], - logs=str(predictions), - threshold=threshold, - labeled=labeled) - return 'ok' + else: + logging.info(f"{username}/{repo}#{issue_num} not selected for " + f"publishing to " + f"projects/{PUBSUB_PROJECT_ID}/topics/{PUBSUB_TOPIC_NAME}") + except Exception as e: + logging.error(f"Exception occured while handling issue " + f"{username}/{repo}#{issue_num}\n Exception: {e}\n" + f"{traceback.format_exc()}") + + # write the issue to the database using ORM + issue_db_obj = Issues(repo=repo, + username=username, + issue_num=issue_num, + title=title, + body=body) + + db.session.add(issue_db_obj) + db.session.commit() + + # make predictions with the model + with app.graph.as_default(): + predictions = app.issue_labeler.get_probabilities(body=body, title=title) + #log to console + LOG.warning(f'issue opened by {username} in {repo} #{issue_num}: {title} \nbody:\n {body}\n') + LOG.warning(f'predictions: {str(predictions)}') + + # get the most confident prediction + argmax = max(predictions, key=predictions.get) + + # get the isssue handle + issue = get_issue_handle(installation_id, username, repo, issue_num) + + + labeled = True + threshold = prediction_threshold[argmax] + + # take an action if the prediction is confident enough + if (predictions[argmax] >= threshold): + # initialize the label name to = the argmax + label_name = argmax + + # handle the yaml file + yaml = get_yaml(owner=username, repo=repo) + if yaml and 'label-alias' in yaml: + if argmax in yaml['label-alias']: + LOG.warning('User has custom names: ', yaml['label-alias']) + new_name = yaml['label-alias'][argmax] + if new_name: + label_name = new_name + + # create message + message = f'Issue-Label Bot is automatically applying the label `{label_name}` to this issue, with a confidence of {predictions[argmax]:.2f}. Please mark this comment with :thumbsup: or :thumbsdown: to give our bot feedback! \n\n Links: [app homepage](https://github.com/marketplace/issue-label-bot), [dashboard]({app_url}data/{username}/{repo}) and [code](https://github.com/hamelsmu/MLapp) for this bot.' + # label the issue using the GitHub api + issue.add_labels(label_name) + + else: + message = f'Issue Label Bot is not confident enough to auto-label this issue. See [dashboard]({app_url}data/{username}/{repo}) for more details.' + LOG.warning(f'Not confident enough to label this issue: # {str(issue_num)}') + labeled = False + + # Make a comment using the GitHub api + comment = issue.create_comment(message) + + # log the event to the database using ORM + issue_db_obj.add_prediction(comment_id=comment.id, + prediction=argmax, + probability=predictions[argmax], + logs=str(predictions), + threshold=threshold, + labeled=labeled) return 'ok' @app.route("/repos/", methods=["GET"]) @@ -223,20 +239,20 @@ def get_repos(username): install_id = app.app_installation_for_user(f'{username}').id except: return f'No current installations for {username} found.' - + url = f'https://api.github.com/installation/repositories' headers = {'Authorization': f'token {ghapp.get_installation_access_token(install_id)}', 'Accept': 'application/vnd.github.machine-man-preview+json'} - + response = requests.get(url=url, headers=headers, params={'per_page':100}) if response.status_code == 200: repos = response.json()['repositories'] repos_with_preds = [x.repo for x in Issues.query.filter(Issues.username == username and Issues.predictions != None).distinct(Issues.repo).all()] return render_template('repos.html', repos=repos, username=username, repos_with_preds=repos_with_preds) - + else: return response.status_code - + @app.route('/users') def show_users(): users = get_users() @@ -336,7 +352,11 @@ def update_feedback(owner, repo): def get_app(): "grab a fresh instance of the app handle." app_id = os.getenv('APP_ID') - key_file_path = 'private-key.pem' + if not app_id: + raise ValueError("APP_ID environment variable must be set.") + key_file_path = os.getenv("GITHUB_APP_PEM_KEY") + if not key_file_path: + raise ValueError("GITHUB_APP_PEM_KEY environment variable must be set.") ghapp = GitHubApp(pem_path=key_file_path, app_id=app_id) return ghapp @@ -364,7 +384,7 @@ def get_issue_handle(installation_id, username, repository, number): def get_yaml(owner, repo): """ Looks for the yaml file in a /.github directory. - + yaml file must be named issue_label_bot.yaml """ ghapp = get_app() @@ -375,12 +395,14 @@ def get_yaml(owner, repo): # get the repo handle, which allows you got get the file contents repo = inst.repository(owner=owner, repository=repo) results = repo.file_contents('.github/issue_label_bot.yaml').decoded - + except: return None - + return yaml.safe_load(results) +SIGNATURE_HEADER = 'X-Hub-Signature' + def verify_webhook(request): "Make sure request is from GitHub.com" @@ -388,7 +410,10 @@ def verify_webhook(request): if os.getenv('DEVELOPMENT_FLAG'): return True # Inspired by https://github.com/bradshjg/flask-githubapp/blob/master/flask_githubapp/core.py#L191-L198 - signature = request.headers['X-Hub-Signature'].split('=')[1] + if SIGNATURE_HEADER not in request.headers: + logging.error("Request is missing header %s", SIGNATURE_HEADER) + + signature = request.headers[SIGNATURE_HEADER].split('=')[1] mac = hmac.new(str.encode(app.webhook_secret), msg=request.data, digestmod='sha1') @@ -404,6 +429,9 @@ def is_public(owner, repo): return False if __name__ == "__main__": + logger = logging.getLogger() + logger.setLevel(logging.INFO) + init() with app.app_context(): # create tables if they do not exist diff --git a/flask_app/forward_utils.py b/flask_app/forward_utils.py index 2a2c11f3..cbe9eb1e 100644 --- a/flask_app/forward_utils.py +++ b/flask_app/forward_utils.py @@ -5,7 +5,7 @@ def get_forwarded_repos(yaml_path='forwarded_repo.yaml'): with open(yaml_path, 'r') as f: config = yaml.safe_load(f) - return config['repos'] + return config def check_topic_path_exists(project_id, topic_path): """ diff --git a/flask_app/forwarded_repo.yaml b/flask_app/forwarded_repo.yaml index 1fc30d29..2a689537 100644 --- a/flask_app/forwarded_repo.yaml +++ b/flask_app/forwarded_repo.yaml @@ -1,3 +1,4 @@ +orgs: + kubeflow: 1.0 repos: abcdefgs0324/issue-label-bot-test: 1.0 - kubeflow/kubeflow: 0.75 diff --git a/script/create_secrets.py b/script/create_secrets.py new file mode 100644 index 00000000..ee5f17d5 --- /dev/null +++ b/script/create_secrets.py @@ -0,0 +1,152 @@ +#!/usr/bin/python +"""A script to create the required secrets in one namespace by copying them +from another namespace +""" + +import base64 +import fire +from google.cloud import storage +from kubernetes import client as k8s_client +from kubernetes import config as k8s_config +from kubernetes.client import rest +import logging +import yaml +import os +import re +import subprocess + +# The namespace for the dev environment. +DEV_NAMESPACE = "label-bot-dev" + +GCS_REGEX = re.compile("gs://([^/]*)(/.*)?") + +def split_gcs_uri(gcs_uri): + """Split a GCS URI into bucket and path.""" + m = GCS_REGEX.match(gcs_uri) + bucket = m.group(1) + path = "" + if m.group(2): + path = m.group(2).lstrip("/") + return bucket, path + +def secret_exists(namespace, name, client): + api = k8s_client.CoreV1Api(client) + + try: + api.read_namespaced_secret(name, namespace) + return True + except rest.ApiException as e: + if e.status != 404: + raise + + return False + +def _read_gcs_path(gcs_path): + bucket_name, blob_name = split_gcs_uri(gcs_path) + + storage_client = storage.Client() + + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + contents = blob.download_as_string().decode() + + return contents + +class SecretCreator: + + @staticmethod + def _secret_from_gcs(secret_name, gcs_path): + bucket_name, blob_name = split_gcs_uri(gcs_path) + + storage_client = storage.Client() + + bucket = storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + contents = blob.download_as_string().decode() + + file_name = os.path.basename(blob_name) + namespace, name = secret_name.split("/", 1) + subprocess.check_call(["kubectl", "-n", namespace, "create", + "secret", "generic", + name, + f"--from-literal=f{file_name}={contents}"]) + @staticmethod + def copy_secret(source, dest): + """Create the dev version of the secrets. + + Args: + source: {namespace}/{secret name} + dest: {namespace}/{secret name} + """ + src_namespace, src_name = source.split("/", 1) + dest_namespace, dest_name = dest.split("/", 1) + + data = subprocess.check_output(["kubectl", "-n", src_namespace, "get", + "secrets", src_name, "-o", + "yaml"]) + + encoded = yaml.load(data) + decoded = {} + + for k, v in encoded["data"].items(): + decoded[k] = base64.b64decode(v).decode() + + command = ["kubectl", "create", "-n", dest_namespace, "secret", + "generic", dest_name] + + for k, v in decoded.items(): + command.append(f"--from-literal={k}={v}") + + subprocess.check_call(command) + + @staticmethod + def create_dev(): + """Create the secrets for the dev environment.""" + + k8s_config.load_kube_config(persist_config=False) + + client = k8s_client.ApiClient() + + if secret_exists(DEV_NAMESPACE, "user-gcp-sa", client): + logging.warning(f"Secret {DEV_NAMESPACE}/user-gcp-sa already exists; " + f"Not recreating it.") + else: + # We get a GCP secret by copying it from the kubeflow namespace. + SecretCreator.copy_secret("kubeflow/user-gcp-sa", + f"{DEV_NAMESPACE}/user-gcp-sa") + + if secret_exists(DEV_NAMESPACE, "github-app", client): + logging.warning(f"Secret {DEV_NAMESPACE}/github-app already exists; " + f"Not recreating it.") + else: + # Create the secret containing the PEM key for the github app + SecretCreator._secret_from_gcs(f"{DEV_NAMESPACE}/github-app", + "gs://issue-label-bot-dev_secrets/kf-label-bot-dev.2019-12-30.private-key.pem") + + # Create the inference secret containing the postgres database with + # postgres secret and the webhook secret + inference_secret = "ml-app-inference-secret" + if secret_exists(DEV_NAMESPACE, inference_secret, client): + logging.warning(f"Secret {DEV_NAMESPACE}/{inference_secret} already exists; " + f"Not recreating it.") + else: + postgres = _read_gcs_path("gs://issue-label-bot-dev_secrets/" + "issue-label-bot.postgres") + webhook = _read_gcs_path("gs://issue-label-bot-dev_secrets/" + "kf-label-bot-dev.webhook.secret") + + subprocess.check_call(["kubectl", "-n", DEV_NAMESPACE, "create", + "secret", "generic", + inference_secret, + f"--from-literal=DATABASE_URL={postgres}", + f"--from-literal=WEBHOOK_SECRET={webhook}"]) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO, + format=('%(levelname)s|%(asctime)s' + '|%(message)s|%(pathname)s|%(lineno)d|'), + datefmt='%Y-%m-%dT%H:%M:%S', + ) + + fire.Fire(SecretCreator) diff --git a/script/send_request.py b/script/send_request.py new file mode 100644 index 00000000..9f8328c5 --- /dev/null +++ b/script/send_request.py @@ -0,0 +1,62 @@ +"""A helper script to send requests to test the label bot.""" +import base64 +import fire +import hmac +import json +import logging +import requests +import subprocess + +class SendRequest: + @staticmethod + def send(url="https://label-bot-dev.mlbot.net/event_handler"): + # Get the webhook secret + secret = subprocess.check_output(["kubectl", "get", "secret", + "ml-app-inference-secret", + "-o", "jsonpath='{.data.WEBHOOK_SECRET}'"]) + + secret_decoded = base64.b64decode(secret).decode() + + # TODO(jlewi): We should allow specificing a specific issue. + payload = { + "action": "opened", + # Installation corresponding to kf-label-bot-dev on + # kubeflow/code-intelligence + "installation": { + "id": 5980888, + }, + "issue": { + "number": 99, + "title": "Test kf-label bot-dev this is a bug", + "body": ("Test whether events are correctly routed to the dev instance." + "If not then there is a bug in the setup") + }, + "repository": { + "full_name": "kubeflow/code-intelligence", + "private": False, + } + } + + data = str.encode(json.dumps(payload)) + # See: https://developer.github.com/webhooks/securing/ + # We need to compute the signature of the payload using the secret + mac = hmac.new(str.encode(secret_decoded), msg=data, digestmod='sha1') + + headers = { + "X-Hub-Signature": "=" + mac.hexdigest(), + "Content-Type": "application/json", + } + + # We use data and not json because we need to compute the hash of the + # data to match the signature + requests.post(url, data=data, headers=headers) + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, + format=('%(levelname)s|%(asctime)s' + '|%(message)s|%(pathname)s|%(lineno)d|'), + datefmt='%Y-%m-%dT%H:%M:%S', + ) + + fire.Fire(SendRequest) + diff --git a/skaffold.yaml b/skaffold.yaml new file mode 100644 index 00000000..c1bb177b --- /dev/null +++ b/skaffold.yaml @@ -0,0 +1,40 @@ +# Reference: https://skaffold.dev/docs/references/yaml/ +apiVersion: skaffold/v2alpha1 +kind: Config +metadata: + name: label-bot +build: + artifacts: + - image: gcr.io/github-probots/label-bot-frontend + # Set the context to the root directory. + # All paths in the Dockerfile should be relative to this one. + context: . + # TODO(https://github.com/GoogleContainerTools/skaffold/issues/3448): We use manual sync + # because inferred sync doesn't work + sync: + manual: + - src: 'flask_app/*.py' + dest: '/' + kaniko: + dockerfile: deployment/Dockerfile + buildContext: + gcsBucket: github-probots_skaffold + env: + # TODO(GoogleContainerTools/skaffold#3468) skaffold doesn't + # appear to work with workload identity + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /secret/user-gcp-sa.json + cache: {} + cluster: + pullSecretName: user-gcp-sa + # Build in the kaniko namespace because we need to disable ISTIO sidecar injection + # see GoogleContainerTools/skaffold#3442 + namespace: label-bot-dev + resources: + requests: + cpu: 8 + memory: 16Gi + +deploy: + kustomize: + path: deployment/overlays/dev \ No newline at end of file