diff --git a/README.md b/README.md index 8658f37..aa78829 100644 --- a/README.md +++ b/README.md @@ -27,3 +27,4 @@ To add your own project below, just [edit](https://github.com/wwdc/2022/edit/mai |[Peter Yaacoub](https://github.com/Yaacoub)|[GitHub](https://github.com/Yaacoub/Swift-Student-Challenge/tree/main/WWDC%202022)|[YouTube](https://youtu.be/t4NQSHLIbaw)|AVFoundation, CoreGraphics, SwiftUI, UIKit|Accepted| |[Riccardo Persello](https://github.com/persello)|[GitHub](https://github.com/persello/ssc22)||Accelerate, AVFoundation, SwiftUI|Accepted| |[Yunho Oh](https://github.com/Helloyunho)|[GitHub](https://github.com/Helloyunho/about_computer_bits)|[YouTube](https://youtu.be/V8Zhc-dDbVI)|SwiftUI|Rejected| +|[Yaoyao Wu](https://github.com/wyy511511)|[GitHub](https://github.com/wyy511511/HockeyTourWithBingDwenDwen)|[YouTube]()|SwiftUI,ARKit,ARQuickLook|Accepted| diff --git a/feedforward_LM_post.ipynb b/feedforward_LM_post.ipynb new file mode 100644 index 0000000..3933abe --- /dev/null +++ b/feedforward_LM_post.ipynb @@ -0,0 +1,439 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "ceZgvB4HW-qc" + }, + "outputs": [], + "source": [ + "'''\n", + "First, we will import everything we need. We will also define a couple of useful functions.\n", + "'''\n", + "import torch\n", + "from torch import nn\n", + "from torch import optim\n", + "\n", + "import random\n", + "\n", + "# This is a function that prints the number of trainable parameters \n", + "# of a model.\n", + "def count_parameters(model):\n", + " return sum(p.numel() for p in model.parameters() if p.requires_grad)\n", + "\n", + "# This functions prints all parameters (and their gradients) of a model.\n", + "def print_parameters(model):\n", + " for name, param in model.named_parameters():\n", + " print(name)\n", + " print(param.data)\n", + " print(param.grad)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "2I6uUdg9W-qe" + }, + "outputs": [], + "source": [ + "'''\n", + "Then, we need to define our model. Remember, we want to build a bigram language model.\n", + "'''\n", + "class FeedforwardLM(nn.Module):\n", + " \n", + " def __init__(self, vocab_size, embedding_dim, hidden_dim):\n", + " super().__init__()\n", + " \n", + " self.embedding = nn.Embedding(vocab_size, embedding_dim)\n", + " self.hidden_layer = nn.Linear(embedding_dim, hidden_dim)\n", + " self.output_layer = nn.Linear(hidden_dim, vocab_size)\n", + " \n", + " self.relu = nn.ReLU()\n", + " \n", + " def forward(self, input):\n", + "\n", + " embedding = self.embedding(input)\n", + " hidden_rep = self.relu(self.hidden_layer(embedding))\n", + " output = self.output_layer(hidden_rep)\n", + " \n", + " return output" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "B5xgbVvqW-qe" + }, + "outputs": [], + "source": [ + "def load_data(filename, old_vocab=False):\n", + " \n", + " # TODO: Load data, convert text into tensors, construct vocabulary, return data and vocab\n", + " if not old_vocab:\n", + " vocab = {'': 0}\n", + " else:\n", + " vocab = old_vocab\n", + " data = list()\n", + " file = open(filename)\n", + " for line in file:\n", + " line_text = line.split()\n", + " line_text = [''] + line_text + ['']\n", + " \n", + " if not old_vocab:\n", + " # form vocabulary\n", + " for word in line_text:\n", + " if word not in vocab:\n", + " vocab[word] = len(vocab)\n", + " \n", + " # add words to data\n", + " for i, word in enumerate(line_text):\n", + " if i < len(line_text) - 1:\n", + " if word in vocab:\n", + " idx1 = vocab[word]\n", + " else: \n", + " idx1 = vocab['']\n", + " if line_text[i + 1] in vocab:\n", + " idx2 = vocab[line_text[i + 1]]\n", + " else: \n", + " idx2 = vocab['']\n", + " data.append((torch.tensor(idx1), torch.tensor(idx2)))\n", + " \n", + " return vocab, data" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive') \n", + "\n", + "\n", + "import os\n", + "\n", + "data_dir = 'drive/MyDrive/Colab Notebooks/'" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v7ka5nCTq-Ji", + "outputId": "a75849a3-7a9f-4003-ed20-ecb165c46c53" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "vocab, train_data = load_data(data_dir+'bible.train.txt')\n", + "print(len(vocab))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-u_E7jeHrGUh", + "outputId": "2e73a625-8d93-4b9c-ed93-7d83b263ef6d" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "7050\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OV6xzqYEW-qf", + "outputId": "d21e7997-a762-445e-b22e-b8e530349c5b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "7050\n", + "7050\n", + "7050\n", + "(tensor(1), tensor(2))\n", + "(tensor(205), tensor(4))\n" + ] + } + ], + "source": [ + "# Let's put it all together. \n", + "\n", + "# 1) Load the data, and shuffle the training data.\n", + "# TODO\n", + "vocab, train_data = load_data(data_dir+'bible.train.txt')\n", + "print(len(vocab))\n", + "_, dev_data = load_data(data_dir+'bible.dev.txt', vocab)\n", + "print(len(vocab))\n", + "_, test_data = load_data(data_dir+'bible.test.txt', vocab)\n", + "print(len(vocab))\n", + "\n", + "print(train_data[0])\n", + "random.shuffle(train_data)\n", + "print(train_data[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NR1QqTsnW-qg", + "outputId": "e6e53249-89b4-4430-cfae-e7e98d697f4b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "183465" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "# 2) Initialize our model.\n", + "# TODO\n", + "\n", + "our_lm = FeedforwardLM(len(vocab), 10, 15)\n", + "count_parameters(our_lm)\n", + "#print_parameters(our_lm)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Saqt81EAW-qh", + "outputId": "b3c79ad5-7bed-42f0-a6f8-798edc5d6ec3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "### Epoch: 1 ###\n", + "Average loss: tensor(6.4752, grad_fn=)\n", + "tensor(5.7503, grad_fn=)\n", + "### Epoch: 2 ###\n", + "Average loss: tensor(6.4417, grad_fn=)\n", + "tensor(5.4675, grad_fn=)\n", + "### Epoch: 3 ###\n", + "Average loss: tensor(6.5658, grad_fn=)\n", + "tensor(5.2705, grad_fn=)\n", + "### Epoch: 4 ###\n", + "Average loss: tensor(6.7042, grad_fn=)\n", + "tensor(5.1239, grad_fn=)\n", + "### Epoch: 5 ###\n", + "Average loss: tensor(6.7930, grad_fn=)\n", + "tensor(5.0167, grad_fn=)\n", + "### Epoch: 6 ###\n", + "Average loss: tensor(6.8785, grad_fn=)\n", + "tensor(4.9195, grad_fn=)\n", + "### Epoch: 7 ###\n", + "Average loss: tensor(6.9361, grad_fn=)\n", + "tensor(4.8433, grad_fn=)\n", + "### Epoch: 8 ###\n", + "Average loss: tensor(6.9608, grad_fn=)\n", + "tensor(4.7808, grad_fn=)\n", + "### Epoch: 9 ###\n", + "Average loss: tensor(7.0621, grad_fn=)\n", + "tensor(4.7264, grad_fn=)\n", + "### Epoch: 10 ###\n", + "Average loss: tensor(7.1504, grad_fn=)\n", + "tensor(4.6751, grad_fn=)\n" + ] + } + ], + "source": [ + "# 3) Now we train our model. \n", + "# TODO\n", + "\n", + "epochs = 10\n", + "ce = nn.CrossEntropyLoss()\n", + "softmax = nn.Softmax(dim=0)\n", + "optimizer = optim.SGD(our_lm.parameters(), lr=0.1)\n", + "\n", + "for i in range(epochs):\n", + " print('### Epoch: ' + str(i+1) + ' ###')\n", + " av_loss = 0\n", + " our_lm.train()\n", + " for (x, y) in train_data[:10000]:\n", + " optimizer.zero_grad()\n", + " \n", + " # a) calculate probs / get an output\n", + " y_raw = our_lm(x)\n", + " y_hat = softmax(y_raw)\n", + " \n", + " # b) compute loss\n", + " loss = ce(y_raw.unsqueeze(0),y.unsqueeze(0))\n", + " av_loss += loss\n", + " \n", + " # c) get the gradient\n", + " loss.backward()\n", + "\n", + " # d) update the weights\n", + " optimizer.step()\n", + " validate(our_lm, dev_data)\n", + " print(av_loss/len(train_data[:10000]))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1atkuorSW-qh", + "outputId": "91399cd8-264f-4f39-9881-900b9e4e8575" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "walk as\n", + "to him\n", + "as the\n", + "him ,\n", + "the same\n", + ", but\n", + "same people\n", + "but ,\n", + "people ,\n", + ", but\n" + ] + } + ], + "source": [ + "# Use the model to predict some words!\n", + "# TODO\n", + "\n", + "words = ['walk', 'to']\n", + "\n", + "for i in range(10):\n", + " word = words[i]\n", + " idx = vocab[word]\n", + " tensor_idx = torch.tensor(idx)\n", + " \n", + " raw_output = our_lm(tensor_idx)\n", + " probs = softmax(raw_output)\n", + " \n", + " pred = torch.argmax(probs)\n", + " \n", + " # Print prediction.\n", + " for w, v in vocab.items():\n", + " if v == pred:\n", + " print(word + ' ' + w)\n", + " words.append(w)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "u5v16264W-qi" + }, + "outputs": [], + "source": [ + "# Note: Perplexity is just exp(2, cross-entropy). So we just use the loss here.\n", + "def validate(model, data):\n", + " \n", + " model.eval()\n", + " \n", + " # TODO: Implement validation function\n", + " av_loss = 0\n", + " for (x, y) in data[:1000]:\n", + " \n", + " # a) calculate probs / get an output\n", + " y_raw = model(x)\n", + " \n", + " # b) compute loss\n", + " loss = ce(y_raw.unsqueeze(0),y.unsqueeze(0))\n", + " av_loss += loss\n", + "\n", + " av_loss = av_loss/len(data[:1000])\n", + " \n", + " print(\"Average loss: \" + str(av_loss))\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H6kl55XHW-qj" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "colab": { + "provenance": [], + "include_colab_link": true + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file