diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..b58a297a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.* +*.pem +*.hdf5 +**/*.pem +*.pkl +**/flask_session \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..b371d4d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.* +*.pem +*.hdf5 +*.pkl +!.gitignore +!.dockerignore +**/flask_session \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..ae941aec --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Hamel Husain + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pipfile b/Pipfile new file mode 100644 index 00000000..2b23cb71 --- /dev/null +++ b/Pipfile @@ -0,0 +1,138 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +absl-py = "==0.7.1" +appnope = "==0.1.0" +asn1crypto = "==0.24.0" +astor = "==0.7.1" +backcall = "==0.1.0" +bleach = "==3.1.0" +blis = "==0.2.4" +cachetools = "==3.1.0" +certifi = "==2019.3.9" +cffi = "==1.12.2" +chardet = "==3.0.4" +click = "==7.0" +cryptography = "==2.6.1" +cymem = "==2.0.2" +cytoolz = "==0.9.0.1" +dask = "==1.1.5" +decorator = "==4.4.0" +defusedxml = "==0.5.0" +dill = "==0.2.9" +entrypoints = "==0.3" +enum34 = "==1.1.6" +ftfy = "==4.4.3" +gast = "==0.2.2" +grpcio = "==1.19.0" +h5py = "==2.9.0" +html5lib = "==1.0.1" +idna = "==2.8" +ijson = "==2.3" +ipdb = "==0.12" +ipykernel = "==5.1.0" +ipython = "==7.4.0" +ipywidgets = "==7.4.2" +itsdangerous = "==1.1.0" +jedi = "==0.13.3" +jsonify = "==0.5" +jsonschema = "==2.6.0" +jupyter = "==1.0.0" +jwcrypto = "==0.6.0" +jwt = "==0.6.1" +kiwisolver = "==1.0.1" +ktext = "==0.34" +matplotlib = "==3.0.3" +mistune = "==0.8.4" +mock = "==2.0.0" +more-itertools = "==7.0.0" +msgpack-numpy = "==0.4.4.2" +msgpack = "==0.6.1" +multiprocess = "==0.70.7" +murmurhash = "==1.0.2" +nbconvert = "==5.4.1" +nbformat = "==4.4.0" +networkx = "==2.2" +notebook = "==5.7.7" +numpy = "==1.16.2" +pandas = "==0.24.2" +pandocfilters = "==1.4.2" +parso = "==0.3.4" +pathos = "==0.2.3" +pbr = "==5.1.3" +pexpect = "==4.6.0" +pickleshare = "==0.7.5" +plac = "==0.9.6" +pox = "==0.2.5" +ppft = "==1.6.4.9" +preshed = "==2.0.1" +protobuf = "==3.7.1" +ptyprocess = "==0.6.0" +pyarrow = "==0.12.1" +pycparser = "==2.19" +pyemd = "==0.5.1" +pyparsing = "==2.3.1" +python-dateutil = "==2.8.0" +pytz = "==2018.9" +pyzmq = "==18.0.1" +qtconsole = "==4.4.3" +requests = "==2.21.0" +scikit-learn = "==0.20.3" +scipy = "==1.2.1" +six = "==1.12.0" +spacy = "==2.1.3" +srsly = "==0.0.5" +tensorboard = "==1.12.2" +tensorflow = "==1.12.0" +termcolor = "==1.1.0" +terminado = "==0.8.2" +testpath = "==0.4.2" +textacy = "==0.6.2" +thinc = "==7.0.4" +toolz = "==0.9.0" +tornado = "==6.0.2" +tqdm = "==4.31.1" +traitlets = "==4.3.2" +uritemplate = "==3.0.0" +urllib3 = "==1.24.1" +wasabi = "==0.2.1" +wcwidth = "==0.1.7" +webencodings = "==0.5.1" +widgetsnbextension = "==3.4.2" +"backports.weakref" = "==1.0.post1" +Cycler = "==0.10.0" +Flask-Session = "==0.3.1" +Flask = "==1.0.2" +"github3.py" = "==1.3.0" +ipython_genutils = "==0.2.0" +Jinja2 = "==2.10" +jupyter_client = "==5.2.4" +jupyter_console = "==6.0.0" +jupyter_core = "==4.4.0" +Keras-Applications = "==1.0.7" +Keras-Preprocessing = "==1.0.9" +Keras = "==2.2.4" +Markdown = "==3.1" +MarkupSafe = "==1.1.1" +prometheus_client = "==0.6.0" +prompt_toolkit = "==2.0.9" +Pygments = "==2.3.1" +Pyphen = "==0.9.5" +python-Levenshtein = "==0.12.0" +PyYAML = "==5.1" +Send2Trash = "==1.5.0" +SQLAlchemy = "==1.3.1" +Unidecode = "==1.0.23" +Werkzeug = "==0.15.1" +flask-sqlalchemy = "*" +psycopg2 = "*" +psycopg2-binary = "*" +pyjwt = "*" + +[requires] +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 00000000..93460948 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,1529 @@ +{ + "_meta": { + "hash": { + "sha256": "75e655fa453f84a3ccdb9434dd2c84333dec5145fa496f61ca33c4bd44b6f15c" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.6" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "absl-py": { + "hashes": [ + "sha256:b943d1c567743ed0455878fcd60bc28ac9fae38d129d1ccfad58079da00b8951" + ], + "index": "pypi", + "version": "==0.7.1" + }, + "appnope": { + "hashes": [ + "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0", + "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71" + ], + "index": "pypi", + "version": "==0.1.0" + }, + "asn1crypto": { + "hashes": [ + "sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87", + "sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49" + ], + "index": "pypi", + "version": "==0.24.0" + }, + "astor": { + "hashes": [ + "sha256:95c30d87a6c2cf89aa628b87398466840f0ad8652f88eb173125a6df8533fb8d", + "sha256:fb503b9e2fdd05609fbf557b916b4a7824171203701660f0c55bbf5a7a68713e" + ], + "index": "pypi", + "version": "==0.7.1" + }, + "backcall": { + "hashes": [ + "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4", + "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2" + ], + "index": "pypi", + "version": "==0.1.0" + }, + "backports.weakref": { + "hashes": [ + "sha256:81bc9b51c0abc58edc76aefbbc68c62a787918ffe943a37947e162c3f8e19e82", + "sha256:bc4170a29915f8b22c9e7c4939701859650f2eb84184aee80da329ac0b9825c2" + ], + "index": "pypi", + "version": "==1.0.post1" + }, + "bleach": { + "hashes": [ + "sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16", + "sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa" + ], + "index": "pypi", + "version": "==3.1.0" + }, + "blis": { + "hashes": [ + "sha256:039129410a338be8db8cf48c54334bd7c30da7e72bad2741e59313b1d242814b", + "sha256:058f9109aaea9d4f88cb623a44994d96c8cf36448de3e1bd30210628d6b52e9e", + "sha256:278d7b95e56cf82a6bef91cd8283eadc9401f2d3bdbbf2cdfdb605cf9081c36e", + "sha256:2d4ca1508fd6229c7994fc17ba324083a5b83f66612c8ea62623a41a1768b030", + "sha256:51a54bad6175e9b154beeb628a879ed492ee2247c9e40c77bdf6fc772145130c", + "sha256:886b313f96d4e268a0587e98c1637d963c73defa8de51e2e6b0d0bd00f16afbb", + "sha256:9f12e6f1e4b10dbb1e0e34e98f60e8435058a60d544a009cb761351fe1d12cad", + "sha256:a54d4fa1908d586f8bce9851a453cb89d1542e9aca65b8b88e9bb9432d626f80", + "sha256:b9d6cef13d95e3752320cd942df25e09160a6f9dfc3d7b41af7cdc772ab18270", + "sha256:d571464d195a950e60bf1547c8914d4da50952e06a0f38cea7b0829d0a4b985a", + "sha256:d616d64c85e6be92d69a1410dc58146cb9603fd1eb148f9ee512b8fddfd789f6", + "sha256:e477c7eaacf7dcccbb190a29559579efb287ecf5c2a9a7a6f9acb0452899f033", + "sha256:e6ae1986625af86f90f111f9d2d284b9e45fddfe56cf40524cdd9417a6a33b87" + ], + "index": "pypi", + "version": "==0.2.4" + }, + "cachetools": { + "hashes": [ + "sha256:219b7dc6024195b6f2bc3d3f884d1fef458745cd323b04165378622dcc823852", + "sha256:9efcc9fab3b49ab833475702b55edd5ae07af1af7a4c627678980b45e459c460" + ], + "index": "pypi", + "version": "==3.1.0" + }, + "certifi": { + "hashes": [ + "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", + "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" + ], + "index": "pypi", + "version": "==2019.3.9" + }, + "cffi": { + "hashes": [ + "sha256:00b97afa72c233495560a0793cdc86c2571721b4271c0667addc83c417f3d90f", + "sha256:0ba1b0c90f2124459f6966a10c03794082a2f3985cd699d7d63c4a8dae113e11", + "sha256:0bffb69da295a4fc3349f2ec7cbe16b8ba057b0a593a92cbe8396e535244ee9d", + "sha256:21469a2b1082088d11ccd79dd84157ba42d940064abbfa59cf5f024c19cf4891", + "sha256:2e4812f7fa984bf1ab253a40f1f4391b604f7fc424a3e21f7de542a7f8f7aedf", + "sha256:2eac2cdd07b9049dd4e68449b90d3ef1adc7c759463af5beb53a84f1db62e36c", + "sha256:2f9089979d7456c74d21303c7851f158833d48fb265876923edcb2d0194104ed", + "sha256:3dd13feff00bddb0bd2d650cdb7338f815c1789a91a6f68fdc00e5c5ed40329b", + "sha256:4065c32b52f4b142f417af6f33a5024edc1336aa845b9d5a8d86071f6fcaac5a", + "sha256:51a4ba1256e9003a3acf508e3b4f4661bebd015b8180cc31849da222426ef585", + "sha256:59888faac06403767c0cf8cfb3f4a777b2939b1fbd9f729299b5384f097f05ea", + "sha256:59c87886640574d8b14910840327f5cd15954e26ed0bbd4e7cef95fa5aef218f", + "sha256:610fc7d6db6c56a244c2701575f6851461753c60f73f2de89c79bbf1cc807f33", + "sha256:70aeadeecb281ea901bf4230c6222af0248c41044d6f57401a614ea59d96d145", + "sha256:71e1296d5e66c59cd2c0f2d72dc476d42afe02aeddc833d8e05630a0551dad7a", + "sha256:8fc7a49b440ea752cfdf1d51a586fd08d395ff7a5d555dc69e84b1939f7ddee3", + "sha256:9b5c2afd2d6e3771d516045a6cfa11a8da9a60e3d128746a7fe9ab36dfe7221f", + "sha256:9c759051ebcb244d9d55ee791259ddd158188d15adee3c152502d3b69005e6bd", + "sha256:b4d1011fec5ec12aa7cc10c05a2f2f12dfa0adfe958e56ae38dc140614035804", + "sha256:b4f1d6332339ecc61275bebd1f7b674098a66fea11a00c84d1c58851e618dc0d", + "sha256:c030cda3dc8e62b814831faa4eb93dd9a46498af8cd1d5c178c2de856972fd92", + "sha256:c2e1f2012e56d61390c0e668c20c4fb0ae667c44d6f6a2eeea5d7148dcd3df9f", + "sha256:c37c77d6562074452120fc6c02ad86ec928f5710fbc435a181d69334b4de1d84", + "sha256:c8149780c60f8fd02752d0429246088c6c04e234b895c4a42e1ea9b4de8d27fb", + "sha256:cbeeef1dc3c4299bd746b774f019de9e4672f7cc666c777cd5b409f0b746dac7", + "sha256:e113878a446c6228669144ae8a56e268c91b7f1fafae927adc4879d9849e0ea7", + "sha256:e21162bf941b85c0cda08224dade5def9360f53b09f9f259adb85fc7dd0e7b35", + "sha256:fb6934ef4744becbda3143d30c6604718871495a5e36c408431bf33d9c146889" + ], + "index": "pypi", + "version": "==1.12.2" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "index": "pypi", + "version": "==3.0.4" + }, + "click": { + "hashes": [ + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + ], + "index": "pypi", + "version": "==7.0" + }, + "cryptography": { + "hashes": [ + "sha256:066f815f1fe46020877c5983a7e747ae140f517f1b09030ec098503575265ce1", + "sha256:210210d9df0afba9e000636e97810117dc55b7157c903a55716bb73e3ae07705", + "sha256:26c821cbeb683facb966045e2064303029d572a87ee69ca5a1bf54bf55f93ca6", + "sha256:2afb83308dc5c5255149ff7d3fb9964f7c9ee3d59b603ec18ccf5b0a8852e2b1", + "sha256:2db34e5c45988f36f7a08a7ab2b69638994a8923853dec2d4af121f689c66dc8", + "sha256:409c4653e0f719fa78febcb71ac417076ae5e20160aec7270c91d009837b9151", + "sha256:45a4f4cf4f4e6a55c8128f8b76b4c057027b27d4c67e3fe157fa02f27e37830d", + "sha256:48eab46ef38faf1031e58dfcc9c3e71756a1108f4c9c966150b605d4a1a7f659", + "sha256:6b9e0ae298ab20d371fc26e2129fd683cfc0cfde4d157c6341722de645146537", + "sha256:6c4778afe50f413707f604828c1ad1ff81fadf6c110cb669579dea7e2e98a75e", + "sha256:8c33fb99025d353c9520141f8bc989c2134a1f76bac6369cea060812f5b5c2bb", + "sha256:9873a1760a274b620a135054b756f9f218fa61ca030e42df31b409f0fb738b6c", + "sha256:9b069768c627f3f5623b1cbd3248c5e7e92aec62f4c98827059eed7053138cc9", + "sha256:9e4ce27a507e4886efbd3c32d120db5089b906979a4debf1d5939ec01b9dd6c5", + "sha256:acb424eaca214cb08735f1a744eceb97d014de6530c1ea23beb86d9c6f13c2ad", + "sha256:c8181c7d77388fe26ab8418bb088b1a1ef5fde058c6926790c8a0a3d94075a4a", + "sha256:d4afbb0840f489b60f5a580a41a1b9c3622e08ecb5eec8614d4fb4cd914c4460", + "sha256:d9ed28030797c00f4bc43c86bf819266c76a5ea61d006cd4078a93ebf7da6bfd", + "sha256:e603aa7bb52e4e8ed4119a58a03b60323918467ef209e6ff9db3ac382e5cf2c6" + ], + "index": "pypi", + "version": "==2.6.1" + }, + "cycler": { + "hashes": [ + "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", + "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" + ], + "index": "pypi", + "version": "==0.10.0" + }, + "cymem": { + "hashes": [ + "sha256:081c652ae1aff4759813e93a2fc4df4ba410ce214a0e542988e24c62110d4cd0", + "sha256:0e447fa4cb6dccd0b96257a798370a17bef3ec254a527230058e41816a777c04", + "sha256:2c8267dcb15cc6ab318f01ceaf16b8440c0386ae44014d5b22fefe5b0398d05c", + "sha256:46141111eedbb5b0d8c9386b00226a15f5727a1202b9095f4363d425f259267e", + "sha256:4994c1f3e948bd58a6e38c905221680563b851983a15f1f01e5ff415d560d153", + "sha256:584872fd3df176e50c90e37aaca6cb731ac0abcdea4f5b8ad77c30674cfaaa99", + "sha256:6e3194135b21bb268030f3473beb8b674b356c330a9fa185dced2f5006cbd5ba", + "sha256:71710ee0e946a6bd33c86dd9e71f95ad584c65e8bb02615f00ceb0d8348fb303", + "sha256:741957f541fb8322de5a8c711d5d58f80d684225d2aec32fec92484cac931a52", + "sha256:7f01ba6153427811cd7d35630081c69b32c188a1d330599a826ef3bf17edbd7c", + "sha256:8d96e95902e781950d7c255b19364a1ed50a204843d63dd386b0abc5e6df5e44", + "sha256:8dd169ece1629ec4db1a592321e3ae0a9bb62fda2052a351fc36871f314c3569", + "sha256:8e6ad29636edd559b0dfe0a19c5cb5e6257461a5df90839e8c7710ddb005f4b4", + "sha256:9935b233882732f03fd0fadbeb9e9aa672edcdd126e6d52c36d60adf1def8ea5", + "sha256:a38b3229782411e4b23240f5f90000c4e7a834af88ed8763c66f8e4603db6b51", + "sha256:a5966b3171bad9c84a2b19dccda5ab37ae8437c0709a6b72cb42b64ea76a4bd3", + "sha256:ab88b1534f06df07262d9bc5efb3ba07948cdbe9a363eb9eaa4ad42fae6c7b5e", + "sha256:b08b0dd7adafbff9f0fd7dc8dcad5f3ce6f23c126c81ad8d1666880cc94e6974", + "sha256:ba47b571d480c0b76d282ff1634372070031d4998a46ae5d8305d49563b74ca6", + "sha256:bf049dc9cf0d3aa4a48ba514b7f1699fb6f35b18ad8c6f018bd13e0bccd9d30c", + "sha256:c46a122c524a3270ac5249f590ac2f75f1a83692a3d3a03479cea49de72a0a89", + "sha256:c63337aa7e1ad4ec182cc7847c6d85390589fbbf1f9f67d1fde8133a9acb7fa8", + "sha256:ec51273ea08a2c6389bc4dd6b5183354826d916b149a041f2f274431166191bc" + ], + "index": "pypi", + "version": "==2.0.2" + }, + "cytoolz": { + "hashes": [ + "sha256:84cc06fa40aa310f2df79dd440fc5f84c3e20f01f9f7783fc9c38d0a11ba00e5" + ], + "index": "pypi", + "version": "==0.9.0.1" + }, + "dask": { + "hashes": [ + "sha256:4b0b82a4d61714d3a49953274b1a8a689a51eacf89c4c2ff18aa7f6282ce515e", + "sha256:4b646948442d035e69eba4514f2fccd6553f6c8ac7ce0f58f73ba1ee4394b8e5" + ], + "index": "pypi", + "version": "==1.1.5" + }, + "decorator": { + "hashes": [ + "sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de", + "sha256:f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6" + ], + "index": "pypi", + "version": "==4.4.0" + }, + "defusedxml": { + "hashes": [ + "sha256:24d7f2f94f7f3cb6061acb215685e5125fbcdc40a857eff9de22518820b0a4f4", + "sha256:702a91ade2968a82beb0db1e0766a6a273f33d4616a6ce8cde475d8e09853b20" + ], + "index": "pypi", + "version": "==0.5.0" + }, + "dill": { + "hashes": [ + "sha256:f6d6046f9f9195206063dd0415dff185ad593d6ee8b0e67f12597c0f4df4986f" + ], + "index": "pypi", + "version": "==0.2.9" + }, + "entrypoints": { + "hashes": [ + "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19", + "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451" + ], + "index": "pypi", + "version": "==0.3" + }, + "enum34": { + "hashes": [ + "sha256:2d81cbbe0e73112bdfe6ef8576f2238f2ba27dd0d55752a776c41d38b7da2850", + "sha256:644837f692e5f550741432dd3f223bbb9852018674981b1664e5dc339387588a", + "sha256:6bd0f6ad48ec2aa117d3d141940d484deccda84d4fcd884f5c3d93c23ecd8c79", + "sha256:8ad8c4783bf61ded74527bffb48ed9b54166685e4230386a9ed9b1279e2df5b1" + ], + "index": "pypi", + "version": "==1.1.6" + }, + "flask": { + "hashes": [ + "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48", + "sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05" + ], + "index": "pypi", + "version": "==1.0.2" + }, + "flask-session": { + "hashes": [ + "sha256:a31c27e0c3287f00c825b3d9625aba585f4df4cccedb1e7dd5a69a215881a731", + "sha256:b9b32126bfc52c3169089f2ed9a40e34b589527bda48b633428e07d39d9c8792" + ], + "index": "pypi", + "version": "==0.3.1" + }, + "flask-sqlalchemy": { + "hashes": [ + "sha256:3bc0fac969dd8c0ace01b32060f0c729565293302f0c4269beed154b46bec50b", + "sha256:5971b9852b5888655f11db634e87725a9031e170f37c0ce7851cf83497f56e53" + ], + "index": "pypi", + "version": "==2.3.2" + }, + "ftfy": { + "hashes": [ + "sha256:3c0066db64a98436e751e56414f03f1cdea54f29364c0632c141c36cca6a5d94" + ], + "index": "pypi", + "version": "==4.4.3" + }, + "gast": { + "hashes": [ + "sha256:fe939df4583692f0512161ec1c880e0a10e71e6a232da045ab8edd3756fbadf0" + ], + "index": "pypi", + "version": "==0.2.2" + }, + "github3.py": { + "hashes": [ + "sha256:15a115c18f7bfcf934dfef7ab103844eb9f620c586bad65967708926da47cbda", + "sha256:50833b5da35546b8cced0e8d7ff4c50a9afc2c8e46cc4d07dc4b66d26467c708" + ], + "index": "pypi", + "version": "==1.3.0" + }, + "grpcio": { + "hashes": [ + "sha256:07c7f7b251b26ef94e29d2c19245e34d4d05897325a289b31de3b6a5e16fbd6c", + "sha256:2ddbca16c2e7b3f2ffc6e34c7cfa6886fb01de9f156ad3f77b72ad652d632097", + "sha256:30d84f9684b4c81ee37906bb303a84435948c2dd3db55d3ef38f8daf28bc6ea3", + "sha256:316e6c79fb1585b23ae100ee26f6ffefa91a21e4d39588fa42efadd7f20c7225", + "sha256:400abff9a772351fff72d5698c8758b837bec3d7f4ed93de70bae744d8f63f53", + "sha256:4ed90a256f6f8690b5c95b9d4f2e9fe6513628f3674e9068e10637e50c2f93d6", + "sha256:51fd87ff610ca2f483c668c3fa7f70d479bffb3c14805d2065b51194edea5e26", + "sha256:5569aba69041530e04eff3d40536027db8851f4e11e6282849b9fc5b1855075d", + "sha256:566b752e36cdcd5a4d38f292aca4c8e3095f13cfe82606e010d67749cacba341", + "sha256:5817f970fbfed72a6203ff96349e796d8f6ff3ce85b58af241c4a14190d9f4d1", + "sha256:5a97bb5a4af16f840f1211dbe66d61592f02110f286d96e67bf6006d7f96aab7", + "sha256:5d57e41c913152b215eda070955b3544bdf20ed2327e5e5eed3005186220ebd0", + "sha256:6cec17145978cef3d20093cdc05e88da597ce05076db566a66a35b9c55d416a3", + "sha256:6ef7ab9b6ba09ce087ddb3b27f12504f50efdbf5d319b8b23173478765452301", + "sha256:756c0d65e4ebce1c47787dbb48955864f2a768e1df76902f33d3e4062c209f3e", + "sha256:828d13f0edd27f452af7fc23093c8a2d63d8fbd92595dbd0f698c78b13af9bdb", + "sha256:8cf02c4e07520be61ad8b59b0043771ef2af666cb73066516eabfee562a28df4", + "sha256:919dfe84d22ce2e2ae81d82238586d7c2a86714fb0b6cf9b437e336851e3c32d", + "sha256:b04a061280b06cdc4e68c4147a0f46b98c395cf62f0c6df4fa2a30a083cdc333", + "sha256:b2dbe7d2f9685bdbb4415f8e475dd96b1b1776193b7286705f90490c3f039037", + "sha256:b60df7cbc3e77c39d5befe6a1e6e4213f3ca683d743ff7c1622b1d4412245a55", + "sha256:b740681332b5a042b9e22246a3cdbfc3d644cf73d38e117f20ad9d8deab8f1a5", + "sha256:ba434873945d5d4542589674cb60c43a1cf76b2b5f0c0f759aa76d499055722f", + "sha256:bcb44cd53beccc92c730254ad3d50715b67a7432e693961b566d982f759b1787", + "sha256:be1cbb6cad1d4242e3aaa4143eabcfbf383358f6c8e9951be2c497b65561b075", + "sha256:c4e38326fcab5c52fd1a8c8e0f908bfe830629a5ffc60793ec5545ef913d62d2", + "sha256:d03c0524d5953568f74269e0faebb1e880ba9f36ca8c773be397087c35bd8188", + "sha256:ea897ffa80276565acdd92349ef82a768db0e3327aacd4aec82f79ca10989689", + "sha256:edc50e8bcd10b165f34c3cf3e1d4f97e9c71b165b85a85b91cf3444000a17692", + "sha256:f96a2e97df522b50da9cb3795f08199b110ceab4146bf70ea7f6a3a0213786cc", + "sha256:fadb649a69e3b08e01f090c24f0c8cccc122e92c362c1a1727b695a63be8416b", + "sha256:fbe4360ff1689a9753cbf1b27dad11e683d39117a32a64372a7c95c6abc81b81" + ], + "index": "pypi", + "version": "==1.19.0" + }, + "h5py": { + "hashes": [ + "sha256:05750b91640273c69989c657eaac34b091abdd75efc8c4824c82aaf898a2da0a", + "sha256:082a27208aa3a2286e7272e998e7e225b2a7d4b7821bd840aebf96d50977abbb", + "sha256:08e2e8297195f9e813e894b6c63f79372582787795bba2014a2db6a2de95f713", + "sha256:0dd2adeb2e9de5081eb8dcec88874e7fd35dae9a21557be3a55a3c7d491842a4", + "sha256:0f94de7a10562b991967a66bbe6dda9808e18088676834c0a4dcec3fdd3bcc6f", + "sha256:106e42e2e01e486a3d32eeb9ba0e3a7f65c12fa8998d63625fa41fb8bdc44cdb", + "sha256:1606c66015f04719c41a9863c156fc0e6b992150de21c067444bcb82e7d75579", + "sha256:1854c4beff9961e477e133143c5e5e355dac0b3ebf19c52cf7cc1b1ef757703c", + "sha256:1e9fb6f1746500ea91a00193ce2361803c70c6b13f10aae9a33ad7b5bd28e800", + "sha256:2cca17e80ddb151894333377675db90cd0279fa454776e0a4f74308376afd050", + "sha256:30e365e8408759db3778c361f1e4e0fe8e98a875185ae46c795a85e9bafb9cdf", + "sha256:3206bac900e16eda81687d787086f4ffd4f3854980d798e191a9868a6510c3ae", + "sha256:3c23d72058647cee19b30452acc7895621e2de0a0bd5b8a1e34204b9ea9ed43c", + "sha256:407b5f911a83daa285bbf1ef78a9909ee5957f257d3524b8606be37e8643c5f0", + "sha256:4162953714a9212d373ac953c10e3329f1e830d3c7473f2a2e4f25dd6241eef0", + "sha256:5fc7aba72a51b2c80605eba1c50dbf84224dcd206279d30a75c154e5652e1fe4", + "sha256:713ac19307e11de4d9833af0c4bd6778bde0a3d967cafd2f0f347223711c1e31", + "sha256:71b946d80ef3c3f12db157d7778b1fe74a517ca85e94809358b15580983c2ce2", + "sha256:8cc4aed71e20d87e0a6f02094d718a95252f11f8ed143bc112d22167f08d4040", + "sha256:9d41ca62daf36d6b6515ab8765e4c8c4388ee18e2a665701fef2b41563821002", + "sha256:a744e13b000f234cd5a5b2a1f95816b819027c57f385da54ad2b7da1adace2f3", + "sha256:b087ee01396c4b34e9dc41e3a6a0442158206d383c19c7d0396d52067b17c1cb", + "sha256:b0f03af381d33306ce67d18275b61acb4ca111ced645381387a02c8a5ee1b796", + "sha256:b9e4b8dfd587365bdd719ae178fa1b6c1231f81280b1375eef8626dfd8761bf3", + "sha256:c5dd4ec75985b99166c045909e10f0534704d102848b1d9f0992720e908928e7", + "sha256:d2b82f23cd862a9d05108fe99967e9edfa95c136f532a71cb3d28dc252771f50", + "sha256:e58a25764472af07b7e1c4b10b0179c8ea726446c7141076286e41891bf3a563", + "sha256:f3b49107fbfc77333fc2b1ef4d5de2abcd57e7ea3a1482455229494cf2da56ce" + ], + "index": "pypi", + "version": "==2.9.0" + }, + "html5lib": { + "hashes": [ + "sha256:20b159aa3badc9d5ee8f5c647e5efd02ed2a66ab8d354930bd9ff139fc1dc0a3", + "sha256:66cb0dcfdbbc4f9c3ba1a63fdb511ffdbd4f513b2b6d81b80cd26ce6b3fb3736" + ], + "index": "pypi", + "version": "==1.0.1" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "index": "pypi", + "version": "==2.8" + }, + "ijson": { + "hashes": [ + "sha256:ef5f9f6bf9e44f2e1721e72bcc82c7ac6bb012b525e0f8642dedf7ddc44cf474", + "sha256:eff9ce137698dcb565420497050955cb811892eb073ea1c09d92ecaf671bd7f7" + ], + "index": "pypi", + "version": "==2.3" + }, + "ipdb": { + "hashes": [ + "sha256:dce2112557edfe759742ca2d0fee35c59c97b0cc7a05398b791079d78f1519ce" + ], + "index": "pypi", + "version": "==0.12" + }, + "ipykernel": { + "hashes": [ + "sha256:0aeb7ec277ac42cc2b59ae3d08b10909b2ec161dc6908096210527162b53675d", + "sha256:0fc0bf97920d454102168ec2008620066878848fcfca06c22b669696212e292f" + ], + "index": "pypi", + "version": "==5.1.0" + }, + "ipython": { + "hashes": [ + "sha256:b038baa489c38f6d853a3cfc4c635b0cda66f2864d136fe8f40c1a6e334e2a6b", + "sha256:f5102c1cd67e399ec8ea66bcebe6e3968ea25a8977e53f012963e5affeb1fe38" + ], + "index": "pypi", + "version": "==7.4.0" + }, + "ipython-genutils": { + "hashes": [ + "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", + "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" + ], + "index": "pypi", + "version": "==0.2.0" + }, + "ipywidgets": { + "hashes": [ + "sha256:0f2b5cde9f272cb49d52f3f0889fdd1a7ae1e74f37b48dac35a83152780d2b7b", + "sha256:a3e224f430163f767047ab9a042fc55adbcab0c24bbe6cf9f306c4f89fdf0ba3" + ], + "index": "pypi", + "version": "==7.4.2" + }, + "itsdangerous": { + "hashes": [ + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" + ], + "index": "pypi", + "version": "==1.1.0" + }, + "jedi": { + "hashes": [ + "sha256:2bb0603e3506f708e792c7f4ad8fc2a7a9d9c2d292a358fbbd58da531695595b", + "sha256:2c6bcd9545c7d6440951b12b44d373479bf18123a401a52025cf98563fbd826c" + ], + "index": "pypi", + "version": "==0.13.3" + }, + "jinja2": { + "hashes": [ + "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", + "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" + ], + "index": "pypi", + "version": "==2.10" + }, + "jsonify": { + "hashes": [ + "sha256:f340032753577575e9777835809b283fdc9b251867d5d5600389131647f8bfe1" + ], + "index": "pypi", + "version": "==0.5" + }, + "jsonschema": { + "hashes": [ + "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08", + "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02" + ], + "index": "pypi", + "version": "==2.6.0" + }, + "jupyter": { + "hashes": [ + "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7", + "sha256:5b290f93b98ffbc21c0c7e749f054b3267782166d72fa5e3ed1ed4eaf34a2b78", + "sha256:d9dc4b3318f310e34c82951ea5d6683f67bed7def4b259fafbfe4f1beb1d8e5f" + ], + "index": "pypi", + "version": "==1.0.0" + }, + "jupyter-client": { + "hashes": [ + "sha256:b5f9cb06105c1d2d30719db5ffb3ea67da60919fb68deaefa583deccd8813551", + "sha256:c44411eb1463ed77548bc2d5ec0d744c9b81c4a542d9637c7a52824e2121b987" + ], + "index": "pypi", + "version": "==5.2.4" + }, + "jupyter-console": { + "hashes": [ + "sha256:308ce876354924fb6c540b41d5d6d08acfc946984bf0c97777c1ddcb42e0b2f5", + "sha256:cc80a97a5c389cbd30252ffb5ce7cefd4b66bde98219edd16bf5cb6f84bb3568" + ], + "index": "pypi", + "version": "==6.0.0" + }, + "jupyter-core": { + "hashes": [ + "sha256:927d713ffa616ea11972534411544589976b2493fc7e09ad946e010aa7eb9970", + "sha256:ba70754aa680300306c699790128f6fbd8c306ee5927976cbe48adacf240c0b7" + ], + "index": "pypi", + "version": "==4.4.0" + }, + "jwcrypto": { + "hashes": [ + "sha256:a87ac0922d09d9a65011f76d99849f1fbad3d95439c7452cebf4ab0871c2b665", + "sha256:e6c517d8998956e531f0a1c158b2f324c29a532a9c4b677bc30b3be14d60ad4d" + ], + "index": "pypi", + "version": "==0.6.0" + }, + "jwt": { + "hashes": [ + "sha256:f78e4c06370b23c643ad66192c80c0b36d4b1e8e2d6458f177a3962c90ee74d5" + ], + "index": "pypi", + "version": "==0.6.1" + }, + "keras": { + "hashes": [ + "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6", + "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9" + ], + "index": "pypi", + "version": "==2.2.4" + }, + "keras-applications": { + "hashes": [ + "sha256:60607b2b98868983e5153bf1cc6aa468ba73adc93bc977a90edaa4bc595e69fa", + "sha256:94b8acc84fb8b1e3d752e20ed4cafa8377c9ecf6e6c1aa09942d959dc02e439a" + ], + "index": "pypi", + "version": "==1.0.7" + }, + "keras-preprocessing": { + "hashes": [ + "sha256:0170b799a7562f80ad7931d22d56de22cf4bdd502e11c48f31a46380137a70a8", + "sha256:5e3700117981c2db762e512ed6586638124fac5842170701628088a11aeb51ac" + ], + "index": "pypi", + "version": "==1.0.9" + }, + "kiwisolver": { + "hashes": [ + "sha256:0ee4ed8b3ae8f5f712b0aa9ebd2858b5b232f1b9a96b0943dceb34df2a223bc3", + "sha256:0f7f532f3c94e99545a29f4c3f05637f4d2713e7fd91b4dd8abfc18340b86cd5", + "sha256:1a078f5dd7e99317098f0e0d490257fd0349d79363e8c923d5bb76428f318421", + "sha256:1aa0b55a0eb1bd3fa82e704f44fb8f16e26702af1a073cc5030eea399e617b56", + "sha256:2874060b91e131ceeff00574b7c2140749c9355817a4ed498e82a4ffa308ecbc", + "sha256:379d97783ba8d2934d52221c833407f20ca287b36d949b4bba6c75274bcf6363", + "sha256:3b791ddf2aefc56382aadc26ea5b352e86a2921e4e85c31c1f770f527eb06ce4", + "sha256:4329008a167fac233e398e8a600d1b91539dc33c5a3eadee84c0d4b04d4494fa", + "sha256:45813e0873bbb679334a161b28cb9606d9665e70561fd6caa8863e279b5e464b", + "sha256:53a5b27e6b5717bdc0125338a822605084054c80f382051fb945d2c0e6899a20", + "sha256:574f24b9805cb1c72d02b9f7749aa0cc0b81aa82571be5201aa1453190390ae5", + "sha256:66f82819ff47fa67a11540da96966fb9245504b7f496034f534b81cacf333861", + "sha256:79e5fe3ccd5144ae80777e12973027bd2f4f5e3ae8eb286cabe787bed9780138", + "sha256:83410258eb886f3456714eea4d4304db3a1fc8624623fc3f38a487ab36c0f653", + "sha256:8b6a7b596ce1d2a6d93c3562f1178ebd3b7bb445b3b0dd33b09f9255e312a965", + "sha256:9576cb63897fbfa69df60f994082c3f4b8e6adb49cccb60efb2a80a208e6f996", + "sha256:95a25d9f3449046ecbe9065be8f8380c03c56081bc5d41fe0fb964aaa30b2195", + "sha256:a424f048bebc4476620e77f3e4d1f282920cef9bc376ba16d0b8fe97eec87cde", + "sha256:aaec1cfd94f4f3e9a25e144d5b0ed1eb8a9596ec36d7318a504d813412563a85", + "sha256:acb673eecbae089ea3be3dcf75bfe45fc8d4dcdc951e27d8691887963cf421c7", + "sha256:b15bc8d2c2848a4a7c04f76c9b3dc3561e95d4dabc6b4f24bfabe5fd81a0b14f", + "sha256:b1c240d565e977d80c0083404c01e4d59c5772c977fae2c483f100567f50847b", + "sha256:c595693de998461bcd49b8d20568c8870b3209b8ea323b2a7b0ea86d85864694", + "sha256:ce3be5d520b4d2c3e5eeb4cd2ef62b9b9ab8ac6b6fedbaa0e39cdb6f50644278", + "sha256:e0f910f84b35c36a3513b96d816e6442ae138862257ae18a0019d2fc67b041dc", + "sha256:ea36e19ac0a483eea239320aef0bd40702404ff8c7e42179a2d9d36c5afcb55c", + "sha256:efabbcd4f406b532206b8801058c8bab9e79645b9880329253ae3322b7b02cd5", + "sha256:f923406e6b32c86309261b8195e24e18b6a8801df0cfc7814ac44017bfcb3939" + ], + "index": "pypi", + "version": "==1.0.1" + }, + "ktext": { + "hashes": [ + "sha256:2737a4d8812330328bd3b777600938c83fc0d064c00cfdabd26893b61e58bb49", + "sha256:2c2f5fd190f1d04acc7b18b1d2f0e146903911db8885d320349d269aca83d63c" + ], + "index": "pypi", + "version": "==0.34" + }, + "markdown": { + "hashes": [ + "sha256:fc4a6f69a656b8d858d7503bda633f4dd63c2d70cf80abdc6eafa64c4ae8c250", + "sha256:fe463ff51e679377e3624984c829022e2cfb3be5518726b06f608a07a3aad680" + ], + "index": "pypi", + "version": "==3.1" + }, + "markupsafe": { + "hashes": [ + "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", + "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", + "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", + "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", + "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", + "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", + "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", + "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", + "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", + "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", + "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", + "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", + "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", + "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", + "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", + "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", + "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", + "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", + "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", + "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", + "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", + "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", + "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", + "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", + "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", + "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7" + ], + "index": "pypi", + "version": "==1.1.1" + }, + "matplotlib": { + "hashes": [ + "sha256:1ae6549976b6ceb6ee426272a28c0fc9715b3e3669694d560c8f661c5b39e2c5", + "sha256:4d4250bf508dd07cca3b43888097f873cadb66eec6ac63dbbfb798798ec07af2", + "sha256:53af2e01d7f1700ed2b64a9091bc865360c9c4032f625451c4589a826854c787", + "sha256:63e498067d32d627111cd1162cae1621f1221f9d4c6a9745dd7233f29de581b6", + "sha256:7169a34971e398dd58e87e173f97366fd88a3fa80852704530433eb224a8ca57", + "sha256:91c54d6bb9eeaaff965656c5ea6cbdcbf780bad8462ac99b30b451548194746f", + "sha256:aeef177647bb3fccfe09065481989d7dfc5ac59e9367d6a00a3481062cf651e4", + "sha256:cf8ae10559a78aee0409ede1e9d4fda03895433eeafe609dd9ed67e45f552db0", + "sha256:d51d0889d1c4d51c51a9822265c0494ea3e70a52bdd88358e0863daca46fa23a", + "sha256:de5ccd3500247f85fe4f9fad90f80a8bd397e4f110a4c33fabf95f07403e8372", + "sha256:e1d33589e32f482d0a7d1957bf473d43341115d40d33f578dad44432e47df7b7", + "sha256:e8d1939262aa6b36d0c51f50a50a43a04b9618d20db31e6c0192b1463067aeef", + "sha256:e918d51b1fda82a65fdf52d2f3914b2246481cc2a9cd10e223e6be6078916ff3" + ], + "index": "pypi", + "version": "==3.0.3" + }, + "mistune": { + "hashes": [ + "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e", + "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4" + ], + "index": "pypi", + "version": "==0.8.4" + }, + "mock": { + "hashes": [ + "sha256:5ce3c71c5545b472da17b72268978914d0252980348636840bd34a00b5cc96c1", + "sha256:b158b6df76edd239b8208d481dc46b6afd45a846b7812ff0ce58971cf5bc8bba" + ], + "index": "pypi", + "version": "==2.0.0" + }, + "more-itertools": { + "hashes": [ + "sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7", + "sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a" + ], + "index": "pypi", + "version": "==7.0.0" + }, + "msgpack": { + "hashes": [ + "sha256:26cb40116111c232bc235ce131cc3b4e76549088cb154e66a2eb8ff6fcc907ec", + "sha256:300fd3f2c664a3bf473d6a952f843b4a71454f4c592ed7e74a36b205c1782d28", + "sha256:3129c355342853007de4a2a86e75eab966119733eb15748819b6554363d4e85c", + "sha256:31f6d645ee5a97d59d3263fab9e6be76f69fa131cddc0d94091a3c8aca30d67a", + "sha256:3ce7ef7ee2546c3903ca8c934d09250531b80c6127e6478781ae31ed835aac4c", + "sha256:4008c72f5ef2b7936447dcb83db41d97e9791c83221be13d5e19db0796df1972", + "sha256:62bd8e43d204580308d477a157b78d3fee2fb4c15d32578108dc5d89866036c8", + "sha256:70cebfe08fb32f83051971264466eadf183101e335d8107b80002e632f425511", + "sha256:72cb7cf85e9df5251abd7b61a1af1fb77add15f40fa7328e924a9c0b6bc7a533", + "sha256:7c55649965c35eb32c499d17dadfb8f53358b961582846e1bc06f66b9bccc556", + "sha256:86b963a5de11336ec26bc4f839327673c9796b398b9f1fe6bb6150c2a5d00f0f", + "sha256:8c73c9bcdfb526247c5e4f4f6cf581b9bb86b388df82cfcaffde0a6e7bf3b43a", + "sha256:8e68c76c6aff4849089962d25346d6784d38e02baa23ffa513cf46be72e3a540", + "sha256:97ac6b867a8f63debc64f44efdc695109d541ecc361ee2dce2c8884ab37360a1", + "sha256:9d4f546af72aa001241d74a79caec278bcc007b4bcde4099994732e98012c858", + "sha256:a28e69fe5468c9f5251c7e4e7232286d71b7dfadc74f312006ebe984433e9746", + "sha256:fd509d4aa95404ce8d86b4e32ce66d5d706fd6646c205e1c2a715d87078683a2" + ], + "index": "pypi", + "version": "==0.6.1" + }, + "msgpack-numpy": { + "hashes": [ + "sha256:20d3f679cd727e2b9acb59297988895a148add8995618e7437b80bb95e7a0d7d", + "sha256:a1638108538aaba55bebaef9d847dfb3064bb1c829e68301716a6a956fa6a0d6" + ], + "index": "pypi", + "version": "==0.4.4.2" + }, + "multiprocess": { + "hashes": [ + "sha256:069f8b63f296af2801fc4c616cb737f2b457e145daa83a460147502eb7f7aa0f", + "sha256:08e00b3943e25d26e83ac1eb92c25be30fe2e353b73ef9f685a4c15efbf6641b", + "sha256:27ffc0f3b15e9321b3b46182f1b552c147b82ab3101079d2c2e6f1ee67b7d118", + "sha256:292995b5a4272350e31c4fc42b2d9fd13ffe091712858e767ab68d0ba06acaa7", + "sha256:46479a327388df8e77ad268892f2e73eac06d6271189b868ce9d4f95474e58e3", + "sha256:6d3117199432e7ce36103950a37980f142e11e09eb9b2415b6c6cbb1da1642d2", + "sha256:f2630920fef3b3224312b315ccd26a87c0dd2c9df3288d24d04398563985847d", + "sha256:f679b203a6b1766b978f579e361a96182e17e86fc8843fac5e865568d2e11d22", + "sha256:fc0f82033e9e294bc3463257e1d6399da737d65c6412e6efde40228339ad0e1e" + ], + "index": "pypi", + "version": "==0.70.7" + }, + "murmurhash": { + "hashes": [ + "sha256:27b908fe4bdb426f4e4e4a8821acbe0302915b2945e035ec9d8ca513e2a74b1f", + "sha256:33405103fa8cde15d72ee525a03d5cfe2c7e4901133819754810986e29627d68", + "sha256:386a9eed3cb27cb2cd4394b6521275ba04552642c2d9cab5c9fb42aa5a3325c0", + "sha256:3af36a0dc9f13f6892d9b8b39a6a3ccf216cae5bce38adc7c2d145677987772f", + "sha256:717196a04cdc80cc3103a3da17b2415a8a5e1d0d578b7079259386bf153b3258", + "sha256:8a4ed95cd3456b43ea301679c7c39ade43fc18b844b37d0ba0ac0d6acbff8e0c", + "sha256:a6c071b4b498bcea16a8dc8590cad81fa8d43821f34c74bc00f96499e2527073", + "sha256:b0afe329701b59d02e56bc6cee7325af83e3fee9c299c615fc1df3202b4f886f", + "sha256:ba766343bdbcb928039b8fff609e80ae7a5fd5ed7a4fc5af822224b63e0cbaff", + "sha256:bf33490514d308bcc27ed240cb3eb114f1ec31af031535cd8f27659a7049bd52", + "sha256:c7a646f6b07b033642b4f52ae2e45efd8b80780b3b90e8092a0cec935fbf81e2", + "sha256:d696c394ebd164ca80b5871e2e9ad2f9fdbb81bd3c552c1d5f1e8ee694e6204a", + "sha256:fe344face8d30a5a6aa26e5acf288aa2a8f0f32e05efdda3d314b4bf289ec2af" + ], + "index": "pypi", + "version": "==1.0.2" + }, + "nbconvert": { + "hashes": [ + "sha256:302554a2e219bc0fc84f3edd3e79953f3767b46ab67626fdec16e38ba3f7efe4", + "sha256:5de8fb2284422272a1d45abc77c07b888127550a6d602ce619592a2b08a474ff" + ], + "index": "pypi", + "version": "==5.4.1" + }, + "nbformat": { + "hashes": [ + "sha256:b9a0dbdbd45bb034f4f8893cafd6f652ea08c8c1674ba83f2dc55d3955743b0b", + "sha256:f7494ef0df60766b7cabe0a3651556345a963b74dbc16bc7c18479041170d402" + ], + "index": "pypi", + "version": "==4.4.0" + }, + "networkx": { + "hashes": [ + "sha256:45e56f7ab6fe81652fb4bc9f44faddb0e9025f469f602df14e3b2551c2ea5c8b" + ], + "index": "pypi", + "version": "==2.2" + }, + "notebook": { + "hashes": [ + "sha256:19df5755bc21bb1f711e6415f6f620bf7c71cb6c8318a25bc151fe31bdfe4047", + "sha256:76758631f037ec09afb56b75e20a0502ca3a8c5d3a61d675d4b9bd5c84c7ce7f" + ], + "index": "pypi", + "version": "==5.7.7" + }, + "numpy": { + "hashes": [ + "sha256:1980f8d84548d74921685f68096911585fee393975f53797614b34d4f409b6da", + "sha256:22752cd809272671b273bb86df0f505f505a12368a3a5fc0aa811c7ece4dfd5c", + "sha256:23cc40313036cffd5d1873ef3ce2e949bdee0646c5d6f375bf7ee4f368db2511", + "sha256:2b0b118ff547fecabc247a2668f48f48b3b1f7d63676ebc5be7352a5fd9e85a5", + "sha256:3a0bd1edf64f6a911427b608a894111f9fcdb25284f724016f34a84c9a3a6ea9", + "sha256:3f25f6c7b0d000017e5ac55977a3999b0b1a74491eacb3c1aa716f0e01f6dcd1", + "sha256:4061c79ac2230594a7419151028e808239450e676c39e58302ad296232e3c2e8", + "sha256:560ceaa24f971ab37dede7ba030fc5d8fa173305d94365f814d9523ffd5d5916", + "sha256:62be044cd58da2a947b7e7b2252a10b42920df9520fc3d39f5c4c70d5460b8ba", + "sha256:6c692e3879dde0b67a9dc78f9bfb6f61c666b4562fd8619632d7043fb5b691b0", + "sha256:6f65e37b5a331df950ef6ff03bd4136b3c0bbcf44d4b8e99135d68a537711b5a", + "sha256:7a78cc4ddb253a55971115f8320a7ce28fd23a065fc33166d601f51760eecfa9", + "sha256:80a41edf64a3626e729a62df7dd278474fc1726836552b67a8c6396fd7e86760", + "sha256:893f4d75255f25a7b8516feb5766c6b63c54780323b9bd4bc51cdd7efc943c73", + "sha256:972ea92f9c1b54cc1c1a3d8508e326c0114aaf0f34996772a30f3f52b73b942f", + "sha256:9f1d4865436f794accdabadc57a8395bd3faa755449b4f65b88b7df65ae05f89", + "sha256:9f4cd7832b35e736b739be03b55875706c8c3e5fe334a06210f1a61e5c2c8ca5", + "sha256:adab43bf657488300d3aeeb8030d7f024fcc86e3a9b8848741ea2ea903e56610", + "sha256:bd2834d496ba9b1bdda3a6cf3de4dc0d4a0e7be306335940402ec95132ad063d", + "sha256:d20c0360940f30003a23c0adae2fe50a0a04f3e48dc05c298493b51fd6280197", + "sha256:d3b3ed87061d2314ff3659bb73896e622252da52558f2380f12c421fbdee3d89", + "sha256:dc235bf29a406dfda5790d01b998a1c01d7d37f449128c0b1b7d1c89a84fae8b", + "sha256:fb3c83554f39f48f3fa3123b9c24aecf681b1c289f9334f8215c1d3c8e2f6e5b" + ], + "index": "pypi", + "version": "==1.16.2" + }, + "pandas": { + "hashes": [ + "sha256:071e42b89b57baa17031af8c6b6bbd2e9a5c68c595bc6bf9adabd7a9ed125d3b", + "sha256:17450e25ae69e2e6b303817bdf26b2cd57f69595d8550a77c308be0cd0fd58fa", + "sha256:17916d818592c9ec891cbef2e90f98cc85e0f1e89ed0924c9b5220dc3209c846", + "sha256:2538f099ab0e9f9c9d09bbcd94b47fd889bad06dc7ae96b1ed583f1dc1a7a822", + "sha256:366f30710172cb45a6b4f43b66c220653b1ea50303fbbd94e50571637ffb9167", + "sha256:42e5ad741a0d09232efbc7fc648226ed93306551772fc8aecc6dce9f0e676794", + "sha256:4e718e7f395ba5bfe8b6f6aaf2ff1c65a09bb77a36af6394621434e7cc813204", + "sha256:4f919f409c433577a501e023943e582c57355d50a724c589e78bc1d551a535a2", + "sha256:4fe0d7e6438212e839fc5010c78b822664f1a824c0d263fd858f44131d9166e2", + "sha256:5149a6db3e74f23dc3f5a216c2c9ae2e12920aa2d4a5b77e44e5b804a5f93248", + "sha256:627594338d6dd995cfc0bacd8e654cd9e1252d2a7c959449228df6740d737eb8", + "sha256:83c702615052f2a0a7fb1dd289726e29ec87a27272d775cb77affe749cca28f8", + "sha256:8c872f7fdf3018b7891e1e3e86c55b190e6c5cee70cab771e8f246c855001296", + "sha256:90f116086063934afd51e61a802a943826d2aac572b2f7d55caaac51c13db5b5", + "sha256:a3352bacac12e1fc646213b998bce586f965c9d431773d9e91db27c7c48a1f7d", + "sha256:bcdd06007cca02d51350f96debe51331dec429ac8f93930a43eb8fb5639e3eb5", + "sha256:c1bd07ebc15285535f61ddd8c0c75d0d6293e80e1ee6d9a8d73f3f36954342d0", + "sha256:c9a4b7c55115eb278c19aa14b34fcf5920c8fe7797a09b7b053ddd6195ea89b3", + "sha256:cc8fc0c7a8d5951dc738f1c1447f71c43734244453616f32b8aa0ef6013a5dfb", + "sha256:d7b460bc316064540ce0c41c1438c416a40746fd8a4fb2999668bf18f3c4acf1" + ], + "index": "pypi", + "version": "==0.24.2" + }, + "pandocfilters": { + "hashes": [ + "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9" + ], + "index": "pypi", + "version": "==1.4.2" + }, + "parso": { + "hashes": [ + "sha256:4580328ae3f548b358f4901e38c0578229186835f0fa0846e47369796dd5bcc9", + "sha256:68406ebd7eafe17f8e40e15a84b56848eccbf27d7c1feb89e93d8fca395706db" + ], + "index": "pypi", + "version": "==0.3.4" + }, + "pathos": { + "hashes": [ + "sha256:954c5b0a8b257c375e35d311c65fa62a210a3d65269195557de38418ac9f61f9" + ], + "index": "pypi", + "version": "==0.2.3" + }, + "pbr": { + "hashes": [ + "sha256:8257baf496c8522437e8a6cfe0f15e00aedc6c0e0e7c9d55eeeeab31e0853843", + "sha256:8c361cc353d988e4f5b998555c88098b9d5964c2e11acf7b0d21925a66bb5824" + ], + "index": "pypi", + "version": "==5.1.3" + }, + "pexpect": { + "hashes": [ + "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba", + "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b" + ], + "index": "pypi", + "version": "==4.6.0" + }, + "pickleshare": { + "hashes": [ + "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", + "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" + ], + "index": "pypi", + "version": "==0.7.5" + }, + "plac": { + "hashes": [ + "sha256:854693ad90367e8267112ffbb8955f57d6fdeac3191791dc9ffce80f87fd2370", + "sha256:ba3f719a018175f0a15a6b04e6cc79c25fd563d348aacd320c3644d2a9baf89b" + ], + "index": "pypi", + "version": "==0.9.6" + }, + "pox": { + "hashes": [ + "sha256:2b53fbdf02596240483dc2cb94f94cc21252ad1b1858c7b1c151afeec9022cc8" + ], + "index": "pypi", + "version": "==0.2.5" + }, + "ppft": { + "hashes": [ + "sha256:5537b00afb7b247da0f59cc57ee5680178be61c8b2e21b5a0672b70a3d247791" + ], + "index": "pypi", + "version": "==1.6.4.9" + }, + "preshed": { + "hashes": [ + "sha256:0c9af79c7b825793f987d477627efb81afd23384ac791bebbc88a257342a77ab", + "sha256:0ebc79431154bc5d12f97b3c93bc350af941702a44f0761dfcd395e970d693f8", + "sha256:102e71dc841c979b2ece44ab05b2b0aa39c8039493ddac40dd22cf23e2484063", + "sha256:15145b24eded01426544be829a6395d6c99e2d62f5f3b88a6e19087ebeef7237", + "sha256:195674dfb4bcf18b26e448feaabdf61adcf028ae69ecaa075c0bdfaf62a19671", + "sha256:38f7fbef59f89d3b2c8c3b102f9a7360cd73a33c829fdeb101c615b18ecc4686", + "sha256:3aa411233dc230247ea4c4558062e5b2d59d41c697107a45fddbfe03e63f3e77", + "sha256:3b8c7b607e6dce0843544cfe4f05355db0516fce8eca0c37d6b5f4f3680493bf", + "sha256:4bda4153d46a603bc6ea65380dfa091d46700f664cb906c7f26a469be6c2a503", + "sha256:541d7ed765d67512d6f9fa24fd01cc1d7a51c7ff2646362924f4db46813b485a", + "sha256:593d23b9f851ae7a4d519ca4489dd2b352d833e08f5d35795d42a591b8badb54", + "sha256:7f6fb8f4108abe958af892847ed50abe6f45aaf45a87853cc8154a7203e75d84", + "sha256:7ff7f18af1f19ea666ac4fbf48842e6acd900fbfdc26bb9aad02f353ff932386", + "sha256:9c0d503d8693bf1e08e0fa1cecbcd3253146abaa9a7501d7d583a72edd29fdd1", + "sha256:9cefe818a97134c0ddf22ef76fced1c841ebd137c2895251c5d1310276c234b5", + "sha256:9e603916a95dc524081d54c0a135611e6f68d787185d5df2b5ab3f076c3d1bd4", + "sha256:a2acacceac79aa6d4b65125e20c7de78fbca1340a251854c87967acef1795490", + "sha256:a3d592e7b265b4faf08c9b4d7493b9e8604e0ba8858cc9bd8c9aee41d3df2a3a", + "sha256:b2030e68c6f539e6dd7bfcea032940042739ef05d50a2eb1d7af24e038971b0f", + "sha256:bc894dc14d8567a5d6a1cded0a701da7fbb360b2124237fe8acde85333825aef", + "sha256:c21d4d10cc0248ba3facbbbfbe63211ce921478a3d5db6de34de39ee1b3484e1", + "sha256:dae01c74313965c487e0ec839e5f28d0c7df9bfd1d978aa5bada3f72ff20a9e5", + "sha256:ee8068035684a4b382bebb3a3f270799360545baff9742b85e627a0a889e6850" + ], + "index": "pypi", + "version": "==2.0.1" + }, + "prometheus-client": { + "hashes": [ + "sha256:1b38b958750f66f208bcd9ab92a633c0c994d8859c831f7abc1f46724fcee490" + ], + "index": "pypi", + "version": "==0.6.0" + }, + "prompt-toolkit": { + "hashes": [ + "sha256:11adf3389a996a6d45cc277580d0d53e8a5afd281d0c9ec71b28e6f121463780", + "sha256:2519ad1d8038fd5fc8e770362237ad0364d16a7650fb5724af6997ed5515e3c1", + "sha256:977c6583ae813a37dc1c2e1b715892461fcbdaa57f6fc62f33a528c4886c8f55" + ], + "index": "pypi", + "version": "==2.0.9" + }, + "protobuf": { + "hashes": [ + "sha256:21e395d7959551e759d604940a115c51c6347d90a475c9baf471a1a86b5604a9", + "sha256:57e05e16955aee9e6a0389fcbd58d8289dd2420e47df1a1096b3a232c26eb2dd", + "sha256:67819e8e48a74c68d87f25cad9f40edfe2faf278cdba5ca73173211b9213b8c9", + "sha256:75da7d43a2c8a13b0bc7238ab3c8ae217cbfd5979d33b01e98e1f78defb2d060", + "sha256:78e08371e236f193ce947712c072542ff19d0043ab5318c2ea46bbc2aaebdca6", + "sha256:7ee5b595db5abb0096e8c4755e69c20dfad38b2d0bcc9bc7bafc652d2496b471", + "sha256:86260ecfe7a66c0e9d82d2c61f86a14aa974d340d159b829b26f35f710f615db", + "sha256:92c77db4bd33ea4ee5f15152a835273f2338a5246b2cbb84bab5d0d7f6e9ba94", + "sha256:9c7b90943e0e188394b4f068926a759e3b4f63738190d1ab3d500d53b9ce7614", + "sha256:a77f217ea50b2542bae5b318f7acee50d9fc8c95dd6d3656eaeff646f7cab5ee", + "sha256:ad589ed1d1f83db22df867b10e01fe445516a5a4d7cfa37fe3590a5f6cfc508b", + "sha256:b06a794901bf573f4b2af87e6139e5cd36ac7c91ac85d7ae3fe5b5f6fc317513", + "sha256:bd8592cc5f8b4371d0bad92543370d4658dc41a5ccaaf105597eb5524c616291", + "sha256:be48e5a6248a928ec43adf2bea037073e5da692c0b3c10b34f9904793bd63138", + "sha256:cc5eb13f5ccc4b1b642cc147c2cdd121a34278b341c7a4d79e91182fff425836", + "sha256:cd3b0e0ad69b74ee55e7c321f52a98effed2b4f4cc9a10f3683d869de00590d5", + "sha256:d6e88c4920660aa75c0c2c4b53407aef5efd9a6e0ca7d2fc84d79aba2ccbda3a", + "sha256:ec3c49b6d247152e19110c3a53d9bb4cf917747882017f70796460728b02722e" + ], + "index": "pypi", + "version": "==3.7.1" + }, + "psycopg2": { + "hashes": [ + "sha256:02445ebbb3a11a3fe8202c413d5e6faf38bb75b4e336203ee144ca2c46529f94", + "sha256:0e9873e60f98f0c52339abf8f0339d1e22bfe5aae0bcf7aabd40c055175035ec", + "sha256:1148a5eb29073280bf9057c7fc45468592c1bb75a28f6df1591adb93c8cb63d0", + "sha256:259a8324e109d4922b0fcd046e223e289830e2568d6f4132a3702439e5fd532b", + "sha256:28dffa9ed4595429e61bacac41d3f9671bb613d1442ff43bcbec63d4f73ed5e8", + "sha256:314a74302d4737a3865d40ea50e430ce1543c921ba10f39d562e807cfe2edf2a", + "sha256:36b60201b6d215d7658a71493fdf6bd5e60ad9a0cffed39906627ff9f4f3afd3", + "sha256:3f9d532bce54c4234161176ff3b8688ff337575ca441ea27597e112dfcd0ee0c", + "sha256:5d222983847b40af989ad96c07fc3f07e47925e463baa5de716be8f805b41d9b", + "sha256:6757a6d2fc58f7d8f5d471ad180a0bd7b4dd3c7d681f051504fbea7ae29c8d6f", + "sha256:6a0e0f1e74edb0ab57d89680e59e7bfefad2bfbdf7c80eb38304d897d43674bb", + "sha256:6ca703ccdf734e886a1cf53eb702261110f6a8b0ed74bcad15f1399f74d3f189", + "sha256:8513b953d8f443c446aa79a4cc8a898bd415fc5e29349054f03a7d696d495542", + "sha256:9262a5ce2038570cb81b4d6413720484cb1bc52c064b2f36228d735b1f98b794", + "sha256:97441f851d862a0c844d981cbee7ee62566c322ebb3d68f86d66aa99d483985b", + "sha256:a07feade155eb8e69b54dd6774cf6acf2d936660c61d8123b8b6b1f9247b67d6", + "sha256:a9b9c02c91b1e3ec1f1886b2d0a90a0ea07cc529cb7e6e472b556bc20ce658f3", + "sha256:ae88216f94728d691b945983140bf40d51a1ff6c7fe57def93949bf9339ed54a", + "sha256:b360ffd17659491f1a6ad7c928350e229c7b7bd83a2b922b6ee541245c7a776f", + "sha256:b4221957ceccf14b2abdabef42d806e791350be10e21b260d7c9ce49012cc19e", + "sha256:b90758e49d5e6b152a460d10b92f8a6ccf318fcc0ee814dcf53f3a6fc5328789", + "sha256:c669ea986190ed05fb289d0c100cc88064351f2b85177cbfd3564c4f4847d18c", + "sha256:d1b61999d15c79cf7f4f7cc9021477aef35277fc52452cf50fd13b713c84424d", + "sha256:de7bb043d1adaaf46e38d47e7a5f703bb3dab01376111e522b07d25e1a79c1e1", + "sha256:e393568e288d884b94d263f2669215197840d097c7e5b0acd1a51c1ea7d1aba8", + "sha256:ed7e0849337bd37d89f2c2b0216a0de863399ee5d363d31b1e5330a99044737b", + "sha256:f153f71c3164665d269a5d03c7fa76ba675c7a8de9dc09a4e2c2cdc9936a7b41", + "sha256:f1fb5a8427af099beb7f65093cbdb52e021b8e6dbdfaf020402a623f4181baf5", + "sha256:f36b333e9f86a2fba960c72b90c34be6ca71819e300f7b1fc3d2b0f0b2c546cd", + "sha256:f4526d078aedd5187d0508aa5f9a01eae6a48a470ed678406da94b4cd6524b7e" + ], + "index": "pypi", + "version": "==2.7.7" + }, + "psycopg2-binary": { + "hashes": [ + "sha256:19a2d1f3567b30f6c2bb3baea23f74f69d51f0c06c2e2082d0d9c28b0733a4c2", + "sha256:2b69cf4b0fa2716fd977aa4e1fd39af6110eb47b2bb30b4e5a469d8fbecfc102", + "sha256:2e952fa17ba48cbc2dc063ddeec37d7dc4ea0ef7db0ac1eda8906365a8543f31", + "sha256:348b49dd737ff74cfb5e663e18cb069b44c64f77ec0523b5794efafbfa7df0b8", + "sha256:3d72a5fdc5f00ca85160915eb9a973cf9a0ab8148f6eda40708bf672c55ac1d1", + "sha256:4957452f7868f43f32c090dadb4188e9c74a4687323c87a882e943c2bd4780c3", + "sha256:5138cec2ee1e53a671e11cc519505eb08aaaaf390c508f25b09605763d48de4b", + "sha256:587098ca4fc46c95736459d171102336af12f0d415b3b865972a79c03f06259f", + "sha256:5b79368bcdb1da4a05f931b62760bea0955ee2c81531d8e84625df2defd3f709", + "sha256:5cf43807392247d9bc99737160da32d3fa619e0bfd85ba24d1c78db205f472a4", + "sha256:676d1a80b1eebc0cacae8dd09b2fde24213173bf65650d22b038c5ed4039f392", + "sha256:6b0211ecda389101a7d1d3df2eba0cf7ffbdd2480ca6f1d2257c7bd739e84110", + "sha256:79cde4660de6f0bb523c229763bd8ad9a93ac6760b72c369cf1213955c430934", + "sha256:7aba9786ac32c2a6d5fb446002ed936b47d5e1f10c466ef7e48f66eb9f9ebe3b", + "sha256:7c8159352244e11bdd422226aa17651110b600d175220c451a9acf795e7414e0", + "sha256:945f2eedf4fc6b2432697eb90bb98cc467de5147869e57405bfc31fa0b824741", + "sha256:96b4e902cde37a7fc6ab306b3ac089a3949e6ce3d824eeca5b19dc0bedb9f6e2", + "sha256:9a7bccb1212e63f309eb9fab47b6eaef796f59850f169a25695b248ca1bf681b", + "sha256:a3bfcac727538ec11af304b5eccadbac952d4cca1a551a29b8fe554e3ad535dc", + "sha256:b19e9f1b85c5d6136f5a0549abdc55dcbd63aba18b4f10d0d063eb65ef2c68b4", + "sha256:b664011bb14ca1f2287c17185e222f2098f7b4c857961dbcf9badb28786dbbf4", + "sha256:bde7959ef012b628868d69c474ec4920252656d0800835ed999ba5e4f57e3e2e", + "sha256:cb095a0657d792c8de9f7c9a0452385a309dfb1bbbb3357d6b1e216353ade6ca", + "sha256:d16d42a1b9772152c1fe606f679b2316551f7e1a1ce273e7f808e82a136cdb3d", + "sha256:d444b1545430ffc1e7a24ce5a9be122ccd3b135a7b7e695c5862c5aff0b11159", + "sha256:d93ccc7bf409ec0a23f2ac70977507e0b8a8d8c54e5ee46109af2f0ec9e411f3", + "sha256:df6444f952ca849016902662e1a47abf4fa0678d75f92fd9dd27f20525f809cd", + "sha256:e63850d8c52ba2b502662bf3c02603175c2397a9acc756090e444ce49508d41e", + "sha256:ec43358c105794bc2b6fd34c68d27f92bea7102393c01889e93f4b6a70975728", + "sha256:f4c6926d9c03dadce7a3b378b40d2fea912c1344ef9b29869f984fb3d2a2420b" + ], + "index": "pypi", + "version": "==2.7.7" + }, + "ptyprocess": { + "hashes": [ + "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", + "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" + ], + "index": "pypi", + "version": "==0.6.0" + }, + "pyarrow": { + "hashes": [ + "sha256:10db6e486c918c3af999d0114a22d92770687e3a6607ea3f14e6748854824c2a", + "sha256:3ea3984a8628d15268f7f9d1cc3cab5b491b3feedf67627f9f1d729ea158902c", + "sha256:4a7bd0308efcbd3257a07d7a228c235bb2cfd61a8e4571df44102a239ae80ec7", + "sha256:4e22e9acdf73ae9077ff6cdc4ca4571af3c1fda78ba3a4699544fd444ec6142a", + "sha256:5b9f68c95e3b6bb8b211cf44a7941f5b29114f01c0c8edbc5137847c6810f572", + "sha256:63170571cccaf0bf01a1d30eacc4d9274bd5c4f448c2b5b1a4ddc125952f4284", + "sha256:a3020705d3a4511e2f0dd1afdd54edd95fd3349207cdf318d58e3ccc1ce5b398", + "sha256:a53a6bc9e6aa04850ab28aded74e467de1915c38075439e1072ed6c41c0ed4fe", + "sha256:bd2804801c27af084b76b331622a7df5fba8da7fb3b0c5ec16f79b934cd4e07b", + "sha256:d195043c25689e1711ee264a975ccb5b9ea15fa8305222b644bf83baf9a8cd70", + "sha256:d80eb57c28edea0025680c402ee2188763b45e4e92769c448235011414a167ba", + "sha256:e1ced4a7af72d9d212f1010a6aeb0b39226acaf4538e27a1dd60296998b9f3c0", + "sha256:e78c09604a5a2071e9ddf20a028526c5ac26fb0893743e3f9eff1fe18f57f383" + ], + "index": "pypi", + "version": "==0.12.1" + }, + "pycparser": { + "hashes": [ + "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3" + ], + "index": "pypi", + "version": "==2.19" + }, + "pyemd": { + "hashes": [ + "sha256:15750113757ace54a03d2efef7bbc2c5a4782cba30555e7fd401bcafcfa0ecb2", + "sha256:fc81c2116f8573e559dfbb8d73e03d9f73c22d0770559f406516984302e07e70" + ], + "index": "pypi", + "version": "==0.5.1" + }, + "pygments": { + "hashes": [ + "sha256:5ffada19f6203563680669ee7f53b64dabbeb100eb51b61996085e99c03b284a", + "sha256:e8218dd399a61674745138520d0d4cf2621d7e032439341bc3f647bff125818d" + ], + "index": "pypi", + "version": "==2.3.1" + }, + "pyjwt": { + "hashes": [ + "sha256:5c6eca3c2940464d106b99ba83b00c6add741c9becaec087fb7ccdefea71350e", + "sha256:8d59a976fb773f3e6a39c85636357c4f0e242707394cadadd9814f5cbaa20e96" + ], + "index": "pypi", + "version": "==1.7.1" + }, + "pyparsing": { + "hashes": [ + "sha256:66c9268862641abcac4a96ba74506e594c884e3f57690a696d21ad8210ed667a", + "sha256:f6c5ef0d7480ad048c054c37632c67fca55299990fff127850181659eea33fc3" + ], + "index": "pypi", + "version": "==2.3.1" + }, + "pyphen": { + "hashes": [ + "sha256:3b633a50873156d777e1f1075ba4d8e96a6ad0a3ca42aa3ea9a6259f93f18921", + "sha256:e172faf10992c8c9d369bdc83e36dbcf1121f4ed0d881f1a0b521935aee583b5" + ], + "index": "pypi", + "version": "==0.9.5" + }, + "python-dateutil": { + "hashes": [ + "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", + "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" + ], + "index": "pypi", + "version": "==2.8.0" + }, + "python-levenshtein": { + "hashes": [ + "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1" + ], + "index": "pypi", + "version": "==0.12.0" + }, + "pytz": { + "hashes": [ + "sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9", + "sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c" + ], + "index": "pypi", + "version": "==2018.9" + }, + "pyyaml": { + "hashes": [ + "sha256:1adecc22f88d38052fb787d959f003811ca858b799590a5eaa70e63dca50308c", + "sha256:436bc774ecf7c103814098159fbb84c2715d25980175292c648f2da143909f95", + "sha256:460a5a4248763f6f37ea225d19d5c205677d8d525f6a83357ca622ed541830c2", + "sha256:5a22a9c84653debfbf198d02fe592c176ea548cccce47553f35f466e15cf2fd4", + "sha256:7a5d3f26b89d688db27822343dfa25c599627bc92093e788956372285c6298ad", + "sha256:9372b04a02080752d9e6f990179a4ab840227c6e2ce15b95e1278456664cf2ba", + "sha256:a5dcbebee834eaddf3fa7366316b880ff4062e4bcc9787b78c7fbb4a26ff2dd1", + "sha256:aee5bab92a176e7cd034e57f46e9df9a9862a71f8f37cad167c6fc74c65f5b4e", + "sha256:c51f642898c0bacd335fc119da60baae0824f2cde95b0330b56c0553439f0673", + "sha256:c68ea4d3ba1705da1e0d85da6684ac657912679a649e8868bd850d2c299cce13", + "sha256:e23d0cc5299223dcc37885dae624f382297717e459ea24053709675a976a3e19" + ], + "index": "pypi", + "version": "==5.1" + }, + "pyzmq": { + "hashes": [ + "sha256:1651e52ed91f0736afd6d94ef9f3259b5534ce8beddb054f3d5ca989c4ef7c4f", + "sha256:5ccb9b3d4cd20c000a9b75689d5add8cd3bce67fcbd0f8ae1b59345247d803af", + "sha256:5e120c4cd3872e332fb35d255ad5998ebcee32ace4387b1b337416b6b90436c7", + "sha256:5e2a3707c69a7281a9957f83718815fd74698cba31f6d69f9ed359921f662221", + "sha256:63d51add9af8d0442dc90f916baf98fdc04e3b0a32afec4bfc83f8d85e72959f", + "sha256:65c5a0bdc49e20f7d6b03a661f71e2fda7a99c51270cafe71598146d09810d0d", + "sha256:66828fabe911aa545d919028441a585edb7c9c77969a5fea6722ef6e6ece38ab", + "sha256:7d79427e82d9dad6e9b47c0b3e7ae5f9d489b1601e3a36ea629bb49501a4daf3", + "sha256:824ee5d3078c4eae737ffc500fbf32f2b14e6ec89b26b435b7834febd70120cf", + "sha256:89dc0a83cccec19ff3c62c091e43e66e0183d1e6b4658c16ee4e659518131494", + "sha256:8b319805f6f7c907b101c864c3ca6cefc9db8ce0791356f180b1b644c7347e4c", + "sha256:90facfb379ab47f94b19519c1ecc8ec8d10813b69d9c163117944948bdec5d15", + "sha256:a0a178c7420021fc0730180a914a4b4b3092ce9696ceb8e72d0f60f8ce1655dd", + "sha256:a7a89591ae315baccb8072f216614b3e59aed7385aef4393a6c741783d6ee9cf", + "sha256:ba2578f0ae582452c02ed9fac2dc477b08e80ce05d2c0885becf5fff6651ccb0", + "sha256:c69b0055c55702f5b0b6b354133e8325b9a56dbc80e1be2d240bead253fb9825", + "sha256:ca434e1858fe222380221ddeb81e86f45522773344c9da63c311d17161df5e06", + "sha256:d4b8ecfc3d92f114f04d5c40f60a65e5196198b827503341521dda12d8b14939", + "sha256:d706025c47b09a54f005953ebe206f6d07a22516776faa4f509aaff681cc5468", + "sha256:d8f27e958f8a2c0c8ffd4d8855c3ce8ac3fa1e105f0491ce31729aa2b3229740", + "sha256:dbd264298f76b9060ce537008eb989317ca787c857e23cbd1b3ddf89f190a9b1", + "sha256:e926d66f0df8fdbf03ba20583af0f215e475c667fb033d45fd031c66c63e34c9", + "sha256:efc3bd48237f973a749f7312f68062f1b4ca5c2032a0673ca3ea8e46aa77187b", + "sha256:f59bc782228777cbfe04555707a9c56d269c787ed25d6d28ed9d0fbb41cb1ad2", + "sha256:f8da5322f4ff5f667a0d5a27e871b560c6637153c81e318b35cb012b2a98835c" + ], + "index": "pypi", + "version": "==18.0.1" + }, + "qtconsole": { + "hashes": [ + "sha256:1ac4a65e81a27b0838330a6d351c2f8435d4013d98a95373e8a41119b2968390", + "sha256:bc1ba15f50c29ed50f1268ad823bb6543be263c18dd093b80495e9df63b003ac" + ], + "index": "pypi", + "version": "==4.4.3" + }, + "requests": { + "hashes": [ + "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e", + "sha256:7bf2a778576d825600030a110f3c0e3e8edc51dfaafe1c146e39a2027784957b" + ], + "index": "pypi", + "version": "==2.21.0" + }, + "scikit-learn": { + "hashes": [ + "sha256:018f470a7e685767d84ce6fac87af59e064e87ec3cea71eaf12646f9538e293d", + "sha256:0ae00d570331b8a5c552f721167818b4739a5c855fbc76b11231ccdea2dd26ab", + "sha256:13079520dd8211967d1871e439b59818d335439672818e9683847091d0e07778", + "sha256:1c133749a526b33af2b6695d94d2cc43ba212c5aa7bd3a45619335556ced7637", + "sha256:382e7053567b7b11e862782e3de2940e2141be24e6262aa0b4a9cb7fdd61f85a", + "sha256:384df81fdba12d21063072f2cf472a7a8425a3d4fa3915faef0a88e94e07b332", + "sha256:4705073de7bbcc6b9cd2f24dc9189aa8d3935e8621d3e65546c4b7fee9a042bf", + "sha256:4f829d6c09b997e1d0a998f970cf3ff82cd6796d56148c63c29174367878d490", + "sha256:51a933224b1b11986d4c7c123e5b28eb69602899d0179e6888b7abf2ffc85265", + "sha256:63ad98c6512b52aebde9bd806ec1127e13e2a8d42a00ebdf805153819f7c2cad", + "sha256:67e15514c9df4c5354b3ecc89451f5baa0f1b62c7ed68f4d20febf9c9d9e17a6", + "sha256:75f0e0e93851b30639baabfc1a4433aabc57eef269d55ee4c6f649fb60686218", + "sha256:89609708e819342dd5c94617fd53a36187d7d6a80435ddb282f6a60b058dbe77", + "sha256:8ca274d4e91685e4547af718b6f1e9a9d4912c7a6dcb0c68925de84f81a09d2a", + "sha256:9987f3d31efc427ebf9926f703e5171552cfb3b6935f880e4f0d3a17b7f91540", + "sha256:9f3e08dbd3f2f574913faba9b48d3c24a43fcc0eb14a0e962431005434b9cfe6", + "sha256:a7a403bcea250cac37971058fca0c30b0144737a375f99d3855e5e7a34c43348", + "sha256:ad7e4e823db1271d344e0c3ce0988b2e0fecc49079eec9c818d866c38b2824bd", + "sha256:b1e9037a582e650d866324a50d2741724ea5f6c175200bef0b549d014898035a", + "sha256:b82fbd8843ead2640158b2c0946d354b66f3d49472e6790d70c4ceec35663b3f", + "sha256:b91c82bfd25145d428de99429de97d7a1c2c2658c212689fe2839b29a5251159", + "sha256:ba57b73ec7074f60bb85f953296df437784d560553d0cc04b253c43f1846ccad", + "sha256:c503802a81de18b8b4d40d069f5e363795ee44b1605f38bc104160ca3bfe2c41", + "sha256:d30e8e0dffbc299533f47044fec26c5087473cb29cf51f1995986ac8354c7b4c", + "sha256:d89b810bfb0e16a0de7f18773849bdf83dd7fd0614ae5225e5a9214cdb9be245", + "sha256:e22e1d47def2944ad7a12c09452de085587ec5baad2174683e56a42b6918a76f", + "sha256:f650ddc023c95681fccd5e297820f35de039e008265040c08188be95b3275a0f", + "sha256:f7d4b3885ad1a7a6f07719ab6b1790d9892d6d41d973e8d4543a93bb15226fb4" + ], + "index": "pypi", + "version": "==0.20.3" + }, + "scipy": { + "hashes": [ + "sha256:014cb900c003b5ac81a53f2403294e8ecf37aedc315b59a6b9370dce0aa7627a", + "sha256:281a34da34a5e0de42d26aed692ab710141cad9d5d218b20643a9cb538ace976", + "sha256:588f9cc4bfab04c45fbd19c1354b5ade377a8124d6151d511c83730a9b6b2338", + "sha256:5a10661accd36b6e2e8855addcf3d675d6222006a15795420a39c040362def66", + "sha256:628f60be272512ca1123524969649a8cb5ae8b31cca349f7c6f8903daf9034d7", + "sha256:6dcc43a88e25b815c2dea1c6fac7339779fc988f5df8396e1de01610604a7c38", + "sha256:70e37cec0ac0fe95c85b74ca4e0620169590fd5d3f44765f3c3a532cedb0e5fd", + "sha256:7274735fb6fb5d67d3789ddec2cd53ed6362539b41aa6cc0d33a06c003aaa390", + "sha256:78e12972e144da47326958ac40c2bd1c1cca908edc8b01c26a36f9ffd3dce466", + "sha256:790cbd3c8d09f3a6d9c47c4558841e25bac34eb7a0864a9def8f26be0b8706af", + "sha256:79792c8fe8e9d06ebc50fe23266522c8c89f20aa94ac8e80472917ecdce1e5ba", + "sha256:865afedf35aaef6df6344bee0de391ee5e99d6e802950a237f9fb9b13e441f91", + "sha256:870fd401ec7b64a895cff8e206ee16569158db00254b2f7157b4c9a5db72c722", + "sha256:963815c226b29b0176d5e3d37fc9de46e2778ce4636a5a7af11a48122ef2577c", + "sha256:9726791484f08e394af0b59eb80489ad94d0a53bbb58ab1837dcad4d58489863", + "sha256:9de84a71bb7979aa8c089c4fb0ea0e2ed3917df3fb2a287a41aaea54bbad7f5d", + "sha256:b2c324ddc5d6dbd3f13680ad16a29425841876a84a1de23a984236d1afff4fa6", + "sha256:b86ae13c597fca087cb8c193870507c8916cefb21e52e1897da320b5a35075e5", + "sha256:ba0488d4dbba2af5bf9596b849873102d612e49a118c512d9d302ceafa36e01a", + "sha256:d78702af4102a3a4e23bb7372cec283e78f32f5573d92091aa6aaba870370fe1", + "sha256:def0e5d681dd3eb562b059d355ae8bebe27f5cc455ab7c2b6655586b63d3a8ea", + "sha256:e085d1babcb419bbe58e2e805ac61924dac4ca45a07c9fa081144739e500aa3c", + "sha256:e2cfcbab37c082a5087aba5ff00209999053260441caadd4f0e8f4c2d6b72088", + "sha256:e742f1f5dcaf222e8471c37ee3d1fd561568a16bb52e031c25674ff1cf9702d5", + "sha256:f06819b028b8ef9010281e74c59cb35483933583043091ed6b261bb1540f11cc", + "sha256:f15f2d60a11c306de7700ee9f65df7e9e463848dbea9c8051e293b704038da60", + "sha256:f31338ee269d201abe76083a990905473987371ff6f3fdb76a3f9073a361cf37", + "sha256:f6b88c8d302c3dac8dff7766955e38d670c82e0d79edfc7eae47d6bb2c186594" + ], + "index": "pypi", + "version": "==1.2.1" + }, + "send2trash": { + "hashes": [ + "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2", + "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b" + ], + "index": "pypi", + "version": "==1.5.0" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "index": "pypi", + "version": "==1.12.0" + }, + "spacy": { + "hashes": [ + "sha256:0fe2e5905f2f5b41be3ebea40626f70bea567a7a2cda9c244109fffe8d964429", + "sha256:30f0f09074bf115a0384691e8ba3d64aab431192b3095a13312a93d0e8a71c07", + "sha256:6a82612f0e75c11d541002f49375d80b4800c967e5d2b402d5a8dd40b6c57ae6", + "sha256:74066ac969a587d16d00d65318c1baa3c3e9215e6858d0c81ce2823320fe09dc", + "sha256:b1b86ddf6142fa2782b2e0269d040430ae5696eb0224f3e99408897cac7bb506", + "sha256:be8a7c89461ac22d261e19e1d3eb35752d8ff3e52452af076b303561bb166408", + "sha256:e6522e1242a5a5f12ef7e55f74df020b5deea59f7d1e7b6e69298301e3c0badd", + "sha256:eb699f54bf6d131df701e6dbbef9e91b74a065a42c9d2850964282b3c14560bb", + "sha256:f385942c5b2c8cf07e4a56871f88a49d4c8a9145fcd731c455e39fb5af9b12ba" + ], + "index": "pypi", + "version": "==2.1.3" + }, + "sqlalchemy": { + "hashes": [ + "sha256:781fb7b9d194ed3fc596b8f0dd4623ff160e3e825dd8c15472376a438c19598b" + ], + "index": "pypi", + "version": "==1.3.1" + }, + "srsly": { + "hashes": [ + "sha256:02ea974c4b80f9ffdea4f953ffece5a8715e4e4b37d09192ab65cf4edfbf74d1", + "sha256:061ade35556e51b2e1da6f8552be7a6327d2d02b69edf0aacc9f5c4319d495f1", + "sha256:1bf6af7a86f34969a3997da09fc8c2f72ee02cd74ff40035e37c2f968776fa23", + "sha256:1e4ef85bf133e384f465865ba4e0a14a52c4f2e4b46c763faf100339a06f09c4", + "sha256:850399e43f4cefdcac7a913363b120ea084cb02fcfdbbde1bd37444804d7def4", + "sha256:977aa6e5fd3f7e9d1c8fe7aeed841dfe3ede75dfce04255d4c670e663faaef2a", + "sha256:abdc5b46866648b123517550582dc4c4b767b816ae54c44e5973bbebc3f0dab4", + "sha256:ac0dbe6e715e1fe3536397a9e65ec8f3c624c99f45b6f30e87d220071ef84721", + "sha256:b8646f0f7cf6fd1de4919ab456d9c030e09e74f741a0cecc941363414109ccdc", + "sha256:b9dc81339c1ab969057e790d7b2a56fd4da87336785bd671c86520e8272e3663", + "sha256:d7c91f59edc2ceeca70adf1b0a46d337234ff4fb7ca2b579ca41885f011b329f", + "sha256:d906a2a3df1cac2cb4bf382b8aaf14e22df2ca3758eba0d3049723c851c8ebf0", + "sha256:ecec49c9cdaae4594011666dd654e1e044e552f63bb3a62a1849c65a92ee302e", + "sha256:ef7897050c04a313f2db99c9bcaf2f0c3c75609677683ca5a6e1e7a515325d72" + ], + "index": "pypi", + "version": "==0.0.5" + }, + "tensorboard": { + "hashes": [ + "sha256:6f194519f41762bfdf5eb410ccf33226d1c252caf5ad8893288648bfbcf4d135", + "sha256:81170f66bf8f95c2e9f6b3fefe0ddc5472655a9e3793e73b5b5d4ec0ba395e76" + ], + "index": "pypi", + "version": "==1.12.2" + }, + "tensorflow": { + "hashes": [ + "sha256:16fb8a59e724afd37a276d33b7e2ed070e5c84899a8d4cfc3fe1bb446a859da7", + "sha256:1ae50e44c0b29df5fb5b460118be5a257b4eb3e561008f64d2c4c715651259b7", + "sha256:1b7d09cc26ef727d628dcb74841b89374a38ed81af25bd589a21659ef67443da", + "sha256:2681b55d3e434e20fe98e3a3b1bde3588af62d7864b62feee4141a71e29ef594", + "sha256:42fc8398ce9f9895b488f516ea0143cf6cf2a3a5fc804da4a190b063304bc173", + "sha256:531619ad1c17b4084d09f442a9171318af813e81aae748e5de8274d561461749", + "sha256:5cee35f8a6a12e83560f30246811643efdc551c364bc981d27f21fbd0926403d", + "sha256:6ad6ed495f1a3d445c43d90cb2ce251ff5532fd6436e25f52977ee59ffa583df", + "sha256:cd8c1a899e3befe1ccb774ea1aae077a4b1286f855c956210b23766f4ac85c30", + "sha256:d3f3d7cd9bd4cdc7ebf25fd6c2dfc103dcf4b2834ae9276cc4cf897eb1515f6d", + "sha256:e4f479e6aca595acc98347364288cbdfd3c025ca85389380174ea75a43c327b7", + "sha256:f587dc03b5f0d1e50cca39b7159c9f21ffdec96273dbf5f7619d48c622cb21f2" + ], + "index": "pypi", + "version": "==1.12.0" + }, + "termcolor": { + "hashes": [ + "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" + ], + "index": "pypi", + "version": "==1.1.0" + }, + "terminado": { + "hashes": [ + "sha256:d9d012de63acb8223ac969c17c3043337c2fcfd28f3aea1ee429b345d01ef460", + "sha256:de08e141f83c3a0798b050ecb097ab6259c3f0331b2f7b7750c9075ced2c20c2" + ], + "index": "pypi", + "version": "==0.8.2" + }, + "testpath": { + "hashes": [ + "sha256:46c89ebb683f473ffe2aab0ed9f12581d4d078308a3cb3765d79c6b2317b0109", + "sha256:b694b3d9288dbd81685c5d2e7140b81365d46c29f5db4bc659de5aa6b98780f8" + ], + "index": "pypi", + "version": "==0.4.2" + }, + "textacy": { + "hashes": [ + "sha256:6019f32719c0661f41fa93c2fdd9714504d443119bf4f6426ee690bdda90835b", + "sha256:8b9abb1a41eb72e634117bd4936a10de7db7d65cf6208f3387c2bc94678e038c" + ], + "index": "pypi", + "version": "==0.6.2" + }, + "thinc": { + "hashes": [ + "sha256:12c003b804fb93c64261a5010df0129f942234adb8f45d489a355a5315e06acf", + "sha256:17f9ada01f1f77a5560bc16ec5a650dca08356b50727ded0df19f0dfb4a32a25", + "sha256:26c9d54ffd90753feebbc462ae59939a9e3d2485ef24ed3dc1861c9b486fdbbe", + "sha256:3258161fc2cefa4082f099dec3748f1dcef5e920df5e9d82258ea6ffec280b9a", + "sha256:38a83b928cdc49c994852538f639b2a889681a0589c44b1a6fc3c899e5f36893", + "sha256:3e76101a733bbb0b97d44bdbcb407678b9e2b487047acb6f4c19b72909a6b12f", + "sha256:412f107c458d2951711b4d3ec53587244cd3acc032944e855f49cf94a1adc36e", + "sha256:4948c10c61e627950900cdccf506eb7398d2b28f33cf72bb4b5d9c5c572925e7", + "sha256:a8b2d7713a7dfc0b18b5c16db58ab6e015df14e4fbed0249ed49e630b2d6a86f", + "sha256:ec99c2c65962157c7ee7b947d29f2775291860b81cba62c5bd9f92fdeca2d137", + "sha256:f2386e66042218f19e511692926cef00a9646a3104d2efddfb5bec7b0388a83b", + "sha256:fc0b37733591315afddee45823d4f6740f9b0567c1ba57a3a3c319669d1fcbad" + ], + "index": "pypi", + "version": "==7.0.4" + }, + "toolz": { + "hashes": [ + "sha256:929f0a7ea7f61c178bd951bdae93920515d3fbdbafc8e6caf82d752b9b3b31c9" + ], + "index": "pypi", + "version": "==0.9.0" + }, + "tornado": { + "hashes": [ + "sha256:1174dcb84d08887b55defb2cda1986faeeea715fff189ef3dc44cce99f5fca6b", + "sha256:2613fab506bd2aedb3722c8c64c17f8f74f4070afed6eea17f20b2115e445aec", + "sha256:44b82bc1146a24e5b9853d04c142576b4e8fa7a92f2e30bc364a85d1f75c4de2", + "sha256:457fcbee4df737d2defc181b9073758d73f54a6cfc1f280533ff48831b39f4a8", + "sha256:49603e1a6e24104961497ad0c07c799aec1caac7400a6762b687e74c8206677d", + "sha256:8c2f40b99a8153893793559919a355d7b74649a11e59f411b0b0a1793e160bc0", + "sha256:e1d897889c3b5a829426b7d52828fb37b28bc181cd598624e65c8be40ee3f7fa" + ], + "index": "pypi", + "version": "==6.0.2" + }, + "tqdm": { + "hashes": [ + "sha256:d385c95361699e5cf7622485d9b9eae2d4864b21cd5a2374a9c381ffed701021", + "sha256:e22977e3ebe961f72362f6ddfb9197cc531c9737aaf5f607ef09740c849ecd05" + ], + "index": "pypi", + "version": "==4.31.1" + }, + "traitlets": { + "hashes": [ + "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835", + "sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9" + ], + "index": "pypi", + "version": "==4.3.2" + }, + "unidecode": { + "hashes": [ + "sha256:092cdf7ad9d1052c50313426a625b717dab52f7ac58f859e09ea020953b1ad8f", + "sha256:8b85354be8fd0c0e10adbf0675f6dc2310e56fda43fa8fe049123b6c475e52fb" + ], + "index": "pypi", + "version": "==1.0.23" + }, + "uritemplate": { + "hashes": [ + "sha256:01c69f4fe8ed503b2951bef85d996a9d22434d2431584b5b107b2981ff416fbd", + "sha256:1b9c467a940ce9fb9f50df819e8ddd14696f89b9a8cc87ac77952ba416e0a8fd", + "sha256:c02643cebe23fc8adb5e6becffe201185bf06c40bda5c0b4028a93f1527d011d" + ], + "index": "pypi", + "version": "==3.0.0" + }, + "urllib3": { + "hashes": [ + "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39", + "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22" + ], + "index": "pypi", + "version": "==1.24.1" + }, + "wasabi": { + "hashes": [ + "sha256:3491ae742d238ce260116d1b9bf962c134e82df5c814060ea4b1433c6abc841e", + "sha256:57d4f715e69bc99895215ddc4f5587f90c5bac9f4980c0826b0aa09c1a625769" + ], + "index": "pypi", + "version": "==0.2.1" + }, + "wcwidth": { + "hashes": [ + "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", + "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + ], + "index": "pypi", + "version": "==0.1.7" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "index": "pypi", + "version": "==0.5.1" + }, + "werkzeug": { + "hashes": [ + "sha256:96da23fa8ccecbc3ae832a83df5c722c11547d021637faacb0bec4dd2f4666c8", + "sha256:ca5c2dcd367d6c0df87185b9082929d255358f5391923269335782b213d52655" + ], + "index": "pypi", + "version": "==0.15.1" + }, + "wheel": { + "hashes": [ + "sha256:66a8fd76f28977bb664b098372daef2b27f60dc4d1688cfab7b37a09448f0e9d", + "sha256:8eb4a788b3aec8abf5ff68d4165441bc57420c9f64ca5f471f58c3969fe08668" + ], + "markers": "python_version >= '3'", + "version": "==0.33.1" + }, + "widgetsnbextension": { + "hashes": [ + "sha256:14b2c65f9940c9a7d3b70adbe713dbd38b5ec69724eebaba034d1036cf3d4740", + "sha256:fa618be8435447a017fd1bf2c7ae922d0428056cfc7449f7a8641edf76b48265" + ], + "index": "pypi", + "version": "==3.4.2" + } + }, + "develop": {} +} diff --git a/README.md b/README.md new file mode 100644 index 00000000..03efa335 --- /dev/null +++ b/README.md @@ -0,0 +1,139 @@ +[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/) [![License: MIT](https://img.shields.io/badge/License-MIT-darkgreen.svg)](https://opensource.org/licenses/MIT) + + + + +[blog]: https://www.google.com/ + +### Code for: ["How to automate tasks on GitHub with machine learning for fun and profit"][blog] + + +Table of Contents + +- [Issue-Label Bot](#issue-label-bot) + - [Important links](#important-links) + - [Files](#files) +- [Running This Code](#running-this-code) + - [Environment Variables](#environment-variables) + - [Run Locally](#run-locally) + - [Deploy As A Service](#deploy-as-a-service) +- [Contributing](#contributing) + - [Roadmap](#roadmap) + - [References](#references) +- [Disclaimers](#disclaimers) + + + +Original Authors: [@hamelsmu](https://github.com/hamelsmu), [@inc0](https://github.com/inc0) + + +# Issue-Label Bot + +A GitHub App powered by machine learning, written in python. A discussion of the motivation for building this app is described in this [blog post][blog]. + +When an issue is opened, the bot predicts if the label should be a: `feature request`, `bug` or `question` and applies a label automatically if appropriate. Here is a screenshot of the bot in action: + +![alt text](images/example3_big.png) + +More examples can be viewed on our [app's homepage](https://mlbot.net/). It should be noted that the bot may not apply any label in circumstances where the prediction is uncertain. See the [disclaimers](#Disclaimers) section for more caveats. + + +## Important links + +- [Issue Label Bot homepage](https://mlbot.net). Provides a way to view example predictions as well as other information regarding this bot. + - GitHub [App page](https://github.com/apps/issue-label-bot) for Issue Label Bot, where you can install the app. See [disclaimers](#Disclaimers) below before installing. + + +## Files + + - [/notebooks](/notebooks): contains notebooks on how to train the model and interact with the GitHub api uing a python client. + - [/flask_app](/flask_app): code for a flask app that listens for [GitHub issue events](https://developer.github.com/v3/issues/events/) and responds with predictions. This is the main application that the user will interact with. +- [/argo](/argo): the code in this directory relates to the construction of [Argo ML Pipelines](https://argoproj.github.io/) for training and deploying ML workflows. +- [/deployment](/deployment): This directory contains files that are helpful in deploying the app. + - [Dockerfile](/deployment/Dockerfile) this is the definition of the container that is used to run the flask app. The build for this container is hosted on DockerHub at [hamelsmu/mlapp](https://hub.docker.com/r/hamelsmu/mlapp). + - [heroku.yml](/heroku.yml): this is used for [deploying to Heroku](https://devcenter.heroku.com/articles/container-registry-and-runtime). + - *.yaml: these files relate to a Kubernetees deployment. + +# Running This Code + + ## Prerequisites + +To utilize the code in this repository, you will need to register a GitHub App of your own and install this app on your desired repositories and store authentication secrets. + +First, walk through the [prerequisites section of this getting started guide](https://developer.github.com/apps/quickstart-guides/using-the-github-api-in-your-app/#prerequisites) **except** The Ruby programming language" section as we will be using python instead as the client that interfaces with the GitHub api. + + Second, [setup your development environment](https://developer.github.com/apps/quickstart-guides/setting-up-your-development-environment/). Make sure you create a Webhook secret, even though this step is optional. + + Next, setup a postgres database. You can do this [for free on Heroku](https://elements.heroku.com/addons/heroku-postgresql). Detailed instructions (stolen shamelessly from [here](https://www.edx.org/course/cs50s-web-programming-with-python-and-javascript)): + +1. Navigate to https://www.heroku.com/, and create an account if you don’t already have one. +2. On Heroku’s Dashboard, click “New” and choose “Create new app.” +3. Give your app a name, and click “Create app.” +4. On your app’s “Overview” page, click the “Configure Add-ons” button. +5. In the “Add-ons” section of the page, type in and select “Heroku Postgres.” +6. Choose the “Hobby Dev - Free” plan, which will give you access to a free PostgreSQL database that will support up to 10,000 rows of data. Click “Provision.” +7. Now, click the “Heroku Postgres :: Database” link. +8. You should now be on your database’s overview page. Click on 8 “Settings”, and then “View Credentials.” This is the information you’ll need to log into your database. + +Finally, you need to create environment variables for all the secrets, which is described below. + +## Environment Variables + +1. `PRIVATE_KEY`: this is the private key you use to [authenticate as an app](https://developer.github.com/apps/quickstart-guides/setting-up-your-development-environment) with the GitHub api. +2. `WEBHOOK_SECRET`: this is used to verify that payloads received by your app are actually from GitHub. This is described [here](https://developer.github.com/apps/quickstart-guides/setting-up-your-development-environment/). +2. `DATABASE_URL`: this is the URL that contains the login information for your POSTGRESQL database, usually in the form: `postgres://:@:5432/` +3. `APP_ID`: this is a unique identifier provided to you by GitHub when you [register your app](https://developer.github.com/apps/quickstart-guides/setting-up-your-development-environment). +4. `FLASK_ENV`: this is usually set to either `production` or `development`. You will want to use `deployment` for local testing. +5. `PORT`: this is the port your app will be serving on. Note that if you are deploying to Heroku, Heroku will override this variable with their own value when building your app. For local development, you will want this to match the [port Smee is serving to](https://developer.github.com/apps/quickstart-guides/setting-up-your-development-environment/#step-1-start-a-new-smee-channel). +6. `APP_URL`: this is the url for the homepage of your app that is provided to users as a link in issue comments. You can set this to an arbitrary value for local development. + +Note: If you are using [zsh](http://www.zsh.org/), the [dotenv](https://github.com/robbyrussell/oh-my-zsh/tree/master/plugins/dotenv) plugin can be useful for managing environment variables. + +## Run Locally + +1. **Install Dependencies**: Install [requirements.txt](/requirements.txt) into a virtual environment. If you are using [pipenv](https://pipenv.readthedocs.io/en/latest/) install the necessary dependencies from [Pipfile.lock](/Pipenv.lock) by typing `pipenv install` in the root of this repository. + +2. **Run the flask app**: run `python flask_app/app.py` from the root of this repository. For this to work, you must correctly set the environment variables as described in the [Environment Variables](#Environment-Variables) section. + +3. Optional - **Run app as docker container**. A Docker container that serves [Issue-Label Bot](https://github.com/apps/issue-label-bot) can be built with the command `bash script/bootstrap` from the root of this repository. This script builds a Docker image named `hamelsmu/mlapp`, which is also available [on Dockerhub](https://hub.docker.com/r/hamelsmu/mlapp). If you desire to run the Docker container locally for testing, you must [pass the necessary environment variables](https://stackoverflow.com/questions/30494050/how-do-i-pass-environment-variables-to-docker-containers) to the Docker container at runtime, as well as expose necessary ports for the app. See the [References](#References) section for more resources on using Docker. + + +## Deploy As A Service + +The assets in this repo allow you to [deploy to Heroku](https://devcenter.heroku.com/articles/container-registry-and-runtime) (easier) or a Kubernetees cluster (more advanced). + +In Heroku, secrets can be passed in as [configuration variables](https://devcenter.heroku.com/articles/config-vars). Furthermore, [this documentation](https://kubernetes.io/docs/concepts/configuration/secret/#creating-a-secret-manually) describes how you can set secrets in Kubernetees. Make sure you set the environment variable `FLASK_ENV` to `production` if you are going to deploy the app publicly. + + +# Contributing + +We welcome all forms of contributions. We are especially interested in the following: + +- Bug fixes +- Enhancements or additional features +- Improvements to the model, or expansion of the dataset(s) used for training. + +## Roadmap + +The authors of this project are interested in adding the following features in the near future: + +- Constructing better labels and negative samples of items that do not belong in the label set to drive further improvements. +- Using the tools from [fastai](https://docs.fast.ai/) to explore: + - State of the art architectures, such as [Multi-Head Attention](https://docs.fast.ai/text.models.html#MultiHeadAttention) + - Pre-training on a large corpus such as stack overflow and fine tuning that on GitHub issues to predict repo-specific issue labels. A related project that can help bootstrap this task is [stackroboflow.com](https://stackroboflow.com/about/index.html) +- Using [GitHub Actions](https://github.com/features/actions) to trigger automated deploys of this code. +- Model pipeline orchestration on [Argo pipelines](https://argoproj.github.io/). + + +## References + - The code in this repo and associated tutorial(s) assume familiarity with Docker. [This blog post](https://towardsdatascience.com/how-docker-can-help-you-become-a-more-effective-data-scientist-7fc048ef91d5) offers a gentle introduction to Docker for data scientists. + + - Need inspiration for other data products you can build using machine learning and public GitHub datasets? See these examples: + - [GitHub issue summarization](https://towardsdatascience.com/how-to-create-data-products-that-are-magical-using-sequence-to-sequence-models-703f86a231f8) and recommendation. + - Natural language [semantic code search](https://towardsdatascience.com/semantic-code-search-3cd6d244a39c). +- Excellent course on flask: [HarvardX CS50 Web](https://www.edx.org/course/cs50s-web-programming-with-python-and-javascript). +- MOOCs by [fastai](https://www.fast.ai/) for [machine learning](http://course18.fast.ai/ml) and [deep learning](http://course.fast.ai/). + +# Disclaimers + +[Issue-Label Bot](https://github.com/apps/issue-label-bot) is for educational and demonstration purposes only. Our primary goal was to construct a minimal working example for the community. Therefore, we believe the model demonstrated has great room from improvement and is expected to make many mistakes. Furthermore, **this app only works on public repositories and will do nothing if installed on a private repo.** diff --git a/argo/Dockerfile b/argo/Dockerfile new file mode 100644 index 00000000..21c6b893 --- /dev/null +++ b/argo/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.6 + +RUN pip install numpy pandas scikit-learn dill tensorflow dask ktext +RUN mkdir /data +RUN mkdir /output +COPY src /src diff --git a/argo/src/preprocess.py b/argo/src/preprocess.py new file mode 100644 index 00000000..e6ba98a0 --- /dev/null +++ b/argo/src/preprocess.py @@ -0,0 +1,124 @@ +import pandas as pd +import dask.dataframe as df +from dask_ml.preprocessing import OneHotEncoder +import numpy as np +from keras.utils.np_utils import to_categorical +import time + +from sklearn.model_selection import train_test_split +from typing import Callable, List +from keras.preprocessing.text import text_to_word_sequence +from keras.preprocessing.sequence import pad_sequences +from dask import array as da +from textacy.preprocess import preprocess_text +import dask.multiprocessing +from pathos.multiprocessing import cpu_count +from collections import Counter +from collections import defaultdict +import h5py + + +start_time = time.time() + +dask.config.set(scheduler='processes') + +output_dir = "/data/" + +base_url = 'https://storage.googleapis.com/codenet/issue_labels/' +dd = df.from_pandas(pd.concat([pd.read_csv(base_url+f'00000000000{i}.csv.gz') for i in range(10)]), npartitions=128) + +print(dd.head()) + +def textacy_cleaner(text: str) -> str: + """ + Defines the default function for cleaning text. + + This function operates over a list. + """ + return preprocess_text(text, + fix_unicode=True, + lowercase=True, + transliterate=True, + no_urls=True, + no_emails=True, + no_phone_numbers=True, + no_numbers=True, + no_currency_symbols=True, + no_punct=True, + no_contractions=False, + no_accents=True) + + +def process_document(doc: str) -> List[str]: + doc = text_to_word_sequence(textacy_cleaner(doc)) + return ["_start_"] + doc + ["_end_"] + + +test_data = 'hello world 314-903-3072, hamel.husain@gmail.com wee woo' +assert process_document(test_data) == ['_start_', 'hello', 'world', 'phone', 'email', 'wee', 'woo', '_end_'] + + +bodies_parsed = dd["body"].apply(process_document) +titles_parsed = dd["title"].apply(process_document) + +now = time.time() - start_time +print(f"tokenized {now}") + +def to_one_hot(df): + return to_categorical(df.values, num_classes=3) + +targets = dd["class_int"].to_frame().map_partitions(to_one_hot) + +body_quant = int(bodies_parsed.apply(len).quantile(q=0.75).compute()) +title_quant = int(titles_parsed.apply(len).quantile(q=0.75).compute()) + +def count_words(partition): + c = Counter() + def count(p): + c.update(p) + return c + return partition.apply(count).iloc[0] + +body_counts = bodies_parsed.map_partitions(count_words).compute() +body_counts = sum(body_counts.tolist(), Counter()) + +title_counts = titles_parsed.map_partitions(count_words).compute() +title_counts = sum(title_counts.tolist(), Counter()) + + +words_to_keep_body = body_counts.most_common(n=8000) +body_vocab = defaultdict(lambda: 1) +body_vocab.update({x:i+2 for i, x in enumerate([x[0] for x in words_to_keep_body])}) + +words_to_keep_title = title_counts.most_common(n=4500) +titles_vocab = defaultdict(lambda: 1) +titles_vocab.update({x:i+2 for i, x in enumerate([x[0] for x in words_to_keep_title])}) + +numer_bodies = bodies_parsed.apply(lambda x: [body_vocab[w] for w in x]) +numer_titles = titles_parsed.apply(lambda x: [titles_vocab[w] for w in x]) + +def pad_partition(numerized_doc): + if type(numerized_doc) != list: + return + return pad_sequences([numerized_doc], maxlen=body_quant, truncating='post')[0] + +processed_bodies = numer_bodies.apply(pad_partition) +processed_titles = numer_titles.apply(pad_partition) + +num_titles = processed_titles.count().compute() +num_bodies = processed_bodies.count().compute() + +now = time.time() - start_time +print(f"saving {now}") + +processed_titles = da.stack(processed_titles.values.compute()) +processed_bodies = da.stack(processed_bodies.values.compute()) + +f = h5py.File('/data/output.hdf5', 'w') +f.create_dataset('/titles', data=processed_titles.compute()) +f.create_dataset('/bodies', data=processed_bodies.compute()) +f.create_dataset('/targets', data=targets.compute()) +f.close() + +now = time.time() - start_time +print(f"saved {now}") \ No newline at end of file diff --git a/argo/src/train.py b/argo/src/train.py new file mode 100644 index 00000000..31576c0d --- /dev/null +++ b/argo/src/train.py @@ -0,0 +1,81 @@ +import numpy as np +import dill as dpickle + +import tensorflow as tf +from tensorflow.keras.utils import multi_gpu_model +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Conv1D, Bidirectional, BatchNormalization, Dot, Flatten, Concatenate +from tensorflow.keras.optimizers import Nadam +from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint + + + + +input_dir = "/data/" +out_dir = "/output/" + +def load_pickle(fname): + "load file pickled with dill." + with open(fname, 'rb') as f: + pp = dpickle.load(f) + return pp + +#load the text pre-processors +title_pp = load_pickle(input_dir + 'title_pp.dpkl') +body_pp = load_pickle(input_dir + 'body_pp.dpkl') + +#load the training data and labels +train_body_vecs = np.load(input_dir + 'train_body_vecs.npy') +train_title_vecs = np.load(input_dir + 'train_title_vecs.npy') +train_labels = np.load(input_dir + 'train_labels.npy') + +#load the test data and labels +test_body_vecs = np.load(input_dir + 'test_body_vecs.npy') +test_title_vecs = np.load(input_dir + 'test_title_vecs.npy') +test_labels = np.load(input_dir + 'test_labels.npy') + + +issue_body_doc_length = train_body_vecs.shape[1] +issue_title_doc_length = train_title_vecs.shape[1] + +body_vocab_size = body_pp.n_tokens +title_vocab_size = title_pp.n_tokens + +body_emb_size = 400 +title_emb_size = 300 + +num_classes = len(set(train_labels)) + +body_input = Input(shape=(issue_body_doc_length,), name='Body-Input') +title_input = Input(shape=(issue_title_doc_length,), name='Title-Input') + +b_i = Embedding(body_vocab_size, body_emb_size, name='Body-Embedding', mask_zero=False)(body_input) +b_t = Embedding(title_vocab_size, title_emb_size, name='Title-Embedding', mask_zero=False)(title_input) + +b_i = BatchNormalization()(b_i) +b_i = Bidirectional(GRU(300, name='Body-Encoder'))(b_i) + +b_t = BatchNormalization()(b_t) +b_t = GRU(300, name='Title-Encoder')(b_t) + +b = Concatenate(name='Concat')([b_i, b_t]) +#b = Dense(100, activation='relu', name='Dense1')(b_concat) +b = BatchNormalization()(b) +out = Dense(num_classes, activation='softmax')(b) + +parallel_model = Model([body_input, title_input], out) +parallel_model.compile(optimizer=Nadam(lr=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) + +script_name_base = 'IssueLabeler' +csv_logger = CSVLogger(out_dir + '{:}.log'.format(script_name_base)) +model_checkpoint = ModelCheckpoint(out_dir + '{:}.epoch{{epoch:02d}}-val{{val_loss:.5f}}.hdf5'.format(script_name_base), + save_best_only=True) + +batch_size = 6400 +epochs = 10 +history = parallel_model.fit(x=[train_body_vecs, train_title_vecs], + y=np.expand_dims(train_labels, -1), + batch_size=batch_size, + epochs=epochs, + validation_split=0.10, + callbacks=[csv_logger, model_checkpoint]) diff --git a/argo/workflow.yaml b/argo/workflow.yaml new file mode 100644 index 00000000..63c18ebd --- /dev/null +++ b/argo/workflow.yaml @@ -0,0 +1,46 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: mlapp- + labels: + workflow: mlapp +spec: + entrypoint: mlapp + templates: + - name: mlapp + dag: + tasks: + - name: preprocess + template: preprocess + + - name: model1 + dependencies: [preprocess] + template: train + arguments: + artifacts: + - name: dataset + from: "{{tasks.preprocess.outputs.artifacts.dataset}}" + + - name: preprocess + container: + image: analytics-kubelet-055005d.private-us-east-1.github.net:30550/mlapp:latest + name: preprocess + command: ["python", "/src/preprocess.py"] + outputs: + artifacts: + - name: dataset + path: /data + + - name: train + inputs: + artifacts: + - name: dataset + path: /data + outputs: + artifacts: + - name: model + path: /output + container: + image: analytics-kubelet-055005d.private-us-east-1.github.net:30550/mlapp:latest + name: trainer + command: ["python", "/src/train.py"] diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..881772ff --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,26 @@ +# Docker image +# Build a Docker image to deploy, run, or push to a container registry. +# Add steps that use Docker Compose, tag images, push to a registry, run an image, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/docker + +trigger: +- docker-build + +pool: + vmImage: 'Ubuntu-16.04' + +variables: + imageName: 'hamelsmu/mlapp' + +steps: +- script: docker build -f deployment/Dockerfile -t $(imageName) . + displayName: 'docker build' + continueOnError: false + +- script: docker login -u $(docker_username) -p $(docker_password) + displayName: 'docker login' + continueOnError: false + +- script: docker push $(imageName) + displayName: 'docker push' + continueOnError: false \ No newline at end of file diff --git a/deployment/Dockerfile b/deployment/Dockerfile new file mode 100644 index 00000000..6841fb6a --- /dev/null +++ b/deployment/Dockerfile @@ -0,0 +1,76 @@ +FROM python:3.6-slim-stretch + +RUN apt-get update +RUN apt-get upgrade -y +RUN apt-get install --reinstall build-essential -y +RUN apt install -y gcc g++ + +# hardcode some install so rebuilds are faster, because cannot cache requirements.txt +RUN pip install \ + asn1crypto==0.24.0 \ + astor==0.7.1 \ + backcall==0.1.0 \ + backports.weakref==1.0.post1 \ + bleach==3.1.0 \ + blis==0.2.4 \ + cachetools==3.1.0 \ + certifi==2019.3.9 \ + cffi==1.12.2 \ + chardet==3.0.4 \ + click==7.0 \ + cryptography==2.6.1 \ + cycler==0.10.0 \ + cymem==2.0.2 \ + cytoolz==0.9.0.1 \ + dask==1.1.5 \ + decorator==4.4.0 \ + defusedxml==0.5.0 \ + dill==0.2.9 \ + entrypoints==0.3 \ + enum34==1.1.6 \ + flask-session==0.3.1 \ + flask-sqlalchemy==2.3.2 \ + flask==1.0.2 \ + ftfy==4.4.3 \ + gast==0.2.2 \ + github3.py==1.3.0 \ + grpcio==1.19.0 \ + h5py==2.9.0 \ + html5lib==1.0.1 \ + idna==2.8 \ + ijson==2.3 \ + ipdb==0.12 \ + ipykernel==5.1.0 \ + ipython-genutils==0.2.0 \ + ipython==7.4.0 \ + ipywidgets==7.4.2 \ + itsdangerous==1.1.0 \ + jedi==0.13.3 \ + jinja2==2.10 \ + jsonify==0.5 \ + jsonschema==2.6.0 \ + jupyter-client==5.2.4 \ + jupyter-console==6.0.0 \ + jupyter-core==4.4.0 \ + jupyter==1.0.0 \ + jwcrypto==0.6.0 \ + jwt==0.6.1 \ + keras-applications==1.0.7 \ + keras-preprocessing==1.0.9 \ + keras==2.2.4 \ + kiwisolver==1.0.1 \ + ktext==0.34 \ + numpy==1.16.2 \ + pandas==0.24.2 \ + pyarrow==0.12.1 \ + scikit-learn==0.20.3 \ + tensorflow==1.12.0 + +COPY requirements.txt . +RUN pip install -r requirements.txt +COPY flask_app flask_app/ + +EXPOSE 3000 80 443 +WORKDIR flask_app/ + +CMD python app.py \ No newline at end of file diff --git a/deployment/README.md b/deployment/README.md new file mode 100644 index 00000000..2902e277 --- /dev/null +++ b/deployment/README.md @@ -0,0 +1,31 @@ +# Deploying MLApp + + +## github-probots + +There is a dedicated instance running in + +* **GCP project**: github-probots +* **cluster**: kf-ci-ml +* **namespace**: mlapp + +Deploying it + +1. Create the deployment + + ``` + kubectl apply -f deployments.yaml + ``` + +1. Create the secret + + ``` + gsutil cp gs://github-probots_secrets/ml-app-inference-secret.yaml /tmp + kubectl apply -f /tmp/ml-app-inference-secret.yaml + ``` + +1. Create the ingress + + ``` + kubectl apply -f ingress.yaml + ``` \ No newline at end of file diff --git a/deployment/deployments.yaml b/deployment/deployments.yaml new file mode 100644 index 00000000..1542a0af --- /dev/null +++ b/deployment/deployments.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-github-app + namespace: mlapp + labels: + app: ml-github-app +spec: + replicas: 3 + selector: + matchLabels: + app: ml-github-app + template: + metadata: + labels: + app: ml-github-app + spec: + containers: + - name: ml-github-app + image: hamelsmu/mlapp + command: ["python", "app.py"] + env: + - name: PRIVATE_KEY + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: PRIVATE_KEY + - name: DATABASE_URL + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: DATABASE_URL + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: WEBHOOK_SECRET + - name: APP_ID + valueFrom: + secretKeyRef: + name: ml-app-inference-secret + key: APP_ID + - name: FLASK_ENV + value: production + - name: PORT + value: '3000' + - name: APP_URL + value: https://mlbot.net/ + - name: APP_URL_HEROKU + value: https://fathomless-forest-27162.herokuapp.com/ + - name: authors + value: mjh + ports: + - containerPort: 443 + - containerPort: 80 + - containerPort: 3000 \ No newline at end of file diff --git a/deployment/fake-secret.yaml b/deployment/fake-secret.yaml new file mode 100644 index 00000000..f6cc7337 --- /dev/null +++ b/deployment/fake-secret.yaml @@ -0,0 +1,13 @@ +# https://kubernetes.io/docs/concepts/configuration/secret/#creating-a-secret-manually +apiVersion: v1 +kind: Secret +metadata: + name: ml-app-inference-secret +type: Opaque +data: + PRIVATE_KEY: something + PRIVATE_KEY_DEV: something + DATABASE_URL: something + WEBHOOK_SECRET: something + APP_ID: something + APP_ID_DEV: something \ No newline at end of file diff --git a/deployment/heroku.yml b/deployment/heroku.yml new file mode 100644 index 00000000..2b8f79bb --- /dev/null +++ b/deployment/heroku.yml @@ -0,0 +1,3 @@ +build: + docker: + web: Dockerfile \ No newline at end of file diff --git a/deployment/ingress.yaml b/deployment/ingress.yaml new file mode 100644 index 00000000..aa52a3b6 --- /dev/null +++ b/deployment/ingress.yaml @@ -0,0 +1,16 @@ +apiVersion: extensions/v1beta1 +kind: Ingress +metadata: + name: ml-gh-app + namespace: mlapp +spec: + backend: + serviceName: ml-github-app + servicePort: 3000 + rules: + - http: + paths: + - path: / + backend: + serviceName: ml-github-app + servicePort: 3000 \ No newline at end of file diff --git a/deployment/service.yaml b/deployment/service.yaml new file mode 100644 index 00000000..f3cd8895 --- /dev/null +++ b/deployment/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: ml-github-app + namespace: mlapp + labels: + app: ml-github-app +spec: + ports: + - port: 3000 + protocol: TCP + selector: + app: ml-github-app + type: NodePort \ No newline at end of file diff --git a/flask_app/app.py b/flask_app/app.py new file mode 100644 index 00000000..1bca047e --- /dev/null +++ b/flask_app/app.py @@ -0,0 +1,245 @@ +import os +import logging +from collections import defaultdict +import hmac +from flask import (abort, Flask, session, render_template, + session, redirect, url_for, request, + flash, jsonify) +from flask_session import Session +from sqlalchemy import desc +from mlapp import GitHubApp +from tensorflow.keras.models import load_model +from tensorflow.keras.utils import get_file +from utils import IssueLabeler +import dill as dpickle +from urllib.request import urlopen +from sql_models import db, Issues, Predictions +import tensorflow as tf +import requests +import ipdb + +app = Flask(__name__) +app_url = os.getenv('APP_URL') + +# Configure session to use filesystem. Hamel: BOILERPLATE. +app.config["SESSION_PERMANENT"] = False +Session(app) + +# Bind database to flask app +app.config["SQLALCHEMY_DATABASE_URI"] = os.getenv("DATABASE_URL") +app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False +db.init_app(app) + +# Additional Setup inspired by https://github.com/bradshjg/flask-githubapp/blob/master/flask_githubapp/core.py +app.webhook_secret = os.getenv('WEBHOOK_SECRET') +LOG = logging.getLogger(__name__) + +# set the prediction threshold for everything except for the label question which has a different threshold +prediction_threshold = defaultdict(lambda: .55) +prediction_threshold['question'] = .65 + + +def init(): + "Load all necessary artifacts to make predictions." + title_pp_url = "https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/title_pp.dpkl" + body_pp_url = 'https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/body_pp.dpkl' + model_url = 'https://storage.googleapis.com/codenet/issue_labels/issue_label_model_files/Issue_Label_v1_best_model.hdf5' + model_filename = 'downloaded_model.hdf5' + + #save keyfile + pem_string = os.getenv('PRIVATE_KEY') + if not pem_string: + raise ValueError('Environment variable PRIVATE_KEY was not supplied.') + + with open('private-key.pem', 'wb') as f: + f.write(str.encode(pem_string)) + + with urlopen(title_pp_url) as f: + title_pp = dpickle.load(f) + + with urlopen(body_pp_url) as f: + body_pp = dpickle.load(f) + + model_path = get_file(fname=model_filename, origin=model_url) + model = load_model(model_path) + app.graph = tf.get_default_graph() + app.issue_labeler = IssueLabeler(body_text_preprocessor=body_pp, + title_text_preprocessor=title_pp, + model=model) + +# smee by default sends things to /event_handler route +@app.route("/", methods=["GET"]) +def index(): + "Landing page" + results = db.engine.execute("SELECT * FROM (SELECT distinct repo, username FROM issues a JOIN predictions b on a.issue_id=b.issue_id) as t ORDER BY random() LIMIT 50").fetchall() + num_users = f'{len(db.engine.execute("SELECT distinct username FROM issues").fetchall()):,}' + num_predictions = f'{db.engine.execute("SELECT count(*) FROM predictions").fetchall()[0][0]:,}' + num_repos = f'{len(results):,}' + return render_template("index.html", + results=results, + num_users=num_users, + num_repos=num_repos, + num_predictions=num_predictions) + +# smee by default sends things to /event_handler route +@app.route("/event_handler", methods=["POST"]) +def bot(): + "Handle payload" + # authenticate webhook to make sure it is from GitHub + verify_webhook(request) + + # Check if payload corresponds to an issue being opened + if request.json['action'] == 'opened' and ('issue' in request.json): + # get metadata + installation_id = request.json['installation']['id'] + issue_num = request.json['issue']['number'] + private = request.json['repository']['private'] + username, repo = request.json['repository']['full_name'].split('/') + title = request.json['issue']['title'] + body = request.json['issue']['body'] + + # don't do anything if repo is private. + if private: + return 'ok' + + # write the issue to the database using ORM + issue_db_obj = Issues(repo=repo, + username=username, + issue_num=issue_num, + title=title, + body=body) + + db.session.add(issue_db_obj) + db.session.commit() + + # make predictions with the model + with app.graph.as_default(): + predictions = app.issue_labeler.get_probabilities(body=body, title=title) + #log to console + LOG.warning(f'issue opened by {username} in {repo} #{issue_num}: {title} \nbody:\n {body}\n') + LOG.warning(f'predictions: {str(predictions)}') + + # get the most confident prediction + argmax = max(predictions, key=predictions.get) + # take an action if the prediction is confident enough + if predictions and (predictions[argmax] >= prediction_threshold[argmax]): + # create message + message = f'Issue-Label Bot is automatically applying the label `{argmax}` to this issue, with a confidence of {predictions[argmax]:.2f}. Please mark this comment with :thumbsup: or :thumbsdown: to give our bot feedback! \n\n Links: [dashboard]({app_url}data/{username}/{repo}), [app homepage](https://github.com/apps/issue-label-bot) and [code](https://github.com/hamelsmu/MLapp) for this bot.' + # label the issue and make a comment using the GitHub api + issue = get_issue_handle(installation_id, username, repo, issue_num) + comment = issue.create_comment(message) + issue.add_labels(argmax) + + # log the prediction to the database using ORM + issue_db_obj.add_prediction(comment_id=comment.id, + prediction=argmax, + probability=predictions[argmax], + logs=str(predictions)) + return 'ok' + + else: + return 'ok' + +@app.route("/data//", methods=["GET", "POST"]) +def data(owner, repo): + "Route where users can see the Bot's recent predictions for a repo" + + if not is_public(owner, repo): + return render_template("data.html", + results=[], + owner=owner, + repo=repo, + error=f'{owner}/{repo} is a private repo or does not exist.') + + issues = Issues.query.filter(Issues.username == owner, Issues.repo == repo).all() + issue_numbers = [x.issue_id for x in issues] + + if request.method == 'POST': + update_feedback(owner=owner, repo=repo) + + # get the 50 most recent predictions. + predictions = (Predictions.query.filter(Predictions.issue_id.in_(issue_numbers)) + .order_by(desc(Predictions.issue_id)) + .limit(50) + .all()) + + num_issues = len(issues) + num_predictions = len(predictions) + + return render_template("data.html", + results=predictions, + num_issues=num_issues, + num_predictions=num_predictions, + owner=owner, + repo=repo) + + +def update_feedback(owner, repo): + "Update feedback for predicted labels for an owner/repo" + # authenticate webhook to make sure it is from GitHub + issues = Issues.query.filter(Issues.username == owner, Issues.repo == repo).all() + issue_numbers = [x.issue_id for x in issues] + + # only update last 100 things to prevent edge cases on repos with large number of issues. + predictions = (Predictions.query.filter(Predictions.issue_id.in_(issue_numbers)) + .limit(100) + .all()) + + # we only want to get the installation token once for the list of predictions. + ghapp = get_app() + installation_id = ghapp.get_installation_id(owner=owner, repo=repo) + installation_access_token = ghapp.get_installation_access_token(installation_id) + + # grab all the reactions and update the statistics in the database. + for prediction in predictions: + reactions = ghapp.get_reactions(owner=owner, + repo=repo, + comment_id=prediction.comment_id, + iat=installation_access_token) + prediction.likes = reactions['+1'] + prediction.dislikes = reactions['-1'] + db.session.commit() + print(f'Successfully updated feedback based on reactions for {len(predictions)} predictions in {owner}/{repo}.') + + +def get_app(): + "grab a fresh instance of the app handle." + app_id = os.getenv('APP_ID') + key_file_path = 'private-key.pem' + ghapp = GitHubApp(pem_path=key_file_path, app_id=app_id) + return ghapp + +def get_issue_handle(installation_id, username, repository, number): + "get an issue object." + ghapp = get_app() + install = ghapp.get_installation(installation_id) + return install.issue(username, repository, number) + +def verify_webhook(request): + "Make sure request is from GitHub.com" + # Inspired by https://github.com/bradshjg/flask-githubapp/blob/master/flask_githubapp/core.py#L191-L198 + signature = request.headers['X-Hub-Signature'].split('=')[1] + + mac = hmac.new(str.encode(app.webhook_secret), msg=request.data, digestmod='sha1') + + if not hmac.compare_digest(mac.hexdigest(), signature): + LOG.warning('GitHub hook signature verification failed.') + abort(400) + +def is_public(owner, repo): + "Verify repo is public." + try: + return requests.head(f'https://github.com/{owner}/{repo}').status_code == 200 + except: + return False + +if __name__ == "__main__": + init() + with app.app_context(): + # create tables if they do not exist + db.create_all() + + # make sure things reload + app.jinja_env.auto_reload = True + app.config['TEMPLATES_AUTO_RELOAD'] = True + app.run(debug=True, host='0.0.0.0', port=os.getenv('PORT')) \ No newline at end of file diff --git a/flask_app/mlapp.py b/flask_app/mlapp.py new file mode 100644 index 00000000..fea656f2 --- /dev/null +++ b/flask_app/mlapp.py @@ -0,0 +1,183 @@ +from collections import namedtuple, Counter +from github3 import GitHub +from pathlib import Path +from cryptography.hazmat.backends import default_backend +import time +import json +import jwt +import requests +from tqdm import tqdm +from typing import List + +class GitHubApp(GitHub): + """ + This is a small wrapper around the github3.py library + + Provides some convenience functions for testing purposes. + """ + + def __init__(self, pem_path, app_id): + super().__init__() + + self.path = Path(pem_path) + self.app_id = app_id + + if not self.path.is_file(): + raise ValueError(f'argument: `pem_path` must be a valid filename. {pem_path} was not found.') + + def get_app(self): + with open(self.path, 'rb') as key_file: + client = GitHub() + client.login_as_app(private_key_pem=key_file.read(), + app_id=self.app_id) + return client + + def get_installation(self, installation_id): + "login as app installation without requesting previously gathered data." + with open(self.path, 'rb') as key_file: + client = GitHub() + client.login_as_app_installation(private_key_pem=key_file.read(), + app_id=self.app_id, + installation_id=installation_id) + return client + + def get_test_installation_id(self): + "Get a sample test_installation id." + client = self.get_app() + return next(client.app_installations()).id + + def get_test_installation(self): + "login as app installation with the first installation_id retrieved." + return self.get_installation(self.get_test_installation_id()) + + def get_test_repo(self): + repo = self.get_all_repos(self.get_test_installation_id())[0] + appInstallation = self.get_test_installation() + owner, name = repo['full_name'].split('/') + return appInstallation.repository(owner, name) + + def get_test_issue(self): + test_repo = self.get_test_repo() + return next(test_repo.issues()) + + def get_jwt(self): + """ + This is needed to retrieve the installation access token (for debugging). + + Useful for debugging purposes. Must call .decode() on returned object to get string. + """ + now = self._now_int() + payload = { + "iat": now, + "exp": now + (60), + "iss": self.app_id + } + with open(self.path, 'rb') as key_file: + private_key = default_backend().load_pem_private_key(key_file.read(), None) + return jwt.encode(payload, private_key, algorithm='RS256') + + def get_installation_id(self, owner, repo): + "https://developer.github.com/v3/apps/#find-repository-installation" + url = f'https://api.github.com/repos/{owner}/{repo}/installation' + + headers = {'Authorization': f'Bearer {self.get_jwt().decode()}', + 'Accept': 'application/vnd.github.machine-man-preview+json'} + + response = requests.get(url=url, headers=headers) + if response.status_code != 200: + raise Exception(f'Status code : {response.status_code}, {response.json()}') + return response.json()['id'] + + def get_installation_access_token(self, installation_id): + "Get the installation access token for debugging." + + url = f'https://api.github.com/app/installations/{installation_id}/access_tokens' + headers = {'Authorization': f'Bearer {self.get_jwt().decode()}', + 'Accept': 'application/vnd.github.machine-man-preview+json'} + + response = requests.post(url=url, headers=headers) + if response.status_code != 201: + raise Exception(f'Status code : {response.status_code}, {response.json()}') + return response.json()['token'] + + def _extract(self, d, keys): + "extract selected keys from a dict." + return dict((k, d[k]) for k in keys if k in d) + + def _now_int(self): + return int(time.time()) + + def get_all_repos(self, installation_id): + """Get all repos that this installation has access to. + + Useful for testing and debugging. + """ + url = 'https://api.github.com/installation/repositories' + headers={'Authorization': f'token {self.get_installation_access_token(installation_id)}', + 'Accept': 'application/vnd.github.machine-man-preview+json'} + + response = requests.get(url=url, headers=headers) + + if response.status_code >= 400: + raise Exception(f'Status code : {response.status_code}, {response.json()}') + + fields = ['name', 'full_name', 'id'] + return [self._extract(x, fields) for x in response.json()['repositories']] + + def get_reactions(self, owner: str, repo: str, comment_id: int, iat: str): + """Get a list of reactions. + + https://developer.github.com/v3/reactions/#list-reactions-for-a-commit-comment + """ + url = f'https://api.github.com/repos/{owner}/{repo}/issues/comments/{comment_id}/reactions' + # installation_id = self.get_installation_id(owner, repo) + # headers={'Authorization': f'token {self.get_installation_access_token(installation_id)}', + # 'Accept': 'application/vnd.github.squirrel-girl-preview+json'} + headers={'Authorization': f'token {iat}', + 'Accept': 'application/vnd.github.squirrel-girl-preview+json'} + + response = requests.get(url=url, headers=headers) + + if response.status_code >= 400: + raise Exception(f'Status code : {response.status_code}, {response.json()}') + + results = [self._extract(x, ['content']) for x in response.json()] + # count the reactions + return Counter([x['content'] for x in results]) + + + @staticmethod + def unpack_issues(client, owner, repo, label_only=True): + """ + extract relevant data from issues. + + returns a list of namedtuples which contains the following fields: + title: str + number: int + body: str + labels: list + url: str + + """ + Issue = namedtuple('Issue', ['title', 'number', 'body', 'labels', 'url']) + + issue_data = [] + issues = list(client.issues_on(owner, repo)) + for issue in tqdm(issues, total=len(issues)): + labels=[label.name for label in issue.labels()] + + # if there are no labels, then optionally skip + if label_only and not labels: + continue + + issue_data.append(Issue(title=issue.title, + number=issue.number, + body=issue.body, + labels=[label.name for label in issue.labels()], + url=issue.html_url) + ) + return issue_data + + def generate_installation_curl(self, endpoint): + iat = self.get_installation_access_token() + print(f'curl -i -H "Authorization: token {iat}" -H "Accept: application/vnd.github.machine-man-preview+json" https://api.github.com{endpoint}') \ No newline at end of file diff --git a/flask_app/sql_models.py b/flask_app/sql_models.py new file mode 100644 index 00000000..f9f134c1 --- /dev/null +++ b/flask_app/sql_models.py @@ -0,0 +1,46 @@ +import os +from flask import Flask +from flask_sqlalchemy import SQLAlchemy + +db = SQLAlchemy() + +class Issues(db.Model): + __tablename__ = "issues" + issue_id = db.Column(db.Integer, primary_key=True) + repo = db.Column(db.String, nullable=False) + username = db.Column(db.String, nullable=False) + issue_num = db.Column(db.Integer, nullable=False) + title = db.Column(db.String, nullable=False) + body = db.Column(db.String, nullable=True) + # the below statement allows you to call `Predictions.issue` to refer back to the issue + predictions = db.relationship('Predictions', backref='issue', lazy=True) + + def add_prediction(self, comment_id, prediction, probability, logs=None, prediction_type='issue label'): + p = Predictions(issue_id = self.issue_id, + comment_id=comment_id, + prediction=prediction, + probability=probability, + likes=None, + dislikes=None, + prediction_type=prediction_type, + logs=logs) + db.session.add(p) + db.session.commit() + + +class Predictions(db.Model): + __tablename__ = "predictions" + prediction_id = db.Column(db.Integer, primary_key=True) + issue_id = db.Column(db.Integer, db.ForeignKey("issues.issue_id"), nullable=False) + comment_id = db.Column(db.BigInteger, nullable=False) + prediction = db.Column(db.String, nullable=False) + probability = db.Column(db.Float, nullable=False) + likes = db.Column(db.Integer, nullable=True) + dislikes = db.Column(db.Integer, nullable=True) + prediction_type = db.Column(db.String, nullable=False) + logs = db.Column(db.String, nullable=True) + + def update_feedback(self, likes, dislikes): + p = Predictions.get(self.prediction_id) + p.likes = likes + p.dislikes = dislikes \ No newline at end of file diff --git a/flask_app/static/styles.css b/flask_app/static/styles.css new file mode 100644 index 00000000..722bf3a9 --- /dev/null +++ b/flask_app/static/styles.css @@ -0,0 +1,4 @@ +.my-flex-card > div > div.card { + height: calc(100% - 15px); + margin-bottom: 15px; +} \ No newline at end of file diff --git a/flask_app/templates/data.html b/flask_app/templates/data.html new file mode 100644 index 00000000..19652f24 --- /dev/null +++ b/flask_app/templates/data.html @@ -0,0 +1,81 @@ +{% extends "layout.html" %} + +{% block title %} + Issue Label Predictions +{% endblock %} + +{% block body %} +
+ +
+
+
+ {% if num_issues > 0 %} +

Data For {{ owner }}/{{ repo }}

+ {% if results %} +

Only showing last 50 predictions

+ + {% endif %} + {% endif %} +
+
+ {% if not results and num_issues > 0 %} +

Issue Label Bot has observed {{ num_issues }} issue(s) that have been opened since you installed the app. However, it has not been confident enough to label any of them! Issue Label Bot will only make a prediction when it is sufficiently confident that the issue is either a feature request, bug, or question.

+ {% endif %} + {% if results %} +
+ + + + + + + + + + + + + + + {% for result in results %} + + + + + + + + + + + {% endfor %} + +
Issue_NumberTitleBodyLinkPredictionConfidence
{{ result.issue.issue_num }}{{ result.issue.title }}{{ result.issue.body | truncate(250) }} + + + + {{ result.prediction }}{{ result.probability|round(3, 'common') }}{{ result.likes }}{{ result.dislikes }}
+ {% endif %} + +{% endblock %} \ No newline at end of file diff --git a/flask_app/templates/index.html b/flask_app/templates/index.html new file mode 100644 index 00000000..49043660 --- /dev/null +++ b/flask_app/templates/index.html @@ -0,0 +1,62 @@ +{% extends "layout.html" %} + +{% block title %} + Issue Label Bot +{% endblock %} + +{% block body %} +
+

Issue Label Bot

+

A bot that labels GitHub issues automatically with machine learning.

+

Written by @hamelsmu and @inc0

+

Stats: {{ num_repos }} repos, {{ num_users }} unique users and {{ num_predictions }} predictions.

+ Must have installed the app and opened at least 1 issue to be included in the stats. +
+
+
+

This bot is meant to serve as an example of how to use machine learning to automate tasks on GitHub. See this this blog post for more details. Issue Label Bot automatically labels issues as either a feature request, bug or question (we will add more labels and functionality over time). The bot will only make a prediction when it is sufficiently confident and may not label every issue.

+
+
+
+
+

Install this app

+

Only works on public GitHub repos.

+

This app will not make predictions on private repositories, even if already installed on them.

+

+
+
+
+
+
+
+

See the code on

+

Includes the code for this website, the GitHub app, and model training steps.

+

Pull requests are welcome! Please see the contributing section of our README for more information.

+
+
+
+
+
+
+

View Predictions

+

Predictions (if they exist) can be viewed at https://mlbot.net/data/{owner}/{repo}

+

See example predictions from random repos from the drop down menu below:

+ {% if results %} + + {% endif %} +
+ +
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/flask_app/templates/layout.html b/flask_app/templates/layout.html new file mode 100644 index 00000000..baea452d --- /dev/null +++ b/flask_app/templates/layout.html @@ -0,0 +1,32 @@ + + + + {% block title %}{% endblock %} + + + + + + + + + + +
+ {% if error %} + + {% endif %} + + {% if alert %} + + {% endif %} + {% block body %} + {% endblock %} +
+ + \ No newline at end of file diff --git a/flask_app/utils.py b/flask_app/utils.py new file mode 100644 index 00000000..56786a76 --- /dev/null +++ b/flask_app/utils.py @@ -0,0 +1,122 @@ +import numpy as np + +# Because of error when using a virutal env +# https://markhneedham.com/blog/2018/05/04/python-runtime-error-osx-matplotlib-not-installed-as-framework-mac/ +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt + +from sklearn import svm, datasets +from sklearn.model_selection import train_test_split +from sklearn.metrics import confusion_matrix +from sklearn.utils.multiclass import unique_labels + + +def plot_confusion_matrix(y_true, y_pred, classes, + normalize=False, + title=None, + cmap=plt.cm.Blues): + """ + This function prints and plots the confusion matrix. + Normalization can be applied by setting `normalize=True`. + """ + if not title: + if normalize: + title = 'Normalized confusion matrix' + else: + title = 'Confusion matrix, without normalization' + + # Compute confusion matrix + cm = confusion_matrix(y_true, y_pred) + # Only use the labels that appear in the data + classes = classes[unique_labels(y_true, y_pred)] + if normalize: + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + print("Normalized confusion matrix") + else: + print('Confusion matrix, without normalization') + + fig, ax = plt.subplots() + im = ax.imshow(cm, interpolation='nearest', cmap=cmap) + ax.figure.colorbar(im, ax=ax) + # We want to show all ticks... + ax.set(xticks=np.arange(cm.shape[1]), + yticks=np.arange(cm.shape[0]), + # ... and label them with the respective list entries + xticklabels=classes, yticklabels=classes, + title=title, + ylabel='True label', + xlabel='Predicted label') + + # Rotate the tick labels and set their alignment. + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", + rotation_mode="anchor") + + # Loop over data dimensions and create text annotations. + fmt = '.2f' if normalize else 'd' + thresh = cm.max() / 2. + for i in range(cm.shape[0]): + for j in range(cm.shape[1]): + ax.text(j, i, format(cm[i, j], fmt), + ha="center", va="center", + color="white" if cm[i, j] > thresh else "black") + fig.tight_layout() + return ax + + +class IssueLabeler: + def __init__(self, + body_text_preprocessor, + title_text_preprocessor, + model, + class_names=['bug', 'feature_request', 'question']): + """ + Parameters + ---------- + body_text_preprocessor: ktext.preprocess.processor + the text preprocessor trained on issue bodies + title_text_preprocessor: ktext.preprocess.processor + text preprocessor trained on issue titles + model: tensorflow.keras.models + a keras model that takes as input two tensors: vectorized + issue body and issue title. + class_names: list + class names as they correspond to the integer indices supplied to the model. + """ + self.body_pp = body_text_preprocessor + self.title_pp = title_text_preprocessor + self.model = model + self.class_names = class_names + + + def get_probabilities(self, body:str, title:str): + """ + Get probabilities for the each class. + + Parameters + ---------- + body: str + the issue body + title: str + the issue title + + Returns + ------ + Dict[str:float] + + Example + ------- + >>> issue_labeler = IssueLabeler(body_pp, title_pp, model) + >>> issue_labeler.get_probabilities('hello world', 'hello world') + {'bug': 0.08372017741203308, + 'feature': 0.6401631832122803, + 'question': 0.2761166989803314} + """ + #transform raw text into array of ints + vec_body = self.body_pp.transform([body]) + vec_title = self.title_pp.transform([title]) + + # get predictions + probs = self.model.predict(x=[vec_body, vec_title]).tolist()[0] + + return {k:v for k,v in zip(self.class_names, probs)} \ No newline at end of file diff --git a/images/example3_big.png b/images/example3_big.png new file mode 100644 index 00000000..7b990cdb Binary files /dev/null and b/images/example3_big.png differ diff --git a/notebooks/GitHub-api-example.ipynb b/notebooks/GitHub-api-example.ipynb new file mode 100644 index 00000000..60961d33 --- /dev/null +++ b/notebooks/GitHub-api-example.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# This notebook shows you how you can use a python client to interact with GitHub as an app\n", + "\n", + "`mlapp` is a wrapper (located in `/flask_app/mlapp.py`) around the github3.py library for the purposes of interacting with GitHub issues." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the app installation" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "from mlapp import GitHubApp\n", + "app_id = 12345 # you get your app id during setup\n", + "key_file_path = '/path/to/your/file'\n", + "\n", + "# instantiate the app object\n", + "ghapp = GitHubApp(pem_path=key_file_path, \n", + " app_id=app_id)\n", + "\n", + "# get the installation id\n", + "installation_id = ghapp.get_installation_id(owner='hamelsmu', \n", + " repo='simple_flask_app')\n", + "\n", + "# get the app installation object that will allow you to perform actions\n", + "install = ghapp.get_installation(installation_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interact With Issues" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create a new issue" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "issue = install.create_issue(owner='hamelsmu',\n", + " repository='simple_flask_app',\n", + " title='Opening a test issue ', \n", + " body='demonstrating the python client')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Comment on an issue" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "comment = issue.create_comment('Wooo! Its time to do some machine learning!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Add a label to an issue" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue.add_labels('AI-is-taking-over')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### See the issue here" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://github.com/hamelsmu/simple_flask_app/issues/1\n" + ] + } + ], + "source": [ + "print(issue.html_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Appendix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract Data\n", + "\n", + "All issues from `kubeflow/kubeflow`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a list of issues which you can use to train models" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 489/489 [02:20<00:00, 2.85it/s]\n" + ] + } + ], + "source": [ + "issues = GitHubApp.unpack_issues(client=install, \n", + " owner='kubeflow',\n", + " repo='kubeflow')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "there are 372 issues with labels\n" + ] + } + ], + "source": [ + "print(f'there are {len(issues)} issues with labels')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get random app installation\n", + "\n", + "The test installation automatically fetches the first installation the app is found on. This is useful for testing purposes " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "install = ghapp.get_test_installation()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "MLapp", + "language": "python", + "name": "mlapp" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/Issue_Labeling_Model.ipynb b/notebooks/Issue_Labeling_Model.ipynb new file mode 100644 index 00000000..64e7ac93 --- /dev/null +++ b/notebooks/Issue_Labeling_Model.ipynb @@ -0,0 +1,1183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Background\n", + "\n", + "The goal of the MLapp project is to provide the following:\n", + "\n", + "1. Illustrate how to build machine learning powered developer tools using the [GitHub Api](https://developer.github.com/v3/) and Flask. We would like to show data scientists how to build exciting data products using machine learning on the GitHub marketplace, that developers can use. Specifically, we will build an illustrative data product that will automatically label issues. \n", + "\n", + "2. Gather feedback and iterate \n", + "\n", + "\n", + "The scope of this notebook is to addresses part of goal #1, by illustrating how we can acquire a dataset of GitHub issue labels and train a classifier. \n", + "\n", + "The top issues on GitHub by count are illustrated in [this spreadsheet](https://docs.google.com/spreadsheets/d/1NPacnVsyZMBneeewvPGhCx512A1RPYf8ktDN_RpKeS4/edit?usp=sharing). To keep things simple, we will build a model to classify an issue as a `bug`, `feature` or `question`. We use hueristics to collapse a set of issue labels into these three categories, which can be viewed [in this query](https://console.cloud.google.com/bigquery?sq=123474043329:01abf8866144486f932c756730ddaff1). \n", + "\n", + "The heueristic for these class labels are contained within the below case statement:\n", + "\n", + "```{sql}\n", + " CASE when labels like '%bug%' and labels not like '%not bug%' then True else False end as Bug_Flag,\n", + " CASE when labels like '%feature%' or labels like '%enhancement%' or labels like '%improvement%' or labels like '%request%' then True else False end as Feature_Flag,\n", + " CASE when labels like '%question%' or labels like '%discussion%' then True else False end as Question_Flag,\n", + "```\n", + " the above case statement is located within [this query](https://console.cloud.google.com/bigquery?sq=123474043329:01abf8866144486f932c756730ddaff1)\n", + " \n", + "\n", + "The following alternative projects were tried before this task that we did not pursue further:\n", + " - Transfer learning using the [GitHub Issue Summarizer](https://github.com/hamelsmu/Seq2Seq_Tutorial) to enable the prediction of custom labels on existing repos. Found that this did not work well as there is a considerable amount of noise with regards to custom labels in repositories and often not enough data to adequately predict this. \n", + " - Tried to classify more than the above three classes, however the human-labeled issues are very subjective and it is not clear what is a question vs. a bug. \n", + " - Tried multi-label classification since labels can co-occur. There is very little overlap between `bug`, `feature` and `question` labels, so we decided to simplify things and make this a multi-class classificaiton problem instead. \n", + "\n", + "\n", + "Note: the code in this notebook was executed on a [p3.8xlarge](https://aws.amazon.com/ec2/instance-types/p3/) instance on AWS.\n", + "\n", + "## Outline \n", + "\n", + "This notebook will follow these steps:\n", + "\n", + "1. Download and partition dataset\n", + "2. Pre-process dataset\n", + "2. Build model architecture & Train Model\n", + "3. Evaluate Model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download and Partition Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "pd.set_option('max_colwidth', 1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train: 2,698,578 rows 10 columns\n", + "Test: 476,220 rows 10 columns\n" + ] + } + ], + "source": [ + "df = pd.concat([pd.read_csv(f'https://storage.googleapis.com/codenet/issue_labels/00000000000{i}.csv.gz')\n", + " for i in range(10)])\n", + "\n", + "#split data into train/test\n", + "traindf, testdf = train_test_split(df, test_size=.15)\n", + "\n", + "traindf.to_pickle('traindf.pkl')\n", + "testdf.to_pickle('testdf.pkl')\n", + "\n", + "#print out stats about shape of data\n", + "print(f'Train: {traindf.shape[0]:,} rows {traindf.shape[1]:,} columns')\n", + "print(f'Test: {testdf.shape[0]:,} rows {testdf.shape[1]:,} columns')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Discussion of the data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urlrepotitlebodynum_labelslabelsc_bugc_featurec_questionclass_int
112473\"https://github.com/DockStation/dockstation/issues/6\"DockStation/dockstationfeature request: image layers window hidesome feedback for this window:\\r \\r ! image https://cloud.githubusercontent.com/assets/9369080/25952392/3769f842-3669-11e7-8a91-4ed2de640246.png \\r \\r \\r it's really cool, but blocking. i think users would prefer to be able hide it the \\ ok\\ was not hiding it in my case - don't know if it should?2[\"enhancement\", \"wish\"]FalseTrueFalse1
33329\"https://github.com/julianschritt/secreth_telegrambot/issues/2\"julianschritt/secreth_telegrambotgrant group admins permissions to /cancelgame and /startgameending a game requires the person who started it to end it. even kicking and re-adding the bot doesn't close an existing game. so if the person who started a game is afk for a long time the bot becomes useless for that group.1[\"enhancement\", \"enhancement\"]FalseTrueFalse1
95978\"https://github.com/remotestorage/remotestorage-bookmarks-chrome/issues/3\"remotestorage/remotestorage-bookmarks-chromedeprecate in favor of memm?this thing is super old and nobody's actively maintaining it. i don't know if someone's actively using it, but i think it would make sense to direct people coming here to https://github.com/lesion/memm, which works and is maintained by @lesion, who's also a core contributor to rs.js.1[\"question\"]FalseFalseTrue2
\n", + "
" + ], + "text/plain": [ + " url \\\n", + "112473 \"https://github.com/DockStation/dockstation/issues/6\" \n", + "33329 \"https://github.com/julianschritt/secreth_telegrambot/issues/2\" \n", + "95978 \"https://github.com/remotestorage/remotestorage-bookmarks-chrome/issues/3\" \n", + "\n", + " repo \\\n", + "112473 DockStation/dockstation \n", + "33329 julianschritt/secreth_telegrambot \n", + "95978 remotestorage/remotestorage-bookmarks-chrome \n", + "\n", + " title \\\n", + "112473 feature request: image layers window hide \n", + "33329 grant group admins permissions to /cancelgame and /startgame \n", + "95978 deprecate in favor of memm? \n", + "\n", + " body \\\n", + "112473 some feedback for this window:\\r \\r ! image https://cloud.githubusercontent.com/assets/9369080/25952392/3769f842-3669-11e7-8a91-4ed2de640246.png \\r \\r \\r it's really cool, but blocking. i think users would prefer to be able hide it the \\ ok\\ was not hiding it in my case - don't know if it should? \n", + "33329 ending a game requires the person who started it to end it. even kicking and re-adding the bot doesn't close an existing game. so if the person who started a game is afk for a long time the bot becomes useless for that group. \n", + "95978 this thing is super old and nobody's actively maintaining it. i don't know if someone's actively using it, but i think it would make sense to direct people coming here to https://github.com/lesion/memm, which works and is maintained by @lesion, who's also a core contributor to rs.js. \n", + "\n", + " num_labels labels c_bug c_feature \\\n", + "112473 2 [\"enhancement\", \"wish\"] False True \n", + "33329 1 [\"enhancement\", \"enhancement\"] False True \n", + "95978 1 [\"question\"] False False \n", + "\n", + " c_question class_int \n", + "112473 False 1 \n", + "33329 False 1 \n", + "95978 True 2 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# preview data\n", + "traindf.head(3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Discussion of the data: \n", + "\n", + "- url: url where you can find this issue\n", + "- repo: owner/repo name\n", + "- title: title of the issue\n", + "- body: body of the issue, not including comments\n", + "- num_labels: number of issue labels\n", + "- labels: an array of labels applied a user manually applied to the issue (represented as a string)\n", + "- c_bug: boolean flag that indicates if the issue label corresponds to a bug\n", + "- c_feature: boolean flag that indicates if the issue label corresponds to a feature\n", + "- c_question: boolean flag that indicates if the issue label corresponds to a question\n", + "- class_int: integer between 0 - 2 that corresponds to the class label. **0=bug, 1=feature, 2=question**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary Statistics" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Class frequency **0=bug, 1=feature, 2=question**" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "class_int\n", + "0 1211335\n", + "1 1231499\n", + "2 255744\n", + "dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traindf.groupby('class_int').size()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "number of unique repos" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Avg # of issues per repo: 7.6\n", + " Avg # of issues per org: 8.7\n" + ] + } + ], + "source": [ + "print(f' Avg # of issues per repo: {len(traindf) / traindf.repo.nunique():.1f}')\n", + "print(f\" Avg # of issues per org: {len(traindf) / traindf.repo.apply(lambda x: x.split('/')[-1]).nunique():.1f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Most popular repos by # of issues:\n", + "\n", + " - `pcnt` = percent of total issues in the dataset\n", + " - `count` = number of issues in the dataset for that repo" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pcntcount
repo
Microsoft/vscode0.00512816281
rancher/rancher0.0024507779
MicrosoftDocs/azure-docs0.0019636233
godotengine/godot0.0019526198
ansible/ansible0.0019506192
hashicorp/terraform0.0016495235
kubernetes/kubernetes0.0016215147
lionheart/openradar-mirror0.0013304221
elastic/kibana0.0012013813
magento/magento20.0011293583
dart-lang/sdk0.0011273579
dotnet/corefx0.0011213558
eclipse/che0.0011143538
brave/browser-laptop0.0010673389
kbower/tickettest10.0009202921
Kademi/kademi-dev0.0008872815
owncloud/core0.0008192600
elastic/elasticsearch0.0008092568
eslint/eslint0.0007652428
angular/angular0.0007192283
openshift/origin0.0007172277
CartoDB/cartodb0.0006852174
cockroachdb/cockroach0.0006552081
RocketChat/Rocket.Chat0.0006502065
Microsoft/AL0.0006472055
\n", + "
" + ], + "text/plain": [ + " pcnt count\n", + "repo \n", + "Microsoft/vscode 0.005128 16281\n", + "rancher/rancher 0.002450 7779\n", + "MicrosoftDocs/azure-docs 0.001963 6233\n", + "godotengine/godot 0.001952 6198\n", + "ansible/ansible 0.001950 6192\n", + "hashicorp/terraform 0.001649 5235\n", + "kubernetes/kubernetes 0.001621 5147\n", + "lionheart/openradar-mirror 0.001330 4221\n", + "elastic/kibana 0.001201 3813\n", + "magento/magento2 0.001129 3583\n", + "dart-lang/sdk 0.001127 3579\n", + "dotnet/corefx 0.001121 3558\n", + "eclipse/che 0.001114 3538\n", + "brave/browser-laptop 0.001067 3389\n", + "kbower/tickettest1 0.000920 2921\n", + "Kademi/kademi-dev 0.000887 2815\n", + "owncloud/core 0.000819 2600\n", + "elastic/elasticsearch 0.000809 2568\n", + "eslint/eslint 0.000765 2428\n", + "angular/angular 0.000719 2283\n", + "openshift/origin 0.000717 2277\n", + "CartoDB/cartodb 0.000685 2174\n", + "cockroachdb/cockroach 0.000655 2081\n", + "RocketChat/Rocket.Chat 0.000650 2065\n", + "Microsoft/AL 0.000647 2055" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pareto_df = pd.DataFrame({'pcnt': df.groupby('repo').size() / len(df), 'count': df.groupby('repo').size()})\n", + "pareto_df.sort_values('pcnt', ascending=False).head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pre-Process Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To process the raw text data, we will use [ktext](https://github.com/hamelsmu/ktext)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "from ktext.preprocess import processor\n", + "import dill as dpickle\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Clean, tokenize, and apply padding / truncating such that each document length = 75th percentile for the dataset.\n", + "Retain only the top keep_n words in the vocabulary and set the remaining words to 1 which will become common index for rare words.\n", + "\n", + "**Warning:** the below block of code can take a long time to execute.\n", + "\n", + "#### Learn the vocabulary from the training dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:....tokenizing data\n", + "WARNING:root:Setting maximum document length to 105 based upon hueristic of 0.75 percentile.\n", + " See full histogram by insepecting the `document_length_stats` attribute.\n", + "WARNING:root:(1/2) done. 855 sec\n", + "WARNING:root:....building corpus\n", + "WARNING:root:(2/2) done. 145 sec\n", + "WARNING:root:Finished parsing 2,698,578 documents.\n", + "WARNING:root:...fit is finished, beginning transform\n", + "WARNING:root:...padding data\n", + "WARNING:root:done. 102 sec\n", + "WARNING:root:....tokenizing data\n", + "WARNING:root:Setting maximum document length to 10 based upon hueristic of 0.75 percentile.\n", + " See full histogram by insepecting the `document_length_stats` attribute.\n", + "WARNING:root:(1/2) done. 66 sec\n", + "WARNING:root:....building corpus\n", + "WARNING:root:(2/2) done. 17 sec\n", + "WARNING:root:Finished parsing 2,698,578 documents.\n", + "WARNING:root:...fit is finished, beginning transform\n", + "WARNING:root:...padding data\n", + "WARNING:root:done. 23 sec\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 8min 8s, sys: 32.3 s, total: 8min 40s\n", + "Wall time: 20min 24s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "train_body_raw = traindf.body.tolist()\n", + "train_title_raw = traindf.title.tolist()\n", + "\n", + "# Clean, tokenize, and apply padding / truncating such that each document length = 75th percentile for the dataset.\n", + "# also, retain only the top keep_n words in the vocabulary and set the remaining words\n", + "# to 1 which will become common index for rare words \n", + "\n", + "# process the issue body data\n", + "body_pp = processor(hueristic_pct_padding=.75, keep_n=8000)\n", + "train_body_vecs = body_pp.fit_transform(train_body_raw)\n", + "\n", + "# process the title data\n", + "title_pp = processor(hueristic_pct_padding=.75, keep_n=4500)\n", + "train_title_vecs = title_pp.fit_transform(train_title_raw)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Apply transformations to Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:...tokenizing data\n", + "WARNING:root:...indexing data\n", + "WARNING:root:...padding data\n", + "WARNING:root:...tokenizing data\n", + "WARNING:root:...indexing data\n", + "WARNING:root:...padding data\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 57.5 s, sys: 31 s, total: 1min 28s\n", + "Wall time: 4min 16s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "test_body_raw = testdf.body.tolist()\n", + "test_title_raw = testdf.title.tolist()\n", + "\n", + "test_body_vecs = body_pp.transform_parallel(test_body_raw)\n", + "test_title_vecs = title_pp.transform_parallel(test_title_raw)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Extract Labels\n", + "\n", + "Add an additional dimension to the end to facilitate compatibility with Keras." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "train_labels = np.expand_dims(traindf.class_int.values, -1)\n", + "test_labels = np.expand_dims(testdf.class_int.values, -1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Check shapes" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "# the number of rows in data for the body, title and labels should be the same for both train and test partitions\n", + "assert train_body_vecs.shape[0] == train_title_vecs.shape[0] == train_labels.shape[0]\n", + "assert test_body_vecs.shape[0] == test_title_vecs.shape[0] == test_labels.shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Save pre-processors and data to disk" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "# Save the preprocessor\n", + "with open('body_pp.dpkl', 'wb') as f:\n", + " dpickle.dump(body_pp, f)\n", + "\n", + "with open('title_pp.dpkl', 'wb') as f:\n", + " dpickle.dump(title_pp, f)\n", + "\n", + "# Save the processed data\n", + "np.save('train_title_vecs.npy', train_title_vecs)\n", + "np.save('train_body_vecs.npy', train_body_vecs)\n", + "np.save('test_body_vecs.npy', test_body_vecs)\n", + "np.save('test_title_vecs.npy', test_title_vecs)\n", + "np.save('train_labels.npy', train_labels)\n", + "np.save('test_labels.npy', test_labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Architecture & Train Model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.keras.utils import multi_gpu_model\n", + "from tensorflow.keras.models import Model\n", + "from tensorflow.keras.layers import Input, GRU, Dense, Embedding, BatchNormalization, Concatenate\n", + "from tensorflow.keras.optimizers import Adam\n", + "import numpy as np\n", + "import dill as dpickle" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.12.0\n" + ] + } + ], + "source": [ + "print(tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Load the data and shape information" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "with open('title_pp.dpkl', 'rb') as f:\n", + " title_pp = dpickle.load(f)\n", + "\n", + "with open('body_pp.dpkl', 'rb') as f:\n", + " body_pp = dpickle.load(f)\n", + " \n", + "#load the training data and labels\n", + "train_body_vecs = np.load('train_body_vecs.npy')\n", + "train_title_vecs = np.load('train_title_vecs.npy')\n", + "train_labels = np.load('train_labels.npy')\n", + "\n", + "#load the test data and labels\n", + "test_body_vecs = np.load('test_body_vecs.npy')\n", + "test_title_vecs = np.load('test_title_vecs.npy')\n", + "test_labels = np.load('test_labels.npy')\n", + "\n", + "\n", + "issue_body_doc_length = train_body_vecs.shape[1]\n", + "issue_title_doc_length = train_title_vecs.shape[1]\n", + "\n", + "body_vocab_size = body_pp.n_tokens\n", + "title_vocab_size = title_pp.n_tokens\n", + "\n", + "num_classes = len(set(train_labels[:, 0]))\n", + "assert num_classes == 3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Build Model Architecture\n", + "\n", + "We did very little hyperparameter tuning. Keeping model simple as possible." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "body_input = Input(shape=(issue_body_doc_length,), name='Body-Input')\n", + "title_input = Input(shape=(issue_title_doc_length,), name='Title-Input')\n", + "\n", + "body = Embedding(body_vocab_size, 50, name='Body-Embedding')(body_input)\n", + "title = Embedding(title_vocab_size, 50, name='Title-Embedding')(title_input)\n", + "\n", + "body = BatchNormalization()(body)\n", + "body = GRU(100, name='Body-Encoder')(body)\n", + "\n", + "title = BatchNormalization()(title)\n", + "title = GRU(75, name='Title-Encoder')(title)\n", + "\n", + "x = Concatenate(name='Concat')([body, title])\n", + "x = BatchNormalization()(x)\n", + "out = Dense(num_classes, activation='softmax')(x)\n", + "\n", + "model = Model([body_input, title_input], out)\n", + "\n", + "model.compile(optimizer=Adam(lr=0.001), \n", + " loss='sparse_categorical_crossentropy', \n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__________________________________________________________________________________________________\n", + "Layer (type) Output Shape Param # Connected to \n", + "==================================================================================================\n", + "Body-Input (InputLayer) (None, 105) 0 \n", + "__________________________________________________________________________________________________\n", + "Title-Input (InputLayer) (None, 10) 0 \n", + "__________________________________________________________________________________________________\n", + "Body-Embedding (Embedding) (None, 105, 50) 400050 Body-Input[0][0] \n", + "__________________________________________________________________________________________________\n", + "Title-Embedding (Embedding) (None, 10, 50) 225050 Title-Input[0][0] \n", + "__________________________________________________________________________________________________\n", + "batch_normalization_15 (BatchNo (None, 105, 50) 200 Body-Embedding[0][0] \n", + "__________________________________________________________________________________________________\n", + "batch_normalization_16 (BatchNo (None, 10, 50) 200 Title-Embedding[0][0] \n", + "__________________________________________________________________________________________________\n", + "Body-Encoder (GRU) (None, 100) 45300 batch_normalization_15[0][0] \n", + "__________________________________________________________________________________________________\n", + "Title-Encoder (GRU) (None, 75) 28350 batch_normalization_16[0][0] \n", + "__________________________________________________________________________________________________\n", + "Concat (Concatenate) (None, 175) 0 Body-Encoder[0][0] \n", + " Title-Encoder[0][0] \n", + "__________________________________________________________________________________________________\n", + "batch_normalization_17 (BatchNo (None, 175) 700 Concat[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_5 (Dense) (None, 3) 528 batch_normalization_17[0][0] \n", + "==================================================================================================\n", + "Total params: 700,378\n", + "Trainable params: 699,828\n", + "Non-trainable params: 550\n", + "__________________________________________________________________________________________________\n" + ] + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Train Model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint\n", + "\n", + "script_name_base = 'Issue_Label_v1'\n", + "csv_logger = CSVLogger('{:}.log'.format(script_name_base))\n", + "model_checkpoint = ModelCheckpoint('{:}_best_model.hdf5'.format(script_name_base),\n", + " save_best_only=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 2698578 samples, validate on 476220 samples\n", + "Epoch 1/4\n", + "2698578/2698578 [==============================] - 603s 223us/step - loss: 0.5008 - acc: 0.8086 - val_loss: 0.4699 - val_acc: 0.8209\n", + "Epoch 2/4\n", + "2698578/2698578 [==============================] - 599s 222us/step - loss: 0.4558 - acc: 0.8265 - val_loss: 0.4611 - val_acc: 0.8248\n", + "Epoch 3/4\n", + "2698578/2698578 [==============================] - 598s 221us/step - loss: 0.4380 - acc: 0.8332 - val_loss: 0.4593 - val_acc: 0.8250\n", + "Epoch 4/4\n", + "2698578/2698578 [==============================] - 597s 221us/step - loss: 0.4230 - acc: 0.8393 - val_loss: 0.4616 - val_acc: 0.8243\n" + ] + } + ], + "source": [ + "batch_size = 900\n", + "epochs = 4\n", + "history = model.fit(x=[train_body_vecs, train_title_vecs], \n", + " y=train_labels,\n", + " batch_size=batch_size,\n", + " epochs=epochs,\n", + " validation_data=[(test_body_vecs, test_title_vecs), test_labels], \n", + " callbacks=[csv_logger, model_checkpoint])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate Model\n", + "\n", + "Compute a confusion matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py:112: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n" + ] + } + ], + "source": [ + "from tensorflow.keras.models import load_model\n", + "from utils import plot_confusion_matrix\n", + "\n", + "\n", + "#load the test data and labels\n", + "test_body_vecs = np.load('test_body_vecs.npy')\n", + "test_title_vecs = np.load('test_title_vecs.npy')\n", + "test_labels = np.load('test_labels.npy')\n", + "\n", + "#load the best model\n", + "best_model = load_model('Issue_Label_v1_best_model.hdf5')\n", + "\n", + "#get predictions\n", + "y_pred = np.argmax(best_model.predict(x=[test_body_vecs, test_title_vecs],\n", + " batch_size=15000),\n", + " axis=1)\n", + "\n", + "# get labels\n", + "y_test = test_labels[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalized confusion matrix\n", + "[[0.88173203 0.09765211 0.02061586]\n", + " [0.1303451 0.83997974 0.02967516]\n", + " [0.27873486 0.23896011 0.48230502]]\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_confusion_matrix(y_test, \n", + " y_pred, \n", + " classes=np.array(['bug', 'feature', 'question']), \n", + " normalize=True, \n", + " title='Normalized Confusion Matrix')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Make Predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from utils import IssueLabeler\n", + "from tensorflow.keras.models import load_model\n", + "import dill as dpickle" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + } + ], + "source": [ + "#load the best model\n", + "best_model = load_model('Issue_Label_v1_best_model.hdf5')\n", + "\n", + "#load the pre-processors\n", + "with open('title_pp.dpkl', 'rb') as f:\n", + " title_pp = dpickle.load(f)\n", + "\n", + "with open('body_pp.dpkl', 'rb') as f:\n", + " body_pp = dpickle.load(f)\n", + " \n", + "# instantiate the IssueLabeler object\n", + "issue_labeler = IssueLabeler(body_text_preprocessor=body_pp,\n", + " title_text_preprocessor=title_pp,\n", + " model=best_model) " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bug': 0.12618249654769897,\n", + " 'feature': 0.1929263472557068,\n", + " 'question': 0.6808911561965942}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue_labeler.get_probabilities(body='Can someone please help me?', \n", + " title='random stuff')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bug': 0.019261939451098442,\n", + " 'feature': 0.9305700659751892,\n", + " 'question': 0.05016808584332466}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue_labeler.get_probabilities(body='It would be great to add a new button', \n", + " title='requesting a button')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'bug': 0.9065071940422058,\n", + " 'feature': 0.03202613815665245,\n", + " 'question': 0.06146678701043129}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue_labeler.get_probabilities(body='It does` not work, I get bad errors', \n", + " title='nothing works')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/mlapp.py b/notebooks/mlapp.py new file mode 120000 index 00000000..84ee4b90 --- /dev/null +++ b/notebooks/mlapp.py @@ -0,0 +1 @@ +../flask_app/mlapp.py \ No newline at end of file diff --git a/notebooks/utils.py b/notebooks/utils.py new file mode 120000 index 00000000..b7bb311e --- /dev/null +++ b/notebooks/utils.py @@ -0,0 +1 @@ +../flask_app/utils.py \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..4e9ba7ee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,129 @@ +-i https://pypi.org/simple +absl-py==0.7.1 +appnope==0.1.0 +asn1crypto==0.24.0 +astor==0.7.1 +backcall==0.1.0 +backports.weakref==1.0.post1 +bleach==3.1.0 +blis==0.2.4 +cachetools==3.1.0 +certifi==2019.3.9 +cffi==1.12.2 +chardet==3.0.4 +click==7.0 +cryptography==2.6.1 +cycler==0.10.0 +cymem==2.0.2 +cytoolz==0.9.0.1 +dask==1.1.5 +decorator==4.4.0 +defusedxml==0.5.0 +dill==0.2.9 +entrypoints==0.3 +enum34==1.1.6 +flask-session==0.3.1 +flask-sqlalchemy==2.3.2 +flask==1.0.2 +ftfy==4.4.3 +gast==0.2.2 +github3.py==1.3.0 +grpcio==1.19.0 +h5py==2.9.0 +html5lib==1.0.1 +idna==2.8 +ijson==2.3 +ipdb==0.12 +ipykernel==5.1.0 +ipython-genutils==0.2.0 +ipython==7.4.0 +ipywidgets==7.4.2 +itsdangerous==1.1.0 +jedi==0.13.3 +jinja2==2.10 +jsonify==0.5 +jsonschema==2.6.0 +jupyter-client==5.2.4 +jupyter-console==6.0.0 +jupyter-core==4.4.0 +jupyter==1.0.0 +jwcrypto==0.6.0 +jwt==0.6.1 +keras-applications==1.0.7 +keras-preprocessing==1.0.9 +keras==2.2.4 +kiwisolver==1.0.1 +ktext==0.34 +markdown==3.1 +markupsafe==1.1.1 +matplotlib==3.0.3 +mistune==0.8.4 +mock==2.0.0 +more-itertools==7.0.0 +msgpack-numpy==0.4.4.2 +msgpack==0.6.1 +multiprocess==0.70.7 +murmurhash==1.0.2 +nbconvert==5.4.1 +nbformat==4.4.0 +networkx==2.2 +notebook==5.7.7 +numpy==1.16.2 +pandas==0.24.2 +pandocfilters==1.4.2 +parso==0.3.4 +pathos==0.2.3 +pbr==5.1.3 +pexpect==4.6.0 +pickleshare==0.7.5 +plac==0.9.6 +pox==0.2.5 +ppft==1.6.4.9 +preshed==2.0.1 +prometheus-client==0.6.0 +prompt-toolkit==2.0.9 +protobuf==3.7.1 +psycopg2-binary==2.7.7 +psycopg2==2.7.7 +ptyprocess==0.6.0 +pyarrow==0.12.1 +pycparser==2.19 +pyemd==0.5.1 +pygments==2.3.1 +pyjwt==1.7.1 +pyparsing==2.3.1 +pyphen==0.9.5 +python-dateutil==2.8.0 +python-levenshtein==0.12.0 +pytz==2018.9 +pyyaml==5.1 +pyzmq==18.0.1 +qtconsole==4.4.3 +requests==2.21.0 +scikit-learn==0.20.3 +scipy==1.2.1 +send2trash==1.5.0 +six==1.12.0 +spacy==2.1.3 +sqlalchemy==1.3.1 +srsly==0.0.5 +tensorboard==1.12.2 +tensorflow==1.12.0 +termcolor==1.1.0 +terminado==0.8.2 +testpath==0.4.2 +textacy==0.6.2 +thinc==7.0.4 +toolz==0.9.0 +tornado==6.0.2 +tqdm==4.31.1 +traitlets==4.3.2 +unidecode==1.0.23 +uritemplate==3.0.0 +urllib3==1.24.1 +wasabi==0.2.1 +wcwidth==0.1.7 +webencodings==0.5.1 +werkzeug==0.15.1 +wheel==0.33.1 ; python_version >= '3' +widgetsnbextension==3.4.2 diff --git a/script/bootstrap b/script/bootstrap new file mode 100644 index 00000000..c5762079 --- /dev/null +++ b/script/bootstrap @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e +cd $(dirname "$0")/.. + +docker build -t hamelsmu/mlapp -f deployment/Dockerfile . \ No newline at end of file