From fa07e12ef8f2338152f769389c4f54a086bced92 Mon Sep 17 00:00:00 2001 From: Javier Date: Sat, 17 Feb 2024 11:33:26 +0000 Subject: [PATCH] brought docs folder back --- docs/Makefile | 21 ++ docs/_static/custom.css | 45 +++ docs/_static/img/widedeep_logo_docs.ico | Bin 0 -> 102115 bytes docs/bayesian_models.rst | 15 + docs/callbacks.rst | 29 ++ docs/conf.py | 297 ++++++++++++++++++ docs/dataloaders.rst | 10 + docs/examples.rst | 21 ++ docs/figures/architecture_1_math.png | Bin 0 -> 47788 bytes docs/figures/architecture_2_math.png | Bin 0 -> 16848 bytes docs/figures/widedeep_arch.png | Bin 0 -> 146196 bytes docs/figures/widedeep_logo.png | Bin 0 -> 85253 bytes docs/index.rst | 160 ++++++++++ docs/installation.rst | 44 +++ docs/losses.rst | 65 ++++ docs/make.bat | 35 +++ docs/metrics.rst | 51 +++ docs/model_components.rst | 71 +++++ docs/preprocessing.rst | 22 ++ docs/quick_start.rst | 134 ++++++++ docs/requirements.txt | 20 ++ docs/tab2vec.rst | 7 + docs/trainer.rst | 23 ++ docs/utils/deeptabular_utils.rst | 6 + docs/utils/fastai_transforms.rst | 19 ++ docs/utils/image_utils.rst | 20 ++ docs/utils/index.rst | 23 ++ docs/utils/text_utils.rst | 15 + .../pytorch-widedeep/bayesian_models.html | 6 +- mkdocs/site/pytorch-widedeep/callbacks.html | 18 +- .../pytorch-widedeep/model_components.html | 186 ++++++----- .../site/pytorch-widedeep/preprocessing.html | 186 ++++++----- .../self_supervised_pretraining.html | 4 +- mkdocs/site/pytorch-widedeep/tab2vec.html | 2 +- .../utils/fastai_transforms.html | 4 +- mkdocs/site/search/search_index.json | 2 +- mkdocs/site/sitemap.xml | 86 ++--- mkdocs/site/sitemap.xml.gz | Bin 806 -> 806 bytes 38 files changed, 1423 insertions(+), 224 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/_static/custom.css create mode 100644 docs/_static/img/widedeep_logo_docs.ico create mode 100644 docs/bayesian_models.rst create mode 100644 docs/callbacks.rst create mode 100644 docs/conf.py create mode 100644 docs/dataloaders.rst create mode 100644 docs/examples.rst create mode 100644 docs/figures/architecture_1_math.png create mode 100644 docs/figures/architecture_2_math.png create mode 100644 docs/figures/widedeep_arch.png create mode 100644 docs/figures/widedeep_logo.png create mode 100644 docs/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/losses.rst create mode 100644 docs/make.bat create mode 100644 docs/metrics.rst create mode 100644 docs/model_components.rst create mode 100644 docs/preprocessing.rst create mode 100644 docs/quick_start.rst create mode 100644 docs/requirements.txt create mode 100644 docs/tab2vec.rst create mode 100644 docs/trainer.rst create mode 100644 docs/utils/deeptabular_utils.rst create mode 100644 docs/utils/fastai_transforms.rst create mode 100644 docs/utils/image_utils.rst create mode 100644 docs/utils/index.rst create mode 100644 docs/utils/text_utils.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..c1d05ae8 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXPROJ = pytorch_widedeep +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 00000000..cbe8279e --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,45 @@ +body a { + font-weight: bold; +} + +.math { + text-align: left; +} + +.eqno { + float: right; +} + +div.ethical-rtd { + /* hide ads */ + display: none; +} + +.rst-content dl:not(.docutils) dl dt strong { + padding-left: 6pt; +} + +.rst-content dl:not(.docutils) dl dt span { + color: #777; +} + +.rst-content dl:not(.docutils) dl dt span.classifier-delimiter { + padding-left: 6pt; + padding-right: 6pt; +} + +.rst-content dl:not(.docutils) dl dt span.classifier { + padding-right: 6pt; +} + +.rst-content dl:not(.docutils) dl dt a.reference.internal span.xref.std.std-term { + color: #2980B9; +} + +.wy-nav-content { + max-width: none; !important; +} + +div.container a.header-logo { + background-image: url("../figures/widedeep_logo.png"); +} diff --git a/docs/_static/img/widedeep_logo_docs.ico b/docs/_static/img/widedeep_logo_docs.ico new file mode 100644 index 0000000000000000000000000000000000000000..5e699b7ea222f5e43e05c969cde6c92a4b110b4d GIT binary patch literal 102115 zcmeHQ30zHS8$VZ>Qqon{LRnIZ2$ikP7G^9%#Au<0GE@jjQTXgF-+WoJkNJi%W7Ib? zBcpCn2~iAVNh(YeSwhB@(*2&3t~uTJw%u;&dY|+AJ)Lu&cYXf<=UvWw-_Bte9`gyq z9y~^$S=xeO=E8Mw@Vk2>RUUI2bhfq?_rn=RNsGty>-X;dumQuEI6zAMihDl8T=nNM zd^ofDc`7qZz-At^Y0?CH4K*D#$Xa9Un30p&P*zmgqMa5WX3OOHES})N%lOuB7sIsZ zJ9eajrn=n*q{E>p_hx6`_+L{b&b~j{`>kbN`u?*4rHyLqm*{$Y5K*T zY5s46%^YU=HDB)+nJ+3C6L|{?C>!b_yclH$m61HW)vgN}hiJ`dK_*vJPeqiBfQEMhJ z*~T$?pmTsthdVJrg-LlvZ}yn9v3`1Qv9s%rnA!J^Ypgx7#^H_M_$fXC^Af+CSu|yz zf4cCJot`QaVs>^={>9zCJty?>^RseEpQC4yzRoeg*+MmQLF95%v*qXBG%wUEc)9ZS z!a2&!+>$*lE*&h&7wF{|9SQLbz42u8Q7bp?^M4!_W+%+MkmWZt-qyey%uOL zw95@(yyFhXd!5$^dZFtc;;Ze{CO*z<#iQjpLY`@E+u%s3$)Rn&w=6?^lj1^E*CcGX zbji;&Gh%RlN!g@S6Q-+A`tED|_TRRTvn~!b>lx;ea_jLfKkK3U``Yen&2&u};4$sw z(A9m|-y@xPwaQp1kMtYkyJ%K9N8&MmFDG#(Ot?K}JV#fNhQ z9*4Uz=1CJ)1drV$;Ik1tr7W?Pr*@i299z^`& zY_~G0SI&Sot%G%Tcz)$w6o0_y*Hd{6ZFBZ)&m2!Cm%X`)ePY&s#ovBr0Homb^V8umsDS{U^zO zLI1N3dC}QF+b!&8w(xR^z>n1zq=wvS|J~K-;_O9(?Tq8iif&G^ObR!iua)n*@ur3Q z<8R$kAND!4**f1sHT}@~@IBU|__ckZJJ+K(Os{ydv3;FE8)bMzd`cROmdv{yazk(IeD*r zUeaZ?n{(R6iG6OaT)w-oWJKo;jN6%+Jr^9G?Y8#>@A~$L5$mn3@7vqtop)`n8XUPU zx^VGBGxt@V5$9dApJ{q~b+TwR{l=hq7cxHS<6KY@6?~U(dp*ow?^F9HhWnSM+_StG zy1di$nd1hUM~@HpUil*I(y8V7uG_%j4;L3cO$$F&xFtMu%G}k!Y$vqI> zYL{h@`NXnqzJ zFG}CJcgtMOjr~&-bnUPF_~`lkDlV~qKl~-|kLbzDjA-eDV!hp0 zgI4*EwDw&v>g1_M2NqlJ@^s27Ox>uF*X(o@c!yxB9tKFMLl#LJgsZ3cUu z+kQpU`dCob4ae<)h4vyo^YZ*)kI#f-Z@%3%+~fWh(cN+1ug#uvx^?WymZdgjKAy%U zH~ob=efLK_6Z~oJvTEEqq4Sy|hMBF*@JGw#wCtd(`l>`>KZnqBFi4~u@Lp%=wTgWj zWYEs`@4I|?l_H+LoH}DDR=yLi|hRglD+s1K)kIh}m=DA;tX`9vF z@!4gw{tu*wJkw#a{FNT|9){;8sy zFc$eIAfZZu{8L3YVJz}bKth!Q`KO9*!dT>=fP^Xq@=q1rgt5p!0SQ$Kn$sn=lsnCm^9p zf&5cNH(@ODPe4MI0{N$kZo*jPpMZoa1@cc7-Gs5oKLH6<3gn+Ex(Q>Ee*zM!6v#hS zbQ8uR{{$pdDUg4v=q8Ls{s~B^QXv0S(M=eO{1cE+r9l3vqMI-l`6nQuN`d@SMK@tA z@=riQl>+&vif+PKo4LX`sf zr;2XESmd98genE{PZiySvB*CG2~`T@pDMZuW08LX5~>u)KUH)S#v=a&BvdJof2!yv zj79zlNT^aE|5VXU7>oQ9kWi&S{;8syFc$eIAfZZu{8L3YVJz}bKth!Q`KO9*!dT>= zfP^Xq@=q1rgt5p!0SQ$KI4^J^TEN{3HL^ z|I_OO*#7C|p634F8^*p&l(=@4I)AvSc>b>oHs49+!M%t4)a!`zQf_+5YshPG9rN>+ z%meQk=F{ov+D3ztC-b`r7V*2qFKO8&exY^`!Bj(wuJISz_Dpc}4Y=Oy<3oW?i#(TPF&cXDUcj$Z-lh41rhdFY zzuy^Rjz}khifeVRAt}v(`!H!7*QA4VxgCuY4HVaUtT}M#UlY&Og+uyV-TEi-5lO{E z0LFR?;I^bXerpp3W5KSFt=WnP0L3+)=#;^0r57)#4Zk(TQ3oGtZ%eV|e!=$t zBPUNyM%wK)ZY~PQ+1t|{q;+n!3pby+Vs}J{VT3G+}V}C!!L-7y*?i_)`QdxbVWtX@Z zo!$w`if1|2m_?wUO)3L(w=aSJI?n}Kc8)u)Z6F~2PaKg|JOsc8MnGH6u4O+Uggrt* zZ445dE1soTpQI2>Tj-H||zd9;jD*1bhN+@XSZ1 zcF^s8+P3?Eq{%S1x9?(l+OYSq6ddOjpI)>85kLeG0Ym^1Km-s0L;w+>9syXt{xC1dXEop3NW_=-eUL={mF^|$D`}6Uy&?%bM8TTe>_wtno67eMb!Ptq{U0-+5&O9RuM0e8?UKcLtR;3rP+n{b4UV=kbmiZg8pCG zPq6(<_me~pOiktz`#UlrpT&i^>ARG#xc$g3BK z~qp$x$nTa6M#)!t$X`ubcKl(qm|0?>T1{?pNykj6wKCrap^ncRolEH?H z52!1<|5aMNU&V3QGsMxWBHl4>ybpB^-{@=6y1QULyoc$3@Q$XF@ZF7la6bmV$M>PC z)M(|x`$~I&%{PG`Vw?pWqTpL(EgG#dja(4Cb4vs4&XQ)kSf|k?{+o~WJ50d+3bCEl zoZ0VF_HJZq8l^aR_gYFdcE4{A&_0$lN`)G^5U^cpyO)AVXd_pxQHlfpYxT?l?{ls8 z8_A7Q*#;;C`gb2GT)+4Gim=kQh6x|#cgWyf&8pD0uT+NtyuWh`{C{i%px-pIgm;v7 z1PoH8VDJ|7Vel^6@-=~`QQL;81-zqE32>MQ^S^B{)*XT||6kDN`!q~xm@6WH2yhXo z?)?MLI;oVu%>mDK#~~O%<^z6pJ+pjNyh5X_;?)_Y6|c;w>sg#$RQVMhl^JK>6VVT3 z7RdYeMD)P-L;xJr9xxwapUnqKh^iP6&>KmFb>W0*NrbWaH0nGC%CiHJoJ+H=UVcDW zvP5_UCv1j97@JR{eh(bV!}?d|5Ahc}Xf4j@KSbL{|EY!Fh@+tYp#O+{L6$T65BiTR zej`qU{)7G__61qa=s)N`viOZS4f+rIkJuMvIivre|H$Gu;xy<#=s#j#kmcME{sVa` zfw24J8&b;OOx-~B=R>}dYr^ur-^B8T^(SyVo9~IzWaRZ7k?Bno;{TrXc z{*BL|oFx)vX^HzX(6is@=GJ96Cyf1Vi5tdVtBAuJ;tYE++rWOC-=G}vH)6|R?@=9Oix}S!+6MY%z&%-xQo5{00dFRFN8gIqHov;D?$wuB~$}>N}c(yd` zIXVw{mM8DLpskMK)Rn*=_GC?zcbW3cuVNpe)V;L8O}36n)89i6dqGX0t~nAsO6`aK`$3tNM9uMkZz zn|-d*;^@TxFWg2_$HuMi0eP+l5s!xp+5F&sEl1~8q5C&SH;dKDRYN8n8C4!s)u>)b- zwHe4ckm#Dyvf&U9(|@G+0k`aTaamugO4#2^tK=WTqCvQ0h2%B6R#aX3sSqAeg{*|W zZx_gR2%iq3R}rrRoM(Ydt9ib1c Ya_kfZ!`fyogj>HdJ%(XLWWzB32bp(QjQ{`u literal 0 HcmV?d00001 diff --git a/docs/bayesian_models.rst b/docs/bayesian_models.rst new file mode 100644 index 00000000..2eb95c60 --- /dev/null +++ b/docs/bayesian_models.rst @@ -0,0 +1,15 @@ +The ``bayesian models`` module +============================== + +This module contains the two Bayesian Models available in this library, namely +the bayesian version of the ``Wide`` and ``TabMlp`` models, referred as +``BayesianWide`` and ``BayesianTabMlp`` + + +.. autoclass:: pytorch_widedeep.bayesian_models.tabular.bayesian_linear.bayesian_wide.BayesianWide + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.bayesian_models.tabular.bayesian_mlp.bayesian_tab_mlp.BayesianTabMlp + :exclude-members: forward + :members: diff --git a/docs/callbacks.rst b/docs/callbacks.rst new file mode 100644 index 00000000..e8e65c45 --- /dev/null +++ b/docs/callbacks.rst @@ -0,0 +1,29 @@ +Callbacks +========= + +Here are the 5 callbacks available in ``pytorch-widedepp``: ``History``, +``LRHistory``, ``ModelCheckpoint``, ``EarlyStopping`` and ``RayTuneReporter``. + +.. note:: ``History`` runs by default, so it should not be passed + to the ``Trainer`` + +.. autoclass:: pytorch_widedeep.callbacks.History + :members: + +.. autoclass:: pytorch_widedeep.callbacks.LRShedulerCallback + :members: + +.. autoclass:: pytorch_widedeep.callbacks.MetricCallback + :members: + +.. autoclass:: pytorch_widedeep.callbacks.LRHistory + :members: + +.. autoclass:: pytorch_widedeep.callbacks.ModelCheckpoint + :members: + +.. autoclass:: pytorch_widedeep.callbacks.EarlyStopping + :members: + +.. autoclass:: pytorch_widedeep.callbacks.RayTuneReporter + :members: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..bcfaa096 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,297 @@ +# -*- coding: utf-8 -*- +# +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# + +import os +import re +import sys + +from sphinx.ext.napoleon.docstring import GoogleDocstring + +# this adds the equivalent of "../../" to the python path +PACKAGEDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, PACKAGEDIR) + + +# -- Project information ----------------------------------------------------- + +project = "pytorch-widedeep" +copyright = "2021, Javier Rodriguez Zaurin" +author = "Javier Rodriguez Zaurin" + +# # The full version, including alpha/beta/rc tags +# def get_version(): +# r""" + +# Get the current version number for the library +# Returns +# ------- +# String +# Of the form "..", in which "major", "minor" and "micro" are numbers + +# """ +# with open("../pytorch_widedeep/VERSION") as f: +# return f.read().strip() +# release = get_version() + +with open(os.path.join(PACKAGEDIR, "pytorch_widedeep", "version.py")) as f: + version = re.search(r"__version__ \= \"(\d+\.\d+\.\d+)\"", f.read()) + assert version is not None, "can't parse __version__ from __init__.py" + version = version.groups()[0] # type: ignore[assignment] + assert len(version.split(".")) == 3, "bad version spec" # type: ignore[attr-defined] + release = version + +# -- General configuration --------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. + +extensions = [ + "sphinx.ext.autosummary", + "sphinx.ext.autodoc", + "sphinx_autodoc_typehints", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.mathjax", + "recommonmark", + "sphinx.ext.doctest", + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "sphinx_markdown_tables", + "sphinx_copybutton", + "sphinx.ext.githubpages", +] + +autosummary_generate = True + +napoleon_use_ivar = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = [".rst", ".md"] +# source_suffix = '.rst' + +# The master toctree document. +master_doc = "index" + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "sphinx" + +# Remove the prompt when copying examples +copybutton_prompt_text = ">>> " + +# autoclass_content = "init" # 'both' +autodoc_member_order = "bysource" +# autodoc_default_flags = ["show-inheritance"] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {"analytics_id": "UA-83738774-2"} + +# html_theme_options = { +# "canonical_url": "", +# # 'analytics_id': 'UA-XXXXXXX-1', # Provided by Google in your dashboard +# "logo_only": False, +# "display_version": True, +# "prev_next_buttons_location": "bottom", +# "style_external_links": False, +# # Toc options +# "collapse_navigation": True, +# "sticky_navigation": True, +# "navigation_depth": 4, +# "includehidden": True, +# "titles_only": False, +# } + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Custom sidebar templates, must be a dictionary that maps document names +# to template names. +# +# The default sidebars (for documents that don't match any pattern) are +# defined by theme itself. Builtin themes are using these templates by +# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', +# 'searchbox.html']``. +# +# html_sidebars = {} + +# This must be the name of an image file (path relative to the configuration +# directory) that is the favicon of the docs. Modern browsers use this as +# the icon for tabs, windows and bookmarks. It should be a Windows-style +# icon file (.ico). +html_favicon = "_static/img/widedeep_logo_docs.ico" +html_logo = "figures/widedeep_logo.png" +html_theme_options = { + "canonical_url": "https://pytorch-widedeep.readthedocs.io/en/latest/", + "collapse_navigation": False, + "logo_only": False, + "display_version": True, +} +# html_favicon = "_static/img/widedeep_logo_docs.ico" + + +# -- Options for HTMLHelp output --------------------------------------------- + +# Output file base name for HTML help builder. +htmlhelp_basename = "pytorch_widedeepdoc" + + +# -- Options for LaTeX output ------------------------------------------------ + +latex_elements = { # type: ignore[var-annotated] + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ( + master_doc, + "pytorch_widedeep.tex", + "pytorch_widedeep Documentation", + "Javier Rodriguez Zaurin", + "manual", + ), +] + + +# -- Options for manual page output ------------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, "pytorch_widedeep", "pytorch_widedeep Documentation", [author], 1) +] + + +# -- Options for Texinfo output ---------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "pytorch_widedeep", + "pytorch_widedeep Documentation", + author, + "pytorch_widedeep", + "One line description of project.", + "Miscellaneous", + ), +] + + +# -- Options for Epub output ------------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# +# epub_identifier = '' + +# A unique identification for the text. +# +# epub_uid = '' + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ["search.html"] + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +def setup(app): + app.add_css_file("custom.css") + + +# -- Extensions to the Napoleon GoogleDocstring class --------------------- +# first, we define new methods for any new sections and add them to the class +def parse_keys_section(self, section): + return self._format_fields("Keys", self._consume_fields()) + + +GoogleDocstring._parse_keys_section = parse_keys_section # type: ignore[attr-defined] + + +def parse_attributes_section(self, section): + return self._format_fields("Attributes", self._consume_fields()) + + +GoogleDocstring._parse_attributes_section = parse_attributes_section # type: ignore[assignment] + + +def parse_class_attributes_section(self, section): + return self._format_fields("Class Attributes", self._consume_fields()) + + +GoogleDocstring._parse_class_attributes_section = parse_class_attributes_section # type: ignore[attr-defined] + + +# we now patch the parse method to guarantee that the the above methods are +# assigned to the _section dict +def patched_parse(self): + self._sections["keys"] = self._parse_keys_section + self._sections["class attributes"] = self._parse_class_attributes_section + self._unpatched_parse() + + +GoogleDocstring._unpatched_parse = GoogleDocstring._parse # type: ignore[attr-defined] +GoogleDocstring._parse = patched_parse # type: ignore[assignment] diff --git a/docs/dataloaders.rst b/docs/dataloaders.rst new file mode 100644 index 00000000..02f9515e --- /dev/null +++ b/docs/dataloaders.rst @@ -0,0 +1,10 @@ +Dataloaders +=========== + +.. note:: This module should contain custom dataloaders that the user might want to + implement. At the moment ``pytorch-widedeep`` offers one custom dataloader, + ``DataLoaderImbalanced``. + + +.. autoclass:: pytorch_widedeep.dataloaders.DataLoaderImbalanced + :members: diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 00000000..b78fd394 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,21 @@ +pytorch-widedeep Examples +***************************** + +This section provides links to example notebooks that may be helpful to better +understand the functionalities withing ``pytorch-widedeep`` and how to use +them to address different problems + +* `Preprocessors and Utils `__ +* `Model Components `__ +* `Binary Classification with default parameters `__ +* `Regression with Images and Text `__ +* `Save and Load Model and Artifacts `__ +* `FineTune routines `__ +* `Custom Components `__ +* `Using Custom DataLoaders and Torchmetrics `__ +* `Extracting Embeddings `__ +* `HyperParameter Tuning With RayTune `__ +* `Model Uncertainty Prediction `__ +* `Bayesian Models `__ +* `Deep Imbalanced Regression `__ + diff --git a/docs/figures/architecture_1_math.png b/docs/figures/architecture_1_math.png new file mode 100644 index 0000000000000000000000000000000000000000..bf81cffd799e1c9d2230e3f5966694bfc01e6478 GIT binary patch literal 47788 zcmeFYbyOVRw=dXOun-8EKnMf~ZVB%0?hYZiyL*Bp5G*(ZcWWGi6B3-p8;1_=G;WQ& z;`_Vr-rt*fZ)X0TwQjG~R8dvu)Y2$AP|n6tfV>!gbtj7A~Bu- ze=9SL*+3wyRT~KjRXGU>N>x`UOB;I&5J>iGQVO~z!5nEYT+vq+BMuLVA(uytAE_X8 zOg!FL>Mc!3DE>!r)yT{qB10lc$?8w#R9Ikea0Ob;2dNrUlQ=0cJrZngP0wJN*(I-K zzpXpB!$tV*HQdDV5OR$Ua%I<{8VJ?`^%GhtP~Wz{bUDU+YA6yM%$kD&vTqhl3nd~Z z{>Gk~{=f={Q%!$atR=lXhj%{csI6g}3xaYaLldsw8`E|>^n3Rt$kri=l1rb3N7cPu z^CvS+CzQim32{-*TxVPRA^6KW2{WSvgS%TA^jFRh%t-|Lj+1A#ex&|fq<}rt;g5g( zixnaazoSq?;ci(&b|wxEyB`sF+!Fh5L2))190?Dr9Oq8=2}JkJi<)RE(_C-7DdH8s z|AO3!P(U7%ho4?+J|!@Z3!MnYFqd8?b7vD`lKr*1-kht!ys9#*Qmy(LPpC)mrGRPV zwP7}8&-}OMUm8ECPS)se&E4{fbSY1xe|)sj4yDqLOoXI=1iSNt1%>o;Nw9da9(=k> zW*J7F_Y7x#)blkTtH&|OO&O#nyvF`>zdY%3N7!IBc&}ddC0{p#{Z>px zL;t0A zQH}+VpC4C#|NcGXdzAXm@3J;w>V*@gg;DbD^4YSsG}!nViL&1lrV}pW=VdSDoCeHm zG3TOwh>p-!(apqxKXmR7c>iCbc==p#wS|t9ovW#7 zN-$;&=7Wn4bF+#sCaXC$NF$P^nR3bVJSM`S&?)jMv#FF4o$_o=cy9hSt@|^f&mAFw zp@Av)KH(!zr=R}%*1$n(x%lH)%&aIpC0sOLH(zZnrq@a-yr{~u#j@10ZqtmVJ~>SL z=k&))a{aA_aLWnH7RWk;Im^d<6U9MyL0mvwz@$|hQFieq1{~8FBWYDWqL=x`s0Ezj znC{qo$gm_PRF)NyC0k_(uV2Ov=l|T(|3UNp$h+|KhI5p&jhohUfpZxgMVvew?U$US zg4{Hu?{PGq4&f}|L`0u`6aR+wZ8!S$3xaQHz2a*uz2vOVwko`%`CY z>0uCGp{vcOeXiX;?Y{S6Piq=7U1doIQBF1DZ#BE%A(P2?njyrE>j1S!anPA-obRZq zt6rPaT3BkNTR_aQH96G;9emiKbe>&t9*y69vHG*OX6d**p)O&n^q0-pTtDA{Plg*$ zLhn;Eu$i&@ub1oekAqs_xo-*tg)O6Z{CF*3>ptQo{C*xAqWxf zH)Yq-2<>a(V~!(*^YTlw3$L@$ODF6uR8+LYC+=?Uyn@Q)R=i_;&NhFWDb-cYRkg+@ zKSX`7akj&Fh5w?KVSU_it$DQ>l1rIL(86%$l4>)XS8dW=vtqgeT^Xgar^ceVqaM%WNS=c;P9^ZU>D0UHfk+~tGX?FQtnEe)#0&Qhi@uX`Ajgn>jZs%7S9<=nN< zjo+^}7u{1sFzoGpg5^DD`k0!#6=mr5_dj*CU@ z4hN063)c%yhez|{)f!e-10$oYlC6ga7qBHwGj$EMs7gp_KqH-{N?+_S#omSO0m~f1 zBiq|#^#04;*6xqJ-DlPmv#Pi=Lwo4Bw6EJ-`nOaSK5P9xl=d|nFm=`qyy{Vr6Il|% zcQ3f3Kehfn+AHEDAT6Z5h|0CWjl+YT;itc9&=Q0G+R4eoPUs@)oDHG8on0ba8tD3) zVYRed!>ssJIksMIK76DgYsb!H_WsCzac*I&d(vZz7xY*qiX}kZ) z|B7evLI1{gAJWp4?sVDi<~<{f&wa8Uu!$JQ?V;cjIehqYJ9W%=ae6x5W8!SW*qI*W za7TQ0Fs{2fn;Ocrpw)FeLjBrs!L=` z1Suhed@)Zrl;|<^wa}HbR8j&l0_PYYR3uUm8gPaLe1(z7{_|WKi5`UV&vj%FD8dGW z`XA3I1INct9PoV{^RFYy_waw>YJA{|cMkIZd>S1D1hoHiWF&A8B&H!DCkGrg%v>!j z9Nny)+^sD1+<^<2&a!%LAQ17}$1jqcI?WL<|BQ{MuDh<1BEOlF1FNaI(?<(dZwKec zc|d~R{J^P$g}W)Gw}ZW-8^5;@)jv=01Lu#I*{CT0dBojLh)P#Ul~TgV)q;|Xm6Mg7 zN*Ifhl2Xvs+>&2iQu;rJ1OEw8S-ZPC^Ruyed3mvVaj-hMTCu(3Gg*MgRHv*E%h{ZT{;` zj&A=!7C<1|;~lnltn6(6IX5s=@bN0Ys*SgWy{@E<1Hd!j9m4N;_}&ZtGvNPr>%U(4 zuc2CQ7OoOb4!}rv;s4_Le+>TjoB!8=|GZQ0-`>f=`|q#(w>$qaQjqPD^nYW;zxe#m zRe;gLSb}W-xoN^!nX^}Fz&^gRkyOzHj)0W?^N|An(gWYeBk+BnLbQDcNX~mRIY}{1 zZ>0T9^q=HX*AeK1s5`mg#s%nc#)Y!jN|7Jvk*S2-F}}P}+j&xeCP7J)D2u~&_SzRy z_8lbz52M{yT*mp~G()6rbU#bT>hgyhTxjbsV9V!l$+f6MpjAKs#slL~GXMNW6oiZy z3__&@Az}OE&r3waBOOhlM{U2ZZU+Y4m z4EX=B%>SP8zc}{4=l_56@&7F3|1CZLEj|A`L-_wkoe>J|A~EqdrB(i6ofUNNb2_D= zl2)7@IREtJ`#%*my`(}p=bX>~F%gk?-Vb+|5T>fOk68g1dY9nT_ZE@O$Gv1xG;g2% z;}z=PAqkC|1e|M~F1n32Zy`c435Fp&F#F9^vlwnLm)2(xYWiEil|Vkb-UXx=Iy@rULGvWECwPDzPi2o z#~p}XQjJxah;UdAr%D!XW%)T$7slEYrO~=%p5L{GQ#V_UT4=)Iv@I!Og#oG8e6CDI_I4_xnm{4cZ) zU_2$|?6>C15r4S9gH)UTmY)_+NBPHBLh!ejTAO^}?U#n!?@8KE2AOy=-tN44bhkM; z820Vw)0@)Fluqn=*R@$}2 z5B`rp;RrSS8M!U+es-^)YcT@pcHrjThd;#s2vy5Fq_lfOuO$y7Y+@sKH4dAJ0>?$t zvS&ybgpZO!&#)@??^KZy{M<-380KEEU(>JNEh|qi&hjlIYYH#>hjvjbyjjQAgW=g4 zGo$I>5x6|)ubf4#2qm)qMRO18I=27O-(TRV_RHmkZ?E`jqwGXo@Pb8|FaV0sJO)Gm zT_{Nicvrtr56@X;AIJ;cZ0AEB$?D#C_o?zn$4_w?eV_|h8Qvv_-9@k zyXmK>fIf1sR6udsKFH_&L%ygAC41nlcim>XT}u&b{g}Ar$@+j$?p*z&W{U1$`XBd@ zj6A?*Oo9&PZYcSzYO}wVL%ICvN7Bt5wU@KVKo1626bSLN1?U(isRCf_F+= ze5b=487jips7N0sX$Er2F$_JRT9zqBo-RF=o%c7JA?UBS%kmh|k@2kGAYETIZ|6m& zgF{e*ec=$!NkyeCsV4^t9}<+f-(4>IRh9qU9?ypWHe1pUt#B|J&m}Y2>zp#)T0Zv5TF_zBGwuz=A@=(8&pPG=!gz z@b58Gb1*oy9fvnxtwycMO;t9|mq}=A{t2r=2~no4hIb&+a;f9w$klbPqQCx>E4VP0I~x4jVofmZg!gKwd?}JJNeC=@C{&k@%7)&xQ4>y zxzAkWgHo2xjV(gs@7@7cJ zA+J1VO)a;_ayS1FsBnIMf+K(1E7pv$2&jeSS~QPm^mpX#D7Fq=@DsqLx#NXA0Ais0 zRXlQ1J@IOTEhw#DZ>t+x7xBvVi(gE5yt%OE=t8FFe5pi5La0w2ZU`q}ZBv}}v1&xF ze&H@g-s@Rqa*;=y0efBW=qS&+Czvrf#s304_GB_N_Sqof5qrIvD+aEg6uvvt@bYb( zv*e%%IE%kt!3}yL%@K6BXR;*uD}o|$`$^AN3ckmnXiOYA=>9Yt;?hTMl@)knQ#T>c zq%|cu-~4bh)4Awlb1F}!GuqC+=)D##U7o+eA=W{Ffw%n(mF6k|q$*Z!!i5}i9%_87bEV^( z#FXG(+2S9rk#;%ER3#61fH24->kCzCjguHUhr~a0xQ`9mrj4D81?pM|(LB8wN%dOx zSt_&Bv!MjcAm*a;Zyd`Aa!+TeZ6mN3|D$u|1Nd_B!%j)zO`L*gde(%o01&D$t=cwZ z`#-tP`Ssgl#%K$7tx|imUvc^Bcx}yX|1m1DMn^ue%ZCP~82j2td59x@4Ky^1<{BnY z5mbAwQl4Y6s=Mh2P|b?kmfXC{#G$2+DGBgyP(?IZs9-0X%W`J@)c+J>WxlLo+Psm> z0r+Zhv_zM+-ex%ohI0 zjJhfAwq+a}l7`tM#8@%wsT5$8QLHW2R|E*WZD(ZlCnxgcDCp0If1K_ei4WibizWkX z)xxH_>T`_xtS%H>!4KMc7DOCF%ypWY(w_~}?vHcR8#A=0+aB%_mQgH?M{0B8kF;VV zt1If-*YpenQJ;|-#kJt~52F#kKBH$4DD(cJ=v@Fz08|h#$D2BxH(LHN(>8m+e21hBn$_<;p>c49Jly^80|wuiJp*(V5zaWCF9d9yA*G z-+!TwVYkZivGm{fG7h@)IPIs7rCGg!%_c{5B{FFIfTCQ}ClH}x`fuQ}v-Nit+!yfn z44w$;-x?Dm*S?@S%~sD*M#n^x;6Zs`UY<0yb9B9xEdU#SU#A!>YDEzDJ(g$BABe!x z0YkLaEH(JCU*!H6*GkyGD9v&(`X62gJjOpTJB<>{_jOkQ$uFnK9T@SL)+=6^%8nGzAw{Y@9AEEDbR#rBWE(;S ztFXZ+BVPNKjPK&6;RNj0^bMQbpO8?mfB>$JDh{bHU`4KV zrVN5(oe(jg#p$m!%os=AWy$H%n}*Xy0_yQgf8YuSxsZ-hDl$KYpV6`Sw&K|fc1mGO zbYvqX(PTSwG#S(ImzK_8P<1_{%JKc03^D6lP2qYIt56>UZ1PN)ka z_UrCUH5RHmZU%1wbLh26Ppg<3S1L89i3Fodt_=leYyXS^qP;g9OcRONe46DcsA~(3 ztqu*@e6VoTGy6#_IvYMl*%#5Jb=+;Lnx$`$b7@w6&3d=yq58 zOo!vE0SoR35N#f)h<-{3eA@`5c988}FlRV8#p@yuIOp8}NgmGcB47GR71(f&q*STB%}EvP70u7JJVGiUi; z*FT@2?X7@OqAK}@aee&xX^d$gd5crSC(NbLu8O)5Ms%FCAZ;eQ{D83N6wCrKptq#|edOC$|UC!H89nKGj?>P`X!=Eu0tO$4heI15bY3X7f zHozLxn#W$t20Pk%w$x-9O(g)7^3nlSrYdJw^Sb946?8+3st&peC>JvETDFj_UX5hh z6t=o`lb=~YXHsSTp}bRxH%%AaG+iv__*wwo=JraY-7MhlEx6JMCpD(~9cl{;QhD|B zODDUQMoWPT9|JEoJ&{h36O$q5F0#F1VSb_Hn5* zf8i);oi37+<7-bY9YagsK3lH7QM722n~Q^tm~Izb>JMmyF4@)7BDRgjYpmyF<+r=5 zdPp*y=KXQ9us^66oC>0+jQ4YT=5|RJhVj!CE=D`7KyVsaX8W41N^!5MiYimYG|7N^ zZ;!1brQ51A_BS!0R9)b!SVTeZc|&($it+BMzRnBQbRuiXX(Cl~mSCuSuM^&OGO8os z0IrsgPJLFUTOcLnM&$3{x;h~qlF5@!;yiCY=+H`PQ<3Z8e3KO~s@Bj?@Lwt*+9)qV zrJ25HZGAoc%gtQLQj1d^6fV=?{7*nAEQxvP-bqC_hzVoL;Q}^LyjxY2MAd$WZf@wDGJp1zT&7j>2 z1G`$Taqi=|oW69yFZ=Q}{rs0P5Bp_b9bwD9WRy=#gKCN#s4?p^aPnnhV=77wQ<`qt zp)`)%LyKqAoN5IF>_O3|v^*0pcZIPKzr#q3Uu==Q-z`P%0{ocBL><-bHcqp3P-$Uu zWDMVC>h^AVRm`z|_KWy)xlLM%$>X~kqj)^MgA>_LPjSj7#AehfI6R5`^Ez+N_)Qd? zD`QD1bPUe@kWmN4KV&#Genm7v7d;@g_vv}exjz$Q+;Oe<#Q@K1OYnlrR&%N|D2T`L zDeTQk2Uk9s+`x#w>sO9ez}r#VQL)bZ?o}>Hy1jtG?b@IK;6MUTP!aw*d6S7aY-Ouu zs?Xz^G34mz4A6I|OoPc{`9r&b;3`SB_>w7i3;&UytM@r@y#NXZ>xvBG@ATl+VIG)& zyb;5MTkiOYGIh>7#JCIvx!&o9OXt&Ur}?I2fjqzkv-88-PV=fd@8_XC={eC`hEp<= zHtx(T%bOWauQPpjOXH0;)W)CK(Fz3$TNO4X8!ukUN_E3kbfeyVb$v7x+EIf(8165T zSfr2c^dhUUaD}fvxC72YduoMFM(jG(jI zvv#NK)sS~;Fn}2fD<-qA4za7e zY2d;kTMD>b`Z|qE#yxzV*+#VSRq~Z+4id6Dp3{Gt(%OoyMDs{z1IPIlRur6 zDn!;LU*9|%G1y=XiDXk)P~E9<{v{bIijMESk)*RK%(q})*L)mu$bwln=@zog7_<2R zgpf9<;FjI8E93D~1MI5W#L+0{oo_yUR2$QgpF!2cncQ~=l}*d??(!ariX0wyJ>NZN z0k~%e92WZ0rY;+5OQTWgHvCSehihtZq>Ykg*jcpnPM?;(_T6{(a#!fEta!CX@$h1+Y4&53)CASxE+vba9by?OaS1;6 zn$RTT94B6;7BT|Srv_@d_e-TW`m6wWno6~%ZzW+R-FEZlP!c=46TT+J$;zPp-f3;t z>KE*gMq0eUGdp~xZf6Fuewri`Et*LO{Kahd3Aw{%H9_({72>BDKsF_e#K3v&6$^Xz zT9B6~_q;(bUU_HF<={g=Y@bg>+sR(lj6!Y*kk0-qi zD-V3~?3FGFx=rl$-c}Qd)ohN6)h?H-KdFNSxYqjWYAu$!@~;NJ&9zE8Cj-yU(aLWE z59n^?`!HstoZCFSJXw@q)m1ho_qn7vxt=vQCdq6~(x+gaOirI$Tir@ehL&#T@*jZt z;`@b(^kJPgY$8C$lKZq9$dEHX?3~77?o+K~ICZ!XXv@>BfO%ZhnJS0*uUzXZXGZ)g z+pcktD`|6xf=nN>03>fQvdbA+(NbjNw+53Cp-!N>M}-Kjr$GQ3LuV_VY{t+`JB>+@ zj~Kk^==C|mmx35m-8G)LQ-$s{&&PD}r;Ce!VFDFFE)aRE$iX7cD(R7Gdh62L)W=#y z;ize@m&faU23wKzi5K=Q^&Kyf1A9v0Uh|F(Q0=hVv*h&ddUbAQ(ndh|es*R9d0_JZ zr_6fgVu>Q=5GI~A7g94jCLD?7V_e!OkU!68DZxh7Ur(GaUYbBWc6Gt}s!#0Ci3)DP zR5=LBHl6#U2oX*rcbO3ZZiAPo!4Q0VG2d%3EP)Nr1&5hW`e%z&=egwR^W#G`#tEtD z(n+KMHfHxY#U~k4)QN>}YOcmAQoPoDj z!(~2hFe$?C?>YX`pti&X3zysps4Vy<9rL%8)zZcV!Urlm;wcjhcj~1o)j|pUP8f7) zte+Amr)+e#$qN5rzzvb12;kI4j{mp_Bqjs{5lZ~l`xPD3Vb7IF2?pgv%Rk?qYTZv7 zyqv(o-VMT+C&biWAr#)LH*x=#?}Yo1NnbclRbE>KD50d!W0(=CnZS#;t%0=+2zIYDe7OxDm;pAh4*u&m5kn_7+W7|!gMm|$|5%~ZF!UjC3b6L1mxffB2 zG3At6JB57H6nzPfg*9{Z>?^-)kXi}YM>OX_n+g+761@vD|Tuk=--2PMGC3sjB)kw;@ zfhJVdwxgD|$iT>XW}_KHTR-MHObvlAMF;oBoNN@-h;v0f2XGU$TW026ou92PIUO5R zamIW)nmhS*^osxtcr3IIGf{uRj9*{@pS(I?X$2tbcg^N2wK%1 zXq$;>#G9yc{Nz7%>03VAz!D9R`9uWZ0inq;^}BURE9WyNvDrHJ&2R0VNQqm&o+}pG zNYsd=QPSmLSS^!E3x_2!;+}pDQzaC3awii!YLlcsSCJpkD*og==-a9+X> zhxr0Wtx6!W;h2sgMC4Ii>`2R7mz=Gb#19;AWd{kz_ZFu-#I@!80>=g|y7X;Gd3D;2 z?M_;^0w5%@{7fd*8_;lvemZI#-rKf&DUrYlzMBE$>=a}00si`$diqFN2JI;4Q6� zsDdWhLb&luBUt;rvdG=p*T7;sH(>X4h`;l5_?~U^E@k;0rrmHMR4K$dOl+AnL{Kr# zB$1aNia0|tAMvJnB@jKq*A;{$LW*`|gM#s_Z|gJ;25-X)RN1eeRQ4HPg5zHxTw({s z<1{eH;ydUl6UKLW-oKQmXAAMf2g&2TkwSa0KQmxRoyJm+i%Zj|fH_%QQ?vE2#s?|0 zHZ8hpB^6IxhtkSya8##HXz`QxJ)%O(pGJGhoED!!bDzDX8*@Y!g+EMmGV#w(e3Tg$ z;9hu(2jW(!j+#V7)WQ=j4CA3avU`lmOgVyn4rJ%i)Gz^4;d!?Ad>yAF0y+g6`jw5P zMD;&RI;eMIqDvgtk3P#R*#XzY_iZlHC`S>vds1EI7OiEUOnDbUylJ86# z1jb(lzLz+Fax@qxNMnGQ7!=}2E+`I2#;3{IKZr{47edQ+^u^}&mW$T_*(3o9Np=l+ zeVx&yT#Ra2ia(!IjuBv9T?Rb2D9GxM;BKM_@gx8#XYz9$3y-%d6pfyKx{WG;rSJ={ zgc8`&LjTm#77;aNX~6*?$RkfM+7ttiUcL*P*Rk!mH&5=%3=U>J0R3G7^`G%SZVID9 zN=4IHxt}DjzAXV5qTO4!P8+Fa6^*I+gv65oQU}EOUW&mpUI(3N0zdqL$Q7cE@#tY&dGCP4C3QQoc2s4X6nO&ml)L8q!@1f1j5e0k@K*I+ub~suP`7t>M4>k@H5qa(Jy~bVKVP07a|s@G zp?o&6g%>3;8UwY)dCV&!P$M7Ef(Tp2>^msR?ZQkNa=6W} zA2jCMy!3bhkG*SjKGu|9mC>j!Pa~Rx5dt_goIZLR)dsUr*iL5naI3b^e8B zM9~3M5ax@NH4EBM=jA|G09-|r1@jG8`N3QBdnLlmD@hIBW>!a(F8y-$Q!D2zrk7d6 zX*6{H9e}%(E!YMn>sh|j&KQRF*m99#|JHT0KlDNzn^*Lv)vyJbak+jYPh6kY+tAJZ zT4PB70V>0buHxCU%0M$u=o!2KMaM%z(#?UQVtpz~HmS>5Sd-)AUS&BKRN}LRX?#_E{SzD}X&l~<`iTH-! ze1Ny#Y zMMSfKfT?4J-V$$?N5#R7xPKnD=d#ZhxmiQ<>h}sv6gQyW*wA8j7Gl*2f zd=D7DUbyCk;f{uMt`g05z6|wNz5KcSoYZf>`u8*GHOXisD7+F^_43C8FgYnW{iIRc zo5OLD*DMJ_Iua3zx6O!5TC87A?Ba4GTwlJhx~9Fqy|B`}m8C%FP#N~~ox+d`Zh{YI-M0`)mYd z=-H5>zrwuNaAriKx6Z%WSd3luw&clbKF%xLkCm=5#lgw6ehzbJTAGgTOq|9N|K@=k zm!#Cw3>4)CdoD$+7pGgjoPzp^kv#~;1d|%hEGqJ1GK$AJ4DT)<%Q9vj0qBc<@b<$@ zu-7$Eu(F!gx2tR!QbUHs?;`ZpkRTM?H(Ayt&iLoD$i5xB%{!kr;g6;FpxHtLJG8;* z4FBAn%h2VB6hqgqWL7q}`Fv^+5r;YmcW*iti(rhS=koYW)k-C5uo2Nd3gbkre3hCR z6U&O(tyP#Mn9-~+CVJo8-e34zet?ZUqq252!JqhK)&;qpSx2wbM{NE}!kk=Ezp^*Y zU_2&~fN0E``9c5sAR37w-W=w(U#d4-8IvR9H5 zS`|$W0Oe9zRywD`52*vd&XW~u=-nYMaf4VPfgDVrV#PH4Oz+jNYAsGzcKLo&So!!B z`w>3fr9Wd^f#NRRsz*;q0FspFCrZ=2$r-wKBb&k`Dgm3sZ3188LAG@uYRS@Ds;A&B zUsp?^HcBI@CBR8z8qgG}v{yn;v%yq~Vax)s0H)Ezh&$1!g(4_;!j+X&7b#Uq{vKuG zt|Yw??2kXN;?3V70^V@qn)(SbV0bZo2Dl{I#NrO=V9|0+>u)whP{!V0q*XUA!;ip@ z-kT4358P%kN^s;QJKqZ}zpSQo>%*~B}cH-Da(5k zqLJ4A`ESebqMxLNY4Evz6{F_~Im)Q|!ZtZlWJ<(BFv@x^0g2Yat~Jc()eCA+&3Gq%rAD^!>qCbT6Kso+0ciYNRa#&$AM9hHY{9gFH`cW{84rtl`5sB&kXLbl|Wp zZFjQtKk*>e%n{o~9%!f%&r&_rDEez}N)8pwE6t9ZGoV{UJ6_qx0_}Qbf>Ot(p+H0h zBTsN$VGpQem9Y2FQH1$)A6M|#BbqHygO`~r5|Q>JVe|@rpWir-w>JXKBHD%`Mik`^ zvxOyR6;IJveGf~r=ZL<>Ly48A)R4K(vC+=eFi*M>Vg$1I_C=qc6qyQ^i>1Y=pU#5c zf7@r&ES`FkptV91aiW9}Z3nt;R>fR&!OINq+4Pt~FMcLn05N+Nk#P<4$iy9vhFp4EnK=^y$W9zBBtta;)%;wW0sbYk6$Ch6(8D=CrA45 zjjAU9()-3&k0ARiFaMalXV8IrWkGwYa`B~uc3jSMAIpjIl+bn(m4C~bHhjhNOe+l< zIFb|*%(au2I@VBhO=Z{nGcN=vcbhD31;COvO*wus*~u=XfWF zmn+w4J1VU3xAo3DT_7NrdUjal4%ezRF>f9-H%N0KqOKy)0*&okLicAvO|C5YVu-pw z#+EMkoOwb;1;N(Drz`iD{L`0F9y1BDeMqQf&&T}TczG(WF(6Jw63Mmpb<-_vpG~MM0-v!n!!ng}qID^8j?);alPaQO z3*@fn)Y3GR*y;6d^?AQ<_RCA^XuX7VJ0S$!)~kQ|pq}SY9j}D*iI}k()*pjdB9S!L zNureGoC7axacz**Jme$l<%;Tq*}t)!vIKt8KOeX4vmI*Wa>iK53&2t5rz!|D*m31w z<5d`g+8ekFO{7;g?1G^b(EAFCcu9{6hgH-dzC1^6YYW|$K)r;uK%(eS$%(6h^j9fQ zocOKZhLCLPk@d!(lLJP{g*mrXJ9y)ct8OAEZ;UwGS{jt9b$+%UtL-dHeF;>R_yQCSz5&86ox_v@9L20LY`27&gjgvk`CHM9F+vytv zB`b}YnqkTCQN;}=DO~8>j0^#_ur3Tc{)!lLx=Kzez#=eT7D(Te;^W$l*$#txY7_fgWw?SwVI)KO{nl0<+)p)E1xQd#-0i#1P>=g6h%R^ zp}2LQhMTVax}7S6Rx3)vMjejs%6{iju!eEaQlaw4iwaT8xTko^!Rg= zgG|gtsU5?+v{}Ot2uYvUa@wmU~)4dmCnm^U~=1IqWFw&J#N>{aD!3D=J>uZ=(cYF zDDo{8`mA7^@HQE~{W;o*6@Xojv^1Q>-%vN=`py0>8bmArYZ&nI@AInPc`SQg zEC=0h1u37C59T~LFYSoo`l{;OM;V_{btY|vcXd7>I=y&z0{r*QDC!8KGVh@#cThzi zr+_^JI*}t3kul?kHIe(Z9xOvs+g?#2>OO4aqfZ3$duP2o9WIPGJNKJT_q_@ATW2zC zQGqwGhdY>23$FiP6pc$cL=jJWX=~^HfJc?zQmHG&?O$}2D@Xc=>86hJ+8*WQ;}|c+ zBn6+xP9vG5%e~I~Jq;hHz|)M_!KOh;ldEo$qIu_R#G+MF51ZEX!tzC9DR;nc}T4PI`tAy7te_U z?81SdcSX3Y5^4GF(ChMTwqGV+-1)8ro8O$Hksyx-tt@II2x$|ylLya=&5XOL6NJbm>(*@f#N&N~6Pff3@bNuDH2Gjq%v%Y4p&1l2p~U zS82I8m0Gq2NZsXV{T|Tl7MSs@c?H<>p!&<83#%YcTKJIhMQv$YKZ+LXNq2$dhXjna zO|Ggm$^~r-$s&KCukDh11?a5}N=X)6znh=wn9c}X#u=!Ji=capoFV>K<>5@F_~w`+ z`+8VOOGs-p_HH&-sbTFTrPCjgu?D+af+-b3W9vrv?HF-kZ(73N)e|(YrJh7ga7ysZ z_`uPFj?k?@`6y4-lqjzrSJ|$22sEguK;MM*Td=yN4T#)Likxqrl0C{eE$liI#-}yX ze(1rbGLe<9Xx=rI137MNYCllqXQkY49t&gTO!8d*NV+*bWcj9!QCt=g)p->~b|-MT z=%xd`#S92W%UR2JOGE*<&C}ANnr?iWK4U*gW&SzsrG(n`Eq3P-wplGs5#QQ34$Jvh zVQWA?>)iq;y^iXWNL$z^v&fuVpid|$<5DJYU557IROBJQ!sjVQg+3C-bim*?y5L`= z+zZH^|I(f@&m@f9@ylaXTe+~cn+S}TP~Z7na7SSB*ex4jm~wGm6?8WokoZZUP)2a| zImbpGXe;9iU|XB}vCA6>2P7jwQ;!i8_UG(aBl~7*xV2^Z9L$r{kERd)b2UzJH=fq^ zsC~;DQb&F@QAQO?rvb3bfJ)JZ)-<}WU9ZK0uOs;M&FK3~ZVF9E?eF-uesCmZU0bSs zW}gnozFntV@HIxv&opk_jlge5KD7IAJfG!8ZB#>g5$$qSge$OL&9JS^yApaxCcVup z{&L$4Y3oJYSJexD_+_9|#^x$X=T%H-03%1N9}3b9l7IOPdbja9ro`DFI^h}xm5vDE zvs{S*tHV2hL#OMp_uH}jC>n`W!#pnp=?;QW84A3+0Z>ieO5O9<`}Dv8kUvB}9Gk~bY>gK_)o zIzi_;3n%O!<>gVi!@9WXK`!_w=Sdyq@cTn}7Uj?`^{e(o?LL8}9p!+5VlZ6tkXLIA zUt~lkYvOOTdX1$u*cV!^4E34JSgvP4{#t5xEHmE4Ep;?*#7f|Qdpyo;kt|Qw`LFB`#%o8JkFS1r1cTrSp&RGlbZqMf5%`cE_QOBvLwF*G)l)ri{GB z-9U<|ggkEtN7sZ5PM{TO^C@Z%bphYu&oxUhiONtBy_~{)RfJ(t z3}!jW_YUTgD&a^9JN{Br#XhPr!*iUk8_FdGf)ZjUgInxO5c3dppquxwNm=&tI5x;o zBqML%QJHTe5x8>b)rsKhA66i^CHNGCy;r43ZGtx5m zVov^{^zLRmkj+;;Bc(T{?i-hicQ-)`qt8RnLJgvQ7}E_(WkHy`HudT(LF76?>3G7h z#CEes>q2F73w{HJcaN3Wp}VEsm_BFa61n{?vN$HPM*8=e*G~zJ7YVEsbi!UFzqC&i zivoNAzsq8gsH%szkY>n?vQoK6( z7{t4ZBtvYfgw9J<99{#zZfM#H5LEW#csO7}yRN6JXe=4M_w#fxFs@4W-$I85N z`IBhB0j&!3(0KZ#+{TV%C3?jpEnio|_-E%;INhOvcBNQRZnG#4q*NjiOEpu5&4Fa=GErj_&!MiY(#yIIe`pmz~!c zoqW^;jJ?`Q?|Q9I-@yrg-(B23MmWQ~f=#Arl+ShXkvax|ZuvF_>1rcPp0*X71z3Lz zV%sIw&+xRqSg5i!`c?X|4|C1*2S`Xo5&aO^`gn!h--ZPI5R2!c)Bl5}s|;(p4flY= zC}HHN(cK-=DN4sn=V(Mgq`PaRG&m%rTe@>}h?I19gEV;Nxz72v&(C%5dG25SZ_@+s z(+hs2@5fetP$x^=p;e-IKL9FYoiKoW(fjphm}GAlIhI#PJKtL+^?pc6vd$aM594*O zyn!%(8Z`9AHjzVaw)dHEgM|vX9O5~bc*`%mpVT9v?=W!K*c)nl8`Vr_oKu$Y^wTFk z(bH?vyRBw@AKN;nIJbD`Nw?ZZ_sq@Z)ns*s?OO^7{RPRk`{eX?D|CvQ-uER501ArA z4-vh=a~0myj|+o0#8MAp7VY^B$S^3KcSUj$4ngZO*fEa-3o^g@_;2~(3Pai;xKL9) zR5G6h2hCiwcU`jcKl4S9A2|O!Je%R5HoJ(?8`B9RwWoA^*;aqCVxl75Rz5mdS-Kfk z54cn@Cj2u=B7t@FD@O< zwSs&3tB@W1(;8msgjsySMyxzeE%Jot0m9W1-crB?64WT3jWzVWe=C|)leYqkaO#=C zq|e;4I1DLPVU9eEhU4Ae;nb?n-!Zq$wCHM<0Ns!QXUmnYSRU`4gvu$IRgZ9oYkU~* z2-0d7EYhtVjaJq-+EcmuHnpp+l1t<1+XMQuxhVs^EH%NdnRgl{-fd*yU8K`X?tNx% z=2S5?!qwm`sP?-#;9nePri!~nz2Z@SiL1CPeCw2Gt2=W~;~F1t!yqM98*NjSI5+*o zoo1V1@i6?%imH7|2Xd)IXAYQY%wn5BO3M!*r{9s6Y=s@Qm&+s5_UiKjQwniF+16n1KClUi6*pu#m;=?Bjx>;b00bRoM4fFi{gpO> z%nhOR783qlk`3&9Q5-m!$VAcSQqW>k8-byc@I&Kpl*Gn<{OJK%W+B&&HNgCnw+MHd zYr%*?Bub6f^F%#!qw{Rne9yH5_iJbNtNZz`mp{rEt#-LTZ>0&=2Mj%T?_<-geV5`s z!XB6R*xrK)gC)YHFD5b$w<<6qZW?wsKury~6`iGcfp>k_Qo;}sMVez7p6ZEk>`9#H z?qtBz)8yZ$8R0z1&WSBCqM(0^o5U3*D57pv2M~Be5q*7ZE9J{XZSk8{n1g+8fY=B0^uHHP*CCg-Strsorx*wR5R*N`VdRI!pK!wB zIEMzdYC15SkrOs_mYgxeG2}ChQlo=)5rfK#%42T**XfT`{9(GBlXTIs8Y;+_1A-IV ze(?`Up$=wx*WKmaYC|};WGAZY}+Xg%)BgoeTu*TnO5_&7moMjC^+<` zO4Qc5M>lbE-1lAuo-!%8IW?a$C%7Tfi;Nn)P0Kweu6za5k3>n7Ss}G)Paw#54}i=i z=G$nl77MgoeLWMOowq1*(8)f}udCRV8AtzB{;h3!hfDj!*y_Vsc~@i0lfEJkagcwB z8q-*|WT$KVKnle{`~A2xjP={@N}L`8mJ^9@N+?~M^D>N;hnoS#D6IP*_{%&C;dKLV z9GtBYn)(gbPs(wgoe`tnARaZF?mqCx2_rU#$Sw-H-n-jzQAPh_b-IF?Yah@4o4@z| zr*65WlE__HxgrTXZ*MvUoSlDr@9aEx#B~Q~BBePQ6+H<0=X5-+zT7l<&i@|eAi(;nr?PIAWghgWXtG7h!M%twyiN5EB7ZSJyC2Dx zy8JELl!p|?T41oiG4)V@9HfLrJ&Uf8Qf0(cQ3>pA)z)RGDIzpLUnDGX@ilhVkDRx`wf@o#f7w2K*ZpVyg7fc}E^ z$mz(|3MwQ`FY3%Z+*J1V^Fi#og-vepsmZD1i>qc zNcUoV4YmD-`bS2WG>;KYRPWCxUDZI@>7R#EGLCYlx@vNd@o5VnU&AYO1#FyczOO_t zYg%*oE;pmp=!*ysxK%rHzY{>O8A5f(98q@Na(-u5Sc-Oz*ivST!n7}}R=jI-GJbEj zdO^on3|nzF!p0kc$&c99G`oyuTjcY0<~*^!9!tCt=yu#B--*0)@R4e3Z}2aGcQzFR z2+lSO{Aq|*?l(!RUOP8A&k_T1EWG92THa0fOpOcs24-<`p1BR*S0(GoY*yO7f0Wn9 zB%@K^hH{fiR^KD{COD6|$!0KN*g7-JYf}g70gT1aiHWX&ik9Ti>)Of4>a%dy(M&V= z;l3@T8}TZzX#Mj=l`d*57PPN1tJ4JY)C2U z+C%MYH1kSWo?|;{$^;#JfgnIh%JT#e!ANFVaU3nFrbBQp(mugE5N2Pyg8|EWVCMm)&1 z=MhU%4Jp2~Y0R>Y4xRPbP~2x{DkZCjDvXv9-6}rDtF!VeMDX45oqXr1&=JCABv$VJ zZlCLQ&M{&Q(l~B5Qb?@PjL8%I{n-^vj64W++||fX3RmXa z_sHx*Sa`q>DGQD!z=2|FZ9M+B0=7nZ^Q{wZEa|p3-(rOz!C5Y6dWwu&pf8!%qSpkB z>gXFCLpG7ZSeQR!CdHRDxu7W$Yg%#d-=n5Ntfu{hH^4{!zsT`%y&I%(+fXpINp~kCqc{1!X?3rXR&lr;(P(ysIH{OuGyx=qKFqr3C}kJW=5 zWjlRaY~&!P$NVz(0{8#?^~ku`7rgV+b<9KFf9JIc_iEc0%P|S>9&A${i!(JV4drMg z0MA<4--DsmN;w@Uj46yv$0z}RP^_?ZxLL9=9$Bu7ZqQF)bTj;7M|0g9HMN~&O&mm{a9O< zJNNO|Kfp!i_ipI4$A-8%AyXVAhf^7jfjyQT-7P6?ZbTeZ;l1ToyarOhNy1)9jV!n+ zCm(6smw*ouXxfxj9T54->5^6rpj$*}h<8>exmFS2Eaut|8-?umI?O}B3_Y6R6UA$ zv{*QRUm_Zb!&<}IAwQFO(07#sB5~9$QbQGr?f%Gylyr4(+%5VzFj3Hkxc0Q~-!at7 zY<)5G{{MAYkvPcNtB<0nn{9)i8DJj~rCR?jnoY-r7*K``7S(F|){q+7_Z1M2|u?^4VrL?H_TXuvk?8B}EtG&m2$Jz5f`7tC%-DF&EQxC4kUf2a$NdgFNaww{)Mo* zm_W{#jT2?>Ez9p2C3i}4iuWZerPtQ#9s1k*YacGvzOqo(wnu|wttZuQe4-b0mr znBvC-y>jC7bdYWeP_<8*58K0pVCC&ow}s+j6IZY^XVt${L^`0q=E|i$Bc1WjWS45h z$Q3`{M;&5n5{A*kDL|+nvR1E)gjGjiAd@$ClUK0Yc2!|WQDp@p-MzjZo5=8#7%j zSdZ0c=(Bqjo6k(yQtOw?D*qEl&q=*66vu_f&oJQ;yAdI_(1fQLy>?5c5VB`dv=0cB zr3YM4MWb1Z-^GuIcjSFGerB7N7YPV0GIa@Jz;H1urAGiWk;I%uzx4 z~_1Y8>y>I&Bf4v&>jyY_>KMaY}N8*wa@UxI3sHK18?4O!s z##>NL|MYj8S*qn{mjdcl14d6QYv&$sNJmzigsIn6$$!)tCn@>ovXk=7b>2FJ@$WAL zxX+l*3MziOxys+QaCo!K=^A9uq6+>aYDhlPku-`^gNL*zR}e}~uFs9{>J?0TNAk#E z(gBUTRuWa7gQ%C zqa(O80CHZ>YT&wB6+1Z8>++ee-iIiU(yqg9n!27((%G%hT1Enrv`2o>DlB1@QRt$k zZIBbXTdnBP!+CqNzi^R1n`bc4=}^Mn8~|D5FTL>bqa61Qw?kHXX#=YL`XFcC8-5}Q#))Hdo$I~W26oWzu2&kvPVn* z@69CK_IteOLD-*)Y)d?*o&5fM8Glle`!C-u^<6iHhto>*6C|^B{wv0oH?{4wmlbb+ zLF8?s8}h2GSK=jU!R)&EE85BYF+^0`p*F(mP~M#M->iJA_9q+7&k^eFB{3c&lRT{z zoaXx{1M&UwhQ+ZV)!un_N60qT4jNsW^Z`7~&M3}EfH2kcI+>YHs zgT4ln1vhA4BNeJ}4oOhS(imXVPThD6EoQfh?5&E2m!x;(PGIIC&Bae5G`s5_rT#ZF z5<^36LzavYx}tw(v@wn-eEHS3|8b-x6}8n{JqinXP7ENmj&k0S@kRcrGtHcGgr!S& zij62p>sua;+fEmDkNqtI{~bi9tR3?!K9rmODLcs@&;9-(Vo9PH*7U*QR9W%sytt}^ zKf@`1uh(9f23b$&VsDc2k6qRM`~1zEaP|#HQd;pLuE6`zCU>c)4ZO3u%hh~Ytq~bG zhU($*0Dr`$Y>uaJ8lATHRrZ?Shq=`6OLG!P-qPdthf>$oyfHC`fTe$rDSICWNf?g- ztcE4$)~Ez_*M9VL0uGYbUEs9L(e276m2}c}VqgnjEQC1aI)rJ>H^T)N>p^bc*0p`& z|8)~AS$W$)RC7`Qlnkhi2FOuK<$=|pNc(5u>-hW)B5MwaiMUSW8ibSaRXJ~xx41K` znl#u>9H&czzI`Q$>^#yU@Ai#T=q2Jb>_4TJ8-K@*j;sBoMaP2|?4*lSw-bBoO6NJY zMDfLzQNCkui;{lHJ(+?yzyG)M_SN!{>22%tjhCZ8tXYES(|DFK+Wq%NGg|WGUB?G* zZ+#4lV;0fz0BwQlKa5rzZSmb7S&>Y33%o7t{>n6dzq-f~kKG5-O(7m3cpb@e2QL~8 z_GC$o_rZHJLnm%~e_3R6j-owG9I~f;KI=>we1%+&s##hQx}Z{4WZ+$S2Zt;mkhs+5 zCz)`p->A{*G-j*j`3mA1{%TmVKHSS+R^e3lIc~BodhI95hoQ$f&`wfP>)P|FaFhDI zTMiTRc~>G_-OWPTHR*rK&pleTvX`xBKqaw4VxP#qJ21tS z+{JxDhb3+K<7(BSm+ab}kZYN=bt-((^A2h&4k;tQdAi~abAxvz1Y8$XdXkSlAiM^h zrQ-koVK$c~I2q{*QHebxj%o;ON zcgr#Jc=k2JFxVi+OvW_N7YLF5BJ3?LU~g7Rl#WFs!w(1SkzWJ9dO54XK z5#|wlW_uO$%$?&trXnWJ3t?A@?-Gt(zLxb#0k!-okT=8gmnS5t>2&I zJ}Q3}&n3K5Oo9yaZX?eBR_8?_fG4-R+QLOZeYf*%OQsMIBx~M86y#_0!s!|>rxcf{ z~_S#>@pD<#UDmYZ!8E{6C98ZH#8u z7R;HAv`1=g3E*Nr9ks_fu=@{1W4>!ApM~Y}0rd;s(g?r~n_vD^uVMLGMk%Ev&!~%? zR`OfPoRBr1kjMOI0S9jA>l97d$Og1vkP{yKGfIS0M=EU6Mp91CM(a(py(M`g{%Q^h zG913e6UZWZ%iludNu1{#K1+wy&608P;#vG=MOi_Os(|^&Ug?QCIYid?p>5^lqqcRz zeemYksJWElt%f4&(XFVS2xf{sVEp+7@qSdd_%`{pA?~mMBl?tb0!uIdoUM-uOa6t; zUFmy$)7~5q(!pefPPxSttnNFTqCj6j4ljZo^3+MGfZ!R=4qPDPc}dUDr}Kg@>l*)1 z(xY_};$+P!bT)CioqbHMt^}NQ;)cmU&jd?dW8JCF(1N?fH4Pwqulc*!zs3WqMQl0w zoX@iFmX8@j25F8|@ocrO@DseRZoL6K&b0N#x%b9@5t{LKyF7&$M9LIu%9BEqLgfF7 zo*pl*Bd_Q~MV8ZlNS1HEI}>+$RGiP_Y7PY=N--*kClQ|gM#nt}Vkc;rYcL9D7vF6^ zS8JP<9Hz{tD}cLoA6cG{7#g>8eu~mo6aDTKfWsahOy~RlXL?%gf3|D&Mk|N^IWaIe zjn&AaZ1~=+Gp|s-8+Z<2 zT;+Ck38MZp9h1B;TG-^o=A{?HvFgj@fy`J~>jqCPQaK9b+^7I?mcy@NlVBj+JS72&5e{JPa+^rNQN za_X^u+N~d8u6a2HWW!dsCn`JU*I!DAxyZPfLCXSUr)Qq$Z?>zuNWYntl%nfCe&jO# zx$doxqrIvhLwg_5PUL7^gi;eR;yn+$Oey|+<}n6-J904Bn!TVj5l(gVpU@$hu(uVk zymsdwOSV`RkvDT2b?Mm@RjZ_kCN3P)vE2hNKgd_LfZkt(r z1CtO(R)+-QH*E#%nFNs@0!p{WpL zlrU{kU+&DK2++zB2Y*0a#+3FSNI2&TyPsc(D~Mj37uDQK)xbH=NE-OI#GY>AfMUx?J|SphPtCsg)8N;+ zE?zWqoGuogEuh62)u~J5f8IYz{vgK?^{O4jg)+SYs2URuJrZnL9_pS zR3=K7b@=sCd!mz>hEVxDz6BmktEi%brjbswD#}mBqFwt6+>2NWB*cg6^Ot|S04prP zbk>lhAKC;z;QSo2ETZ1bW$5ty8{hM2QfT>D$sz# zyMH#Mw9!ryp6;hKB~+z=N%}>_Y@pq@slz4yDKeBlUpqCvhnXgZr%dP9Q)l7SsHC{3 zUE=3kuozQ}h~g=i`auatr);Gje-Hm;{9Ib$sk_=v|Ai{euDL& z*f@Ga_TB?iT~Jq&zFpwr zdjr9QRecf&KS`bcBpfv9d+4hNUW}jWg+xCLVHci6h3&lqvj=q@_f^2)Q+5 zeY)!pZf-Yhuep5RKeGewaOa%&-7-h6cbGiEYvfzJBgR8M)Gsl7Y3}XjDu358q!t>( zJ38$n3H^vIvi!&9K2T^f6N`iiQ`A{i6~D;OA6(h z|A9EI@yT^!(}p=F*3gM1T#$!@z$gDdl6igTx2G!weKW(ZiTrdwh}d#svvjYKZjyphzpeNfpayPMRqy%1B@U|fq>+gr7fHnOYJr-qnO2-Hyi~Q z&pfpUm?wP3HH}z`FE6PGWYb*U0L}(E5`ojn7_rH7!dVt4-|GMCasA`?LGK&~IPP zQ)1{e*m@72_hl349nNl?_Xxh@GT+Q$4ZdKm;8}}H_!Z_D$|`wSQcwg}9x?Bt(F5=b z)z(yoXZMf9N!{R-0adscj1p9u7vCZEM5?Il(g#sKcx!T2OUVA4FgcH490*&CK<|lS0j5{x zP}e#!QQV&r^()g{S*TbBgjiP3={v{mp*2SYM$q8JO%2``!e%>3tFOV`6rltu*R54z z?fzjLabe5Bm-;{GT&QDgLW8m!09SWx;s~{Q#&g}*`=#(Xp}Ua{$v~K|@nM1Y#;JSD zQat4j;s}~v8p$su+3a0IZ3MHa& zl}VrP>?ZbbP*`Kl4Rbx`1PzD=E;_yx`yuQ;O3LE!HeK}Ay?|nwLwl}q$KrY82>Gmh zX>Klb0k)!pgyn37R3Vjg)QkONV)M;*+YG3z{*6SH>}m5ap%3!NLv#kOsFl#StvOje z9=bCTz8zK+9D=6y9nLn%mEy4ldx@iw1X-eBwpa$w`!j6J;tp2*w~C)gZBQ5KIC3vO z(x}yS`e)pCq6wqmg;f~3&?0p|q`8JUL1vAY^-Xn<^pr?Tz`6)i*8}pWJQkVTb~L@^nz@|NHBlxxI2fU;77>! za>n}wD(}&Di*&z;X~|kl1p(2p_}lyZp#LbB$x+b6xfjW=*hG>7v<%(2H*$G{WWVZ* z`(~>=8ZbxRf}F>h0r-WVZI9)6QTq;@w1- zh33jGX=H1B{WOPue#|ftPw(JtaB=XGWy1|IAerPtxi~YkUp~SaU;^83|MHI)ti~nT zf9Hm<5~?SNyMNxHP(<-F7kO|mlB{y~Qu`al?oxkQZK7Ei_n*NhFYUS)NeEa+XAIqv;th!nxd&^bw5{DxBV$PJU@x3ut zOBA8u?=W(M>T&`w)nz&=9<4QEF{sdAySAa-y;|iL$G&2e)p35@h@@WC9nUhj%Eqp@ z53QQV;nFNK+2u>OA7#uT4>5&%ksa|2#&+XbQ~RPlwgV=##w@J%oVIiRyy`V-=nLbxPhpE^$EQCwj@e8;z3Gid!t0LJIQMKkr6_lLX?BE>$ZXlXT7NM_ZTV z0x!qUO>TjXs-6DR7A_G}nHQQ{SdOEM zZb6J$ZsLFIpGRyl=U;n_0GNAe1StxDaB^DcoGhgx8r-D6SL+1dt;&}J)m?qiowY}2 zk51{PzR-k4+i@eDgq6KWfi~ZEcPJ#_>oy9IWcxaIg#m9^C2Gdno`NW7wDFl{JCrsZ zh%{!RW8CtcYYq87b3s{NhN5`h(KyUPBzb8A!mwWHk%HhFuGQO}jrS|3JxUb$k)sIz z@df5qW^Ywqf6_PUwNI7LAo=}HY}6-F`!6@j(uvCZX3RusY@rZUmJKi?i;2byZH4aF z`lVYvMQI9LIllyKaYYA)V9XQgdjd>uRm={cn#d}QBs?0mz5Pvtw!H@WQD!7m-#B`# zJ(?5*#gVn#p(X&8bgF`Zo8?Z3g3o=X@>P8kmT4f<#Ya6V++OP5)N#R`kYbZP(N!Ct z;;>Csv1Wkzb%;#VgPed1oln%M>v3#AP@+)`+uX(J^Y{A@-o~O2@M&<5z)l~%7eE9m zr6UzVrZ1o<^*cNihmJ3E)=Tp^`WC zutY7{(*4O69whbhwqY#}#=}+t#3D@%ReCcy*qY%%_-CBh@mv9)#xtEkyH@6Dwyu(R)ERCX$yPqlD&Bej#Mk}Hw1Y%iSC66K?WZQ zH(aZ16HJenW+pzor>mI<>Zc+!6_E-Pa+(9Qs2Js^lXa4=`eK1F0L?Y?>YbY;zhPZxavwNHy)L~{4or7AV|Ky zg(wv8ik_B6!!+Kjuw^spdP4q7#Q7UFo)xwN_2}?(6m;VQ2h+sUmAzGb)AWRUfDk@Z z@jFwm>1^N6jzgBwKB#3O!7!QqIqd2|ZMPTREhOK5af}83|3nDX#ld8=@2Q_m3m;J|pg+8#*FPgQ9Aa}PofNBLFhcDgSVCjrR>HyepJ0^O;FQ3nP5FwyUgj@nO8pTj>^63?`XfUyuNyV( z@_$Amk!F@-2ZZ|CuOq}p_&wG(Iv?i!sq#7&Tb)*;%e_2q1k!39RW0gb#5}YjQogi@ zF8bIqTidP<09{^y;ab}e|H`q&Zfk(bnkb+yn=}Wh<){@@byu#0+9G`9@_}7`i{}_^t)2O^TrbFH+aydBb z%)H4p*s8Yl&wd&gLU$0ReT#5y%bWiJ+_l$gw`{2yHF==Xkbuc`J}*a1Iln>Ux?)D5 z>E2Wbp>8kEpZ(cb5T={iIl8|Qxs#k>J53`}1@zSlLY8%5MqM&oPL?vfOHQkm{zGpZ zg(GPXur3i0M9&E?=YT2?_8RqvK&PZf%(U}0i%*Xhnlf}-jwGi)_C{e!F~#Zr5@qE0 z0@G7k+p?PdQ!3sYhEFc z1PagE6#lkfYUm7lTs{dtK@7l7GPlOj`HtSplqeNzf-gY#;@#(%X)}8V+C%*xqh9f! zp>A|3%l~A7G5`?N$t&=aE6LMzW}|ZUZMdZ{|rUb=85YWGC(c?bJ3tG$D)S*r3%Su8!4gz&Ydm=p}5otVHDNw8razxJ+4$IHD)4t_g~ zMCCQ|&T~8B%XHTI`tS!Ryd>G~Q~c-{muE^sL58K*1_CYHtNhJsads;f^-DiD)TTM3 zF}B~tE<3#4&nAZOc1JC}hfFOI$zU4tC{-`q6IzPN0((yI+hwo?%%fz;A5}eohgmPpKN-2!)uA=&mdO=pRZv2MU!h=`1%dYuEv$mrE^tC*9zjH zOelG}?$a-WBX6R+ck#_HP=6@>g+Hv33THbGd=k9ibkGc4f7`zqmiR@#?qQRS52E7X z&y;t#=+>B~T ztU?$eAg5glUubLJvbJPzikCP@a9cPKH{sRVtCGo2RLG^PRF9ToKlMy{V(T90;zF=6#(nZ1Sdf}-09XtVr%obK`?*S3L7*)7D5P8$)oOtLhXwIIAb-s!M;$m;BX=_1Bcr?a9<%ks-4G;)WI5X6?tby1<#W;jPFG@MXP=46Ua4D_p|H2_yRJ3<_ZI# zv{P~66U&Z}`HHixgFiu}5r$6#t}&lqOfGOm8%^UUfcp4zpiv((k)TI1+nk@B>{SXD zpfw~5#RKdxI>m2;xAZ-L82pE*)r7;5tzCR+n#!6_*Bz1xw8BVHOQ0#wCrOJ~`8}d7 zUg3)J@UR)lx*7Ft+&_3;*rX42%ocIW6jYFS@>=JX&NuN;pc;6yvoTO;Ek6?O!N{`p zb@H(yJ(9&|Z0x`zZ|lHQebE z-3E~Sqik(p>j{lBQ30<4->>kU=5yg`miw_#XbgSBHy7W@RFk~7TKSmZmy3lqDy;M- zvx(Pm!^Df))>!wkkx^@WrtkQQ`utUx=lq+}%KPAMi4aG0yN~*cL+>#EYDzvX3>z}t z82tqs{IWk$Tw_M<>ZF*%Z1@wyK5J~VS^e5s9uYsb%h{(@GjNvhc&8o zroMLUvnyfTdbcPokn(mYY(}kfCyMwWN!#B4mu)vvb^a%ZBkqK{UPGP{Tz%5g*CRb{ zn*HQjTc}C0JS;1?vH^qbVWdp@{9Ju&H7_$i=B?aoE8F#UopQho?`h2hwxRcTxXzf> z?{i82_K3>}@B#Tmcje|XhV1rV)suqJdlvvL3=qMS2&Hx3m`i0Bo16%#Q0HPwxVf&1tqETQ%8#y^(kf#NDE#T6^B!uOuIXtI;1sqI{05 zI4{JS@YIcwVtNSLNT3(V1zE*FlCrsrp!0L}Cw*?dqMB7;paQkoZRbJRUm8kJI!|L# zNk=^-_y94rI>Pz0h95*bm!GJ7L702j4?}d;2&llbcmNjgknr&D*fkJ4v96+R|KA8cg*k2B< zD-S8rNiF32eRX%VQql{^xbA#*=plQ;kXUjLB<1B^*4)z@NfqZdU( zf(uuax*?Uf1`80>9#DrV4;K=Wn9jd00Jv{haO@luF!W5-r-Ofq7ZE%lbZbuTutEPD zd+2I2DQJp{Nd<=%r{;`sqlA`&I8;G&!plo618G)nEuQ39Kj$!@5(h-Ra@L2*l+_aV zc}A0e`;)Z@j&kmw4t6XcxOukl>^^v%JK0G1SDK*+HC$anJ6sxz!>0W{1T;fjV0TK*cCnwo~_8ePG_xjW{`XS!MIJr`r?ZL`KVO6n-7Py(}M)#wv zxr!mkALOza{ten$)#^1VIPEDT7Pfq?yc4aI@uBhu3-;1b6gFdk@I-O39srYw$6;6%9Lb)%0VMd(s1^JItjQ z^F*9}AOpL|psz;CkW%=-Eb9!!{vD7f@kBdB3=~y#3psVOg3bTQ>P3dBz;^0!CpqW5 z538YjbnO8w2{q>+trBnzBG*63TbOr7Bd75O;nDS1oYzKXArkhy=8g0`bi{KGr@)h^ zm3;1fWJ173F(O{R7{!1dzf(ko*w!K zAOH4Mux`YKUSqb{H4;HCiDQg##n~I%j<*{f(Ly1G{hM!d3nu${>zT@b@lPu?a5Emc zta87`=lf=zQ_UJOtS)8qWL??grSzAU8$!R}Le)ub`4Xvg?!T)1Dwlmn8@N)kx-UJNE^IDHA=d3~&N z+U$4ICpuq-BtP^WPgV(ClX^WeJKYlz8VN;0A;<0fsN8y@j>MmL_63c2jH}B!=`hU! zyo=0D$fS*mB8XUJf41Q%{zO~i`6zffW!UgsTkoaucpulDHXsPW_074nGAj>7!;W3& z4P2sIsFWC_n>Pm;X8;2Fi`Q`TsfT^A;C3cqLE(G-mmdt^_w@vI_6QI8m4MOw59^j6 zfT?N$E*Gm{6Z$ob(%qIFFiMIXIS4NU9rPc3xKp7uQk0&1>kAPG@Dupy59_cUds@6t zMCUX2+r4*$i`#_(>Y_ZTNj1j0RFseHfBnAP8NQ_-aYwe}Oes5zIMDj)dL|l3%@eHl zlP#0aVZ+bQ*@%_7opLQ3%OV=2uHM&jb`qUjuh=eSyXepxY}n~;CKvp@#^#zznCT+% zyZPO}Q2in$(YMQ*;>Hz3m*sq#c$1Wmj2mnYTcNbTaz`Mfw?H^|MKm`6){ zM`>7=ek8(dQy7AX8w;6|#;2lIIk00zk<(hGesGnG9l@y@r zWpAod{f-%N&%QiedhK+kxGxvu&57sx#pzM_&XORKDf0K=Wx%^$eRvl+?R9E$V&gd! zG>tpD$PMt>ag}F>V|uQ@DpzXgVEv(ae1d$}=WvN7qi$Os zBpE7t_ml%W~CK)kB+=!qsa2jz;$wQJX5#Z3Nu+FXh+bsx)sPvxu3ju?YE_v81P z&_FVEI^My8L_N`*_rhC>%~~Gtr+Z&Yl;o~|fs(4C0qsf!uJ5zFtr`E?1k z#!tL_h?I&10N;>3f^!KrH{RIar5jet;c{-1vP1SIveOKP+e?707#W^A?L9oPOvjQS zOJjCNt8>4B90fYF`FQ zk7F*rs<-=R85y&ES(1#-^+?fAvNEN%LBP1}E#L92_-palEuE_jHw8MVz)2YpCo$tS zXlDqiW!`347z+-bH4!?TJ8;)8<;?}YR=;rBv(GfBvQ0!+%6Bp`Oh?BJd>=gFmnDY0bsfeYy} zUTA5?Uu~BQV{4LAEmD z9P3^)M2l3D74%5LD9n~84sWLY)N}Na9D4cDjyv$Qqo}bhvBTNF`>srsYNMz+F2h$R zV4Nbg<((qQSQA(RLsHv6`htM~?8BB#?=R+4S66f;pc%Z2jtB4s9?uP}k%H+alz8^a z4Dm=$r6($Z_O&mTuP6{tEiF1i>_ZpTJskjJ-HvU1~aQ!^&z~@ zd;tn*8R?~N?XffHM7R~!nvyd_)FWF;UZgwKtg~+^<{z`>bziSRC-JI+G_veKgU#fa zA7)5k^vTtPjz?Z_)Sbblaz&QZln6*}Fuk{bmwP9q*Q<3CV{62)o}J8JQ?i+7tp>e6 z(RzQWs=;%1UO)T5gm<<74;V{oL;K6@PcYP;j6xY>a#u0^nwYhIKS7J`Ks2LRV$4y1 zB;_gTo}R5>ODwmA=9Cpt+`X-B7s(#<*uK%NC}K_o_$%EeXx6;P{ft`8wVjY0&4dg;%yRf%l zlDz+E@4Lg{{ma%$h#q8=Nz@pfAtX_v zB#0U$}(5YkfY$c5~J175tPl zVm5Zj=c;xE!-C;;pa?-)o8NU;tEk7^{8qEH9lZn=CXcPCCkIr209^!Gw~AbvQ>mt{ zoF9%izrzlb`&7U%cnjTRYk}H3irwqllyoLSS)>I18`)1wHwe>yUd&^C*j!nx<84(a zThi19nhG7u>hk1{RcjsBkR5IWrVcWZ_f10Lz6!ou&OJP|rf8K3_jjfshPOVxUcJHk zsrF1U`_=DIs6XpLYaG2{fYJJfpT7m~SwTj_$xPM`Ml95`OU?`&c;5}9@^RDbsmQN9 z>Qx@^VngZ_WZ-skv43&Qb=5#R5gp=bW*A+~r&3^s zfN?>QW+rZvS#EGe29@j}LDUS5aw*=tfPC*2)f`-eyY2Jk|6v)VBAkxkD{^|0sdmo#QO65p1{sK+w$J#fJF zA8S^?QFma9gB9R?Y?cl;TU^&_DvOVjG?8dTestwjpbzjqcCI@mgGDkpan%1Nv?m-HIdow6FdIm zS(e83oyG9!oBUbk!h7*t{hsr;%~qO8sRDWrohlYn$guoo44b!5Z_tGP9Mdw)ksWAZ zGjvshExWzpiTEoshRK1cd#FVDQ=Z{22P5mPO%C*{yryE~pnEH8R%bP^XN>c?wKLF8 zX0Mnd*B7LYTPOR|bDEBlG@WgiG#V9{uYCqMo-eXeqAN4x60B;5mQVvOLr--@%mGO< z*8Ir$T5j{GtfcODBMKtk(3doeWRSHjkc;Y+g3upfvV_idc&DmoopWmRR61so3-0LO z$NK=i*Y;hnWD|>-_Tntf;4C{6{AO>1D)6*AO7Q7QR7;7Fp_)cKQ4qjH0rtT7F0Pby z9SxgY-9v<+`eRYS(rex@m_-hq&cMz$dls412T@}|pKopTbgBZO1&prwaSy;(X?qC+ z`}2~kPL38-A;WtN$6IvZDahV7%$5^~Z!dyLb>8)y_0UXs6*$qPm?VxsaD11UzCw<% z)k|=MM|!qHZ;$Q&h;FiYQA$_GwzyS<Si|r)?fAY?0Ja) z2ZTJ?<>48(=CRu;Avpp#4AL}@SZ4F}`2Dd3YbLnxwo2)&8))>+TPAcsIR2>p5xpIJ z;GJz$Cwmw6`@zT2G{qA~%7hF@}MNZZuJ9u$T(7^U|Sm5~9dF8$1WrEtZ_=PFk z2>8z^y_()KP#gIiGx%LJWJULqwChPOcg#>#l1$B&xRMZHx~@}I%WXNzVtYDX3(8?3 zXUvgm;Zk#rzp`%M84wb*R)(Ooy`qx33GYlUmU)=iYqf%kQG=A#<0tO}Kf5h34XrB9 z#}OpZPD$zFOb8hD@Pdomhn|5fzMH4JZD+u>dG^icdYDTGM$GI|>2X(F`6{p)c+pY4 zG7g&wCujBH&#i=yQ?FR)sPbTrwg2NSb z#QbHRAZ$#`cGqy}vx(td*a*KaFCmJvqtSjpz!ovcqQRx_Q{GtY7S5=8!_&w&RiGqw!}vAxSG-xaah z5#__>iO(S!1&jAYZ521Gfa#VS`_1Z1dY2Y{2c@2{C3KDfns=!;b)iF7m?8>9tR%Xh z0#7Nl27A7h!2q#VR~Nq5>cnIEZ?N_dI=zuW&@m=Xo=Bt%3w-3ml4XE5+Npb-BQwEj zXB=o+_RdA{3E+n`Md%Jm_k@zuUU~A6e5~j%_GNY?m*N}|-OV@|3Rr*eGe$bpDe^K~ z2u}jA#6kgnsQQXImiZyW^BqH=pg3y3n}o7Ax|_Ot3u&;%PZbJ!C9OhQ1dj zlLC?*KKb+}-Wu)*FqS!&36rv@iQ%VUY-0sJu@U9t$}!D3{1fG_J*s^y6HQlhtf6$n zF7q;}2r<9G(vOxv8EHM%d3Qste;o{O?d)9>MN$fQgYRYx%tN zFP@P_UB&s8Edo_TLs`#k02=Ks2N3TnYh823P4)?MR)!Zu8s@L4v4a>KdUD%S$xzkP zX^X~uyXaxY5p}>%>sbP-qI&dGtgmgQL4_&jq>sZlN+;Bu1V|UHTtZboNDL=4d+)g2 zzBR&vcUNp*{_6g4`MG1LZqXm2=kRLNpj5-^4g{~#?xR?jFt}*rP%44<6-?0H2|bdk zjH!sq@isWMx&M$qr;FS<556H1bqU6L(B_R!EK)E(iF=*@#2z)sjlZ;y*bzF6HE-Du zyNlBVzErji`q0D(7Km<2IGb%yoZ3xo3!E@c{)FY8yumW&`z>E-&(o3;zQvrnz_dd_ zOnwlS@UU2L>fEsZuwUo&L9h?xa`l%xCP!f*tF$F0_;i((GdEo=yDzlHtwKzDwl4D5 zPf3dI-RCz8o!`sikK+z9)&1CVRT}dkLiei658s80mk-T9*C1I*nY%PYs-1q6J9fW% zAUCQAIDd&yv+alB`f4UcBRXk+)231i4_I+CvV@${r?J-Gy3I}TYw!;3l%8@9)x~F8<1?YW}g?G7a%Py-P4w{Je>i}I>370 zawt2WewDI^X2R|c8Dq$2fFt46NzO5VS3{7aQyg3Xoh^4_bqvYXVZsFclz1X+TCSFvF-k*68^$pg zeVW)k_<8$RK5Yv1sMt!NZW)rL*w?dam zg(wQd`%_YA#EuC(d{UUQ$I*2eokaV~jHyts2V#=QS)k=Q0xI}Q-Sw>dB-~Pxj&>tI zO_wy-nA#dpgI*LZzu)oQ$q+7fDPuj-v*~`{r{t^Orf?dnYF@{AaZJ4B@qsJ$zCWpe zrf7#*TC!NHEpX<(Co4E~ynvekKg&gYH4why{FOZQb!Re)>x3bwN%XDo=ak{35uF1+ z?eN~&Vde070M7+F*2iZmt1ov>S`)~q%L1L~SMXl_?91}P*^iF^TMwc;dt>1`C6JyY@>>%lfy+#_67#CN`6S7{x9}+e@uK(z;}_R$vJjuD2&# z&2y2^xbu^9Giifxw94dtiJy>2QSx3DFlYMh{`SK8DCcf(S?y3pHdOQMby}>K{mS|c zreCs@a=)EPAjP#}^-{>EAt-wxK3xSs+Ee~zP@R#-E6euMewNE8n6=FEpMG z?jL*{k-LHdO%a{n;JY6urxSdluo-HL=gAF>F8r^lgBVU(qr>gaQ?M+q39LK$uuv_Z z>L0$$leA{;MlV+HBqJ;b1H%eeD|Od38Q0C0-^YpIljp)HIuq;7LA;(O)^S zxE-oHJs4PahPkn54Xkw)WlHXxCD6JU9P_fNdH#qcV_0F(AHV;zues*b|C>s`cuI?N zyzLj=x%(2`HtD}L&JdeTMXz&O;>ZT4Mo~r1s8DcQkov^CX$LEBZL{(xRT#+nru>T3 ziCJT?J)4hcg7xi4a)dvu_-sJcwNiZl<6!@pnW`x7GH=aE{8eag{h)Vyfj14gGEINBv9utBn5S<6>ydppb3rcV`K2-zeTmKQrv$GaaOob9{oVZK6$( z|D*`BRxM=sh>@(4WLAAg_Jt6$6%8o`i6Ny#?yQbO3^sS9w9C06Ti0Dl*<430^O21I zYX;%u5cemsF;d-{B9a_bGT*JXU)giyWOjjWgPwIMz7m!++yQ8IB3~wP0l3g{Ojx}I;|S+_b*r)NtIwoIxJ<4x_-GeW%SCw`L<#`o2YRC( z-sJ_(GjViB%SxGnmM!RF^v-Kw{u-hOdVuiPzVQj&xtbnuY#)%nny2Ida%}CoSOb{fT<_uvP`K=7%*P+M{1lOF|ZNh z?H@z4W96aYoz$7ETAWwu@1xPEjhaOysO+^@jxt&9kQauI~W8njO9^R9{_gibDxXpYwHb8oIJIkMFm-(e2zK zdy_laMuF@%-frkV`mvMm|I=s^*j+_T)os(xjP=4!a&)tZ8GZO%{5{RX zZ)88dmEr3PuUib!IawR#o@x0$ueL_;EFL92fX*ev6;}iN^KGxca&szhMRmbcg|vHt z1_-j1K1bTwy=e5{z1rJr<0ZK5h^jMQYw;FAmEet*g?0`FrFMhw%cwW8J7F3=naJDn`sy-k=Qk-S#Tx=j3*0T?L3bMRAu!wIJjTSCOC!b( zYi0~M4!nctof0fc;zdj642ha(7MI`JJ0>w&uEKo+145hM+cnrW>8%^cbz-vZr(0%v z)vHza^?7=~Kp_W{K2^YtperPV{dKi{dwOMC%p<2More zp6v<6_S#eJlbnjpPdvdUfB(4rTl3>GhOFNT|H zu+k?jh?d4G`5V}m=^RURm1%CSgJ{_}s?w~`%?GVxZ|+A!{bQWoKPSSvX*)g^B#9D4 z=p<9~I%%_&INWNrjnmK~v1k3>Io|UvSShbWK&ypW@-m;9W@g<;3X;nlD^i>6ZWS96 zOi?e>D^nzKYQ5fLm@n5aD{tdnT#xq9Ap+&1zxPEMc7K)oc&!A%c&*4bK(jev$wHI9 zfL@~(C{EGmS+t2fpUpC_kJ?rJh!Y$k9ECdy%LZJ|_rE!HYfwNak=CIi)wlEJ@L*aa ztM3iN_B#|&kLw5S7HBn>$i}ig|Bc>M4vc;3R(T*kry3dB@THA^Lh41U-5%yl3r=Br zrB!F>v-T7dX5+uWNbA6U4%O2%Yjb$laiEpo3&rh(SiuEb%nx}NgJ z(@1-X2HE}TQ>N>{gqLsUh)H1G-!@k8^s{dE6=Dy81e{5Q1Rl`cIKc)FmOW(7bZy!9 z`NrM-Kylg>G~ba`k3InWVS4=r@1_R6>;CiDGK_DpilEF)IW;{9{HXX6IY^K(mfdNE zG7*Yi_C0&i(wiWw`hyo{^iVn|#10)`pPJK@;y3>iQ+{0hvDv}yXK?)B6DTDpID5T~ zu&UT$-K5KJCP3TstS%!mbjM4h1JJ>~YRK{IBhLI$tX@fEKz z9zcKVs8RIje7gdKr}LZaG#3bZ3AGDPBxy>ABx;Dozfv z#^~TAiD*l4y!VW1eNFFUK}p&cjC3DAZ@j!GJ-9#9R&!{}o2~ZpC6D z?v>iuV~fWx)@1OZH-TxBZOiVAb9uzvY7gmwqDQTUAEz3$kiS5Fjo(vadGyx`0z(y{ zEHNRyML0}a9FyyZ9Xzabtx4S=kLm`5oMf7Z-TnP}aLP1lU#q5~e21Rdk2+CBI>&F7 zlkH@5-OLrLAj*gcFO5pReuaeb@ND>{35AWgEk}dAM(#pHk$v zFh_^`TmsU%=jz~-k&zE^@q5t_$z^Wz>m2!LQ8cxoEYhhSXn?{bAyu`3APc~9orJbP zf@0RP0To!x5@5GH%KfiF<#PI|MHeZPmF}TYmWE<`{_W3 zImWJ!0+L&qmZM=a2~G<&QN?LK&~~thxAU$(fHTId%!_7E0#0UWDPBE@_x)N&-IZT( zBP6*PAjnPrEqhKrLLGqxrb{0tHx}*>XVs%L*~>mGZwp+1+Xic*m_@F^@!_#n9VpAA zU}_ktwhGw)u*^5m&l^LgN8s07Qj6hDGt8aYt*hj+HU%9-vHQi*={HMhCJfsS;$}HD zjohbvGCTP(?Ax=O8cInrwmW29qAt-HA;V6Y^?*HlOND;yj%7ge5qQqHA+RqGtr@)! z&5J+JgVltO@^1UZ4CerD2Wq0%0O?;<-1J6ZR(RI__r(xPkTFya^L(U#*oVpiQCUHuRqApR=9F+qdh`rR?iPaPB2KFx` z0EbK?wIP@Ge=^UD2M8i6#dzGQrcPd=o@4C%=dpy-+{?Ktop8|SFo(4PWc>ARzg;P3 zn=ro@du{h%W!|GeG378ETyt7n0*e{n#wEDU>z}?#>wWtJXDKc`1 zW|)^P&Ze|3F1U2bTZrzDKYx#C8s2^WRk{zZ@zK5{nW%WMOO+wtB0?sm8}S>p6XGD= z8h>t2M9+Gr?;(7uG1hhA+)|yLuYI~qmT;XIWfCHWaXG*Xli{N0L_kL*pt;OXlCG3U zT}bAn%iP}i(1j#=iQOqVu~yxEz8@&K0O0kCH6g%EROiFg;Mm^NqyRSu3PwDOV$BTg=c4!H$KA>*>2S0F z-{vF0ikyo&z6LtmFxiHc2y?mnjR07F`F#!8?e4;T;PZ%9^30BAi+@>fkgqm8g2*{(f$YLaoWB>q|%A#9`=7uS;y z&TWk*V<1%h!$WT7_z-(RQ#LFW?;KS$hLv2@OBQ#wr#;f19PMezO+n5QTP1P&H~5KL z@x1KUt$4(xov?M@ftrhPnmiE(7R2HjW1NW~*w#`Ml)Je;s&QIjs&x|Q%w}K>9hCZY zy}5AnTbvL4X)Kk%Q4j}3hhum*eJTt&9}K9m3Az(4*iDu+_e@cIa!EhRkx!E}VV{wd z>RZ7zKsfd~71b63eYc?p73vxBc#TV7 z4RJrU_Gv;rxpb5uW6bmEq(CX%@XVf;j5xw&RnYG4P1q33H%j|B54aKcOFP67L3?T% z>P8qGJ`4HV601JSsx~5iv5CH~9`S-oB+(3mlA#u>4>wBIT((FeL^0Y{26`&6E~fSF z&q*gQA1`Xe>4+M((^{$0@ozABc7HDB?cwI=<9y%LjR?-5ep=c-lxA9*8Fxg5ewCTj zpjtaSrm$^7YA%@y%XJkKZ}a?U$EBOA5?%A{jEJQ_CfvvJc_zMJz-jjzN_)oCSHDx> zhpDk>o@6AWbk9+z#tjitX-!WYxgE(T`O>`R0~s=r7|--Tes?=Sl+1HkYN*J*Y>jt} zc>PX{!dbyBfEBJylm~n7)_^5jOS?4Zfh>Tm#ltnh%&jC-8`E718(~rns(ZmPY9qCyo(PHc_o;Wm0 z1jljB0FL_GjnPCrL9V!OEk%KGFXKWy3iH^YAD6b^tz6_>Qx6KjrEcg60(kb`k8esS z41PJaO)e`%){H~&hc4HEoOiO%O-)xEAge%$WB37#j#)2hHY!aOS{u3|-lTgCg#WMy zzlQepwZDHY>qS2i%%yOrWids3S@o-iiW_}zQzCYn2^!m(l-79fhkRzB;`T<%RR*V@ zeYsM@o|Ao^G6U@+=oKJYf!{|ZyNqSf{tp$nm3gC~KsqHjlWQI@!A(7N4)U|;eq z>H-b}KZJJMaOm6?IAm?Rnh&V**C1NcZIr(tLPYa zHa0@l?RK5!e4{w%C{gsOMSzf3QT$VT+jwv0D8q?9`2uE)pbn9iY;gc4S2}xHnvk_W z^3u$Lr8vubSMlv=@^r{9vF`iE8PA2G+c|n}lJgISj)D~Sy8gOb#~b35(UAef>J}{L z_lR{}W`;r#>MO7QR*->MdaB`o#l$(%sZ-4ni%49O*#?cf za@BPO!v!@^)GNrbKalJ3L5q=KOMa&xy2^9{59WGNCyzFjX@t=RPFc^x?d zfIx&P>Vue6{kaJW2ZQ(k9df^D@;Y>w6ld+Mhi2XR>NHvoj5zgCBvYffI(x&DPs17XgVu;pMjU`bearZ)M)##kTg z;&g8YCrn{!p1V>m)2%ULX|7&`sQ}FxzRuuN;6x63KZ^SRbzc8VUOf(i|7|T(Hiz@$-8{ zs)*NXmMXuq>h2ls)g$^}EqRQhKA~0m7K-`KlHzR5aT?QD7{{IH7!RfQoI2Qvl#ISJ z9A7Qhr&Tj3kVr!m?6=*rrqG9I<)ca(g_}penZTgUged|-Qk5{G_-BGZkN&;aYv$&e z{ac}mUMa>enk7iUKCeyiDD(^#?ZQ`nUo%;SyAY6Lg4SOwwtci8__8lFIy z0f1eP5fcNe=|hH1iFVYaKUIFBAV#cUa7#JIt0Vg8;Z#Ilc%hS|HLUz?!$bp-{H8nz za(gObCf>!2OhNdjwz;NO<1!^;rnvV(Vv*g|^Ej2BoL-SrEyoG*6ztnkU zj%}x^+^V$y&dC`G#1zKxd?{j1@cR69@BB>SKfD1Hl4w3Y#gK21(0pw(KdHPm;*?+pPmzCEkdxsP2Z=eg}i&`@_rXRDtOAs|aXGzip zyVYhO$~^wlkvJQWSR*$%H(Cg)A1yN{W?ulc{%uU5OmUO+vqswG|Lt*IY_A`JB3R}s z3BQsWdg_)NGBN=_;1s(sF>vlQT)gP&o!aK}%uZJsMTemnoK(xsl)vOJXur!bF%mJ$Vb1pWPv)tH&r!++Taf4 z@OZM|O;A6puBsUGx)#w>Mh$#B_{VR+-y}d}!D^@atRv-{wkqy$q91E-Lpx@D|8E@) zVI{?B5y{`{R*6iO&SxY+I!xS>Vt<#77bJXY`h(}AH>>$Op>Aga(PTsieZIAc|A9{R zpRt_waoe5XDTLZl%dq@y+!Fh9H`Pt+4@CXhtno*S5D}-`6fWm^Yx)tHESr%;K&!7@ zmX8wlg@~r5luU*sU7Z+r{^z-JB<3D}{2T3A&+37(%dFa9h>#DSz9a#Oxj1_(FV+JCIKM$NhYPwuoPACVoUL zSDeyPJcJw8!KmzXykAJAT7g{Vng1M0eDm;vf|Cg0(PBs-(g3!}Cw9&3T%f%Z;^8ns zNCofDkFxtG=A_qHDh#v#L(~7R8Y)^Oc=mgZx}43Om!_G#8bXkge@Sm)LE!r5Hy3bP zr=8JStweMU>$V)uzYnF6hZIdRPejDB;6YXz*i|Y0+UaB=FjN`z+i zB_w-=mbCnEm7b}KQwE#+sn14zhGaipB^FJm+O1y15&tg<{SFl5Q|Mu;_f7m&Bp$XL?!e0viyPdn3FC@|8G{YV}mHSJ2 z|IqdSe2||n%v5tgtq{+Jef;;``PZ>Zq!MGfIsH<8BLD3sL=v7jVxy+*th-VFd3P?{ z!+##~3Nta=YX7tA$A5nOU*0)xUZiMEwxvHb{TEa6pAWJQMm+ho1+~6^`ah7wL%xgS z;Gl7m>;HKlv*AQLGvtz^^e_7RUrbu{g>kxbr2ksuzdKFDv3BkgErmDohpgNG zL=piPjW+FXD2VXLcww=bdZQHgQHf-#qVPo62ZRegodY^ZEzrHc<&%4Li zIBTuxx#nDR?X$z>WyRpXV15Au1A~_k7ghuVg9rmX%R@tgt{JUCQ($0l66Qie@)ANq z`0@_6CgxVgU|{0m$*NFlN<*mGnu-!;{?Otu?J$Z3QZRWUz&Of9XbJqh-Z-G}DeVYl zvkMZqvxusQmJ^L%`8Q8>fTu5c>zz zZa$Q60fHQOV8poF(*8ek5U;?&E)_ezt3ZQIp&eJ4ev?dgqwIxbOvCS0BpFxP0bRcD zP$tVLLO^Y=fyJ3~XH#Q=ouQVPe!Hq8h`px?TO)?30f6B!_6m3*$${B_?KQl~aH^x;1Q?0|8#o;`rh!rz5wUv>1E#l0 z3VQ%7gBF2VO>tDMBkQ%%uV%>?rHECd#*mK`@j;w@ zUu{B3N#%nmg~(ot8la`nXwm_xXqh#ACNdJ4%c5wJxct8DrWfanV;bDg$++~O}cgvlOMVKRFn;h4JIB|utn7Cu}mnkYHNgK*RROVZx5n?I2P zaGQSgT@YW`8UIMfed;~)9`n9T^&o|~^n)|{L|`{%7Q&;b%!KSF10dN%3=87t-A4PB z@hyO`OO6Ms|EKF9ASkOrjZTA_* zc3*APvA%zJF05$STe6H zzGYjHm6oUEHeZt%Baj8{AD5L@BvvegM3!dgxC+Ku91E&`#HUy7ICdP?gRTGUZ0t?Z zd#R~gggNDQLAQjYK1H7o(C&^QzZ#@`mT8Cx`AJf8JrvBn?c2L!Za~RJ;c97j;`~yQ zP4yL(Y9rQ*8&4IBAY88Skea4o#pX2r8{r_G_?2WZ`e>> zRoIkuAJbRm;Ow{PVEvh$PV;nIJ2l9DL^xi-<>D1R$!HL-oJU<`!{}+BEH}YUuE5su z!Giqk=)rsaiQ^!PdT1jzJyQJD^=Q=q!~CDsVCMZTsxhoSbN!Li!!v^n{R3kMsRh8< z%o#zlLUmIR^#B@? z)St>`IKQ#11?6(lrcTVzp5bu1z>0zy7^Fr&-}JGoVW-3x>oZ#Ms>CQ8II9uOfB85l zQ$w2%Z663{|75yFYxh}fYlt0>uIpfDiu_<@YTfn{=7m zw}7ojGg~lYGuNH+sT7*3nbSX!UZ9vNon5MOR4vK`+MOffWAa`KeMM zMj-f)9ON0~TgY?B4xAEPB)lbDEV>H=LlI8m$f#kZ-nrqro)5Y)YXpxZyTwsSzk;f0 zI}4`uR|8^ZO15P~r?taCh7tBv_64_O`?@P{l-1rLql6*qL>baznP*X~CWIsT0F`H%QCXjgXZg9NQiXR#UHO7WllsGo+M;xQ zy5)?X<3NhRi6vF7wyo#7dy;$j`v>epgg*$M5u6bgu)||I zVp?LbaJYWRlhKjID@D&3o6at+sGqh?wK&4*M{S@FrTu1L(>9~EN+nE%ObxA+UO->y zs~oGG7+8*y!E3cRU8>1hYg@fuZERU{UU1FX>{x7{dn*i0gxdJozmq|>#?khH=NiE3{9Z59}+# zOZ&saJ>Bib!H21z6u(VZrC;&R*#PW-E!GB(*|$O1RpiBQytjB+$EcLBJm}9UI}_;x9JnFDA|c744WTEYWTC2|d|}lQgE3}d zhq1R2KTzDcBY@v<8YquR7otWE<{e+i9;MURw!Smda5DVnd{&53ilQ!ZCgim5?qxx_ z=v(fWG|1fH$7ZvUS$%VRc?@{eB}F3Zllzr^%l?N)-==#>Sadt8S3BG`K`>E;cumGd zT!px)@UTEyrc};76Eo#EXM^SS;?NDLt~8SbN^wI$;AD;h?z2~?#3T2a*wM-KKbbXrfpPH}HB>u9bTO&}ScAJJ^CtH<9Itn;rUAe|8Uj`H z^358L#8mW_&hs;;U8gKd?fH5Ng(Y&MNUD*#yTW@`MxOZ5hQ)7CR}4WoG%g*!b7o+mmIGg){Zm>v>7jUxSs1NPlKgX+Y z2ZYD@t0JAWE}Cr~S0}cLo#r$wRN89GZ`Qj_@W#u=wY8ZWpS_ygVLF7{3cO1Xrs1Xd zQelVbzBem>SMk#7*LB(Qe7Gp)QQry`-UE&rm^;MlN!oG`a$Nm^;H zF`}W_aewz4<78Ylrfjt>`?pR_M;T?&FXO&hF z2r@C*GOa5YU$>yH5Sto>ZOnhwm6feZu4>fZT{R|IoBH0q)%dgy-AA0|xQM$rok=gc z|5`8Z@JT(Y{046pn4jnRFuR7s7yLFmeK_50jOZxWr&Fz3^YD2Zm+pzDI&$gSnrdpQ{KbM|MUSZGW# zHWoWA&$=t&i?HiTRAVa{Sx%da#dPWY%vAA2Rye!kHTBDBjidhoLe2x9?_t12@P6bv zS|+W#Hs@p6LKW0X`${J5iCf!wv-%M_xsPNKwOI}iUI`5s_U2)ve?=Po+_KLZ-KCMmo zB!1l26yGwu+3%r5lHJMiWaFMkAD%sW4`1+16ih(A48Kpl{Jd}Jx%uQ9^6Fmd>)<;9 zyA+w1I--NoxqRiZdeZ3s=nrLky#d!j<7@hAbKtr0V9HnEZf=k@J&p z18W!$GvV5Zdrk!`@YC2Z#O59X*rB=I#UwbGbHwwHVq{@&R%L?Aoa$%l5@cI2mmsjX zHlQDRv)B?Kl%|*2P1R;DQ4s9-h=Y-taN_z_-f?O(JoM7W>@XPvtUv{fcPJzTk_`dg zdl@pZ9lq-d;_gY2LfDPccc@%mD)F8$|n*ER9nE)hCwWifKvNbTKb+fhujRphbcH;y+S{pm*UvS-1J6?B zT4}oHklET^=5b*oW?|y-&adXazqX4Bgt8xCDNjOm38PsIB zek@z3pQ6zYN-`|Tp3g_cb&gNbR8?)$V=N0=m6}a9PfCrp(9C}&B8^-8kLGf%3Fyrx z@m+7m*ji62TgPSCw}ME5u^5fQPUgx|PZp|ju-NVNhFaVPJTLB-Oz{>PELEEhvOW8Y ztyX|$0e&!HGWa#0{%u446@krht_vqIJnwoe27{oJ)fUIZ036p<`|~iO7`vVjct^|1 z7L&xAEAXJ+F}96W>*_y{HksCq;c`oj1oD5;k=QN9{M$(;U4U*U6-}#*Bh)(~2speOXD{SiBFBhE~cZX9w@aXhicNZIcLy4Sj6Fd*i z**u;hn<*k_+RS|h1Dxz26S`a?=Em#bePp!XFP9Yz?nJX?zku-c)P$2|FTPz3;4Sgi z9h2pF)dp?+_25lifGqnKz;#Yp)!5GIcy=h7X`KDMs`K69<7ML`pcMIRmha;g#qH9X z>tWTA$!Dd658~ylDTnK3lxYoQ_ia=)J^$TLh&;EGz_jbfomeOug9Hlu%hrnyM>GOx9p|liEnt_M0dVFpR_~A=m3)E$xT9i_POq;0=d+?a!P6gq%kjN2n>kNi?!(aFzin}zl%qWVd3}<6*%f}>3`E4v*tQ?QX+mzz zy3hj!uEjr;Z3?;%AnPN_F0a_MTHk+3OJRtw_5cevU|VyVImI?7pdXq9$HagL;irIz z<-Qg-mJ0!&b`#}RK@&YEt7lX~K5%ML2u$e%fFcc45zYIMngWTl z4_pT^CTc!KqvJXwy>7wJM5~WP4A{EP*OAIuVU3C7OJ>B7Y-zqG$dp_iXBPG^l zdR-5#+?*`8Vn8^H@H-ySG%gyPgVa%VQg^yPN8qQ+lA&#%C86Ud$LfT_E%8R{W?G7lV|qQef|(Iu0E((nQ*mdhx;`Aj&g~!XFEP-JnCswB z*uYT+j(LOzst$1hsf85jM&b}-c@#dJhegBKhVbXEypMIy3z-z-X$dlCs)~BWYb{OA zFjDiqWK{$aK@?4jE1Qnf&X0Fb^m;QkLqSztZ|A`mm1tBo$F9O`u_qW4M#@;QL>f}< zP;7@VuB%>@5vG-;ngB4BxCI^0v8kedJl|K+iCykQ!=SDhk7y)2j z4lg-reF$iTJ&+$pn$dy>kOmD@k70eta$UEt>UjR;dC~r`ie=k|6wn_bDY_9_25tjM zBa5^AiRtQ^(9A78{>i*Jii0_J4QECcK>82 zMjhnp=bLdZx=YD}eb*&pG`<;WrsPWr9XdMFEz$?>qk_aX;y{|MWM;(o5xP!lcUGqz&JkLwXS z7Huk=OkJCny|!JevZLQ$Zw{w^zHLX+ofpnXXVG%FYz3o1M*UIel%#wAuFeBAo0>n0 zr0clcMmZ$8=A~lWh@&puS5-Go2Dn1Xigjd13In=+A{21{) zsK=40UEBTr==Vq$!pqHTvQiP~XS8`)8G*i)t2OCx!rCgS5D2-bV;lYxn^ol*D z_E=h+y?nExNX%r$2a1!PQvtJ1QZO3B9Zj!Y>NC;(jCBQ+L&IZD+RUZEGYgm;HQp~$ zl$iPS>RUb_H?vNVWA5AXfzF2eE^7S=90v8vU3V|O+$@PRC51&BAl$`MPe5 zRkJZ?8CYT1>;rNv*{nvxilD9uz7Oo<;NRb5e3T3IRtbo})47I4+EktE!iE67&{1wm z4N4j31iT9hjdUH4+w|4nk-Lm!CQbu8PHRWLY^6KTt21?gqHa-KEQ(p4NsjmZv{?udrd!X{f@|-_q~TW|ibBFjW)_*be6x8}dDVm1w;xPR#Q@c7Z^7e%m4z^sA9?)T#V)Knu z(TVmHeei!qesR%ctd9YD(YHPt8D{7gr}T9khDcyy?tP)_7)kqm-kz1c)ZLt_^Xn)H zrHzMOL3s_U(pK04+N2gx%Gj2e(9swd0&&O?0ISdS#`1lpl6?^D(lkfTP=i_%fDts|3 zj_&0^n3WO_0C6pMz4x;3>A1oh4y7Zm`)+(5jX`v&@{<5oam5bl0ZJodsRFt8c6(8n+7gG5;L2Rqg$0A;^ur&4Z=#M^+TcHuLrNw?>Lz%W!m>Yu; zpJnJb4NC$x;MrE=HXk2=&IMi~q$YMf+Mg0Vk@}VgJ?8bNZqD4?0rXFXQ=A~0!%Xk4 zDKPPX@~;*M`i9&7Fr0fU+H*pj+T2g0&oDFcw$A4Hs6M20HmZfGX<0zeF0HI5XUyE1~&!^>EBKGOPeZdgYR5(^`6r095`4G}^A^3Kewt`VPzOC`fqtE$H5OB#p&LWk|1QKv?{D z5-yJiK+H*9mWHaXJZ8%@-8jV#&+DgR&}De+m_=Cy>MH{|>_XhHpV$n|@VMNIpNQYt z3i`YLpmb85{UDL@Zqa1!XFGV>@`Y?P`Chbpyx{CX%#RzN0SjW|334k{fD9Ay7y zcW;_}-DhC{H8Y&Bp!shkUJpnfui(Se^f|(21^IW6`%`sA6)%#oMUK5BB}UwDr$#hu zr|1j)f>G;1F|q#9-t%U*AW&kQ|8fJXExuaQ15xC#k{O%T30yxzC+%AJ_*Fz4o)rpbbR7K>)Nh7*5ALN?;f=rj&Y65vlv!C2ZJ+%dpqpZK68?mx&TmARdj{|=;GWX?LwnA4lOf974NW8>U;<49*i>+RE}v7? z<=IsqmD|0UXnI=)@@o!&Ac#M zJsGppIGiRw-X6hS@1=TsB`upF1)36f{%wHnufYOXe@;nsbJ-;vC=(E@3tSY_T0-El zP=h31Y09yj1m zudp!gM9S#L)s__&IMSKTF3|^z!!69K)0D$7j|)9v6VpqNR`8Bz+4>%SBZHIeCpJ zuBEtiZHU7pyFR6nL7v(!&a%XJxmGIV`^m?mW#oZhQ$~HpwPywZ^pO&%{tz$iNn7)` zy%nu1oRJ)TI3X^%9R1mA=vgKHD96u+B1BKlya;~5#J8CW#u~>o@pVa6TDh9X%a4ob zuwihA8bIQ;#Z)=GUa)-b(r>81S@sFALczSEBK}CU%=~7ujIWUFfQUAZ1n=`;zoZB6 zf~%6v-rOXz>o;=_C^+;uK5hG)dT=S)7Nf{lL?X+D=_9_Nrb*9%qv^6n2;rW zSG|_7Ku~!kQ;?Ic6elbA3dKV&5h=kET;q~fd(>+ODs;k@sBCUzxo01x(;!8pJqDO?J=4`I!!em( zC_ZL>$*jsRwdQJt^Rq_*9^WVD=~-UDtE{3?5@9fnq&|DNFi)aAmUMZ~!;6c66gCac zkN|fqk5}kZ1o|O`E0IC=Kz<8Eex(sOt8iiw^VyT>+;2XI-NDnB!6w8YgzV@>BD!({ zFSD#80Fb5zS4qaSVK2OsX_*nizM&v~l$+}V;fg1N3O`@Mn4a#TI72CD&cVxFN>Zys za-~KzJ6Rrone9wtO|Ck`3}h6AD=1t4@>OpZsOp)<}zA$(~@ zn|GDWvd-g;Xd|L#5tR`NmT~7`&3XQSE;n+8{9O@t>tyVq3tinPIq%Fe)6{&7aGNIy zQFmc~`U4yc?Bhc_$Ib=}3^(iV{^i-)XQ$KtZ;+cZ>FGzw>FMd7!%-AWj+u=V#KtDF z>FH0PHYF1il?Nry2d&sbZSw`6tGa`1!wU_}Xp{H6kM1b4u?UAeSnZN@Y0@zYS^J3f zALFhaHx1?mFRKT=}dEUQD={`?8KOAVg zof_hCdEVQ{dTM3f<{^5UIi5S_c#at!@O?a)9(6qS(KSBMI;M{>tF{a)&ljv{eoP`f zx8#uVUWEsiU6tv+S=_H`pF5=fnTVEfi;vVQ@E$0Yb^ax)JLs^_l+)7g+EAuVqifTd zvk4$GyU2RoCCgcR*@WU_r@Hur_t>>`N1*HT(7M6WPY`9}+PVVoZj`d$e&Ky_+75;5 zwt4FDMDnn=@w(5pcedWC`|x{V>^jy(>;48gwORPW6LoD8;c8vZYa7M-dPL{GR^}Y* z<8gp5!~1@zSjM&fCr*XT%>dtf4ad2C>IzcwcFvNRVJmaeLm@BJn#X}1?)j8l?2$%94JLti^uWt0=jP*Fg z*G6^@5~FZe5GffLEYrFBwe)fUQZ-vA$Zs+Yac|Pbur?n1S(da;G!`-nJrB4xmb^AU z;hC+}$JST5yCJ>_a6C@M`plAVnDV_WDcEYK;Cl?2%Xu!NW^%1x#r$07ApUFrV~q0& zuYINk?)*xIBkmoH2{p%yOpwJ+*Bh$5&z9Pyfe$OTolAFlx&d(-!~=t&ytjGk?&MD1 z2`|^{;=l{91FnhV@OmGRA6gy^;eqUD`3rkj`z2ip@BK2Ajf{H>CKF^CI?HMJC!&WJ zIp60QA>1V*^HJlCkM{>G_E(UyH;68IZ2G(h9v=4GPWKw<<&V)qo$lA zmld|&mMQzBY%N2-nQgsKG)*5sTjE$1tvPg{dHj%G$aA7Tv8)AheM=jcx=a?-Kz9_e zMjK~!wDujPoab5Uivyku_ooKWz6Y!H6HC}{Er%j>%kwuSVm!y3RMV~Nca<2B-YA-Q z=znE4ogiE+=Q8D5S(-;uP8m7eq;vZNyL?Sui}}#*IO=?XV=yITb>X@3(zvU_q5JW? z5bG{uNfY)A@2+d~KH>Xb#{ts(qJiVMV>am3)*@N3WJ`=)5K(!Z8TZH2L{5W5X>JXK zn0NN`Z1MQ;O(5Q@o=cVitEkz?bBGsGAH(&1Q>XX+x+zGe_E=geU^4tv#)IRqQg$s( z57zo+NXyYPwOD(wEZg;uyInHAmX_{sm6O$y$=!l@XNIdv!wy`@M=$Rt?z0y{@kR1h zpB0M#x_~tf22s>R1)+CZ`iDXJk(hS=g=lMVa5I(m2FqLh1oY&?Xm+y>rr&)pS7?VJ zw8{RKyJ5})(efE6W<9Ug8&(hAQ17+M8AP)WaEwo-7PP-VbbffNjGyQ{{C1ofZ*4&q zfTMX3CgW7N;Igrg)U5x!xBLo{q@7lK7yMR45YNNT6Ff1ROI^w>Bnqw7c_E$-zREQWhEa$x6 z7BjuZxx$WF>fB^FUu^osfV&Jy>D21We)O|-)X=Vp>OAhz`7U)R;bCKP>IuwG^sD70 zxvRqxQeiL9S?P6ib}ASc80n|78K^|S^zwK&CNcfkf;;0WAVb z)mKo(^=Cjgm12Rl&7qb8zFdziYys$uoa=}+z36n&ys(kJmsfP`@Yb4dN zQ@-57>hg{Nh0h$aZ+;Xy#1)FDG!eMjKnjfJc_4M_rUV<$WPCB4?-y~nWDa3? z-kJgVkjRkd1)U7AuUi=Wd3&xosgUEseWG+ty_Y1j{sY5o_uil`@aU$(1t`lV!=(%>H^D~km1^?I=)hG{r7nCo3 zBIROE3^^zbC+hLg7Jx`%7-^BpV+rBnF=)vj=CY0%_bnYFQ7l3orW_;c4k_&$$*G@j zEOlQrGDSyg4w%Nb30kR*mkk`aUR&v^|aai##Uj&S`z zB$`35=OT_F``}J_*~mQLF(0knj1@TCOj4mImMr?mqxD=WafdzxE+*_}f~~3_=_c;$ zCRCNWofhl~KAWaXXLWXUGtU9XMEcwwD2r9(v#7b^4Vj}@<>{iXLq~M%(bz7jGu{Pc zYCEIm@WQ~35hAbOaigWWR)6VfluFwxykas*RaMo=bf$qw9#FIYNo@#Cc=8EN|J<}D zd_b6=8ap7RuOrB=uC}P|zP9ULALl3J`ZNFFtuKP1wE#Iun zXmVCX^Qz)HQU=fItZz0}7}o?-VFOd@$U}P;{xMcw{kY>6mo*r!kIwsbYASuyGuN^g zllH;SFSj4Ptn*8SV-dA;1-;aCqa@)%*L_ineg-nztg{!kUOQM5NxkmQQ8HgNE27a+ z8!fOzxU1d}%7#sV!kNg1aIO^X^EQsupC;NGxE9V}my26_t7EE_3IQ?NYWUO$LZ9LS!8b&eS0nM6pF{{w)LCr2dg9; z+2I8iwRn5tfdWINZq}DXnkOUBWNz(tcynagbpwq&KJ7~VsUCZc@i!~4*v-KI`lM1S# z#D}s=NLAbTu9Nl#5i16orbr5D7;8~U65f>sY0X-zYZELum9o2*sJgf?0ju;P58HQX zday{>P||)y^SldWlP5d50Y`Cpzto+)xK!l;&eru#vdVPvzSPp3a9n-V!*PqiI5n^k zTKuM#G3#Zo(`;;_7dgYA-a=;@Yz{ncC=5+v$RKuHXT~X$Ja@}7E9FhhBJ;i{dHQhe zYcj5-q3fM9*F{ zrglEU6cabz-Exd)k=aU$Y*6rsq(TnUN?X*P+3%0#HR|>N<#WcenKrt-QkUdEp7}pe z8?D+?$?`F4)Q*?;5=Jgi(`fjNeB-U40tOBAZdSSVlLNwW=7c_j3op_f)=;5KFjiR< zE5_O1SdK!qv8}5oxa%{p9v_)LH#~*p8{5NP@vg(zZ$TGd6={19*TI?Q-nP)1EJM)U zig1uKSEgnYUl`4y+yiag(Ou3`ij(X{224JEDQg*er0Zep*Avx`T=X=)-RX^nE*>KD ztp5GCNP=~n_wF9ilC`h5hxTXde1KsbC(8R+VP7P>4z6H8Bfy)^@EvuG`5M9Qyb$N( zmh9$9s#Orb&a#f?J5lGB-bw-URsZM(I<8|k2>#^mj}l2wTkz{c_;2+W2qrA{&`}KU zr@a$UOpOlQ6dX@tsl3F#Au}#y2JT)vc{jahs3OU#l@m%K^-7ogD7^F|V zhyVoq!NCNcO_rVsXKdo~@mdp`3rmS>A6 zM1S^HsdL|BtO*OQBU;|2|1M4FJpjGO4g7)p;l<~RXwB7XDLr4)Qd%!a=(&^(1V7&s zrL>XGBvCW3?GwhM&r3xC3R={RWPXa1g@ZGn+&}flfP``)#^wDo~h2aKO zN&~kVR@Y4R1{XCY)Cp(&KpJFF-J^P-^6f}Rn9vb&Un3h;(y+1Q5b-&{C$-NjW^`}N z=sC82=~t{#IEBb)zAjWeb5qD6)U~0o9t@oQ+M{5(Lzn<|r12O-$W9Q9umT353){6m zMe(ihYjcf3CApmSrZWHcz+<%N(dPY z5ym+TaN5uBXswHVn1D~BF$go`g=UV~xDmPcO!uc@F)WK-7-X^dC-W})c-$P~a9+dD zy%NONLgKqPTYBZL+5v6pzISvghQl4M$@x_T5|;tV?h&;KJh)d;J_Ent$kM_O0`W#e zPVK|I_Ps68o}OU(40N9J9?Aul+1bDNpk!C)xeIf?Nj{NZlnpBa;^&d@`~0*U@b8`~ zg2AGrl}+KE7NaonwTAT*z9X1xB?`thc*u`W??8rn;MRLI?5$n`G_C4^Sj|R&5>nM)teI9P?eg*M@F)WTX zp~d?67CE@K7KIiH2;;{z=4VVcuehDX%M6%kR+F#5e1jd0Fm_F2Dz+5{cKpKbc!$o2 z8h}G87S?vSW^C69y+p}jP zRQlyyCRCZP9BP?;^-c}$F)oZ z;;qNU_v%i~uWhcqwA;$pVXx}pz@W7m`ow6|b6!}P#&uec0Tbk2k&E?T4=s!h3JXPO zSN8qtDqo`z4dECjFLFRTT8(en#?+A~(|!59+F{Jqe#Q=m>D7~|SQgZm*Mp|Vgur3T z_z40;{q$8=%n|f}O=v8^q@c*^^J;d-0f+Pu{K{?_AfofIfnh%5u3J}4+TLr*+xXZ7 zjYhwrgIMY?HdQ;v@?+R@b#`sP$=-TfflORubp|1OOw^9VATpQ9NFsGTl~>#g5Epyl z_FFIa>Kt^-$)FzUmQm%wB*uq~M5}QECYF?>C5gfQ(>3SkVAs6*<~k3>Q^(Hny(OEC zXcz*)y=KkE?4K}%OqDU1Sq4jJ3kQw75X%~&OnLnU4&q4}`%!UI>%DLepMyyVUHUlj zs5KFiW&ndE-6HXQfbx$N4dYY((R1cy@{_dNRV;|(7O`4Q&VwwN?KUQg8z#d}sziz8 zm;RTOcx=*XSLwM$FB);uT>{$8Ja^4goRJ`}B!;M)Yv`s_ml4_#_X(>9&n+9Ol>v4? zC7sdehO~n_cfRr_ZCJS|GQ%14t;W+x-&0JY~Z!-CiB+X6D{ zb5MCuMIi~SvuhyC8P^d(7-2PT5gujIPgEy|P_GePZO=(ae*o*XIpJhDMeIGcM8cQt>?n6SD!q`uQW?iz*pdxzn#8Lm zlXg9=c&8&xk+re|H(ssLaaW!=83~93u$q$W=uD}y&^g}%M{KeW z1N#%BagaP0Q*oEj3>WY4MB}90jVyJ8_>(gA<5-_|KZph6mDkhoo`-=)l4zdTD<+2W zgKDGOEyjFfILm`ts+2y|Hpi)Q)M36}dWN}+EGhbFdyDeM0=;^$OkWxpx;YPTV-)kF zhuNi%egN~~I7W%us9efT^V+Ug8dqGuF!YG#Y#A&)#}I~L5(UVy=wU20SP>jMYT|)k@j>Jw!h{oP z0PsSq`40o@Nx|>6KQbl@NTRiESLw7=YK{u)DVG zB1J^hfBV#?{K(-T?#S$#qZ-Fw9b~lggt5?7ZoZVVRMpK66G7Xrq#>mBcDE@$(PuzK zOZ)s3vBMQrQ3)bEGMj^yIN-=D06Vc35 z6)rTL%wwBG6$!+;Q-d7y#h75M0DS;a0v9W=t@a#OQ!Zbt;&5gXSyQjIy7cXgC&>X- z*!sIhoFhkXYAwDARo;t$yn2a{)x45U>vdfhb<8Fu8{@vy$`;oG-^h*0pk9DkzKO`z zMhqT~?17d^@u|u8qWu;gBxssBgjH$-TeAuKQ|XR_R*@cIV}&gT#v@I1vzdCQg>bKy z{$9TH@6x%b3^lg;5)3f=Sqr0&E^7PXHDgwEkH*_9?pOnRg4JkWJ5lXn0CBwDi)k=u zl!dVYBtgCQbg0U2xKAijCR@hgV=DOPd~dv{+7tNj(tJZ7Y7$e8S!jKEqGjpbu2N3iXFr8!L2KI zi>E(hvALBowd*{6vV~hyTa%DB@x3^l@Vnt99~^?npgBrvxgi2!wy8s2E4 z$bb5maobQVNj#M_bXaXtYq&mYO~82F2=WJqXj?8j*KQy_{*^7Iy2KcUxM+-05-ozM z=@|0G;F$R*EX#&k_mpomM${%(tBg4{9KEK{$au7>M%mX9qsT^T=_9@f0x;tP-$YJx zg|6zVQOjIayc(b^?KUaf*PbsOs(n)yXO(Q&4PaDghy95TH-{olGJFzV(q73?L4O0a zsXPC2q=1JtyQ#-pyh!xEOvB2vM^D%~H8!y>Mxl^8R$JW=yx!kQDX`if-Kza-f3)jV zBitiBw|0)8a-eu9JlKDaJ`$!!0YPhK^o9TxZ`lI_Wv64)*8gl9tsm-^Bl>O&`^lCiz?xcl3vRPcTno!y+<@(-%zp)^--&`F)`>vA;Z zdi(e8I3=!GDcDq3e7JM_c17ry>-}#)#HeNfk|M84En}B2#%Pi;M))O~jiy`OBLU|= z_M(Cj&c!!WZRcGV(M|P9HJ&lor2SJBwhs|>@RO>8QO9w~DD8n^tSyKo#i4xDfu`3_ zWhGd_dE5lu?d(+wKOEO3XxcrXl^mCYq9wNL>XfH%4APJ!S`Y4|YPObD*x@W$6w&98 zFE6{I=HjOP#J~97(0+j(t<_xR`UNMHi`=WvKKK2be+0isE@3q>gnkRv4r1!4%?fWn zWB>I~!qD8PMt{IS?Es|)?s$~&%Frn+m7@KaC=XD$s8hpQUH$l*c7CQe~=`Hm_ctEnq^^6 z??-CtwL4b%AL7dHwB0-{m^(jyYmB_NG-ch@N0gLDcg3aEgTbSp?J-5@Cq5<`#VP(ycq zH_s#SzVGv#?_B5a!*#*jGjs2I?Y;Jj--=C;vZBAm) zxCwkxTlqE?3F+3frI?tqjF=cm*#Tl^X=93nBpnnLjjoPALgKq7?;(xx5eJz*mFFoR z@@M~E(J(^^8frg(JR?!%cZtme`UK+Q-(KZV-OA7R%|-kEOyc`<@JETKIz(99>Mp)g z!xOHPp70aL)$z6CgEg?(YS#fek^`F-RhzE{($9P5au1Jdh+k}A-qsiP^<_!9fn-x5 z5bIAsNce#*Chm-7ZH;OuaJ-OYZ)dIUOiN`3%R~SvMchB)fWwfs!S<(HbA)sevIx24 z_S^SGG&5eL&*SdN;7s|wP)wX zJ#eE8lMnyVbs|jJb@r+A_JR6se3OsMTX zY!)EqfBx=3KN-|K`k~^7S_0MP%%fuy$FvM>&{lAQk)@_TmFBz1t~jH7C%$|ELETiM zTTgDCxi@?rrtijW?o2e&@i6HvzN42K-Tv_20anxLWdDnkdnM-Wr>X^k>DuwkA8siR zbSHL$L1qd^k}?8H1}_={dk1>^hw}tl?Mva*c@+}9`FZZ zx{K5*v)w~(7L!qCpnf~6j9g5@;MbnQV8Nq~eu7FK&@B?3!m=PVfOSU{lG4L=i}uH3 zZFFuw^G5j?H1r=T>t0U?3{l}f-mH6+7%?nqLt2V87C`=E%0BLiTmnDKz1C*anRoVjFVJ6PzTkOrQXc9? zQ%x~>cP&7v{{2SII>G*peYO4gefYl0t6M)}e#>IetYcRA-|-jp*KBYW6@DK1RrWDW zC4PFKWm8m>tkI1PfeqY^yzuaFzwq~}<>AtnZ&bhhdj93TY>jNPv=ucLUVNl>Iat@nk# z;Zj*CWMA|%Z)f4C%c~DF=tr3{3^KH8RBA}7XJwaWr)I}yFRNpJHB=>6Zp@1PdXT*_ zcx%vVuseH4MMYhVA-+95KVvmDDRZ~~+haA7fKQT)spM(SzupcG4v-HR4@7^}%1KsV zOHE&(b-F9~y4LTd|I6r8_qW}*hi?D)P{K)KHlDEY)Hvg9^jnd1?R1rykQQ@=w;2Uy zm1bFHMf1kY#h>12mJb=_k?X=s-kSX~tL&QXVoGv1nZIGHJtoR8$`7v6Jj&h;49O3v z3lTTZ>DEbnU{IMKZ5L-(u}VMjR4_X!AW6DFf30{D>n-2w=AX~hIl9^3?v(7@*q%GA z+Tq`kx+8xl?T#k#V-f*wY7&k+YPUP?jNJ(c-u@u^;ns(xV2bk{Z9j2(=b;BJlM?CS3-i-&#lWhP_xCmbY%GC!B2 zlM9=3>B!fK(P1fSDzY+j)(gwk*5uXP(X1JAT7I^yG1N6wU`E=d7-PU!WxUHnDiwb_ zUXUBxcF^XAt=35CXzll+Z!;qrV-uxxV@D&bWsvVah-Zr+`{613p0K6+)8#GSCpK~- ziXsNGepvR7{N!zOk9XvWXt`~iZ*1uFqui~E$(46%vu&`AYiuWQnbJH=P`vo(L$C zn?LF0wYU6T0a8^qQP$|~fA;>FrM)!<8Q%Ru`q@7HnTqL(u2fJYekJ|(ix|t{v~S>s z?^DmG2B&(cY#!dC;-kt6!+MAKiJ`ca0 z<#?1^o|}r7916<=+UJmlp8_!Vc=!$-A1xuxWFmymr|nn{tC-M)=}E=M!jDz!7I?Ju z+>0W=i0mGfz2y(@l$rb_&}qC?x^B2xvp;wI)UPTGf|l?$!gDGJE36%p;RkakF`?b? zxiX>!qFkm0gLd6YPvT-Gcby}L#<@2gM_T&@!*${19Nu#!8r(VUnl*alRh1>mnf4OT zVXmjJK_Yr0nfH~@^1P7)!9&l!y+msS)Xv~2F9z&Y(UkEyZMK7Nva&;X_*D6zariKD zCy_LPdu2Fq;?bmA*+7w#Ppp-Wp$;tLqk_WRht26t*LWhkTJc*;SD(~~U zvb;;_%#>O~J1LiUtq{y3N6yJ^;GWaK6ZlfX^3q)k%3)<}XvZ=-HZ4WP62^9 zTMO>K-(fvcTuA;Zl=ae~k$yU>LCrXGOEI)qX7p|M=cGkz@bKxn&G^U|yrIWA37gMn z;}z^p5qWM%KhXqhQu(aM;=bwp+&rVAR$M-xp`c6G#^`s&% za|_CSYuR-WPt)BcxN!ApIUgC%_?hmZ$4XaaSsY}q#?cKbgvY%(>pg$ehuuucCA@mp zbUd)ZySug3*9^7?Gt|ZT*q#t>Bl@)GhhzL1=M;-k9+}kB(<4=n1Z=l%ASpwTL=t#L zz)Bd84;0v*{ubUa!4`U;h&14G11AbwWStACRPzBd^K|knFCu>xoElPJ1cG1&OeOcd zEPh*JQ(QNq)z}!)g;XDfvR>k3V^i#6vtrF0z9NP+>OgmN_(Fvjqdxz^7*6Kzqn>%} z*FHZ@dd{Lr&UjyM+$O6Rh6^J{_>qsN3HcKMp!c1rwv3sA0ulpoje&%UOoD_4TpHDd=;Q#(fsh|`^SF;&0^nD){-jz~y^H0Qs_GOE<;!1JM& z>e^1)3i5o$5L=e#CJ-Z27B^e_^XDK5xbXp(wx&+cL2kA-c8+{*f>eLq!3SKQf6Ph+ z`s)@aYe6b)1!a&J#K9EA#qyYijY{Yi2m}&vFfrp(6_@-o9C#;4W#Qyx&&SH@>gvkk z%Eh5%<6!0BU{I+|<$7!P4Hz5@H8Bf9`W5 zh_jO*71cSR%b&mJY3gQqMajZdiiW%sKEKBe9D$?rZ(E*mbL)T z02)Hap^@_3^WM-;@te zGq3hOj1EwV&7lv0*LU+bDw?6&6xxK(*@?pwvcrv5%E2710{oR!i!PYQGV_GTa-OWT zbgy}`cLo+~^*0NiU$-t50+bDiht$p{2k&(Wb=w^=-}(I-dlaXj?zGab3Q?MpHtr5j z^gxtA?@>|~sH96;_NF|2N*>yMHIW7|pl$mFxVTgof)dB?n*WJOC(WSBB}*}pZ)qSa zBJQ!}EBQcF&3yf=8|Zh3)1lcKawXQ#ZrmW~^5Duy5yCM~w?wtVVa`DxuF!^Az1hxK zd9+%EKu&g_Cq6+ye6S9Go1GL?t@3$q_enWa=#vP)tTxD}UAVz{C4nNt=OR1y8ekIYIGH&>dn{yoq@=Y2pp_wa_;U5jym~~z3 zw5J#9cZ{M}YNG@e8oGv26%rlBXW97`OJ(6B64LJ{7XQ&X*9d#g3d%W3Extw1m>*RG)>|$Ox7?glD zn<^p{e|5p@Ssb=1%hq60bK&icoPyf+?~EslnaM+q*upktK1*4d+r47gbLBiZ5a}j( zK@2~xPb<0W!k`v-2PQ%_sf1fd#Hg989M5gLLcL#i3ZU$#d8rsJ$9yxdv4xanW#0j7 zCn^Si^Ea~l5OhHjr0fLDLZU2<=E5i|8=+)ScmdmFYNY^=7O$QSmost6*2-3IV`&YK zjGPVaiL{VWQYv$u(ah5w^qH)dmXj;8?&c@EoA{!Ic=xUY__aokrsdx1#O|2S@eE}g zhv~aY*R_1QWUpV)rpZLu7dfKXqaw?LOkHziG$ys zH?EZJg9{9*H2GatzPYmSSPymjOzt%t-vV)QY0SrsOsw$lu1+vU*mDIOnED3wAA5jY z=_!l!CZ3=ztRa&!PsM)b%Z?IJWvZAQqT@UHmGnby#wo>;KneMZOb0XCj5 zf%3crjs>w`!`52wBO7zI)7Eio+5DJ7iJ+h$cdlr5_BObhB5(64SHK8y?Xw>!T?xGX zb`ZQd^F(LOq`jn-Ha)uv2}s>ZX(t?Z{HiHZn=h=f$*ww@S&uff)wmVnQU>2SVEPJM zshS3REYkSY9KKqeBDvTeUy*yaw9k6TukeOQ45oGotY+wJaj_(P!jN$>*4#-GZ_>_q zRHyAXnQgXKfgQNW$_V}hGMQ^wGwPEZmC*I#q@A%~8_an!zgOl6?q}g@ePiI@hYV z6hn>_ce?n%YYKG@nwgK6_<3W(C)t6@*uY@G+!D0IALq^(g|0@Xvqn@ z^8nC!mR9Xro?3A)efrwC6&+`G4FBhts&mb@GH;T-Ac3aI5 zv!$(UZ8k76uQOf#_!DEa={KynkXmgv;~016rGYk>^Yftn@5Zg8GOmuvK}Wk|ki9t8 zr(QBAFHa8L^Co6aPrXred&h~sJt@A5dtOduRA%HPEDd6Gd9eHVc(q6wTH=tLi zzkUaPwg=E(_Zf=gfvlFe#CsOh?CKWmETbR2jOgrH#(D^*n)D*z2 zqVCqTfcD@};dPnnZ!QgEC&HUQAJPqUob18YLUW8;Lkx4etD(aNUK`DmfP#%cJ9QI( zvRm|mj%tr=Dl@g}xy2eW%#^1-0B;?)(c)XsUToTsIliv(wO3)Ofv%?eFN=-&+YTX-;G z&nSUgWFEznubFkv_b{XyV;d+_O)@x7pOyF?gf}pt&tOBx*N$j@Z1o7Q39os+;0SU9 zQ__an8SQunIl{d6-18hHCif;idxRiDo+H>1&-Q4>(Qn`XT_QWO&#FiB<``V4{l)GW z?#=2I@o#^}xs!<*waq% zDr>4@26$EC(vb&}0h<+8q$z$8-mz2v&VdTr41Ys0M_5N!*?B&posMQPSW=RF2|z*K zd4jBoq69TFt9Hk62++H;a&H5 zW9RwOQv4ZVrb4`CE_#R1&VZVW0N|u}WpNXw^^319z~+1bH7~_O|7?f~gZ=3Nd@P7R zB9eLGtM>P_iPZ{tQb!*X8U6+-x6L}86byjJWE+wmRFm>8d%Z*4wxh$2??Xe&;PHaE z5ZS){@G6M|=n8kwOH>YA@IXKeaKk{zhyW+3u5k|wNCmoqq#~4Qrn_F-d9&B7H(>D^ zF|@}&lnQYJd+%th5t}DWIrAm*4s{B+aTSD%qV0H|>_Er7i>(H;Q_S?QFjSxsFmsja za``)dFgTS~y%t@yPLbJxV#B};_s+0_=w1hFU1uK)Fm=Gw?a)1Tgd#%T0`?@Jv>Wiu z@d*hwJv^RNlT4~7hdYQZGpS1YIo00eEUx4e3<;{qy)V5w=7~{)_s*=<-nH(15nV-3 zf6IUid9{VGssm*1C)f)fL4#E|npY{QX@31GWoDKE)e2eoflkS^WLtcVp&aK7ElrU}|HIHU zvHD1AcI^JgK}`T|nNy13;Qyi|eB$Jz+KX_3Tc~9!k{^u*^GwFfRJ(5hDmVs}hi%Pw z9rZiIFx6Zqi`m=e(#Zb87V)YS0e8s{`@ZZm;n#px&&wEA zV)xiv>5_N&lg0s)U1Q&G zwzD)?tM`7pAgkYQK3dCRZ4L zCTl!-&HVGY5o<%D$iTDpM(h*JCLk&tO$Nb=EhAik*rOs~`w(w~Hh&i}=BdCBdmf%v z`(kU9z-GzqXr9x3(*$O_YqtsL!9f8*WFV}_JhV=|$G~-BPC-GTKQoPct_%Q(gYq+e3is2?MUFK)auVqZ)XG>&?X7C(;a5MUD_z9sZs!T(RU(ag3=CZrB;pgLFCW z&OeQ)4(@~92^^D0+U)R-=1bgh<^iPT@F7tUca3z6 zTGZRrp|e6wNpLVeEG%qBXscy=&a*tj5{M4@&-6Q@7~!GmDdN?Otpd{hJf$)?@eT|v zT?w8*oGu^XOLhP}9Pa8en1kuyC`K-p8C$TFy zl10!eY=U{fHZiL7Ku}oN6HC5t7Tg)*4m=~#0jxS)%x>G|xdoS7g5PHvi=XR=_Uz(| zIqDR%=ptgiL(09~YIMjA#+Vfnt#~-u6n>(G=kDzu$!`c_+rZF{nb5^5Z3s?2zt ziRWtg|rse9LALABy!CECSE<_XVV^F zqT~>NB%ej07EY%e;NF|P$m42B(qAW29dBJ*H&SE{=gpOwk7K3vLU^6U8NgepT!SdU zbUlK52~e+!X={if_IYX+=e27iP;uRUf%Y>wgj%tdL9LKOt}o{>*fxt6uEB_E`9_OE#HEE@^=j0ATQ z;h33w?}FLx-7BIGm^mVg19RA4X?#AO15ivZs_dKj%QhhA{B$ExOfS=tX@zW@k)I@m2y6aZ;o6^i+)zB&5IZt)8 zS_`QKlW?_f72$mSj?x#pQR1>Pc6Jiumd7ed%M?yQ*_}3-)5dnFeEA84S2j@k4C`B9%@KAD2_BX{+ zkdPzfdD`5zzC@PffcrBPA~b$w`trZ?Y4Zv*K{Nn*S2d8M#Tra0TssVYL?C=?Rs8bF z7(9S0nkuDgdVLWwh<^p_E`~w*eax|Aj&#?xN!_}aHHm>`OUvk{^XDEjAHc6Vrmsu{ zJBLoWWR-8Gsa?zo$uAHONT929NPN4<|B39M=e1xZ4+>$Z@|JW(jJ0}Yc28+ypS(z` zxdHPi6;p0ab-Wt?6BiiIYc28la=bQ{Hun9&-c&IJ-zG6J@pE#<9}g~)b${JNLoY&A zOI<>Yb&((pH1Zo5P`4c`RRBUc`o4ewNp^a*E9WbMD3Jq4<<10D)ud4<%ATj8c zkEQaTv@T`^@ali?^|kN*7HOO$z$!F48U$XYE-%^scMt-S1ybo!ZK51^E;(?qh(A$^ z-}-q)B57Gv{eK>R!2}g5AipY07;br`NdI-^pLw3H(JAEz;1R4sxlDgas3|Hg= zI=RV4$2nfM!NYi#weJL~qh^dYkO!&!pFi_CL|U?#H?IzXm3YJX+onD+&I++yuXBi$ z0|*$qjNk#q<>LGTiPk*?H&>IH_CjzsS5A{nTx!s#zqxeq=NK|+?W8)W^U2)};KZr< zb(Qb}*omqGY{o~wMT#paa6zrsk(!%4%ovXcfBIF1<~kk3HJ9yJre9(g^)TPYzBSGD zk}5Fj2|ch`bj__@*F-{F!fd>Cc5)xWhAMQ zuNu-l`pvsu#1Vb_?o0B0{0|+l@oY*;m`8B}LDhlM2FfM7fGrh0uYNe~e)w?tNE|;D z9Zhn#y``hvhIO4O@|uT=`LwK$y0a;JE<*LcW}iv`XuhtGZ40g`c=5fxWv0H}>F@91Ki)Tkxp6`+4Pd*;Gt7Pozh z&Bb*Z5XAtb?%4v;AlKN7^HU1gdLg!iYlYByx{oD?1rCtIvNhYLW9#ts#-y;y;_#NU zJ-aB#k!n+uV#+nHi$?%*ZK10$f0=g&;n*YZeS+pqth=IC!=Aau2x1C%XJ@F<%Dl7Q zjnglIVDI*BU&O|LJ*fh)j&PKztKbd^MF@E>DX3l7$!})}!B2pDHXr4G#%v!wJ?c%P z73KKQhywL0>Fz61d~T6GMVU}sO$?b}J!4I|C6`-ue$scBbftS!so_k>!ON5;;k$N7 z)6$Q-Z%(h0lQ<&3T>;_ox_3<;f6_do=#HyMNv=Ms>v5*3h7rd3p9y+NeW`u7ww_0J zSISK(aN*kh|6p4|_9$W6M73XK+>-HjroHiwuhMnCBI|&mMTf=UUOj?Jx<0-FOf1Y3 zUU*kret<5{zl@GhO9gXcs2vwfZ!|CSAB@ii7%y$WJAJiv81F>p+xh24Hg%N`?@DGi z%Ql=*1QzX*zk}>ru{ESA{l~@w!Y)L5ll6)RMtb$}m72ty@p6;7fxb^-sjCH_N+sY` z=6*~s-oZk;dwn!U96&s!?-%=}IEsA6G|4YlNXmmqiVxEz`k#4!>eek|s68OXWjB9y zg+oL@s^3cFDUmUs8!N|Mc~9VONy@D{T(5((t|ELf>8D zh3;2j{ygk_?Zw4b&8&Bv%C`e9)fuo5byz?>JN?2R1BKHql%({IDBc`XZGKcn%(Zx@ zW8#ikrm=>-Pd?Y@3-SdF+l@dOtt;^iqyJd4%3c8#rYQY^&W{P9UA0=rZszi0vblqC z`^v8PNfQFbp)E&hVYTx7W^3w8e!f?th!c2T$qK7|)pVt)_NpjJ zH~OP^3tDA%a6iP$J;ppcLY?MUj3_eNdnO-neTxDufzj+W$MV-)igmx7L&2$>_EUc< zObC*`f}yJZL{($^t7q4#-UP<=eS6o5e9429VU$?LwMg?0>*iYFUY@rMn2`CYTUKj=G(jb0k{`ftmuKK9<4wk<%?bIFLhhLKM_`L852~n@v zV3Eh?xulFi6P7Dh(J_5wPp_!XVpQ6%6@lR_QCG)?raN)cI2qtBWc}Z%3e{vp)(U>c znEL2nhKy&;@cn4$qb}Q%Ym@Ce?;s)3isHROp7(S>-m+-)%gSM6=#|fhsi(S{rrR06 zLw|@Ug`HmKlKVM#&v210RUpvTZ(HUCp)WtWs49#MA~)MrTkx4fhK55O`E_~ZFkO!C zvi@~Y=r;{mOx6&Ry-QAEhzE-thJK+|(hWKAr@~F%^$x)i-D-*b+%(xL#>t(2m2d{( zG@s)xw->S3%4kz4i)uO2v7T6v`&i7=jEvvmN!l^=j@NV#h-0Liq95_RzFMz;yvU)_ z&X?lgDMC$)X_@TICQcrIGaJdPT&f7@-MO{cnC9TRG(Ua~XdEvWsn@35-|&3!BMg}h zk$qxoce^u^{@NUs3xPSx4P6Hq4e}tzjc4vI-Xvx!Z)nNPOF9T(W=3BbvkI?Q;{F`>GA}vMb-yQPE8vU0a>exwl`*<>I{J@J}hw7F2F-rhrkkIJg-zq&UfzI& zq5=XW{1#yt{gsd3_yf8)r|jH+IwgQ|RD6c(z0h*gxkADQcH^2icR*ZWd>|x(DMQ@QAVv046X2Fk$$8Ep(Q2 zX4IQmD+)PG0L|zMa*2R)0YMAc*~GcFL>WxC8bi}=n+K4k_HYsorj{3xb{YFD9dq_i z8mK5`$ z#)>oHKfRLONPk~wME(cjuFu86H0=5dt2&t|U{RiE^C$e8V)Yo~> zD!4)Va6P1)+jusc`w_}RI+N=} z=3S!qek<`RNsjPKz2S`Nwe*PWdbfDBpP+w;eScG<)*h|$?<_-mR#Ro882Ern3F340 z8q8y7iD0-2zQAF6`b%Z=Gr=&65i4KBJ)cnl-j31_!}K`U&12X(xDD5QuYIK~@2KsM z_g`patIJqgy3_sY;wsXcjE>3vqhA# zSlvIoVtP zkMQ6f3!twHn#ETJ5dir>>i#J}$|n^!o`Z`P6ZTmqLbzndY*lMMfn_rz|FFMAT43tY zg!)FJx$Fyqoz9r#63d=cDur~_5o$s1&zX2LGw%kHB(F(7=;L`lLlJxO%E*d;^QD*I zbY4>T2Ga8+Ja!4(kV*aDHlwAs)@6A>6|@#nowXdr4!=$B=vr$_H7)9;LF1G}Bei(b zrJU52a-b;rjH#e(eE~w*D`UH)!C3u(`I;*d0iB2G?uGVvm%0m5MFlcsDM9#jqnJBw z5p*2`0j%wPKwgSpCpjTO-@>scmA5^?r*u|G)ne?M&2NUxeKnttMP6{SDQ^zjpbVCY z$Qk`r9t3GZn%yD;is5(3t>~V39wnd^ zr|gW7xjps~cgaLoi0hVX3$gjnIfpso9-yN~qr^%h=k6ioO>eUIu8!b({SEh0<|P@( zsHQ1Pt^TCgkeE{AHd~O}UbXH!YqzZF?uE?qyP0h&rTVpfwNLCmY1(#F^w-H7Rfp*C z&v&UX(&&hJmS~<>5o{q|WBrH7)&a4^v2FQP0IB>6B`YfnXvYh$Jw3EOf&g7ir+q*@ zAydfva5%_S4Jcv-lbl+=saXKJ_T;I~fOfWiE~vhm_7;`Fo(IRA;TBH?Po4JXU7CrX zu?_(oEgo~dYgMOttPJ|hV+{F@S`|w?r)p35vB?JUlNHrI9hI>%b=wG8F^QaP-bDCb?HL0&h0VH=+4Wy>w*!jyr~q#78#;~e7GhO^(o>g>^@ zLFJ7u!|0+-UcS@K{>|0XnU33FcnHJbJ!n==iCk?F*%#R(MeFgEs6(?;-6QWc?j7fm zSY`w?R>w{urc4sXs!vzengdupXPvT7OT*<0;J5qeQ)t(`GCKocSS5*)EH86OsdzC! z17F>2=PNX<w-O9nS=XM&~$tHoLCUnj<_oF!x%8Vy)KR`FKey(aR{Y zCxxr-rE}ERxQB84K!oZP8_V!Y+fk)|?{CZ;u2!wBG*!{_7x|pM^yvXgg9C4f3@z5Y z)_`^_Jbe5UXxh{p9^-!u{ys|tIMTZ)ic&D_ycnomfw@`*H$7A~C zAH|xyv0m6YE?y1wjQ60mjz!RCY#wGaoyT&~lngiL)F^UW-YB|1bN(cDNO`&y}tj4YW$jsBhZdGExc>uQX{ z1~>xb^v{>sJJ2p_sojJX-DDlmKU{Szuc`Sd_!%A|W3mdUQK5SEXY|FcOn5%gAax~Y z`t@3_CN^M}_lek~F=KAxW=?J$S9po%F2m+ml{|(Y~7wi&o z!phYv>rVDc2AYt43ctujYo48+AZDxFhppb+A~=So?KJll-wuu`)W(9Y6=%9lSt>Z& zwYJa*O`Iv7YQT#jU#$r8$hQxGqY^~7T9b>Hn#|2cnG^1i>djf$DIaE1b`*J3=;w`f z2z=$zltr6sY_hzjV6OlyBDBzS`!bOso>9Lvn1e7_1JtSEO-AhjL<*?Gnuf2QcK{~( z^R(@2^M0}$%?qVt{WUdk`z(?bF)C_aX8qd-tjd~na%^rp;i?^-&?hRW2kqjCP1>Cz z-OM$h*BT3vEq4p7xiMK}UG;0yT}PHbzueB8KJ`rEDYj?FWMj4gFYm$(+w~O7)Ol1) zru8P~f`NWGnCERr$zI?>lOW-Dv$H0NKBosHk1akP?ks0gFF#HG?qp(8v2*ZYF0Rz6 zKRHA2vlQcwg~pl^{I&OV^NXTlt1%(ZnE4V$Ywb0cY~}f$<#67U02tn?cXvlvu4{o_ z-gYRgc&WdNz~>ayoIELL@u0_`Mqy1!_bW@6^>R2%mj*5U0}JE6t6Ucn%KuIi@1Ay* zOP*vXh3y&8$=oUI(EN_T`3D-&aN8cxfW`3O0e3{^Ih;b*#B+!KFn$@o!kW*Ulb86c z`(-}0Vcp>lt8S%bsL$75K^r8$YNGhKvKG%ftTrDX4EpuxKGq^Oe%sOB%lQ1~&O7Tq z+SVFB4^H=uo56_{qB?e=?X=C-A9omEamEeaRo~mbEb##VU>Go_*he%}A{PPxT1ScJ zfeZm1_hxi3cu(>gC5VPb;@l)8daX@Vm7O4Io;Bb1oZ4{P={n{*^jNjhVcin6TNrzq zND_Vg+7fo^l0=w$l&9#u``S`;r(;4U_xG4RPuC|RBx z+cjZXDXsl^pVu}>({94rwg0mj%^gHvMm@-qK#NEtlN5)iVPdA zOCRNW@}<|s&u)DR8;i?xuvDNbPnlLlOOJm?ZzwAczB5kO-x~bMb5%Y|UWaDln21*` zd)5x`+g@GnXg0fMZ1Tfa&U_a3`~0UwpFeJ^Kw87JU$IR2s#0x)iN3>hn0S z{$SksC7g<`8f#?%l{F-xQoBFemSsIZDQ{^BFXy(rXhKfCi{Y|k82hwAg%)!}x~eJ7 z#3m7MdnDs}Qba{eT#{u}woX2AN7+v=a`ik>VoxHUBHbvZ483bTbs`{}^XL}w3+MiY zFH;uq1NuuBfo`sWK-`CqEq_s)>Tor5Jr76FF50=&#-kT}0(uVIZZ!28#OhEe=oJ27 zP$|-+bx(l4fYoyje3q0ojDF=|*ZINp@Oj@`(N+ufnzYrLh1(Sg-6&%ooWG(@Cv!G! z#%!W8vvS09y_ok|*{l^28(|_(W_nZV?}n zjRc(@_j~-d*q!LEeH6?yiH``kP>TFJqegR-G@74wOTiIdWU0l_`$>Ml{8Oo~|-?&6Ss% z;2|T`FwZB0qr^VqKuFfH%mKfc0iZg87Jwtht?@#=$lQK^^vwmKd)}>}SYM!1)S}c| z?e55bV28I3w0dnN!<*YMbH^*S&JG+(Wp%3BlWQx>Wa(P0mnRz!jfQS9PgF}O>5s^1 zypQ*nJ-m0fHpv=wdEF}X?=of-sb}eDkwM+A866~)R3Ijz}b6JzX2sES(d=v=FFmyRu>a``FQs(+PGLY&=NQ>Y`aNzc9OpYmN2f+n5J%eRUcNTtA?1pTCu!%pes`Yd3tgp9moJ~3#nwRA2 zae+?W>e2N_+o?7#5oq*#__ACtDo>q*jB%~N{XhEwvO@dS-_P~(*p2_-bix%z&&b%3 zWIfj&#bxpn2WZifM~tb#`R`Hs7JoQjbb3gJ>C!H8;y{L>i8m&P%BsV2MJaW;h0%3`D!KJ7E<(!Y`bl!w41*h021_u`(;v%WM5 zkF}aTV6kLR?CWb;g@g=m+`RoQVhx_;ncwLd(u?^azD5<+3C?NZVpu%&GY}<`W|!hN zb*3Qal{gCVH`hrtQta@FKS*oJ?Hl+)YAUFtML#ENDIGdwMewES>;xZv6>f<%0SySD z!(SfS|JnIKYQNX$*h@g<{~doFi;!tOU@}zhIOAlnb#k~|P-a~Zcju%Nqh<4IFF15RAau5HduO)_WjA?|uu?z2wfA>o z>cK>B*=s(o{1=4=qkEo$6klqNi>p0d8BcRh->ppugH8y`@Iza3+T^$wa=@DOg8g}) zQKN4rP3jBk4hC0-3p8_03ti|3l3(vRZaE>rLH^^g#AEA$9?l)$0O2xl0IPSj#2PUY zOELk#m{R%8leG6NszydekzIUz8IDZMVm?Rv=&`?8La`UWWD~TDZ#QwT@TJ^lT&fU;*3kSs>w+3hsDW!D(8)&X8@50- zjB*t6*{ZAbgs(;??-@2h---FjW)Vm2p}YfMl-mxo1D72k;nsoki6Z|4pWha72}7J# z=+3sOleVP(5;oUrRXA0^?isrxVtN1Y1y$r3&`;hd792a*&yW~IEk#TL)$UZp2%teLdKo;6+Ve{G|^gF@oQ1(qWc2lLo4qJw7Kb5$T4v)Af5>5@H}&VM#3rf zXYVmS`evP+PI|75_yGM1-#xiXCeiPZe2$0I>X;|FoB2wnk)l#b)mulI%AV~jxDOSC zrH|6fRxVQThRaHIqXyo)d^D0D9uAP2L-~38qShD2_{^mW=(XS5UESN5X+5&8ca2>CTOMf^^rU>ur37@C>4nT z%yG(nSoB94{E)C#0GFSCWVciY8UFTTRq))jpx-Tat=n4~1p01vsHK4<(l<1qb$MZb zj8!%OyKQh+okrh->nwPEorx(G`1_cOChX9$%U^i}g^zdP0?XQ%lHzwR>f-6@m zm(_uDTQ+&_09G176W}kUM@&S|K2p<^$uRU9aTsCUx;XR#Y^z_)d9O*4Tu;-VtUA!^ z@##~BlF&huVm+(&w|XIjLxmG?SYi2-o}CJ;)_Y`Rxp5kddJA{mMweAeHY!lAp+~nu zBG5Rj5b4kV0>BzV^!{GWi&O=wiEo%Vqp$Myipsv?$C$7B$o}j|npAycf;*6fM&La@ z+FL85)BGY!VBmAIQwg*oYh)@WF2pi%K1K5LLaR0`IJdS{*k2gfyT5Gl@$`+@lKx!$ ze5_t;p?}4wCl3ImL)Ro>d|_OjkR$9lvB6w(=Cq0lkG9w}9i3Lkb;@8bt61ue9e^{; zl829jDE&~-I(^W$hHRifkiBE2R##v8-C@Jts#564J$7D~W&Ueacy4NqZqCb+ci%

3ek9@;PTKJW%k40y~0^AwwPk??=}{F$161He{^e9pI$p9N7bpm^RT?_Y`1 z8_e_;12SP~-qrS`&pMLCJT{AxBXQsZ_+mY(Z#ZP82#tPD>y)~eNh;{2)dYm<(PVvI zWnQ-Wz1XGBz%FFM&dzp$-F-!70qYog%w=*>+gJ9j2Q1t04c;5uA=uG!Y_M=L9}#!$vu zMw>gC%4j?F($|RcBgSSl92QXxCi9~BxbtzX?{yMgsuxHEFhw^|WqAIiHE;}3BIuNc zfL`IxnE15&9X;Et6M8`SlX-%pnb}Wlh>WigglRMGt=toGpz~tLZk^89jc0%Lb?f#p zo!T7xP9N)9Dhc38ZSGK+rXY?&EM|+Fb*pB4$sIcQG#-DJvereQ z)g(?97j)KuG*|3%uzF}E#$NhWKO#V}+|)e&)OY;w2!|^iGYyVT^2#QE0qNfrpa4>k zKW0~@P&V#>HD@mCCfML!{w3aJd9VQfRbI6^snDgd)nfv(^=&91;OuMh?XpvcsYNTz z_o@m9-Y-MRl%O%h9fMyzXsYbIULu}cVEc23X9(=Wmg>|+;Zoec9!TV_Y;vCW@N#f) zti%Avel6Bs)&b|H2`GCS2_70fkb5LuRCExelxZEWVK@8Bj@ij&>?X})2RH03Am}II z@r+thml+a^PO=nC8SoIb{D|F?Prhk0nNqH3@Jax0;xcy(G{H~+gUELW)5D2AtX`>lLy9yeJUVGL8sT zG=+DC(Hi^$tD2l{r*qVus1D&fM!6vn4uk)PtFM5na(mvEL!*RrgCN~XiNsMrTDp-C zX^;--BhoD(2#6r1bV|cPN7e_yOwL+#ez5Xo;~x-GtbOxLvHn? z^?&qQNNfP&C2nw2UcNhA?kcsYhf2=g5l3d;qyH|cU{{9td|cEC&udzNO9MWq3SJ z=l7`iVWbUvjmzWE)|KwB8h}mhEH}$c6|mvh3aXovWqn;5Al9r@hrc}5>o>flWMiW; zvcMh}Pq2DpiS9oi6an!XTQdZgQk2r0#^w_tiK31#a}g={*0C00Ilr!ru#*et%(=QYrpyN?Y@ZM$4di zr9btK^Xk&;`*M(= zzyE#5O0E25(4J+cHA&Zlw&NtU|6G4Rcw@4kVXyvrU3wICC-ZObRXssdVTJb}r#fJT z`QN|n1BmtrG{WHb$-02lJeI+@b1+NS%=$?}ekdTSLC!$LmO)1Oizol1_1B1b5hqbe zNABRmO8cx+8Y=x_M!#Qv8F9IP(f9v9QYStM3GmbI>CwgyfvN5moo>~tr;qB6zwVA0 z6{~^q4?3LJKsVPkP?A0_1xTC60yp3ASZV+Bbuu#Fd!)A+cKW7DZztNcZ*y(y`Dkz% zJ<#5>Ei|uWCo7kE`k#V7A=aQKG|cGDZWOqDUmQOZ(n&8eOw%XBJt^!#yD_C#PP5w7xi2$?8D7Uzj#4tQWuKN4U zgtG8^A2{4ap(7*91x?o9u@69zS$fM0JUiYgTjO>hPs(Ha-PeB$2U1YcSk~y?%!TYI z9Ssdk#drrMd8pC(9%MXv-hOi`7^ES=w4Sm+Ji~*)B8MTKqd!p8EqMFNUMm$BJ?D%ut=a=*s8)tzj7&uLI=+6W@lJlgZ^f!pPe(F=JVUmd1Zj_cBDy`e(Afec>S|GXO?ID%xTk@^== zF2oH1CIBpf7PTRL`-W^lOB~%O0Q||Y#OW5k0N|{@=k^{mC{A^UI#*h@a4#n&B5Qrb z+$-CAbqf!N*ca^sY7>wl&+uc#-Ll57`o?y?QXk;j@6_dE+6Yqha2s3?pnQ5FV$zk? zvbQS2FDjk*c1)h6^QQE)?Q6on&jk94LA+2Jhtx=3OVmDDKDvRj8L1Yw~y4wKWI2{bDm+R*wlk&2Cw5zr;NRwSqk!;+KR5 zwTl%C%VV`gCrwq7Wvti!w#Z}AtE2E-m(F?qz|l zv-cfzeg*&nMd6~EQ^dl0qu*q>-o;<6f2icrsCheu@c<^l^&{)Ldg6QeKLehia$i$G z$d@5pzhouvw{P~$%{H$5DJqkpdmXjoRt6T+6H{8@kE{)2m^%l ziu-c6{Q@AC(ZtNwf|Mho!^gi!smz%R?McWe4@RNN6uxBTa@9)d6Dt|>I*1vV@Kv1% zz?`!ldWaoR9QlQ2x=HVh%X@oJ?R9JY;0@f8#-v+D4;}v_MqfmO<=WJ}%5X8*2<%o_ zS=n#UzrFbO>^S$M-4FBVA3(8m)La1c8lItT&2TJYZRE`Wy0a&FZnUB~9G(?wy;{3r zKS=gTB#pP)A9?zGGDuJg8;JwS+=te_HL3m8?->>DhA#)AIErkPnNqzR`OJy`R3(+& z%hBkbYop??(AIagGrYUa(su0<(t&<82uVnXf6=)339BOL=2$N86(3}b7JUaK1*6@{ z&~O!$$twHFPfve|z)TpYIHJ`my(xekOy+ZE<_la|EZDQ@@Yi|`K1~>|Rz<+g2GKv4 z2y!Vl0v0wAGyb#{x+U%TO&u!B^t1}jxu)3uP&Z1dW`n8E~@Jfm$h)CH=+iD}*BLHI_K2A=$ z{jiglm&X|{`3*t;7aGXb2=~f$`rtu|0b%mLL#a+}Qyas|OkFbbrTw0_5q zklgGRz8RLBNW%$1`%$Q=hwRIje)dzAeB2IH5kK7i( zfRd89?IDm@?2BLY%(vzs58q(^?mn)h9B;^0>r9AJLNou@q8uLs44d$|Ir+uSo-<9k z)$R7n&@F*~Lx4bCGvThDf{E>qJ44~KBq-Ym`b7j5*}?vqxpjBciF}0fQhkpSln*+I zJo9jW!NW0qd6eNv5M&q(yuo>K@h}EnFx=>5^6cd8D=vL=p^fpH^3&a7zYkvf>(VuE z1kbIBkJCX|;L+QYdPV1v<2|OQh4D_cWfoMRKKuAuKCkQFHK7*<>lrXBUwnyYG>uP> z{~Ur@E_rO~Iis`umVIzAC!qq+INRw_3K#ibKTC=2&eO=`k5XRvbcrqB2__rN9^r}t zxfWBq%;b~T|4MZP3?Sg&cy6I{`P_`g^1Y$}K^%f{IP<%n2`^v10~38VYS%vgh>wX; zN#(A&oY!DxUL#;KLFv=v@MfI0_@x5N;`Y5g7E`N_K3K|S50$vGmx!@S#CFoIf z5=D@*vse7601&b{t*uf&efEDs$48^l`;Rfc^4Z18FqD{{U5`wv)vF0mLddq{%2 z)UTHi_bkaK!W%7#`7#2b$pPFLW4uIq4!jU>>$~jdP8=e zKfD!c%&2B!G=_E#VM#Vx>zYsN zy?HN^`p!TRs$VBt?&62sv;i>U3v!KVQAYn8-e~kAeyslGk{Z5}9@qqbha$1iU0b5-ew{6z-D)RfY+k9x-gtC>L zCZeL=WnY`zjU1Boz7@UHzTFe}0oiP7b$Ar=Wf&DM_P4sDBfYF7mIH+f_g?NhR05ha zLsg%rodPu@(Ecl_5h55T*xRuA{0S$R||0dZ^0y(C%sc;UEJkF+n|gW*++3* z5XQp7JIo%ub@Fc4+AzOty=L=w+)6~9Bm?i$J$awoeub3U#aTx@>x3`KaVWnz8^0Zx z@KhD?lq$k|li=~C?|HKGe_aJv5OE+T`Kfu2UoBEamE6;K1Jid)y!YM6ee;8;>ACCdgOQHzd&fS7R^#Hu@d`8(#R=k*9?UA3-Ou%ok)U}ng!fy9NL`Q zg$H>kg%sUv+pQeHDl z@q;PUx@(bg?HwH*D`{Tq{a|8FUYVJk?A+drkL}f@GsCn$K;rh@M-~i*_pn{DW)1c!zjox?-^zzh4moCZ@iI+&d)`!ikb6--XJ~ z>Y|+QND^Of`KNmNKfS*pY6vEUSBs$Z(*O1K?KO(MrNItv$-xS%gLSkMzrA%-(XeGZ zs)IL%ayM}AFE*epHN0SEpjYxFx+;xEyd{l(lb9o&{(Z=4_C9mSK+10hS$jfy#0xY+ z*&Ac=N{x3~s%~Io-!$p^x-Mo|R;T{(&l}wN*SCB51~ZJSG#DJV^@M%L9C2v5@sgOc zc}lA)Rpq+Iz8>S)iaom;Lt-YF5@JQ=KCyY}so!(@kt|t$>_JOByoqLgvID{Q# zf_;M`MHvN0#UDnTegz$e8wvMr?)E@OXN7|w(kmlgyg)<4#54t=1aS0mgMH9wokcJ` zByD8#3OehRo0e*uBcV2|OS3`knn~)(lh`;(amd{eFwn)Fbea#_sftpI81$C@*^9e>ZbAZ>$O~j+>^{4bq}`{Zr1R)u|N{N$A%WaVVfcDq&fm8kjOaZB)d_wV90IJ^XJIj*{^UA)&e z4*?)y3>ChaowqdbJ`DZ$t49~N zgdSEIjXcEfakD$LNUU%K3_B|;YYJ31n}Mzg0g6W+o}Qf>V6+pCjScq$ZFi~>xn!@uQ(`#9q7$_Qn8JUj9N51U$R{+(Z$^^ z{#+&d3hGuKwV+_~`A#V^5l~Tu13J&N`8oRM&!6D{ZEg)JOJsm}mSau0lKpQf{#^t< z4ZGIL;q5;vdj0?^QAi-hoHy9Ke*Jn$RaFO|G@b*rrr}$hs#V5x{#VA9h>AEFWA#dm z0)sH}TJ~4}SqiH$Gq_sH7kC-f#jgq@z@#qYxzDkXJl8-D43wIq*m^7|Ds+Cc5>CXo zmZR9iU!FRNoVdYN51KoBCd#fvjQ`MA#W!IrtRtLSGoMS=rTJ?e#C^r!eGhk^SY2HH z0c>e$$>}=<-wr6y(yp(sA1vn=^V*K`>gejeR#hG&Ils;4V)%pmqWn8e((M73R$AWl zt4B@U5=7hK+Q^-OilwB+LwhN4MR=XweE)}gOrt-(6PFODJ^Z&-@i&n0`djUY!i#MS z#5SmQKmoHo@uH~jFjN%)gq(EOo%{IYgk)GvFc|EEe=T~&fKqQ$@8tdNGT%-1nS!vM ztjXy3=UbB~a-k)5->CQAc;&tOX$@pL$)r?K2A>+8g|b~i$EZYVUzId2+N&*feeF@h z^M{9SdIe+V57VX5Jh?6_C)W&=9Z_J?9U4`T5^ z%KM`?U4wJ1J9Q|*}4 zyui!`+*-kcuyo{rB5u~l<|kg4=Yo_S>{B3~Xr0pe)+wXCjElQYO&vjT+guhw&&94) z5CTeBxQ|t}o`O9(MZ+_wX6m*U!!&ZYK-<6ilFay<(GsUzH*5v}QEQZ+qzV)ADxHf} zL0(Z7?f39+O@gqGA00eQ(QG@lG&GNfhDR~6DGLsds81WNvk5gs6yL=_8mQFa0xinVxAgl=zdEBOgj4d(EMR!h1KBH++5ZGZRr@XN{;{M zT~Ws++E!*}Q_CZf_xgwxE>`C(#lgJRIgLL@Te6-sLmyDAtcGn?v5j20mYn{@@*X<$My3w5v`Rz8EG&ECvp zm_=etYI0C_N3n5qMLS%djbJhWabOX!#rpSfGm{w8Ph=ezq+YnP8xCMbXg!bz|Qi6Y|a`G+dap+ABfAtkj|o zzHsc#_!lt>sPH`jqg+d z2k#elfw%*$SIHgwAi6l!yfEH7}@o88rFQxVgC@m4EwCF}=v3k4yE?)E3`9$gpy{=@jtf0%WqwZf#`!Gh-8HC#liAZS-h8adxw z>?*6n;cu&Jl>Asgt{xm2i7S43V0wqFKIwXIB#fB19$EN4_`qrH-spE7Q@t~-P0pDo z2lBcBx>(1XzZ_E=C;}605+p%;|JQfQt7v}>2b^ZJtGopEW3#;z>L9f9DxefQdt&VPGC3&Hgy?i>3i{k?$7VD9wb-(D}ZuUk~Q1+3*<#!+}6aEAE~eWOn<0*(F@ z5TI^z9)Nbdmd;L`%m!bIXCx# z&~FxK5bvp}5y~fWM+ROcdk3by$=k61ko@a}LeXXj<4PB%(4Vd0^uM@L%& z6&prQw*=3Ef15iyEnu&ngZ2Q9x_;Y>Sg%vy@9|v0>zKfHS@3xR*Un_hc6dy3PTO0S zVv~-gmA)JcWcL_tL&^}mA?!5AmUQLsjvy7$$wPz&5oT2opiHWRdwYwXrz+RA`bU5Y zehQ4$53H)Hs&RjFl6k(1Bv-R+b%QGy{aO~XYfCon$Qt%awf*D^&k1isB(B{JKVzW% zeDcAt6Z1q5ki!GHPkBe@-N)v2L?u(B5A_L(|HQqdD$rI9^ zflC}tTw)2FAD8FB*fYqqm#V6epNs~UeUvm}$qPH&;O!oUK z^w`Q5^|yl(`c9qE92^5@OG-=jX5P)3^OgLMoB0fYpf?_Y??nP&hD9ymOYL*`JC^K; ztFtrs7Qrz+PhVcda489KI$R6!V}BM(v|Pz1)sghDzy8R> z%gYpK#XNS0w6CfY7X#Q8ar1xkVZn5PRXe)~jobO9E{L_<)0QB{MAd-~Tp~}wMEi;4 z06EWZkDiEz1S!ogFZ9ubFSZPAT6#DAAy>=xug}k<@#Oyy#H^{HCp9@HcVU>oDKkUq zalMD&xpgn<(sOa)0`=T0i^g1qXHNQ+)`1}VYMz`l>J=yz#!-R4h3PH*67Xmfmuh2v z3|FB~*5)bfozW#lPh*QoeibFxwLf+5Ifnhm!oZ*e)U;>f_-e=)m?}PCx;U$1UPSqLsWOgieZO_zz8dH)Owiqnd!tr_?C+{7b=YrWU0!CDh&8ckk+>Uw zSMODS85FwKn!`hsnnIEPJGZBiKtc<9NO{ydg;7m)L#< zM2Cjn`|^^h^l=*wpq;(KMvA)&%E`qUhr@i}IB+toeRyaO}2TMq+c!F21w z389pef$)ug0S8J5h(@GncN4h0|_HKA46copEakVO|?tr z=LF(;)+- zL;n~n4lo;TJ_@45^?OdqT6mwrjX{c>q-NZ+kXcRcMrUWl9-h`+{rLl1$8%Q;G6EZ` z)ng$v{f7pDIwR@M0rHC)O=^2_2cdssc{miy9|tAVK!?~^4*@>*)A?tLk&$seP)EB^ zFA(I5DIZk}048HCK{{Cl{hB1Zuc|LEbK5{(p(PTQLt5=4fj2mEj7w$p2FAxRq25UW z@R>YeHCAw2D?%B?hwfTv0d&&+WhHjn$c#Q;Ee)&S z2h(0-cbbn55B1V9>J(92uS$t~znCKNnt!OcPW9sP!2QB;)^s$4M3j*<$O7v%hLy3q zO{o5^B;?{ZEiM+(!2@qBio%!g z+}(WDA<>C?A3x8kK1`Pgoi&CnCiWG^+UvNI!XK>8bdylfxQrjHM!?a|M_<>6cqe!Q z3=F51E;a=E4^j+!dwWj(kJmt@MBB`0)DrMtb#PD5AQ#DQ_M?ZsfLw}TNY@pm177@7 zk)6zF<;>gs2lp&4s1gVQ!|}nfTtSJIKEEB@4FQIbn|@8=18Mv{FblcFbJrRT4eg0U z=-a<9%vu8fo?R}hdny5!KQTW$yfQlcRAuUx2g<+jneM*@!JNiBHs@gNBaAW<1E^|{ z@1(=$#2e2}j}Cwk3B+cLk3QxdmzQlPl@;^6^1-hF-PU%B*F)I0Q(MrDNarmI`s10n zn55LOEA~RD!=K*$Wtn(Cf+cS!weW$~=&J@jKR+>G_&Oxcj#0sdk3lmD2&%=e_g?+H zB-oO)=*^CpvrZbQ99&+^-rZ=aO=_~DYW=>;%xmA}Xn+-Y{|8`LYR(y!#X?(BslVQQ z34lYPH)1xBBQq9+)wypxcq{JXaCYhw&tpVOCG=Fv*f@RccQnlh>-Cma)I;`F?37gXDXnjk5VM)FKQkxk+DRt4u^=!tJ(IMf;rq`wGeN%j zN&2WFzaUms7e~JLA$^z_#+Tve4B+>x=7Jq2pu_zUVs=LXf z#&_|zPao;X)y&p>FoJ)Ln)F)75TvPlKBvJ&y=K=}X+naA{%U%%>YGIU>V=)?q=Wq< z6<%Du1ulpnBdPY>Wv_Kz2?{6U`SkNA4d`T)2E#avDs&A0@^|nXXt!v^;RDCIaBpf` z>!AS2-g|+pfr+e|P(aE*6%Y2am+DmFTw94k+!Fv%^U((-^SZxQ`&rr_abTcQ9e#eO zwn-0w!|f|Jl4xyje+T4Oqa`M&fPzWdM;E<+vGKea$gz~z%^in?Q+GZSj%!A)o(Fc8 zzOPYZb;h@TI+pBlnM1jsDteDB;vhi{Ziu!}Sm-}2(DhCx&bow@u3Gi-{HA*V*Fz#9 zWlIk&-8>dJT&Q4J3m<4w>o2!Z2JLwS+}sN^qbgD2`u9<0u|)^6kj*`$9|eco^Wz0n zkyl`Kr?}1ECCRqbbS3JKtbI_wr57+%o;`Zy+^UZmucg&8O4)#YGdIU%XNzOhc_FCY zKcC6FcsHm6G!dM?3o{gQT2Yb9(BXN151!dS1-gZsfns?sH47x6ZJ?oOsL$nNEO59l zki~Z&-k4iQtsg29>?GsS3nBEtMvuQikeCz^+O_xjPi2RzO+?tKVW%az{yhX4F;*QN zrZ(nX57OzGdLG#?U6%*+0kGi-%WnlBgr)n7pnqy<>94fWP8D{96%`dZ@iu}DUS7SX z56Cc4ZTjDaoQ*op@HAYBLO%|SXK5VmMDg64ze4i|uFd4LGR_a3W?49A3ni{yrVP-z z;8lkrn2Yfk7i6=C1j25sN%gEYQc`Fj$qfb{@cQy>dRJAj3%G}PHeN4~PuKd;@r-cu z%+nU-eSJ`-NzCf+FFVg?HD=fpRlloP7jK6P zU4z46^D*nIb+N1VLM)0)BVj9a@7->WIh?}~$i=4{f_E9Z>t=fgGLZl*%SycnItl0l z2QRzmKuH1cUby=D`T^fM?t|qu!+P@M$&@o2Q{?BnoaUdxDYnL0jky8WOp#RUDZ%iV zurFU%pyJtw)Ebw_lgkJKt(ZzEICAV1tb%dSyYQjS%+B_MHdNc6wImRGGaXDfC#@H$ zDg(LIH0d+QuI7UTK-0wL| zAsn6OaO-5!e0&3VuqVlsX6qPX?N{;3WPz9saGwmpfsv3eHugqdTuB z9LqNv4ZKu(u0w|ELF!m0oOn%%uwtYb`O%Xre5EeiP57bSdk+xXg&nyJmBU)i;#s53Gy=hC4p7fkev4J@qB(OQpQ{)+s@r*R^*Z6iP61;eL_mhI`IE z8r`S)WL&Sht7PvM-sIKR;Ik!U{1kgau z0v%X@PL?C6Izy!cGBeFp%Tg0!-UuYfMoLb;xoU7v2bMmfqRJtBC69uRE!DyF7`Pq} zCHem8dGlv~%GA#$0>7=L+&DqtcL+@exLR==rp(czbw^)*c>+$M> zEf1Hm0qQQtAyxfTtqW>mq55Tqjh>Q$WX%vckO|`*T^XM6CTwTG5LaMf zd^}U?J$c6q4D*0$t%8?33y!$vMa zs#PSdIL@wRZ;P@eH8-tbqT_e)GBxe%L%pxNCnJ;@_qX2t4-umC0t$(IUsrEJA+ZCd zU2NC}D(Yb?BgL6dzPtnyB8klbg;xo5iIeDmU59_fH{zRTB4kTd_RY0YY{K=oDYe}8 zvP$8IgRCb7aipt_6n_Zf4lClxZhwShxV^q`ZN?5@iE$r(#et3otK<~O!92jnG6-mB zBB7)mw0W``)>q%6?9hVz24oq=E{KR19+tkRcocm5Ch9dJtC;nu9BgJZLS4jDgQ~CY ztgi|{z6*z38CdtMZaFjbgv>-bO}=6n*T}C#K48S0l@_72u;|iH?|FiX47-dII3%)7 zbWE7Du?JX!xXH~K3Os=yToh(2V$V8AMYMg(4 zYHwet8QDVnyCj^F^kTm;_?x;>)znS)omfUIy~l2e^cem}TrdK1*@ z@u%a$P?5N#tn5>wk#8s{QBsv-7hGTk@Ky;}S&xJR;uD%sxVarX{TK>T59U(kgQB`G zxE6q-{||sdzfQnr2{i2?Kqu2YF+mGubfCrYsv27F1z-cW{e(8~kS^HugHy@g_Q#U6 z@;Y>qK6xT>+v+s^U6e4cy6^vS%XV{z=$r?IUk+Atx-U*4Eh_0nC^@ zrQq;P8bfLWjKba$?$BLMNeuo=v#UYaPvFx<;er^k(%j^ zUifqQ;JEG_{|R&{|NiD4t&RCII8IVHNL?_Pxh4aS9OcL-Gub*xcK8#EO6iFE>fqm6 zWOPc;sJVYO@+3a`(!_d*IV8oWtlcou2D>z+1qR^cYa(tQ0U}1H0(p9J)DON{2?0DH z?S>37jF|0lKl7?6NZ6#Hd^S#dRWh@a34}}6XDGi|Wd20Ki2JZ@McgR--W=nTgn7+m zt}D6L{gFq(nY-bzx@=b<=5c*Xj;*A!^#0LzlD>?m(xYQ~zZ}us&xzwJ=$zVMiMwdL z_CS${mk0qqw)My`M3b}cI|#D?IY=3%4=L7mJOx|jNkC9ey8pH*P|xgycV_6&lNDrO zNKtftHmjs++h}SH>i!H`>jVz-G#ayF)^r&t5ZDSkVPo>VV31@V8Tl!lOhX5>5C*@s zqrY;7op0DzIJo<5aw#_ijJU)AVdQs00z^!~TKR3CV;Q#q5#RDX`F0HFK2>v?`jizy zmdyvg8P>3k_~H|gBsgWou$}=IMELx&e>{J>190!XlHa#Y?d%fPgeAZO*IYG-_V715 z>Y*29z9*iM@taudl2#b!nYye6`v+o!gJpu+vkq)wfpHUL*v6k;U^@lpJbV)dzDD#~ z`PV2BFbaE^guPg(`VIH~dQY}%G28o20N15)(L1$a7LQ{(66G6pQCjL|el6#jQU&2! z9iEaF_2w+dxGOYYR!Ko-mL0_-Q>pOltoLOJq2vh^F`oT~N>v~`2H*66BE)TrrZeC( z$pvkb*pEIv09zD9f>m>L#i&>U&5KQY<2Op-^!kkDD2}cO`d6tb7g@9g`x8v*)YbX7 zG%{Y?E(KKIBdUXCoBZ$v=h>Itwh~~<*yGU(t%ogS{dLc9JvMvOa=ohq&X=AO*t|y< zwfi0rI;eaA>RNUX(j$P-_fvB-N>?He^ff7s?^J>*z%D9~UvYySxfn2thYS6=BlE~> zMSw=3obbTB*!dG z9YM6xACD<-0pUjv9n5~~Z}4)roou9e`t&J5c*RHSA^F0eCUxL^pk4*#sZhF8Q50>N zyWZj@xWbtu$SKZ=RkSj3)muuQ?_QL9(&0O9*P8XEiQ`q1B4$JIP1aPut+uDp{O62= z2mq)j$m2W*kZI_TO-=htO=Um>0G6+>FHlT!Sc8cKzw~sdLBQaKF?8%!3?0p*?|2Fo zM}fpBSz%-pABS8MUe$iT+?t86WJuvbq5g*iGuf-HEIg;!OC!afI&;aXgB$QUSS+#y z6W9K`a~K4}`6QczAUF5<(G)n`mXnQQ;Csq}pqgL;G=cARVxQ?ckIVpDgkw?Hl7}&Q z^?v%%jjeuI`MfYvzyyTU`&d$PS3iaY>bD(ozTLG7oskYFen-4hcT+O+-ZoEptgjID zuo*kj6xzRgS2Wghub^+=DETu=QZ37trZ>#mlA@!02i|Hp)R7Ap|__!$r=a>&8yEA)Z zKB{WvSY(y5(!(icgEdAO0qT|@d8SgMWJTtO5FLJek3in?{ZpTudzm|bgS0Io?V*L> zI-IhLGkpeEE@uHufv>`O{FzU@;naeOjy@eNaq6F+pO21Rl7AVxaNf(&uT2uk>edkt zFbbD!?33E&4{50HjA;d8jfZ)L08hs7+c0(foydGxMip_)zJsHN?A5Vr7l#WO4m{b} zW8Zp6xa4n$4z#B)fWn$VP*BVFQPoo#YMRLX=;*1bsb71+zeH0)#D9NL@SlvVe?q&F zi|?~dIOWdBLt%EZ8sF%QO7iJcvO)W9v*WjpUd8Y>KFyA#Wj(8}TcCOL%P7Q?&^5u7 z!bqd`1VKRp=Q*tFiEnPi80~QO5$W8i{M+$IE?!HSai32T#B1sn_XcEGVvlTmy`7o{ zD!eE{gbF2WfN=rxu*9O57NU>A$JG{x^2vZs9^>xay05eC9&v9azB1!^`zqup#Lc8t zm8M!;?GF}zhQ0ZVgXeX}(URobdbEQhYr|X7`ztNE8V5?(sORxHERHO++TRXZZ2OB3 zl%cX&RTEGKhG1gfdX+?Cq%N-{0)zg7H3=KX@>5=; z61)aJbF99QS%uEA)YfZGf7*tt3p>YD=a&canu?{RY9|&-AqG0+P(LkKUZUXGWWw$U z$Eyq8?+|!8L%J|d$i4dqVub|ifN<&x3FyEG*cLEK*?z!J2i*ULk*M3{@r1Y)MF>`a z{~UhJ6tYv@5ln>sd>SGUQ@Iq*2}CUz#29W&G{k}m8NT?z6{vsx%TWis{!6ew?gi}n z=is4?h@K8O`Cq7^xzLlBxF7)K@Xn~o7bqKBWaxZeK(kS<-vk=D$K<0}=YOaY3inpK zRgiUN@FuN6_=9|Hi}_z@21n5Zc3qq|{n!*b+T=#YU@Q_jknw@ytTXJa+WoZRGcswIGtWNFE zxgZ`iT^Ip|O+E&_vQ7lP+jCz4f2gHFMG>O?&7p*k*dxbZr-ECQ8AU+;53kYs?QD-2 zSkQG@P*v+4qmq>yd}oqPxasK5rzp+&KV-}l8)8X;ZbMDaVE#jp`Ire4mm5QO?#x9d z8$_xD#x_#FP*zs%0KNgzp-5`$PHj_!@bFUbqyu0~euW?VE=M*Jw9N&L2KITeGGymi zsWx1DtM*Jj6_kSQiE@UPu&oxJ=#2}tF&guXr=-LgdGZenT|*vV9{I@}v4)Tt=JX?sqRHb5~)`ynaxhDcE~ zkohw5d}Vj~6z;vrz*A2sgfWvK%x+7Bo<<5X(YQgAtXzw9-AXk@g&qFu`zpxyFb3^o zi4n#rOZ)I)gqaB!`YA+8=8(wB4b9xwAzqj2=)!e$c7t~h_-;D5^4qoO+&w=di%_P} zpjrlA2sr#3CB7$~U`a8ptgMo5nc;{Zt=?{Pi$Hj^;o(v}03%~8>>mJE%6S}4dhgA4 zTRF`vCU{(a7$#2;6b18SZ`*SmkO?R;>Tp1n668s>RB9bbs7$g(i2f4>O{Jli;s8+l zE*q?0SidV|qD5PeZO%SP*IYD>&ZiD6YwbrKh^u@V=zX0-p zlCw`rY)Qs}>6B(utDozug0wAgi9H(Oir$2HMtE@nUXQiv8{Ty9@XSzvWmiROC3-UQ zDbw*}9W}&74R4^=Vppwk8`mzA-#_8PLEMAREEdOAD0fO1M}Q`)bb#R?Z6j!Gi~zn% zZR-v25L=S17;0BoiUJw8O<kFuTr~n?L=yDB9-$ED9W@5_;#4*GxFkoTvdnE^u2=al{6fnju zS1K2qWHDxT?K=tC(v?wqs)ur!yWaVqq7(8D5OMsn??ex0(K4W3+cm;g0MXJ=hh9;B z!%a@_w$5K#9=^E2vI-hcbf0{!W4nu5Ck55V7vNTKUqCnY78CbEOfI0>z=$Lv!)Nm( zAS##u9d444eOjL97kGVMP;*AF21r~e_kZ5L*K9+4KsKu#WMc`1Yph*=$G zTCsts5o2p>i(JHo6-cV_fldJZBmV|7AHmxy+-Mc#H<bUG$3 z3Z7F1J_G}s=tP+e{BS1VaSx6*nx@y*3YW#r>_kzwbOjF5{&$t5(Fp}U2$NBU0${z3 zSIqG6y>7d*q3oLn=R09c3Ba||Y>(c#*D;{KbBBUP#03e))2kL4+yMjpuAH8pVib=g zk$|YLXg3Vy0T5Dw{DUB3bJ4>9EBwAw9x2$S_bLUf?8H3mP1^cK7cyyU2tcnfe%^yN z?z1%gBhPD<7QJbp=oCQhE#a|A-}#CwrR(+bL>s)mwj;U_LUfmV%UkdNnZ%E6mG=Hd z8-rwCPg_48BKu@&ZNc`SQx}{y@N1u8gNfonP=t*P(G05ikW*=Uu39C+hJX+l{pTMX z94t!I2+CZ{)}bc(X8)^rXeHr{!KN9c}UeFF`dv*u<=DbTt5zXlpJduG%x5UWA_nVlE($}7GVr+N7B zA(%_Vk}PPKyjwC>4+x7i;Xd2i)~L%>OCwx7nV_^Cb;HKn6RfVazjo{sF6tU}!;QLz zI%x6WCVw8aag1(x)l-SqLpw5OyTipBk;XQrZ&vu$D{K+y91myaY8?3UjVJ?Zo1To3 z2d*|G{UCSRM=1)DkxWUuapoQV))|CY$jj(JWamoP!1>NTGF~6AAs6%Hx3#k?b)0?R zvi$AKq8+bRAmDu!l6O_;?NP43=U7+@yVXbw=)le+DcTJG`)Ci-a6A5R#D+xpzIfU8 zQ?)5ag8kL5n(T^`{e!mjJJoFHg>4^Q`{5OKTHO0YyY>5?t*D^hgumW{XKC0_k*0v@ zMqM2hCHZPz@9Aod^7QJH#1%gE4aG!QVn^@z7wy~2Bb+EyDdT?jim}_SXFjW>MU!Fo zZ0j)^4Y}W|j^PXBAKo)6kCPT5+mSN3y!*P&KC;C(V=2ocE3ES#uhx_>uRS{CKMCg!39DR?n>~8^Gt{-`5`>K_)~D zHwH?r#gXE--ff=(ukv|pPQeTc8`Z%9olGiiTVQfH@&!_IN3tF~H!K`ZR2xuwv+?gP z5IXxA(Y%T6XM9$gGUmxl%DwvY1IEI`C9Hi0ys@aIm4Nq0$3v`lN{*d&w$ZuW){MwD z+wlD|AMVcRY?>vCqSm2vAKW1e@!y$n7K({~8T8#OJmW>x59cD*(P`Et^Unjg7Poi~ zWs>JP57`BoFlUJ3)TU_#q>^n8|-?o@m{c7NV<(_@Tqo>eTPT70sOV{F| zQ$lJm&hZ~ZMTTRluQA2?JCZyuHb%Q2ZngU~(!3KR=_GE;ew3LeQdB_$u(O@iDDC-i z{tPv6{7IO323Za%!ofW)c2I3e`1*FD;qhZHT?6e(8n{dO+Cp^jLLHL+~wpOp{Y@F|CIkopZPqE+?3-XP(}J1;ygWcQGE2IzP!x|5-oA(xs0 z)8s7&vasJ}X3lHyGcZMF&smw%PtY5&k;)@};eb{v)7~0_5N%1WnS=2Mn`k(5>Ql!F ztlyS?6J=FmZ|!!h@$RiLD?b{unWMBF_mxW<^IqxG5f%}BP2zXI6Fn{Uo%Bz*U&z>Y zV)0}LVzw%4qluJ9lPgF>fOex-cX24jGUOY_w9f&2sUMAn6%&iVTOtfyZJa_9f}Zp2avJ1-S^*K1YI zSZdt8Vwz)X7Ih%n=5Z1D;yD3mcqtQX2VuEQYQ+2Zto%>M$pItfq(@mQoUfBLq$UW6 zS+lG$k&Z-u=ruokHN4pG+#qs!VRTUbsRKQp=|oPCSvNInv=%a zTb_+|dJ`wTbS?93w_Jb4Ti?2RT@`!#U5rt$M99zJ`!2zABi3rh$2?XjLj~o@ZHQ*N zr^qlR<$>rYb*x>%KV&+Zdw(!>rFGc-K6Bx1aO-%R6=<7TvXtm6?6LhSwZ&i@*=OhGCYU7NDA3-x^}9P_VneL)b?mP9 zPgjP6CWi+?cbHRjr&WPIE^vXYxZ=EnI;Q(+Ssl%9mUP2cKoIpyiP8kabz=7B|nVA__S+Q6irovAS zcT4Rj#euk6KOP2(M*`!k1^3Zjs;OnSQQ0#{>a~_W+Z}Ll7W_K%3TKHs#RLOpnrq)O zF5Qv!dZo>Abw8tKk5w)Xqc^e9Un>PM?`$Yj95r;icMyH1&DO)wushG!axjaUTQl=E321N0VpMrvXy&oa@Zpcsz_7L`|b|&6_|2%(de@ zETq1$8n@E7Mr0N;KK%!~6`!K&dY7*R$E5e-XdYpwTyqb{fTFKz(CEaX?%fejSA<8Y zrJb#J+4#gr*6SY(#ed|Na_$@4zcrp^$`E4k?Wf!b|DWC2V)vQ4Gn!a3W)g51a&cNe zRo}?KpPMD=@|>PqFxh@yImZ2UyLlXS3%8CN`C;d}C2RLhiR$%2PBEeJA7r?#7P9VI zQr5ZMbw!`U(ZXLJ&&XkJ{eFQJ6mB=RO1u5Msoa4WL^Vm6IAyxEGo_ zm4`no-HrX@NiE}ggxWsUp2Xk2Y7+0D?pg7_JXQ&~;7dTjtxOSqii$5&)w@%}hQsdK zn>>u$ay8K-eD=H2CZYNw;l|@Um~VxEB@v~fen=+#B3Qy{<&#M|abSAW4U)9tokFqC zrkluMu2Wd=*PF^6Bxw{e(X*zKI>T2HI*A>vqrEm`E^sNO6ih~%{*fLQ$Q;CT1Kck%V5T3JF-0(rX2o8xul6UquD7@aRs+d!5re!v9< ztwtALHo$EEc(i}Im1+(OIA2wL1cWBPNi!N;cnaT2MjYz2{ylQ?p!W7@*yA^#I;2$t zjT1ieyYqJU@K^*r^gOlAO+Phl^w-m;h6%*qvfw!y@_~8i^9{qGa$I(+EYSv3unbn?;`3`)!y)d-S&@cIP*n5>V=oz zG@)JD>A`-K9AdKsuWG%%B?S*7)SaGw8lvT0XF`_!r!h@mTj~L7?+hL?bR(`_Jvbn^wpgD&IaS)Fz)VV#bIyXJDcP8<}PGxyvVcp z3Gg;(gda?Mek5!3Z3j`5sP(uCmtarWN@nWRbx__&rT3hqlo$! zDV}Lh80Fmlsme`Eddi%HJKrHXyfDjZfr+uHZrvuL_d2tZoot7NUL*&%R-5o|wE#H1 z#f)+V;Pfuhy#a>)VkJg-VPhi@AU}IBs*>#N(&xDzET!AEaxrAEL$gfF`qng{i7Z(a zU5k=-qE6iN&A$;zH(x1URXg)gwVF#0e z4eKw!ZABe>UDfbTg$E^L(UJ7Wp7&kW--m6!<{OZD(ulRK3?~ zG`#Wk!rD1fSK78I4^$!Eh&f-Yz>^ETLJsoWV{MO$ZUok22ovc=miyil(yuyrwKE=l z)67ylQ# zA`8jQt1G2I#jH(SuC=D*6Lc8q?6a94>NEXqetgEy>iur5eTGt@G@?eU0INl^ghu9f zUmi*gO`WgMUhvjlVmc6D-v&7Vrt1dpa%?aD%$>-t$8?H%pWJ zrqJwKPT zwS{WcpZ%UvUR*j^>H2Dhyj0P@LBvPqn8S~Ic^nV-eb7{Rg%W>PN54KY5cD=Y!2J;E zlU68iwPIGrkj>KhD+g6s>!qhM`WPu&{R5YRPQR(WkNS#J>e8=Lj^(}-f}fB5;66>g z&Z{I($R#6B7{i6rWCQoxxEXi6JlQKz^I_VJhLsu`iq2F|91uwDiGuzB#$QYG{i&5W7BI2^7bCkz5p)|TqMi6fT1lugw%;=l>y zDdxE2KcC3=O;|aP8}?F)*)@NIl*DY0y^~ZgIMr-5P|TT=K*EQpyDeDz*2vWln9vdv zkrUyJ31f+yhXu9f+!;;4MRD|TdUVAkQ~bHKo`r+10lVhpv`)`uvlP`d+5JPBuqf2_ zb+Zwlj8SojbRb1^o2Fq&4XUG911XvP;WtS7ai4!J1#)abE<7zYl#Fk@KGQHkJ zTp8p$HU|f@xl>1W#Jo6SZbKZQsf~)oOC1)e2TJq5er>I2;%X5hXeeN}HbK-f6t#!F z`$oL{BHr#%;x~83d_zCsMzw@S4)enZ9=YU6r=%u!#C zmB9D{qkQ)u<72t{&hEpQ&gEilC9taa6C)cNbe0HSx0^Xs=(DVm;9cHx<>fun;dK4U z+igw;6?k8XOMpm|+5Fig0rdU?$lt$z9|Vl>v%g$0fOJ_aNWWG`xdjF)Mh5^AVXL!Z z9&!pt^iU1LaX~4?$8@_cPvda&6tneDgb{*lS&;Uwk!zi;mp+v~pQ_$U1hL18(@ZE~ zAh8PycM=+hmuELh(ZOaXkur~2Z*0+p!={5kYIjgyOF22aHC3a>0DN&{b0$x(fj``i zB&q1-VL-q|#}jR{1WFw^FG;=H5+k$EK3Jdn&c!{|b>_3L$wQkOtfdHY0*R3hfP0w~K&&;mz3 zKK8wWzkSwtf9Xt#0*xa3la1HhmZPXNB4JutaWnWJElt-t_-8y+nAM(k2O+;Im7bk} zVOTfNnOK`IiZk0~UW)(e>MtUJaw#gYt!qV=@Mm&9_v@A3(~?OkW;e$63!jv?Qq$0bPUdGX?M6H^aiDJcWi)P=!ZZK+En^c8EEeCcoh0~i`1Fth@rgh^RYPaj6&%tFS! zKHdHTW`)1KOymUM?nmA}KCEGjwYMfzceRqj0OOPaTEH8Ki-<-`d0%lt%!wSz;y<() z=iGe4oMG#duI;-TLte7Ut{)du+W#mqXC)_4Tx3trNl_v%u^%$A_bnlUSqaXA2utg) z)nV6$v%^T590beW70sl@j2niqel6(HepX$ z+TK{x%{OwF5-KCZhvB22SCjkybPW+*Yc?^We5e?IJt=-8W3bb_Gaue5MgG;)a{>ImD#wnHjQq(5&P73=xazw0ov!J0y94xpbyXjI>oR`7QiewJ?lhg z@gS)QA(dTPm$Cj}P(p{?FVra}v{X^k#ba_N8q2zh#-XoWU7fu@ZM5mdt}+rx`kb&w z)VEtwxM;|slM~>^HAQ_!Nih$1TQ$q|gU8Cpd;n5`^&bKuyyzUan+Kp%s3{<2QgM^11jHeI$iNW&o>b=|+6^-Ms5(o)Yj zW!4j7{7bxkPq^%nshppiZmq8YmTk!G8{cjAfk)%1F~{mil%#HN{7VD*FC)JsX9@#Q z34wK0bw!j}mzf^7ddg_t3_EJW@v{mB+*gFjAu;4B*>IPcsIV95=;KP)zG6APP0FHe z3Q6j^-$IGm44UXsD%+HB9}-I(ro4H3|B!y*bEYC(3J1Xw*S$!!#lSY#y_-rvikY2) z>HW{|CGhJU<}2sosEV!L0mZDTQ+n{95&rK@nqsUPS6Y%6p5wm<44nIg2V(8?pWv`h z-_F6qJAb=6I7w6oIVg^1b@RPy6_r7GGsR?P>oRwF>EkmbUY}>S)H`?Kc_&dTe^^0F z^g%H$tr&+dy0xiyHw)RLWRLWNzmaWXA71T&P(v-*65~T$$r9oXY%dGF_r@ydv<|9y zp|N&%Yr9{L+&os#MGh?sf)gi#mucpr_SV&BH_#dOK93O7P^A$uENV*LO@V+UCe-e4 zt?>UxwKkU{ePuTYN4Wn$mTS+t6r`c`NAhQh&%b6zx!Hk1M|#mzPwxE?cc{q+LP3=U ziNXgX>$@EwUi~Jg)WeOlsVvEMx_G;7rXZIoBtaZVN!!Op9MIepF+WWH9<@8fnP(r< zHldcigpUSw-tI&vh2KjC>*-d%l}hy!Q$=^=Q_HJu*RGPW>AslzgHx_+N7O6LC=86q zj1{&IS~R>tb@)7!pnANW#g1F!1!4R&gC z%h$dg9*%jqyQ(JVp_e{?yX3Y1=G^M?WVT(A>|!NZFQInfRa!`A@cxL}=32Pc8}ZNx zTYQYWMy9YHZj0Ar-Kz~QbzBBg`QLEy?>b#u6p*&fl&smskg13SiMW=fJRjq#4g3nq zy$`$DaJwnn4o=aKC&T|?nd3XCb0^^AHcZqXIDp=M4t)`Z40LcrsdK!v2Iz7_IYgr~ zFCg3zm68(bal8h#UcZjsBA&(tTp&6e{zDfLi4@IkQRB-j-5ql$nTN^+LE3JkwfjU6t!s~sU<#w@z6xx zhRi{{DQ<}Nxsk>%>gvEeBwe&x_FNiVu6U3ZhDDfvKwS{E!?&>M*AM>~!_8G6%6)$( z*L=^yFHJLiI3lD{h|f+evPqN1M`2uhRo<%3{g;IB6b@Uq37#=?TBZB=KzQ-(Qdb1r zwml=5ppdv?23!`0QfrERBH3_B*x|lx%JP;qJ9{O|FpC2hd%$3x)U1$#@1q3tHdvkSPAQCRnT;%gej-3T;M{&Y})_=(Yj&V8x z1S>-SaSkyJrd+TFhNg^Hi(0b^=rmQ}xhG@*IQzGwumt%#7=vc7s*;_FM(R5==<@GC z4PF%E!93&{pJj_%TeAQS>i1yw?K>DQf`I-jque>xF~i%YRQj8=t!I#HeANW?CP-UQ z(B$}CPs7PR+G!N2-LWlc1J5gk5t!^o{O7wpy`a9U{eO@S_4Yr#L?ZkU;Moude0|J6 z-O&6HI>AeE=01w;$ue3nB_B@0W7!!}LBtq+>#+Ae&it_%7mnTIv|VTWJx= zI6Rp5%h{TfLd9T9-Bvs%`m^9_NJ|p~oFYa{0AekG;IXxDqTHXbYOzFP z#~nGy`~&WI3Z`D6B>|0x!p||OAl@EjdIkX-l6WvgMMe1?mXH8BNYQxlqpCU3hl@*& z(++I@rtlW$_XR-3yc(MCdG6U~t3qWLFs#@p<*rs=nPKS_`jyCf^=q~GWP?^p?PD}E z#tElahL5$l(%oQM&=B7{d>p>x;!iTmo-6k~UYny>h=gp>`7G|YC3{X~T!8rDRTWs! zd8U2j`!X!6>@KsfsAW_PTbSBgBdV z*s0Kx2vTBfH>5h~yHJg&C&U@`!fJkZ{6%3*189WiHEQ`S_@McJ`!WE;_Z$rC0XnVr z!EZ0r=O~T&(gNI#Ln=g_h;R@pZ`bXf-`wXftz?(K_Dpbcn*zLL&EkmXMwDH(OS?9d z^Ml5gX!j*5w9pf3#Q}+sE_&UUSneWjj%N9OasvR4?e*&01E%$r>TlI?uChClSG_7B z3h@%p4{v08+v4D^|qo3`4|kugmzESIt~pDrI^| zOqxUSkdK$AK#GbVkf~|=V#S-ypfE3Xed@Xkp%Lm4yt)^l#)#@ieL}I<+WO6-Y8WS^ z7J_~=WVizB920$SEe3uq7yzVqJ)Yi_8F#Eq0OUS|d~msa_58y~F$(jy$JY0e)mSqW zl{?ZZ_yFIQR0H}Q!`f#4)?F!>u=lq>23#@@3x8O(6e_gdAbbVO# zZ}>YiVzxphm`oY$@_K00mL0XThgE;!PZ7gxpeL#rw&rNbhTS0r61c_*Jr$8oz3(|f z{angWo}>RAx~bg1*9LKUHWZgTFHBm4U+W?A*})_M({FY&TJqS-FnB<@4Jn%*! z+y{nVePVa-8lXiU>4Ndp3;;XfciAEUV3Ch~$$WsUsC(g5=Q%RSG5;`BM0~~@T>Z?x zx*VQla`R9fvj?(Q=tBO8(zrErBCF>IQ4hxCubTVE+ZkNS@u0sz1`fB8N=Fz1WP(BB z&krBJ2l$$A&p)9bEfs=s8fcJs1!BsuWts&ADGW&wZzMNj6#mJnH8{fUX*$!+MKMs%wU}`NrtXf4_2B8PDlkF$WZq0D1B=nuDz>kJNOW| zj~=hWN3hSK?BVIk;MXyO&IufZTTT?p4jEJ}>_KCJSX)AYt0HQ9F; zg&mu%(PsQZU;io-GzafgP2&LAj70b7Sf5ieh@7r(b)IWI-}Ok%*wo? zV=JkK`2OnVbL1dOp3ZL+kW)VApq$VhSXBb_O8#2Uu2d#PKEguczGkC<1{#jmG&an( zUczM0c;N1}_z&=S%r5UH*??`2Ap(4(`9v`};4c3H7$}SZN~yHU)H8j3o92V^DQ8;i1^|_a^YESK7XUdBi)ppBjI!u2E#o>Y&>p$uVc2Q7?qXc2ky09m&?1mpsZCJ zA#yE)kPSfzD{97zy9JKHkd+iIB?%b7|j~)9jGMYJwOqwBryBq+Rf}I#Q6M?O3`V z08`3Gp%+gTlM=MC<3`9qAjU-m&B=nj51`C#IZlsm79Z3uEhm5e?s~d3U@bWg3PIFt zrv3BvxA#ENJ#pgFqt0+n*=I1%ReVj9r?Kory#7(ekNN$U%KdY|Cda=2xQ~Fx@`k3O z&y!d-n%F7qIa&$LFkE+3XTO;RcwF}!u)zmb7A5}r6nwz_M*@*rU0k^LrV0@Qu~+@y z=nGTGAkb+P*A@O}+Ok(xJSOEc3H!hY*%wDTGu>KZO#@C|@jN(w>y!KfbRjjev1KOb zgfA1y6!dr0R|v$OBA>Or^r*6Z*!n`LPL;zv{N~Cv{nv$sCw&zb{meh~kp#~$!12{* z^k@nV*e)8xumNP3{{VVCz+hUSDR#S2jW=zfWCNBlMfzU`+kp3|Ec4DpcHT9w8&_J_ z-sY+AURoR+p|@^t&6Te+{le4(JTbX;dN=&6=gmc+{X6&Nh5creS7~&=?OYa_rP|pT z;c@T8$5sLX@|T@Ok7|f6{dVS1yw%jN;D>hT7EdY%pKr)wN+CUu3cpfCkRr%pI?{9_ z95R{E)0C1za+%YUl5(Cv3=TwQVGa(%8peHr(J+CdBa2G?5iUZE9}_8l)jNARJ8u<} zD0H57oO0g99&OeyOfTM0$EOk&IIEd$TV2dW8aZBOStFx?HnNx*q*Liw?Yb}YP5P9H z>1t~$Uu|(Xj5ve|*RC7hYw)wDn9q-mM^SHP>}QdWcW zd9v58496Gl|B#gX(*wTt^Q)+EK}R*-(_{1xcbHOcw8FZpT9FD76ljh1AUz$o5D(Dh)B z$DpJqWmo5xjN{V^GBXSsC^rf7_0Um|Kg~*ixl=+~ryIBG{rbt$IT=5rs*G=D9L|7u zLMyySc`gf~A=y;aMocs&CI73)e0?kZ?H`y3$yTc0Y(Dzx`w?Z6OWUjZw;L$&vv#n1 z&yq`$kl=B=R(-Pzzb0FLqf$k>1>Dh! z#>rVHBke{|!cucdkD~+q)|baT4Nq9=8q#y-ir8$d%qndxyQ=rEf2KZRGvMuH3L23O z&~WsI4W~~3;?jRRzO~!NwO56|{h06eY#|;KWo)(o{2ktyOVQZw5GyUA8>bwz=Kchu z)r;3a?fpE7my>cc=wgA&?^<7#H#a`6S2W&JKfPbaE}eOGzkp|6*svs0xv9D|=%STt zeKZ^Hb>%Wb{pmAO-T-D;;n-%tndJ@UoA61ySG||!3DY{vM-`N&DFsDi2d^=iW?ZiE zmC~!imcr?#;$aS|_=x|W)67edcREMy#rRHzyN?{xi-W-$8R@Kn!MFD_T2ORr2!y!`m&CsmNq{QZ z1V&|zMRbFTmsZnP&+AWbjtE}q;~iR!Q>x8~!mDPIuex`&HGJmX&(-?0%U_&Ys+Y#V zH|b2G3$sqivAvCeH5$`gTUha1ZC#`)^j(hccB&W5e zzSTpp8|U?<*JnA^XCX~K9LucuBnh~=0ycLa8&+WvJJDQgmr-L8UG<4km$K}%#{DMM ze&;bfXSw%>huK!Fr0OjSEVtv|8C_l9Pd%A6_?4_m28F~l%Q<+mLnT!yF58O+{V@||YEzh|OvzwGVoD+i!^hlRb`Zegkkm;*b=xOcu znJhlF-4pS?snoC)%DBXE+ZyAGU)3_D7r~v+u&o*I^<0Qn5lAgk-&UB|soL({*WzZP z&FT%h2#u)~euj3}8{}RQTyNAvY?3lx)!jMkay_QK$8E>Jy>ph78f$R9d;hHaoP~Jh z=!p~N-6iFGrq!E?UPqG(xYvvJp%lInGqGNIuL>PeG3F&j(p_7x8U1hb=k4vTkJEQ{ zkAUKhBQgJ1d+8qDebrQ^)ln)=UEfCHdDPxpEPY5jiH%*H=I%(e9#W!OVX_|(IX4Hn zn&oUwnMSehkKt^Ebx&4y0!VFX$st?a47D{~epqEYqC>^Eam%&{qzn7&p#dD|NcFnS zv5JfnJ05Qqvpeht^WYPr$}}Ewt;tWEQp1Vy_e@VKwWVyrOMFACG#gN2k4A!Al;3IZ zrow7#2WH_-ZVxtdikl{a;WHRw8*y&&YYB#T0@*XD23PgSEC@Ka7(j=v$9(@4rW6T_GM;1Eyo+-Rr!VD+PEY1RWa#Jquz48 zqx+@;uR2e*K9&yiGBFFx#6ZmMN@Qzt?L2O2lX=NWp*1;7w~uFS^7z=`9d8(q%$lTk zuN!v$p?bV8O{aD1)!ciV_$NySsr4@-!r?Gm`s%W~QuDf9__~+S2BlvUtdaM&XE9Tw z_RnU)$znXmbOrpQ`hY0wZFcDX6#gMcGFE~5en@hDzx%P$oTpD_sMQE10XOb^>9y5# z*?P?$>7;y2+V;PT8i7b(D}Sumj;T*$zo7Mg+*-_hf_oQM7~X67 z8aQ+NaPbHU)k^y|TQB^ffx= s)1^U9F~As>!g?JU*GXc!!y3*Gl;F2Da5s+3e@omPLH+XT3E$ zSyhMDR?OehL}6zWqmhgS`1)*;Xyf_r$L#?E<@8HUWw6!_C%E%<)lTxtG<^{}GT1bj zhArRQr|cfQW8;a1J%udH4rF@4dNScw$9L}6G0b=4xtOF<8frEL?=Vq-Sw&KU)wmp5 z(m}#G@PsNoLTF83zASN=8=OyR6UjAr{*v{~r<*f+$iszAMC&Cp*;y6qqu-U($MUq= z^lA&G*hPE=$PIgSPLsKKb`RFRb>V{IKs;(AkD|6aSjH{_ABFi!lSJ3?G%-E zb*>Ap{xrzDHq-T;z*zpBmm`zU>LnORdndp7POLvoVQiZG%e&QD?v?hn;KG~h{X%BC zC4&l$5_p?V!`}LOX3Tb4ZyvQlg5-^#m`*mXvY+vn%em_ptMHk%(y3+MfdWldV>|ui zJDsbiCD|VoHiZfvzO4H?He#SMTlPdf#Efj zBX_fxEScg|G5$YlZ7oZZKkf$*`l$z=-pY5x;T`5GrN?PE*mn1f7HVkB5Z^IocI^}E z(CL0wu1L2_JTKiEWk^4qj-wF1J<@G>W)Y>w+a4RywK-{c+dpk_yO*!~v+_Hsq^YiB zw&YEEd;d5oWnpQ|Vwsb)>8vJLw{C-Tvr$Kbvo;^gu|gf`x_B?ct-3F~kLfGWZ-0Lq z@;psUF4y&~mAv^)o2y>Y(zLs6;JxE{yV{7Qy~m@B9vwJPFHv~-LoP9qbJjDwDYYESCTMZdS2&GczL?msXejCt^Gvk zMDfW&$Uff>c%brbPcQm0#l(2S%2%iH{lePu{bDB#vRzUQP4`5c>!cIOn=!?F%QnsP zH)tjE^V-}T6OCskiAg&mb$o;IaPov9Td-rh%v9AIFELy%(JCzRr-XUfzf9Rzc^|Lu zaz1OhHpqkv&kOa~kXE<YahlR>O;$8kq=AKIo2Y#h8SmwzYOVwHM-@jK-H+-!>w^?cKg#pScU)J#k55Gt+5 z>3(v1;>~ry^?OZtbAinZ_8?$y9sMqa0R*XE;y-;k2$Ta=n!nZ*F#zp5;_Rn30uqMD zjhWM)2MK>#-j6lD#-x83#PRoxX@Os5=lC8L^ghjdvnFO#5G1QsS9o_p~Fuk>n$t?i~3GJS529l%lbEL-C!%&{KpC4N4!uuWHh189I- zmG`b_!Bg*2qjk|jN0awE@w_%6BZ_XyV|OiRXHKe`u+jJd?@%9`#xyUR0H3YPCjH!| zyTv8x+Pb2vE0Llw6VUszZ2CGOd6PmXMv4Db*3 zCGqs&E<66Al5G`wOZ51~YqW%qPy47&Joq9c|-ni3-ZVR{?T4OM z<4^VQb+zZ^y;{AmvyCivwYa;BlkL)hE~hd;-!fw3^K6_V4tM6F#5m%5jeo&|P9SDdy6HvXQAN5@R<9+ozwC;V}EMLDs7++@8 zOKib&KV=XntAEkpsa%LzOw?ceo8E&%zUDb}gpF?EBOnPY83HJo=USo}0WR*hgoH=x zg<3<)?F+5`BlA)eBBMc>#xGF`UQ$ivt9xFwT8!oE78_#P69ZQt+q~h|HzWqrv8MOw z%eSvc5~*HJu6;AQez&QuU#ubU5_4N!jdt4)`=Lc?pbJ$}Dsta|+Ih~0Qk ztdX2p_s0j8PzDDw2-j$lTko~~9PXa<4xjj_PG!G7Yj;ce?VGpu+bnW6$DW=vuqTz7 zZzQKr7Zl}4oD%r(ADr0cSGUi9hZ14kE8+^Kxk4qrj+}Jq?pV_<-;>*xjw{c8QxF`{ z_PHPT@37C<+25GlxFwyLghm&t-)?^S(Ia*+zQ#%C8ESC0Tx7w_vlizOTn)>R6$?F_ zVV^ZBip3u4JAYXfvnxal*N##zhuO&wKCA?_1+a->)F*X=mg@6r6KmB~} zP!!$me$3rQC!SBcZk8R8|663xr4^@v@$8*|)PUki>ANmGm(Afe{l}+(^gIttZBr05 zRJ@?W-QT%1asI3Kr%Cjz>%bS=_xsdw^)&OSnet=H692}d)K7JP3XSH=))N{?VAl66 zHQLutpB4Oa+Y>2vS|1+W$Pcc4x~J#`Y6Ja4Rov^DZ(Wc4Zr57Q&aYhl?8dEn>{e$? zBn}G-xl+bVS&tQ7ln`p4MVE^RR5jR4aFB;8ysG%D&M7~du-$8U{m3fLE2LfB=~thq znZ|X8wP2y81|IArF})?Wn#07l_pn=)Z|hs#%yh)%dtg$KJYX>)NZfV&Jb|V&A2Dc| ze$*rO1vP5Vo5PONo8y5xk5(8VMUeY`lASc9C`uJsA>Lhke8#i|zpd0ZX+b<4seH?s zoGiH5nHK2wCn4DBVTlg&8YMZr;Z!={h1j(8zhY|j7?FVkb%dJ0X%M5I z?9Usd`A(Lf(Y?ceRyCp+ zlmv=Zre1AM=^$E|pH1Wx6f1#cZVQd~%!la$)lxbd!?UBrCf|6y@A(o{!TE2h>Tl7$ z;%%lVfl|pTh`#kik&(bM?=h5mo!j!*!E|$#QilO%i3bQaxjh`&Hm1SdKNUl~_gAWs zU)Qj8WPYQGvPfmZwd!>Fi<^*l|5_CxX&yQ9 zrT+rl3WyvIL?A%lM-fo5EB@z+tL+T>QUthW2!&mUL4>@=iQoFK&gUU&LYQ9P=gO$@ zob1L%jYX%w#T^7h8D%<)xxE(om!kB?$wj8w<(9gc(Kol=Nt zV8|#YHI`Rsx%gHNM!2?VZ_t{7FB8GT}usi$(y=Ae) zgu*t`^UjIs=7Qw7kLc(J=WO>NKdH}NYB_^IF6ax^J5HUeKwlWjQlTfHT95-b5V80f zNQi(G{Bq&qa5*?Qc=-1Rl+&Xg_8#l;?nMvi2kiO-fzUB1Ta65<%Kc*c{+>j=MC;&l z4LJUB4j=|s0C`((kURey`rF^OWUSw)!8*1vN8Z^$RjJ);I0tCilo1x71qVKz&%CE+&eZ`VGK=pWm z0vOMnF-0IE0+PI};aYFvujOzd0umC-_^cm*92xEOC0`pg*e3jH79(>=u9O0AiyQ^N zms1C95vcdf{h_6RYWyaMIfjgA?(v5s9N=lb`7=&cS=gnVX)8GI;CrJ^@uWxv;@cU{bGh!3~QEmaSEjEksl9AzvLavoqfD!Y5>ZFhn-AC%p{p?C6 zn8P}rMzUU{@+Ck6uTkdsS8TpR^DHOQ02KbzI?1g3Exdy+n+6y_P?MDn94|3ommgla zKM=+ag1q*a7v}%&-+wl-V*r;a*Mx{hV#|U!Poe(M>LJcOL>LQjQpslNWwZ-)h-heN z(8dPd1KQYDqj|6OIUU!rxj;)oTF`SNt|R%5vY|&$PAGol_vMk`#X?Noy(T}$YL1eH z56ZTPH*Nxai6QHUzv=l9d`s4zIvv*YU1jYT{N4}ifm{Fm5vfBj+J>;++R)i6r}$%0 zfZMBBNBUb0qYuEPm7e5T%mj@BpenST>E7t^DQ!!)v>rsac#-N`=CqF zKRs;r-36T7m-k|-Q?dDtFK_q+Mg>~8opNW!!Phdghx{Y=+tbi!zkIn^@sHjl5KVmM z=pLwy#gM4981uhBR+GETI*+*%QAM@Aj+CsQQNj$MUhqX!_{#Dc1UxJI`%veyK)b?p zkN62Lw1NLwP4_Hur6BhRGE|9wHQMhX3ZQ}r1L2b@s1^Tco~kf&0q%ntFytwVoW{0? zlY4DSr9g%If6XStAJ{tvYKP6gUuOXqa-K#>sCb~=1PP5pY+B3hVO5~nkBo#=Bg#Mn zO3DBJYx_`8`RaH{cVFUOz)M8A@dqW;0ok0XP|D?A02RejJ=(*7GK-Os(I{wmrHc9K z1JYd;KyQ2Gz!(32X69Zle)HprIx5E3hzR8}tPhgjWknXRe`eWwvwG>reAQamFM z*|Y?fU!rL}G-%rgY93+VZABHlo-9?^AAr!7jH>kAh`ta1zGk*wB-6(S7KZVrP>t`t zbUPFSP(cgyyB*OF?0JE zmL)P!k!_o9U3~x}9p}7RZ^G-3tRn=#7GHTIu3@LxY1*jX2BLdbvU}+eq;s=^SFA>D z-4n{!Y}CnFxaSu}I<5Cj0-=r58Sk*|k9;O@x6+98crmAke@?GVI;e&6b;jREQEP0L zB(ab=WNOn8AO!>G=zTaf%!x?BdH@It^8o!eItB({A4?{9{CIO{nc5wk=U4$TY(zxQyn8+_5 zw{2pKZ%%H$OGG}LOBx$uwIS7EUY+V4!^{nd#9`nB$E=jJ+zV8G87f9bIiO<&0DSQ; z!!BefJX!@l7(W3oVIF|J-eErUS;Td>PYgCXJ!EVGy5#=ia_D4%%7c^2L|SOHlp;cT z1x#_QdMk4=f$WYdB$6OuB;xmCQ?DrOEA_i_`O=WM{eiFgb%>z#%2vCov^gq)4tV!( zXcqSn*3pE|CXGEev*Sn0O%#fBYgGWVYd`4|+UNHWI?%{Km^7f2ZWm$In_-);QoIo( z8UDgp9C`v#B+-QHg0I%o1sa-C8+UN4xCo(RSH_@=dI1Nh`LAF&KcuQtwd;RXX|+-q zz(E39q>`XrO7|oM)D@9|6St#Z7#SY^3k;SShEiP+8_h-Vm_AArf0zLd=v`P}IaU7#B0!ZNXn z`fSSv1a% zU$5&=3gSbz7CGuCV1q*QW*`%N)e%lU0x0MTh4$-52;UCULpQJkY5-)zSyKg2vESjv zFJ^{jv3VQ&`{3{V#bL;6B{L>Ap$%{D3-YPoS}uAt9inSS2R0$%u>b4I{bfL^f4;Fv z>y8wx)Li(Hk9oV8NJJrK2g6Qb@)Ww3u8T32A&bJ2}1i5-y>F+jl z`mxSqdgYxeFIb>&L$Y;Mu-!dTZ9Y$MiX|r<a}R zr4E_^MX!jhOk@s0M@~6Y5%1i*q+dlo#-EVBGJU5oe*hAU7hnrI=xy_)!>Qm<#;(K9Ir!rvhDlzij zX%nx!4T&U?S#>2yH9p~hPl76JE7d%LEFDhv1IUf#12u&tF~7!tbA^9*j4+7G>rs>` zz5#dIgM>w)JXm?TU3uEfA@sSOhsn}D-ZWqBXJwDkcVi@+PHphK{cI^x&4-Als^eD0 zwbB5|f3+HKFbZa9J0cPh{_oVSDFG{do=Qps3JvE(&%Byg!{%}yi`l6e9SOu89ghJ= zxb^w#>8z0b_ZQ7QP>*5iBbP4HFtVWAULk<#Zbr#a41p7ISo!$Bk{`4|<3ar4h9;B> z4yO-DO7ywr?a^VbDvfT=_fIWHnk6!g8BpCD!JP-vUr74{0zTl$bEsc@&XbR0ny&Y# zfH>Fod20~_p%(0aY0!J|AXLD(GEG`Z z!EY}Pr1ZvtfQfvTf#ClIJK&fARG&OQ{(-v|_&@*F$^3T3Rj8q;f!KY>* zeO|Pl=A|?=G^F#>I#G|dkzqO;^ZyKJp9sp{?fUfTVgf0JYTh(z8iA6^UOJ^Ts5by( z@E-xK79pR{k0}uok$@6v0cZto^(J!Z047ub0*~BOr+_}f9f%|8vb$7Kj@1`4&@ zK3C3|kDfK(=_mZGAqMZnAQxa#cFpb4uftQME=TKC#oAT8`8Og0dwkoCa%j6kBhT58 z5jv5n$It62K%1MX&E;QHUI>DMK#<`+1}(h=PVQYGV+N`%0Jo$L#BUHQ|FA+XtP3M6 zO?gC>vVW{>r0duEuY$4Sx2MtA?VI$6n$p+G|XR#5AnQI01;~t*vJa9`~m*QNWC2&74dulbpeTpXQdu3 z%p>!iBsdz>0w{f_iW9}`R89{#Nqo%c&wko(NpgYyGpG>3YGO#>n>3e~mpWBTbxgu6 z0K`X&n@<0f4IGq6!DDWA zsPB60r$p9ggLzKnOwdUF-<_Ka1L5Ys1#M=%Ms2HotI$bis}gCtzYto8aRUSbUXZ&l zI*xq}1Wsy@*aj^;Zj)|;IiK@6EJ}_Rus`Y10Nlh>$~$w5kY{V$Rf&Ye7_w_JR(qDk z(6l68<@KT?BjuC*tfaVZeTVudOC_?c$k0bBLo1vuH%Moj%21VKJUTxf!ergvT+_f* z>z=QkyvlluRIdoUOLZUr@~L0|kaE@lJfp9{AyDbKDlclZ<|2mh@DaiQA^+VQypGX8 zzj^~1&<*K#LU{oaKBxkHb7yKBkE3mHylbpx7n&=>~HNYg{j?^Y>G9RLG*_j+-CVvEwF77$a z*G}VG>yti~s!d6I4T~fLWJs@}qY*%{@B}l#`v#4lM>TvOs~lhL35g0kU!o-X42~pQ zy3*xA2OoYh>FZ_uotp1Oetj|g$hK|T1BnB?Q6Ex@>H=4x0k94y zK+A1yS&0pl=a6)nOU3SD)4(3Q@%A~{0YR14Q#;bzT3$=2%n!UQ+rdbQ(8<(NSiT0e znlko=!&i_V=Gbs~5Ebed%!3h&7kdCe_%PHW?_W zyNBq3|7^7_EmhP&Q%Y?RM6^sGvX~&1Yh9Ob!GQi{57Lq{-Dc*eZ34qpqC0z4tN1pF z4pe>#@TKuhqd2G{KQx0-x)3CX+dy9G|1tI!P*rYQ|FATO;3ieNI|T)#Te<|K1w=|f zx=TVDY3c6n4sioYH_|2D-SA(VbMAZZ``+`7@r}V??1#aA)>?DTHS;&u90%QO5`N$% z!H&p`%DbLhr<<#uY_*C`h2<}&iYAI)uE^4F{u&pXJI-ZpYI|TWXabljfA2O64!3b} zknyq_2zQi=!gSz!if731@kju!p9^ZX3PGj#REv8ftdZM=5ecvOyFL2c5MOJuui|7p z)T$svSBbMAOGEMOT}%r>Zg|Nb4x)Zdj4 zMi$y$Z1#U^#=qb4A24EVO3?5K){)K>w7+u!J!xLJoqw7@6e*nUj zYLW!+J?nizDV~-F56i@Z@HGHyWdpP7;_?!|ihlP95llOUO$Hk9zb6gWkU|9neAI{F zw^($}-he|3bemm*!ow$uG%80cjR#;-Rz?k|`d=<1g#Ssg`nE*q;*9;0m}Gr_4vQ%+z(3(+-LJ}B^J`ZkPH)AHKhYlW(I<2ex^aL{8BJN_u-fS z1EGPQU%er9+ZWbPSkhI%0rN8DkZAfpYX$Or7jVa{yMMrA@5mT|~9~9Uu zdx4b_f2CRebL547*r3i10T^IV&}J-Dv(~aWEse}#x<(G%_Q`lbwmwu1{zYd?Vedb` zchA`id|>PP6ICR9h4c|ETP{ApGCSb{mwC4r1D^P>y?q1Q+gG;Gzk=ESxxu?2x<995 z>|X}~dkeCKg&>^Cppp~fu>CuIe}5mwQ#t_6Id@_!g!tbBGr}Ee1B1(tGr+Yh zX5a>FSQ7o=2=19n1u*FpQuqz-1?0c%s(Yi!5k`uzN)cJq=%zLh$Q=VgM_0USP;Z2X zkFRzpJecVU+uLwwvWNdRvi|-A5O(a;OLu|~9z3|agOl_M|5vAp?PMe;9E~Ub0~Or3 zweTIY>E}x=ZA-hmyW(2f=)t_Wj*faWv&k9EyA9idYp1E4QaB0mABKNArp|s??H_Z@ z;W}Shb!ZQzF1=LA+ix zG1KwBC50>zZ~N=lkv)L_?;i(+2O>?g^AwLDh@8}9T0g>{vVR6`*-K6JOlE{pg#sME z68hVqiAR-xbdZ4-O zwNTqQ;yVm0^Ciq-(*K{!FU0;@ytv#EEP1v!Ps_!{Rag_=N%RdyycXxDul|u1Ixt$q z)gt-8!}Xy7;@LvOrr`kbnjN3={I4&3BU|9Njt;H_Ve=rP<&P8J(6Xge# z{QR3&MjVoW3!5y_6d@^&=p%k~5x4aaMR>%d&^C}-%6B?gLNs?WZmd*G2#@hQ-$)J9 zFnwo&j41Q~QQJr8%evpTL&oVT3Z(OBOA^Vgt&0c_wPf`t;>vX7K&hlbb0K~)iVwf& z&Y*OhD1Un(hd01w%^l;0k<0GgnB<}B0AeD@*%ipxhvD} zD^B-sSzU)|qvwS2fAPczRFiTK(Eu(4!b!4)EX4j4ewmb%6kLuEvM&1z-ep0K!}$i9 zW@thI>N1!oU}-{a42atr0ko(*YV}6e(rNP|r|{?}fj` zmi)zN)TO-*H#BeqqAO-f$##AjO^^>vG@#=Q8r=9p_Szrg=IlO4 zOyPHx09mV?QoYt;(DsTBWV2Z~9A)1ifk`r?r9Zr%WVl!J!vly-kz@D}OctVJ4T}6! z3V6I#EAoyG?9;*_V$+uttVd0at{QPjzD`?*V&-J;!VxczH$*{VAQN0vFbG1TJJtbD zqxiAFX;g4TFody&Fg)Ok{T#z?Fnf6^Ar5j$a%R+nhu-PjV3Z!5!H=+s9uD9?;O6nJ zwVIFo@&%>Y^Dad`RX{E!#ff<6mzDGHToiJF?MFlK_lx#^;Cs&2CaQYB)s*lFXgm+s zP|0=4*>F+7aL(`v#V}61orHftOAG5pxH(8l%?=W zPn_e@q=A_l;Osdwx*-T`c6l(?`Rhw`#G`-Vt> zI_jeiVZeteULRn580T5@pe+!s?1AUSp^<@b3+Ux0&!-~WR{#?ec(eovFr}3Z9vG?p z4`Hu;mx{=VV}mRIM5O6|_Lfr%#)8+CNDt(t-vP|Qn{oqj=~$*`;PS?B3h|&nSsWcz zMe=}FV-Xp`_k@E{5nGcE{hn|#_k@f7XToJ6f}oB#=w6=-F6mYRt2nPz!Wt%!0jTqFNps#^-1DJ|`%(`}8zGup=(La18ZJrfUS9ytmCryUoye_ zY^6xh6@14y2w#}232n4gFBR57hG}ti_7cg-t>XV1}2dWxK!`hchRrhSN_*w{^ zT(ZxO3|g4({|ZpFL{=ym#wLt{O5Ykeun72p%5eVLb#q0%9ZmqeQH+-94%>`#YOwZJs829#dl%l`OBi5I{FC_WmskLOQ`_Nh!Oq&gE3$RdKGCQ+b+sP**pmb{8zlcm9S zVQgIy3pSG?VQ?f~&}Cr2wxkRPUK5q)VeTq0H}V$wf2-)r!Q|5exF>&WH$PH3*;!Lm z?H0HA3gZEJcEipr0Ebiph}7f{mfv1;gZ(4L92}yCQ4kTQOr4xWM1G&Y9SipSRu*tz z*HXXFLxUT&|B(h~gB+i|9UWP>DuDRaNio4dZ1)iY_XwGSrV60KjW*%Ak+jnzIQLgL zfPDRSTG>VLi0D!B18a4Y&n6iP?<+J{WtAq>EZn)ebG1ra7;LxT;O zC!Vq;pujOg?gh7%U!xC=5E&6#liNikA0OX3Xs2km+=&4cqC0mOvJNkdtK_W-+0npI zT`=8cy_Y;rSvqD1Up#AOe;}fev&-snwp{Wzx2DZkx4t1CfMhisRWQkgEQwOSpWb@gW6?v{Y^u=qY{I4w zrt7B!zpld}R#CCp{CQ2l_U-{t%z>cQN`{fT$+RlW*p1;0bp{;yn;ynk_3XL*b2&2V zH}ne^TGA1Ga4S8@Vb8N{YvtA$T&1E4DQ8_FbY^>+Tmjv&z3COyR|Pe;d(H)l=Dxp# z-Q6mDGbDoz4il=Mv+6nvkqWwFRM+=Di&RSb&E6Z!>v2h=(|iliXHK;BAtBO}&vVme zc$8I`YE3?D(KUbCUE;qyJZb#xE42~9CUz26IychCt3cmw)vDCOw?QZ3J1-Jlp8KeE z1tm}8izzOTjo2kY*-VQ6hnR98Ch3ThInWQ@_TULTBEav`wB*q)T8XWESQ>e+1%GVC zX6QjsdA&&mJruzqSO>jxtIQ`@7(heq--P1mWDm$m2JQ`hUq(QNsijuxz-Wtxz6^vf zrBCgkpN*OcJO_`_is>DS){!TQ?AzV-T22+QV<}ZGhU;;;8qIE$2n0NsSANC&ugaM! zwBC^jTN9qXC0Cq!bf9$`ZfTM`Fi)PsNUMc<;2d;DjxR1E)7vDTsZ1VZG3o5zi!vGMg7h04e$~wAiGEh zAQW~hL5rkO4tyUU_07TL`5CY8fj4k&cwkGPUHB5NH%z*M0`aWa2Ntokbm~jAWFAj- zV^iI%cQjMg9uV0GGTt70B?-KV&`I1-FVrLrbkGG5?{fReDra9B?u^erlp+~d_y9#v?QTl zk<=S)-Uyr)Q=FTF={|?ZiU^3ZF#9X$;o-d~{{YjoUm9F8t*i96q!y74M=Dy7)ix;I zCx*QAg-h5a9u+9uxQ>=x3IP@D&)Eb`JZ0_<xz)W=JUW4S&W#!|#0PJ2SI=Rjp!FDZ23qhzJ4ggG^hP2n7nP zkE88&!4`jmT5QBxlu)I#Usc5JP312q&DsmZj$pzl61_w@rR=mkn^(@OFWqlWlMh$r z8H-j*XWhh5!nHH^1aGsoCsoj7OjXp5xh(n9+n!j7bBIb5V=q~hw`-W3x4GDiw7+Wo zbQ^M<_-A68`=AcoMq!d9r}3O7_xynA>TG@z-Erf%zVZ0*#5IU?Lhg__MeUsB@zDAm zM;+gl?3#0ZM~FnJu4fu0ZOKRn<83>ab0CEVy?wXXY61G<#riM8%#;e|BUYcac!>Gm&p4_qTzurq_UfDBLTg|8B zD5UOY;IdiB0ogc#$Vh+$(^WFCV-g)*VZ*Vymx0L-%l_pzp=6*iw+OO2=1bIl}vyar4N!*|E=R@QpWzyaiS}#9$ z+eH=O3;s>b@EGfd9#1RpSibJ>(KkgY#u(%+=B-Ga7Q2|2XY;8I2jzE{n=$Bv!;?C6 z2GFl9+s$sV!{_;l8(*pp-Y0RJY$vr&-#m9>yW8#4E}LH9S;8n)?x?`Js+KHMGb|uT zYTOx4Q<~-qte*MGGk*Av#LU;JXyyBLSu9HB0wwB!Ki?;0cow+@V5vqkX*_p1+a(6p zegshZyTg2J&Wt$Vuo|VA+tGW+z8)xh3-9S#I}~1 zdy5*)xN4u<@Zxiasz!iup(9Hdg?l{qr1-w7#`*`w6UHuD0?wX{p&=YDvAMA3wE%a2 zSE^uEjW5PyF@{)?2;K?Ofd6sbe@-}T^C+YWcv8#wp405)3@*i>bgYW59HvCSv^Qw* zIAY&9Z)F?smXteTL_Z)hiFQvuaw=5qW)=HwWR4lkhY7#w>HGmz@6z3$DF-d`T_U-K zGJW1hvmhFzoRzk^k39YKqOTlZIetpQEgNq-sRM-2x$MuSudKzfl5T2hT!=f0*h_vP z@Ph$*lq0RTM{PiN?Jp8mM87c<6<1|FO~1I-Vp#Z)W_G<-uOeL2Aw@V}nJU`j%dj%V zuT%l8THAg7IlDj{USyqDue>8^LrD!1Y@h68kLwr*xjD?LwAjhJ;P3m_>y0ezT7hA-oGqc@H0UesDNkX@RSk$onRJ9992W|L5mgpA%&QhikLe zSmz?b)pZDFa$`%rF+puRfdUQGbC1K(da968Etf>{dx=uJ?B~R>VDTcQfLuu!McGFa zk?Uh;-h9K-`iv_h+eYP^1aUi2ee-qs=CwfEnvYiFX}-Gzq(4RYNGsWRjXt6$%RIYj zFur?6*xu0x^i~}~M+laxTeqNpn2u;uep?{(4r5FYrQXAX4WKA55`LIYw4k>)cSj`u z5bYH%e54Vuc9pR=_j=cAgJ90<@KrC5(W;>fxlA6D@wlw0n<0On^PqjZh>{W zW5~jUZwO;g(+lv9HcAe@*IEhsHfGdW8$A6}s_7ZMS%W&|Mj%*(2K05@_mjlhROMZR z=8jdRjtb{2vW{juEBzT)$ZMkl3~Gm{=VimRMT=b ze;5|JutI)WA>0;*_NO;q_MSv$3vo5jXoPa*srttDZxk@npdY{tjHs@v!IdN7g`?VNPFq5K zDXadBZ#|RB0j#>FsKk6OF+sS!Vqa<=FYjU~uU^ltUlw<@56?Ooir_DqU#ohJvuqkE zhP%%hcauV?IfZpvmnz%1{zOzKmlcpM&jwuD6&H9;E^s&bI(hV@fBZ#+Y#5<%-cs?R zE{S`tt%LEg>|s=o$pJn3L2Q);6Li9gCs3`3SHu(dr`Q)qUpJb!0mi~9NG|xB%u?G0 zos?R<`ipZ-`cesk)jq;6=1sNo)`SEj9C!Xubw>-te?Rf-!Bhl>2N{%5acCVX1(Q1d z11K;FRB_MO&ig7@o#5Q*BL-}eEg&L>5%GwK2jQke1975STU(FA$A8MRG0J`~0V2P) zaN7Gj@GR;`V8+nQ5nbRGdg0tIF+>I}3y+PsRO<>?abBP(hCS85>R zijqQoiH*W@Qz)W4Rn=6QWU+55LoQ%h@PUGE6&lf(r*$Qbyu1(v3<)&7v+a`A(@8RP zv-+=0P*?j>c}f(nuazCNRrMF#teVQw-uxmU#_V;-(Hnb9(uEyB4 zm;p^I5h7X2KJ~%ZBq^_~ldTTt$+K(n6;>u$46Vfru?9us9T#7X1 zFSOm=Uel-8mb_g zHDrFgO77{2vgXUzu4(xb(Y&5K9AXSYqGceb^z0egy9q>smmgn)8{RU(9leA5i#4to z<0a|uK8?d%q6WSp0+?4v2V61WWT6N5P*64M>pL>7Mr!x}_H;23am#*{={vC1R6jDD z@1805(u$EhrNa_Gh^JU{(8o-j5!ZmPA~+s2D-N9#)Rbk=dN7;P?1MNnr$yw%<8h>>1>~EDeow;L>v&@H`B7lQ)k)7a!8 zOQC@#PP%D{MU_%NthgjrPHzC6HysWsZr7aF^>aGC8o0env`R%Z(=i=3T8aoH=%--1=_B)N-^Se{OTdD&p z%BUV-?ecJwt&FRZGkewxYM~um?iz+5RCgbxaJ!?QJYAsm9eo@r&Ug2ng|=GcdY+?J ziK`STBR;;xBG2Ne!X|1-1wSHLIDnMX011P%HI_o`JQUulFZ29h=Fw`wSYWSWzW6oh z==O`**>Ev4b(!c}<@K8N-K{GkgCemIHYSfYoB9seaNmCRl>8%JSSP)&EMYv(e(fi0_XlbpM9kPwb-)(Oy-J z{WjqVK6wj>xU}PiNLHNN?JjT!7oV@V5J@g{%p&2c+DKB;Vvcv;?p~aAMvFJa4JZ@v z)33|vIxI%lq)TNwF-Z7+V@mjvb29_R zly@~}XNQ#GdtW^%8L4ZS6|>#lP#$h$t}GhkoQgpGT?pBGBofASdEoQ1PJ!8gi_nri za|jqKqxolinvENEPRLINHf2LMadEm1f!r{TBl;-U3;(il-|6}oFQ$||sYKiJwG?(& zx{Rb-pwGtyY4kYav{mBVKpCRC3WLcR=y&BC(&jWt1zqLE>t}MR96u}H1k6|6aH^Vf ztJUFbjJIHykG(uQxw&eE;`hs_5oehwQI0GU|L{d>^q@8KJ5NEl`RO=?KQtQ1pGnth z{L1=zbw|=bhWAuwAm4h{pvvTfvGvEzt|;SDdW}`GU<=aHh9zBV9!&1;cB>g~c3D-G zjg(-W5z9XSED_lodTNL z=%fI}T+ z#urmRmiY8i?Cfl;zAi8hL?NIYAwNQKUD;|J$H4vmE`HK$vB%zHjU3tot-7%DKha8k z1k+xZ13qpc)qLx&*yi=uV!k^gt|p32gnPR=vls=I#D7cYmz4WU9q1s_5|29U0?Q<( z@PXbK5|S*hK`a@4&QA6jh?Y3H-LQlfjSIh#j(EX;qC<$gucX09XAuU>VB7cYDR;X! z#tb@RU*uC8DG3b8lM&=AzOm5S3eUYK*JbHa8>Ei}cL*D5%&bBM$~>uSYzDjCvM+~! zbg_zGaF-iG8n51ArTG11?4t26OX^oIAT9q!p9`FrK+zicFqRwET$L*c73px_!~FC2 zSifD58LWE+dPj1tbIrL((S#E@G#(i%&2Z=AEh!{7%TY79;%K#2t`8Wnh9A|vrygiG zPk0j;Sghe(f}ZP`b@?LrsbeuOt72!cd`dlY|13^d_S^CdDLDC5<(|%Llm7V~yOkpA zG_|J17RQ}NGPk?Rugtl5N|aJoy4Sc?dOw}r8`P*V9}tb>;*+Obt$KAd3I8A@NIKsM z4;bG2zT-MzH*hc|-u%ez=h2?zT%>!)^ds#r^XsoSNferH_HTa}&$_R;_FDx%Z4_CO zD*+PXlWO@v%!{ZmRZ<6DPNH61@OAFScuOyEY!KyoJh>D9auD-#Nnt*t%7EV#-+gr1 z%nEJO+iM5){PI#6l=QZ$$X88*m03mV37oS*E4G)`xLPk>->C`_$IpO>FiP$ zI~}&u9s9)_6Z%6?x-QKhC&yOTPdl)Ct`b>AFYBDV*<-<1d8FhU-t}+<>V&P+hOv)&u0NMToRwnw z)*V<}tHHFAmYfr59N1zImZLrLX>j1F6yZa1$C1O9V|Bj;ubP?hbi;`}sYZIcc|oTm zugNf?QLoLhYoBgaQkI$eBHiCnW~$641+EcinzL zDwxad?v`X$+j?Z%<->8-$$db`I0)&2NJB;XyBwi%zQH7Jt$Be+Ec|R%Uv$@Kt=0HF{)AHn&6TliCAV2F8rq*ywTw@760}thn5BX^`aGW@r1&m*=XQikUp+JbH}&@>jE|B0mgOr5@4BL^ZLND`{ckC7!zofx41 zo*^mm)xcvLame`sd-7aUs=U(CMKoZ-`YQ@A*X!mjPT1bMIs% zpK6eA`&dZ*9B5*RP;Minh95L}@*1QP+8?_fn|vSNSe+UiFcv<%&UVG6HxOIYN@5MJu1vBRYN{q^tt)`xBsW3*om;da3v@WO@!anG(ghJ8n5c8>JCB;3 z(yqG}LaohLp~>(?GkEr-JkfS8tL}`Jvauze+P}T)q0=EKrw=-i4{)ne$ zbj0rxm0|L*`6Qa62M#@SH2@sFnRn_6IZ#iQAA}0(R(z4rbJl2DNrJvqVQ1~di+osm zT)CeP+JjmaD;ce1au^7U#~k>qFqqfh$GJmE>63iC+_7M{vg0vpM$gR7{+gdprI7Y4 zBKP*nfyK6io`m{%QDW`W&nu&D9)6>g3>D##eiG?>F;n$kNHOgf?&NdDTZU|^19wvX zIy)5>27$uUkHRaO6YM{KNUei6eCem-lYWYRu*i;ljCP+tg{8+0K79b**u&Xrxx<`- z;Y%3&Vu1khuRM7tg0IFa7H?3qFq|PCoacFW3#(k}N=B8~>Ei5DoPpaFR zsUM>`@W9d`<5PT8hXQNVseL=5vaE(hV1#c@Jafc3MjVBx}5vy<$n&kO9 z0U}W}3|}BZ0$K9$)@OeF!J86z21kD_IPpsuBN=245d?|lK@oh`bEED-fNcBW%Dq1S z?_`>n0lXEw?(l(qqsh^{2eN%;N03xs*ID)>Bpc-{y7a53FY`j8ATWXp89aIR>v?TD z?SQTfoNA&{?P<)m0T7oUp5u-)DEm{1+XD6AdmAi*+6EGyUVno+`b4|%AgEUG0Ig&P zAOM>6Hq>MJ2&9dR?tbwuS6U5Jl;QTwD_?X>`^G=OvMV0y&AZ4EV8b-=C8H zHek^)9F^jsjb{j1ZAC-$!frdC!A0vN>mZff=5P-Yko|5Em!K)Hj$&RvGOH|$3eZ6J zpbg!qD?bTgvR*_Modz);n?Gp)3^oKXxpdOebo!P!7+zlDU};*uXnsO%cxWFJ>bxBK zHR*Fm*lnojP0UFaYy3GV#6xA9)oX}yh)gi}$#eNdZL^IuDO`=Iv>v@` z&g|Y4@+8S+r`fynTb~*^9a&aVB*jU!YI^6@ssx3l{&+^az4mIzO&z#-91{B{?vylF zgV=4o_3jqOHg7H$FUCYWv~zq@Hr#3k$2m}$CbPvnS8?>)kZ3Q?vshTbnujs^9)8wH zdk@|k{^#twF530^flnl@f;=1?oZaRqeRm>PX)`9?vW*1T3F4)a)2mF9X3qcls;0qoNwWUX%JV*E$r5YbIG^QT74DGEo6<$5Q?9I;>4yC`KeH)}051KeD@oeuWt zLrW&(1zNAj{A0#B8141=Qapn96CsNlA!~gY@;p8GIWJX^gAK10VH)|BWgZd?tl}Vb z58n_Y|3~;nGxqN$XAaeTWxtB|zeLnaw96;-4Yd1V7%b5l4P)FTji1e~rw^N52wrOh zuTs~ZTfb<0vPcdUY;sAoB;qYi9H~$KvVPk^_y<+wIvW3G<8r9dU|T6eGV_)&R|WpD z6 zyPnnjp9xNGy1brT{p`7?>Zwvd^z5(c5h{H6aqe8@x@-0YvV^e9pK-x2F_dZUn0GtK z@+M1O+-KtxM19_$e`D^#{7r!dM!{4il6VO2P}(DTCJ^VIvr(f1q7e{J;eKve|2#K- zY^RGuYHGQJ=-Zn!J(u%+^7=*;EuyJqCw3ZI7|?*+lEL@9Y?cP~p!?$H{4}}$<5d>` z;mo~uKG=`PZF^CB*s1ix3)r;?MBD&81~3COF#hsK^>unN3c~`m#h~4T7adnEJ*rb_C&>0;t0&w^+!pNm$mZkoS70N4=rx4w3)NQ-o-Agi_1p@GE?$3?g0-#jiZS8z9{(^k>tik8v|_uwXn0+KC zz1>ZRy~68*DC+U#EmF6uYd>ztW)CggF<_4%d`4vI*Sqr19{x%DIh3*aWQ>>){UAQr z3A-_@-u?19Fco5yUnfNwON(N8-07%G3Y=42NO{^Ewb(JkXZOcw>_?Oyto3xk9bazP zZg_O$Ku!6MS)UOVmIvJ3o+K_@9u9H1oGrZ&42_gWK(vKaFAHw!L;V)2(;y;37EW;7` z`1v=sCo8MW#uy588f*A5u?%e-!1>-;#jF(Kuk;DWjXtUV{_XHaHHc-KN190}5q-Q{ zCz_0p$IYk^u)BPp%8KR0;-5T0HQZolid_N1MF=YP^~C?xbSW z>CFHn?ss)h?62T};_6VG@Y?=`u;{{V($2WodI474O}xa~HB#71drzZ#-m*QZj(_zn zqcgGo=vb zJ}sZklW2VjO_HTfJ!JO#&7wzW{SK`;@+Hfv-7^+McOF5d?)RMU*Z3}QQy0Y^h`4ck zOV+~RM`=j+;HqWce*|ltN-Ti8lcOmmF@p%XSoqy9qXA;Zz)BSAqzzDzPK|XvAHNF} zxJVlGti4vERN1u=r&IgvWOoEbI&8I(;gV1hx%Rxfk*ws!$PRW5M=RVR%0^d75% zS8=}jE-$GUY2>~6UU_vRDwaUQd;R20JMti0>I%p)!k!?$h(~Em6jk9_bOZaoHxsl6 z{|^?WWX8viqd@l5dG~K~h)uXF8nP2lf(2XL70e|N1IK{5Ly2F{V-)eZx-*1OhUh3< z+bCGEzrxRc?eKetdsj9Fij_g(6|TmJiuT6Q+{M=M{*=Y#-kRrUoz`j*TP`Jq ztk5GKt()BbWbum63d{%1#g)Ux>@7X*L)v0-?J@zauDc9!dGn$M1nO_MyA)!@ns)Yu zDjbZ>9cl*m%6sFK*Lt%E3*Fi>1Y4Ou>F+5WIEam~H=C}MnSK_P{beKApe0K_TB+x8 zIuMTA0%LPl&trgZg+Xg!qUMv=9FAll!JH32-nbYHvkCd6|2Me#+`D~rCrk+Vd%q?o z;y?|j5GWL>Y&_5x0(FHVO*%8{e$AbO`OH^-UuT@>=zbN;s?FBDl>esCxLSn0mg9R~ ztb#vMV=Ck4HD~mlPMWfwJR(XvuETyW`N+?US#NoOp1EoLa>zEC%}9B(;nSmXnWysG z6vhr+>2^CmpZ&rMdjF#N$Do-e{!%@Yma3!PD_zu1x7$srG^$-kyx5yoY?m=a^Bek( zk@Mr-OmC9`_rt^oQHdwZq71Yecj`M>xQJ@+Uke`lIbkwPIW*Kl?k-*hPuVtNB-k(9WJ3}!77hFQ6hwqU5nqFtsgT5X+3Dm z;2)^8J4$$y?aEOeQDL_rX7GeRc{Pkpo%rG<#k*1wK3^h2{s;IDw(GxNY|a=9Sm+Lg z^F?;lmVvwljyHyu!tggKLerjZ4_v{NL=L&W`qd0T|F4j7E5Ej)w?yXEc=`?RRCEPo zHcZ$J>(}uRz1Hl>yS> z6Q8LXrN@8cIOFjBG{|;R8Vf$vD(8K0Hsj224Y0h#FJTO%Wqu7u8#2TEHxwiG2$pasEW# z5nHqk3uu1S5coJxk?h5)7dKnBe^GW5Oqb$HaS^$z5(+6T{J8Yaq+=ZS(^>ky^PHuB ztuUH9dCTsC{F@?SBcH8p!qm0yK%Jd8TVu+O{2<#k`#_fH{N*&5kiWT=SSoHTXbg>M>0NE*TUGZGj>qu6(C+TNx29IH1E9fKSY zfk2r6#=6FjavHB8O+TUAVO_h;ZHw-R7t}kGY~QeHP(7l08Hm#&*#0uTpTp4E8u{Gg zW{lo;LmvQcO>%>ucVyZzHBUEG(Bz<~AD7+W=ea6NkmQl6c@3x}n7E!xM}RpXz$eJp zuD6LM7e#KcUiKHaSA?0*ASZ)UBu!v+@J;-^{Kw>8weHSe+GT2(3EdOiLgB~e*9hA_8*c>mJjX(-{~FfWu3!at39~NGG^zG$9-3G=fBY@ZogIm z8DR1CCw;E4(9@WnMUBkKiPA}B zaMScqaJZquV0}rLzE0p6D8h~b*(+z96HR$sbBC=QBRX@M$5EA_Prxf!lol4|eo=^< zwPM?-l=T|2EZ1IPlXH&>XPl?!M1f~O<1`-+xxT*s+12$0`7udR1>PG30Nw-^JW_zZ z`O;ERS(iZErUw6^o8k3I|)tkGJI-lHt!h% zE(V|T@Hytz((28Z0a#dvd{B!i)&L^;tKtBQuF|W^LxuMDI zXnQ;CUS7|Tns^yP)@-jyX62$1r%ka3&fA~6b)*{-a`=@qSz{Xz;G4{SK6GB4RZK8PxadYRfgLS%4-Nq6 zE0S9{S{sPwwl)AUVt#N(Wbkk{$4!U9j>izmzu?tcA9i)h!p1}6nHxAA;3&g?Btbk) zF7P!MIaFCrdj^g#Y!z$84egDtFGKKJ+U=jUa?{P%r0g@ZJVBESFz&IbVOxWiIQ(7- zvsR{rR#Kl!%p+yc1NVl`y=5DhQJ-&-^lqmA2h^efZz;EGo%Nzekn#FQWvUa8UmV2K z?gUfwl}Y4NWR&4PGNi~*=Drs;RdmB2pdkH;DdTr8$jh15t&{Lmr9C$p22M2Qg39~< zAPm2dKFQ=*+fyDAi8npF6vm~Nri#i;taT!--o(a5oqi986W>P=orW~>DxuEciV2@E zEUAt~Ebd{ptqRD#dh(#2e%CHC7+hV7S;q0Rue0_Gs+Jr+|m4nx`;PK-2SEB_~^i zFWkAWbNNl9##envBL$-lUn9R<2Ypw@ea#1b>J!=S@2EwZ)j^;ul?>8D+%#~h7VqzM zcI{kPZPbj_MtLHNNou}Ij+Jdq`mt{_p)8Vl%c>vv)5yyejsR*){ zy{B~svsjxPPUYY29gKdI$a7N%?BTbYHJLX`k5-Q8A=|fa{V93hm-^C?cIXD=fO<6r zyO(Y4$y0co!ga+34As!h&!^XN+Qmb!KlvWz!b)G9g+b{H)1!`!lSOHlC_lS4wG+~{ zDl#1QS3${+C4nX~fH{I2J!4*4Oo@SbP7bI@)zZXR5fUNCzzU#Ca^*HDr`seqA>cQ(uB5uOWSs(D;)LyCoXkPF%J_FKA=tJ zkCP-wHRLT1miL~V65(4jD* z9rAARIBc}a11%;KZQg8~!s1tf=$nk!v3iz7TCb#052$8h4yKipxCLaF8#NC;Lgz>ua<`w)eYSg{G~=4u(m zS&c!MnFbMwC&twomIo^yiScl|lCQq3&5V>Q_!S3T`)bp-;diIft<1@4O{R=rz$|s? zt;^_OCKi)*$1Df*w>uzbEX=J_v{#cPZKyl@1}WBPdKn|o+=4hQlTnU@kn_hc>@;yCkqgmu^KNN5f< z&s`#nJC+)@F1?Lii<79(NaO7$T0CzgMl>zF+$0=|m;-?j3oz^LPbF{bFA|)`__8yBay$yHoy^ zp@FnDScGW>uu=fMpPoA~H`t}1N0%)r0<##)TQ3KP!%6QR#__0^3TAv`&kVZ#CKK2# z%5;cTIxnCFtu(scQ(icJ;<%~;${&tdm$`}lGHB3Y2F)A%+D=W2e8}B(2XBMN7JK=a zTO`!lH~ScmUQNPU>U_02Zh}phaPrRWVwk578KK$cCY|o+c+&imq4>u~?^If2v+g8v z=R91eo27FiTKV~m>9Z8HS~jcfw?uIzmog;|BxrYQJ6h418#td|dWRA=Ijz7E3)%yu;p&D0Ra0|_7fUQavxgsE)KKs&P2h%sSmFh`x3bK{x+=slT%Ytr~<}c zY(BAInJxuU6nZyaq%m5e!&?BVcrE5zbcE$_k>FrZiV4)BiVshYiR5x?NzzbT@8tWa z$#pR;H$7MI<;?lNzO7n)FC8FQ<3P*&3_|q|w?HY(48uf0$q*6A!l>{F_>t)5XVS@p zU!IN*d*f^XAx+WRpN9D+%TgRfvc#-C)_U=jYTu8TMK`|}_Gth|ApYaxtVG41@HLXE zawn4xHBBBnh8x-&${M%nhDfUmGDcD#^PYn~!CZ7o z1@9xN-HCLnfu}gU-7nhV9174pc1HwD=ebk_#U$*>m@Ly2{mZlpBBEzZxLt?gtfT@; zpe@KsW_?Fm%}BHTt_}RIKvq68%3xuxw+PjlreI&&l-&+-H!7QnTx1}gkkz5P zOx=38CdQQxP(@Z~b{gCX&!q+Cd79mCgA86<`W(-0&j`53)4bfIF4?+TGah=d^U~#C zcLA96{1I;;YvhZw9+vpRGi35M`z6%UGNkxg{KvUI7jTTG`-;mFy`3(YvWKExqI&VTU0amy0a`g7lB0C2W88~(NPg%AV161v>2d0ET7EO=wCZhVI?6s^7I{Bh;G%U zWF@-}^k|GWiy9pgW8szta!eZVTm5P|-anAhd3ND-N37AhN@|cC&eB2gJcgUhqT1gu zkPA_k8XsJX&8Wxlq;LNP?vnFzwY;s~RNYRru8T?0 zZg5`7W{s%Lb#`ga0711sO3>N1-XJ;R)r6&ufajsVZCnkW>C9l~hr69-*E{io>Gn9? z){8tn)uNq|HKdz9Jh0N6AnjZLcsBU4w7g7hu&jFbDy&%YuX@`POKeSexEF|^v6Te4 zCNu+FQ=yV65v!G2uz_wb)J6dmGjn^EeR>zyF4|&@PT#(ue^k~-c{)i$<%B_D|hLQ20MdIE3f6?^>6 z*Gj?jaH}~itzKK{ukFch4oFC^V;DEl4{UUoMt=n&S>ea>WdBS^aVbv9!#Q2CxlFyn zFUhRVG!ACMuU)2=liky8!fqLwG)pd?SUJ9!MIlhl#A$ZJrGk_j@idO;58X!Dg@$Wz zu0cOLGnXz*9H00()$_BEicgg}IZW#{g+kbs#Ww2=GM?6I`wMDZlQ3R1uE}A^TVr?S zKD9Q)Y0Yo@#jo>_{!@Wx9{xd7{E|)!(gFW!G@a4&%hQ!$$T4B~t`t&X4Bo}H&%DP| zl}Zl&dV9S;2Q*(?FQboz4x0yd_CNe!q$gtu51qv-*P^)Ltk)1+@~VIS*dh0afnaWl zhapwJ6QNA1x)bSiA%wL~=qFlSSLTPPgDL>x{3XImxHHWuHug07;QO;(=<#d()RL5MV(f-uc!nhG`otJS zE!u9S2Yk}FzX}r3gB5hHklbsVoG!u~uJwcp6ko;t84)-S%_Z=}OWTY&+S)u2e(QH0 zp4y@u@esl9Ylq>=sj~-4xGv840TJ$58@^Q({;fE}Bkb}0$>*-9XXtx#o*C{atXt6h zsj-Jc1xWmlI(ulZbA%71Ma2{~Z&TI;Msn?+fR@9UdP6bl6j0l*BA9AFe_2V;nkkpo zl@PdesFEyswI4ejeKaY58?!5Dd)99nj=4iub;jyxZ2TkSi ztyOWK<~!!)@Vfutf2`T%lTkp>4enb6ARQd5mQC7QkJr&9)Z!seg`3KuR=%^5FLNJD z%|8Z(9(dx;m9a?9%h378CZ6s@lBSK`c|EsMOrjQsRmM0NAqK7II9Kf?gHW&&UbN1j z2&iH^>B$>)=)LiQ+@W*VA;J^j?4Oy*?(ULSKuScULApyyq&o!x>F#b336btby1Vwg7tg!DeO=#s^grGE zfP=-l*SzN(HOCxtjNfQI=T0juD90OMeVxTcY1yl0Xy9}l_~g_3DIrmW*xr)*1W}G& zRey(Lu^R6Pe<%EFqKBK3W|IB56b(9D3aJId;@EMax_Ljxj)gFpCZwlwSpUAhKIEu6 zV$o_W7Du!v^vvwst5FbNTqK0 zsIHx7ei+$X?}**A4NjpG9N8M{YM~Bw3l*Bka}z0MqlnK>9x!w=UT5~zLR%UFH+y1u zi?t+HH6`jo_1KX(bD2U@TNHi3}lSg08OZ%*8Ny+)(=x}+-;1XXXFwS7Sht|)DuNKRJ{foK33#!~UeyHr9 z2KsIFQ2AUKYx7MisQZ$+I*rw7>max;+f#1*!#xQ?PsY+~id>2ckJ(7@4bkIBrMIfm zrMiEy8#t+w|1-JG41ig3(`}s@i1^q9{!)?Da_cx;70y0tEizv|nYf5##<|T4kK<&< zoE=`-wAYsZuD%(jGDKxV-8v(Mx|lqmkz_7V`+mu0;bLK7B_WdVkWX(2N)MJplKYer zLfq6c*BZ0&1)Ig>>}sUZza#0%g^UAbyL0cV14l_s+ER7E&uNB!^~KTGZl5Wj`*CfW-`Ru z>SPWf*tEu~5srsP?`AH#o|yObYiw@y%PMYt&Qh+u`E>Bx!odD|;m;y$b2hC~GO$m@ zqkMhm+_WR3Ir$?;im(~ANUHBI!8%^K{!w?ugvOM*qZHN+a}bzji0#X)=d?Va*?39y zqxmoZjD7S=l)rwu&e0&T=|OY}eR-Vo*t;p?`VcE<)A#S_cKs z%cbgXXUy55y$_p7YaVJ3(#jI{wrr$urKrBQVkLav6Q5l44AX@up|>~Jt#4tmK61?S zhNSP<)6w{M{Fu|*O&)rz#R=k9pz=h#(PGH`R&`QYcqsf1Zsd^_Vd8ynWmJI1o!HRO z@WbQ!0>JT>Da$UsKphVRw?RbSRSij)#Q@1yCK?=0nP7}E<}!U6gI4$S(0aH1B1bqv zR${5DYCIQi#Zk}AEm-Ok+0-@l0@FY|71jyvgrf^4^f1h}&mz?uhdN?+Sij#c)zMNF zvDW`&8+oiU7aOromW?BBxoNCfQ+uuRVi=Hv#Bag%O zSE3MPDJ(wHHF8~89DYW!LM};j^;l}*-Yf8UJuB|ybo^K%j5rEA=j3@!nmO@+$40`F z$R$w7108)rA`VHnfP?nEI8twF;ojz7uT%~-`%Ljb6HOHQGL+gNgI7MCXxAMqW@Y$W z2#jMlw7Nqjel90P3k#cztZ?o&T^J4MC7Bfb0*Zm&DKpD?TShi7 zbW@}i6VX;x2-z;yQH-%qOE#%o`>`51XqI*r=h&2UV`9R^coiRdQSHRDA0XDjWe$!{5rNsE3?fuf4ve z)$#0`G+T~*7!Ln@f4fl2(Ql(Vnd(4GejtH42EPZ z^y|85d}XfY=217S-|lnIq+4M`K6*(3ktE7SWr;G?eOk)Vimu6%_N%$%%%tWEOZ>G_ zsyk?&L`PH$Z?6|fJa((!LkL)h0LEXg?aHrT44LexA0)s=pz$uHPS=QhHkUS}$J?t5 zc;99Z$6FIkOVC{GL;eMF@e)L8*|ZuPtRy&($a6-$!(71qwMG zvv#1s{TQrxl@$;f3|#FULkw?zF5R5OMDiuskB-=4(4 z7R7~c^-!MVuMcnaxvHarAl=0pS!e5;TFcU4U;liP$9eT8?fSJmZl7u47q*Aq;KDYs z)4rfmBLD<%yp#Mb6~Qs9o2ldQ;s{jdjCFghskE$z=}FA1SGlZxT5Ta*aF9!u7IJdC zT76XA-So%RzKB8(J8ertzNpuV5bu^UWQVud-I63!g6y@&%b+o%rH-3{2~3x%RpTJ{ znO+&aK(5LzJD%BR^Prl^?hD?@+<=wbewUBUP_yH=TGs_F+TTGl2V0Y#B4=i6CxU{n z4d+kR_VzbBHo0^DUUr9@6tORA-3EY&e65tcvZh~!^C^pW!%V0zZ| z)4*_IDoVqjTIW|U-%^WSrDHLEI6JG%AUfeQXDIFD!gvA7y~v7pMilU{Zh~Lq8WON3 zGgbb>41o>oAdV||*%+qPZE25b1*ZL1MiQKqwAt^zhT@5ztmQPMv{j}*+BQ?+v?6pg zzhFoHd|hwN$cX%=ZRIjL|3kP|m4}Q~o(6rh4IcS?NqFWkhe92H%J-H~L~`*07%^G9 z0~{3P3kiff61<^hR9KY-8?X{mNUQgaEf%2Q)ZlUOw|RlTcP^{3df-f}h<4~wc*)dLhurqq!-myz zLd)bY{%21P9bW~c+KHEXCD~Drah8VWhd(8Mdt1+6(YF92rV!(~gR%Oxd+(!{7YFJ? zBF-=1PDj2ki7V&)a7{Z?rUDUZ6kNB>@;=iozytYk4()8Z3J}L|i^VwKW*opS)_$s! zKQ56@r&+(gf$@Vh{gRy(~;-P#(F4 zP#R@ofng!N$XhR??}%n=IM>_o$9h*J{U<}R5T&tvTJ&PK9gg83utgG0rLbn>wI>(( zvmb0MZr@w^rdQfdePmjEEgrHuD{OxM?CS^F8NT^Y?F5q=kBf54+z*y_IBBw4!)-n=OKxx4|S6aVr1MT^5 zQnIG*v%;dG&N5Z>Fyyw-KL1`!KTIdf0j9@DgXvx~!aoy)?)W>AosX3J^2dYw}a+1^O6e#=f%L4~3t>_}JY z-e(lF2TT;VP-Ht$B@6XQoR8idtB!})o+b&U3oiNZlJRoKC4DTlJxz;AmB6%GeS*qe ztap_)uGeJuNSnKik-c4inTL{Bby-J`|uk0URIYvK%S}#z=o7H%$gWYy1r0xE2 zQP28WI1@@;NNp1K;}7t>?z;)< z=#nauC>kq2DHqJy-8>HVq&PLY>JdD}p$Vie_uUgvkizsI&$k#}*aLzXJ zBm3hEMNa(1+tk|7I%j6sHh-<)RC?-k&Qo%a^$oG%9}3FhpH56zAEd-gb>#e)B36B% zAZkaZkLYbSR`e>C={4Qk1}7*#peC!->q+o#5$k5+@1cWlLD%cW-Os!CZTk#21{>pW=E2 zJomr;M%xDmIMpF}k*2Wj3-;_;-G}Z>c0>R};2NQ{x?i4coISkTf54gyo@WV1IZ{>Y zeri_9>~{uTDT4~Vj_L7k?#riRN+4>swV6`*n^pIi0I|q9DrTl~89=k3J7p6f8c>R# zDKyJ~Ym)66nSHkLxcVXCOu!7oi&cyfjk(tWe0bs>CKD6-lGDVu5on2m9 z5(6-R89a7AOxE&o;w-3s_n^yi1Y%tX2TJ%k_|VE^jVtbT&%X=<_i?rf19vKQGVlnz z2j)~39~O$2fAneH&p&*|9xV|klbNUrQuu=kTX>up2fW=&S(9h~Y?T~Km>Y2+W-}1; zIw%}JzHI={ILj z-!B9+D<1y^6-GVf;-m|4Z$S;{>jns=qbThvNgmDM;(C5u652L!AJ4ch(!FYTEy-z1 zq$~3Cf^-3O|H8~osHYM%pMLZxyQgSyyzenBIOHC@x4`YWy}S$vQMiA^`9XQtula#u zauc;%OF6Y*y=y6zzeL~|=C8gT+i{Dp)H2N^o+eMau?$X)wq1VXD?9-abUgRZ4Z0m1 zs!5d>6t|$-^M}E9#<#~oGvD^Lm8@nxzSA5BM**0NZ=)jK&+oQv(yk<*4YG6}1nu^6 z^{XP?`XAtA!0<*G4>kyi`J;slQz(-OHRTTDPqf+bP3(tB4+iEp`(dbO4yRY_hH`e) zGC{C`?W&CzzMz1Swoc#7>ICr_{m|}WGKwF0rUrW2?z1a3A@{R0i8JE&`3{e(9N(>3 zr;z=|4>C{q*`pJ#Qd=m0UXOhlZ_?apMsSSnF~;+6Q$}WRfDEng!h08#U)gy=U=cP^ zTtP)V>E^|Xo>&<2$-uRfGoR6G*1)>g-0D`SJb?&Z=do2yndB&g8w74`?19(&``F8B*$Z9w=>h9v%q!1wA@{;Wq`0KX{-cY@1YK!zjf3qexZGhF+d;m38A zQDeVB*^Vmi`u3Y8g+C>z`&$f!;GhSEj7qDDL9I4J-0848*M7XD(j*zDTDge~`D&}s zxUG;wFb&0yreG?z0J5-w&hl2 zS8(H;Gc#qc{~UqhFB?k`8}PKUCYvV8p}}c3`23w1arc{v_|oi*FVaKpLR!>)4Y8ol zP~dcnAoaSPRyy1(RnVQG5#M;?afw7|CJa@}7s!d3vN#buDe(@~EV*tmla`g<*Mxxl z;g@|&aZq}t&aMZpxiJWsx=QBT=v^x4qE*n9PY;eC7@El{J{#XXnaEw#y7^Of_%N~g zT^dZkpZJ*+z|x6hMHAY8jp0ChV%#t2#45+~@d(9US7cpy?)JI1L(Y*j9aG0^LQ}G! zMY*+4BY2qkAm;|}BiA9d19eM~#MyAsq; zz|8%2;iZjr^PFdxa1c%xd6O0Ym_Q^ZR8JIKc0~zLidI->!mdagVa}#1)YI~h`@mij{sOAGXCeqkE0(wTtCmW{tTK+Tr(CewJDB z6@<~Q9#9;)!854z?(m|&RFZq<6cZOoVBm5wV*h@dgO;C@<>hfwn3@)P9fOtaL07Kp zBKOeKTS={Wy9C*lgTdIC&lzUrkN*bx4%#mAKEqgEO`-j?_SQ>ls@d(FWz-#iFKnA6 zcWQ9{puw~+sqCt;^YGobo0&@`!}geFf-}C)T=C3Ui_TZMb`CImr=?gf>xc^KyE2F= zqJUA6miQI`iDEilVlbSoK$OVA_>Xj_%h>uc(gH}7cdJ>HT7%>WLKpMmLAnlLp~R=u zI+`hK3mF$382Y}83`W2bG1B=ggl3`7Al6`wQ9 zI)|e|Dur|6RiCNT#FVP+!-$hwRvf?Q>Vxxg1hY^7v|GZdWLi5A~BgL zsxPb1GcW6-+>wWR&+PQA|#QWXPRs@4abhTY|qCp+k8) z(cu14EmpT0o8r~=aQetxhkI|(?i1}5u8({PXOH~As%|_Mxw6J1Q=9G1d4$6?60#lA z;NWzX^`;-50Iaf~(eyYoH8#EgpH&RNW^$?{JCniwADOuTd6o=7lTzyABV1A!|8e5C zm~z+9`my^)Xz(2Zclo0q4J{h8=GWbZa&_!7<~O|j7=%~Npk%wvR>dJA)0Vfn-~=RP znHCNBoegMg02Coyjojt}D&2RUE#?9P8oOm7Pm9`-_%PI&8sg=)Qs=>mGIEeqiMA|( z(JXmj$6{O;F{fFIi@-3_=1Fn@=^amf?syt7_NMUtc!Q*F{kc}bE>l~5=a+1=&l3N7 z$F8Vt1Y=kRBJWj2v+9mW;p#4pMSav@`(!~`-D>r=l<1{{OuM1e=L#8njVs$H%!1DC zUYFNL=Skz-D@|f@4f`!tOxI`oQMS^_XN8D7rON%IR|zkXn-OXBqJ+*1D>kuBJXF03 z7nl`(|9I10y6BoVmpEDXOKuYRr=h8RG{!wdc%7<24dV}VBy=>L8f$M5pOFCBZyB^o zsnW#-_s=KP#vcazdR0NOnO=fI2|yhh z6v)xKU%`DNacj^Eq6V{6{4_zw)^WyOKNoR4Fqj=nXDNPc_E+Ac2leV9n;H7!cR6~P zmwh4_Wcu{ppe@4?NNh1Z-I-C#mVY9b|Lqy-cr4ghrw$~oAcLq6QvVyUV9AM8EyX=n zQ|@2$t`f$U?cebVAv$E*`S~Ko{#w6^vLAf%5#*&UgHR&*b4Gl}s>;NBJuV>J{ERRb zr+#s99G}fl9195MO2{+w*n7dY`md;tk;j>*L7v7#ANPE#M;tMC@SMfVoas!cKWMEF zR~3H6vr%@262&3iy9ZE90yl`n6kvmqU~K4J@q=F!@GwXrtor^;oEC%*>d`q*oNzM&lVW&InXwung{OfvjZzL4- zL6A#i3Yr?LRen_0OyFEy4+P@Ox2W4558#rM10m-ZiOugBt;^7JJn;HLIL`1B?oq4% zySAudj0~0cKWJ}lET^M?SXt5$J{eMBiUVmvIUoLHOyrpz-9!2XVk(;b4>{lfi^1R} z+E_G+pk@;YC%j(iJ;1g3-@wc@kh2w2WK;MYvlBVYhQOcs2=fIZqCFTPf9dlZxQI}L zfIuNKb{_x)^70b6E4^X;-%D>8k+NiCG60WFE3s&!ibF+GsM&q zY(Q@fq@efksgl5P+Y<=>_i8E>?>JUHmBo%AdQm^rHf99?MQibg`2qp)7@D6+Kq7p8 zDA4>6uoU3)YrLBu*Z)303?vMErgYHOMj7;PG6l`?s{UcSdkK$&1Q<*rpByuE6X57i48-RB-d?&uKL)4?(kmbiMHmxg$g*11Nja{7RV!!r*NK zw~%gxh#9;Ppwc+bU91*t!2bjK{Qm%ge}i*&woge7d|}X?L{cf0AB*K<#SYH4uW&qI zFXl^8cF2W6G5`TJL8c>R0%-WHFK{q;dH}-xzktCPqM47Ec;{&rEBd43Wn@Z)eViel ze1NzKrCSq!{eS|Epa$WUVL!|BhO-wAhRm(B8TqW|BdSF0!=E>I#5+F8mI1tD(D(?S3 zc)RxNUf(;o^$+6npxl$v(a{kd6*V2SoO=b}aoIP}`h2`%!1}9SD1-cQu>N0wTbf-M zSwMw^Ti!w{Lzny<%H>ji2L7>fX96A1O)wDHDXb5cWf+vxTW-hsFu8MsIKMPAkTFPG zsi5HEsv$G}_-FC}MI=}@CR=Tt`1>h^dN3}%j~AJQI2P6`;ZiRjiVQO8T?S6%X6Myp z>iVRqkCL9>R0&rmj*+gbDW@?2j{GMmG%)&vml}&~DC{Nb_Avb*no1an z3dVzoz8_I;sDMp|h;9ASJ06WEsKFF{KM1*N1AK3qD*U_43Vq-7rd|Spjbbogll94S zeXk18xE_>J5@qJ^RZzk1A>l3>A;jcRA!3tx|3p&`3`@o{i$p*R_ew(RkpC&Ote_gG zMD<*Q77<(e5ZRE^1AIK3V@3+Z@&;AWKSv9-S3v8*m2ThpBXJ20Zm*IklpB+8N$Ki0$yrpX%ou3-5BOJm1jIeDb zSH=rjnv5S%ap6hneQ0neZ{gp5$>Ov8{%7JyM)rUv>`G+sm$>}T*l%Yjya-`VM}MS@ zLBpDZjr?NS>x*L}Yn0STWJ76)Eei+3aJHm^(f7V+`9c$H5WbtBpbGhH#n}3hyGxuM z!>BQT*h`87@-$`SvrBXd8ge1(vm?{7zS>to%vrEBEyDsOq<&9LJWo?;u)tJN(UgCl^E2f}pWXYD`zp6l zfWSCE*XnjIs+Cch{u7V;OYxtyO>P-1%iLT-Jz4LQ9eG8lPTLN-vDBKI|M;db{!p=X zkQ5hO#t9a#@YIWN8xyVUqp-2`x1KqaJTVby2wzWc63}x%__@8xS9f>ZQjn?Ssf%q+WpU8 z08t^jig+2-a$N)!!Ub0unfq%1Mf;VS9Q5q=n^`S%EIPFx;agMHN=;V>a`d2X7QNCZ zj@tVjEFG5i(20MT5&3LrHzw7P4=w}jh+kK}O_ccEvCM0!hQKAr;tK|W(QC4D@nY-pJ|&bRSFOa^A>EncGpk!+Tt#b#Rjx^q<08;BX5@G`yhybuy5|DG3c$0Sz= z(pW3a#%Ktz_h`W6XScYjcwd2)rqSsX$U{FbqG~Cfj+tqbjDQC20Cbc35{##c0UplH zqID*vSC|J)8zTi=9e5mflAnQlN~@etNlxzA(%9e;nYJ$91_`jx*$vjJScq6#sMbu? znQi=#3EF#%_;qP^ow15t7RZ7W8kJJiDjJhslMaC5rf{5f&H>v6dEX%!0wypEy9@nB ze<`V3;%=m6N=YCx)x;rm+n%00(uRvh1ZCY43>Z?2Z=`u1~PZ7>$kXZ zNobj<3V@v-IGJR!pqUMQU9sqlX#rXES>i$1kuOaYdECxQblmnkZ5jnxC6QiHgW2VA zJHK$mo|kLpyYw%lfJ|)%Y+fss@J)dnG`$^4FgMax~|GsOW{zZR3GC^bByz zw5Zp)osCR6SB2X7a=Y3XoH$jgEoTUIyn#*S27*f}HQt~mi#0FjDI2k74_5>iuu`co z>H%?RUi2bhrLxLv9IDY^UbvkDAvs7$pbK{l2)6>uWU>r`B9UBHvuf6w4gTzqo+j?x zRM;N6tt&AK^j!kgl!bLA;~q+`qTDSVr*%4Z%m;pWzZy# z4xEK!i<@tF|8yTBU|SP|`4??sbJAS^=_Pf~=BI=f*O7$Kec*(%b_^K-vN}k5<7fv~ z5dqT#24&1M^3jfVX0)LuPwpLIBm$Eo7RF1`WE9|Q%C-*KSsFkzt5Z^KxFV6YlqS#J zgUc-^B)2<4Rf1@MqqK-mG071zybo4-d%Hr3j6tcrO0BK292%EQOD6cWMedu*ylHy?whfYF)ZB6!D%m9<9&GGXxRe7P1(BDm*4j%NJ~Gw3r)ZVZsz;mPubVlA9_4zpNx(8?HUz5Ke#&AEWJ`!GuQ3dmv@ zso7ilPGCNSItLHi;LGFy%gp>7NekCjT=Etheusq$&Z8f{V8NS6f_ScMR5SZfQ5iagjftTedm8RnwC_co~|sv6*?i|SnOXZ?2Hy} z^+8!#OY@8cz3^lJw3eX@b80g3qAYlmj?JMVy%?~!^zA8{)DXcxV9#rjC?xU|z||oQ zp?m-AN}s>9hxoa% zp)~slYyp#CL*TbK;oD6F8g&fVnd)*8>4;4Z^bgC&kOR8}7SCw``uh!ur@9@;ysr2V zk>!^}6^;(=+bAhNwNascZvnM58QKk|mB6lic^SwIclc<5sHOZXVNwUCCXJy!h6~RK z0{DvpSu{UuY?i*O=WDPvJylXdvVpd}{ubxjXfm)LM5p(|Jy79KXx2<&EJ~@N(9nyJ z)olnmbfH%Sgb8*;B$^jN0_ToM_sf%>F+ERaU_VagiA%t40Mw@njWEve_U6)R@_uVQ z@LkCeF>wGJpgjlDC}FI5?*3v#H1U8Ii49Ec8)G8kSpJY+)T^_lFqHx=wu=DA*Ucq7 z@SzBSUMf9g9)-w{2#mPaPQQmiKHLDQfCW-gce+r`1H0Vtk~G#OzU65BZx-nL16rEe zJ1>QD@w?HQ1Gf|?o-Q;_z54(k)P+o!H7(fXcD4wrf*~4FPy;blQ7MoD z-XmQH-=RTn02(ffjut?-R0DEX!KIc64hZZpALf%=D05;y}T8+#6Mh&uJpmEDF3ciiV-*IGU5@g=ukU6B# zFJ*w6!)Tz0aSjiY;N}nuGKb`J8kFA8!JGmRI}EL`IybKr;JxA&vn_|7X4YT1QfU; zg1vMupaEzA1$g%2Rh;e6%v zX_BJlbiZO`u0b38{2$DEijSxJ;NKDB0Jbg?vz%at61PEj-oMm%6H0G7@GbYwCpp|j zpdw$O85&Br$YF*M8qVj+v_v5IJ4t~?&WJ)txK>xz)&bB16cqDSj<#P{u{dQS#ZK$Gs45$UY-C9F+u;-%m|Meg^F^9qppn zdz8ZaU6>CX;SW$CS-R4$X_36DF2#mccZOPH@$8i2_j_N_hDW&X;eIYZs?)jw7A8r< zeYA0Kmb8-ccc#);@>&cPAXtSvi#UkYjZtGQ&20}C~VEw9F=4sIU$c6W59+JV`bgxHugH*3m!Qbk24;tjLLYCQ6T5cvvNKdq| z_lDXjVxKu#Vp~%P-z~Bw4WX0BlVlqHfHZ2&c1p65GgV_3FC>rF-d@S`ER;~s2<2aG0 zIloS^|A~_zpAGOKf(C}^A-#3c>WF(-EQv$_R@2<#qfG_-3I?(E(*kF6VWif(LHKLJ z;>DoPiqIf@l`*0XIK+2EdI%?o*h78}G{~FW4Gi$7nwJ7pC4IaDpQ!kMLu6PQQA6zB z!-8bqEWYo)zY`JNZ}`^g1cOLI!teA5yMz*uWE?;dlIb=K9^6INlKHzqVxU26P})Nxy!?S3z<6jGDNG3L zKpTP~8}yd(JHuj|q-Y5L0XN)~464;-+xWD@l`)VSQar;R?=>1kTA;mf6LST$cYm!_ z0Ir#SUQ@F@Jh4(kW(9EW5rk!~jT+_B@P8yR1ZFOu3dRU!<@m10^ zd#KlkcxL!|4W;L+qndyINjnK>6z!6aV-%^h9(O4aMk2G`*t1!8w$eR6o%38-lw*ll zNMpiv3us6L``aFaQPtEX@Q+-ZF`XamgYAvRqre&1Oz&Z!QqW$lI^Y=MC&3fLRA2|>+~-+*wMGrQ&F93zf9DGi)g%gn@c5lp;nN z0zJ3VZS~+;x5nYEiRmgP>KAd%M|lN_>`L0}r#fsr)QK%-+2qjf_5GXWLSfw(U?St} zQInQF0u!k-BVhgv*k-vq7S_r&)0qPs>1Bon%$p<9icE-;R;PXQ<&*Q3aT$8I_R1l1 z9jl&1uAE)L1$^W6pQZ;Ze55@X#gAir#t2ShY$yeh3jkaMYwXXgI6eF{4BA7mWkjKc z2havXwlS!VO@qdp4hp46#Ph4?(E`SeII>xV1NprFpM2g=`K=0kjPD=ejMH~j37jjp z4|rHSqcKMyewIYSWaS#>dApzUu7=4-c_OvADa!LkkkMaGk8%Cw`*1O#tr*=jizvI> zD@qhu{3y|^qCe*%4{qWRPpRvx-7?*V(grPU%2s>mokQ@$GmPc@dBtz8wjIx%yTh6e ze)E)}PnVGqziJwG^e9t*klw&_h-cgG?)X9PwCUHaMY{WgrdyfE+ZQrlZ{4(te#hwA z5;Vyk#4#~&v*=9T&R9+~9&7Vt(l7*1tEmL%o@Doi3r-uzFob=E-EQuymKn7Z*t*>s z^r-)1pto7;i=>~5`MS+>IW~8&L@-Au7;q%6aPcK>!wIpoHzu~X==VF)&RI9%UpZ*3Up)ygtrg-4EeY|4a7bRwk|2yZjG;is_aSDIdBFVd7E?E~}HN&?wS58JME$6T1 ztZv>Y=9{gsI#=r0O)2=OQ!EUTHm62hZ}2%QyWe^WUFR<|g)7u9F5BB&lV34c6Zg-K{tvyI?kLeKOX`lWGo?UJhj*H_SqF^- zJ9IwGT7T07T|_5;Kcm=Hbk+@O)Vm<|pZ`{v{RkU1)7#X%z35;jo7sE9<#yx6n2fJM zO2vm~T8N_aKX{4VCa8PkVn+Wypzpn2KbSp|@JTPHQMb4);2$oinixr|S>u_JT9>)k zj8u8g?9g>n{nbsG*{>B&!$TpH+Xz#O2fsAUI4_9NMUGN=!_4$9JWyVZ%M`(GZ+>@{ zZDl105{P}k?ua+6j*eZUJt<9b8ZWVqVIt)IvrA~lsQIpkX{4SriZ4&#Lz!C|Bkgaz zGh)oR$pK>e&R^$BRa-Mvs-H(cWMXa~?i{5z>AhTf^qT)u-n*U@vHR}aW4qYeb5mrh zZ;lUF7F?&Tod^=<9FyG|<}J1zdT>YA=QMtexpiz^2ogMYdoAc@Q9F0~KqQ!r=XfiS zrOJm_BiilgLe|;tjPYPCLC9_G1rZO%N|cl~KV`M%t9`s-WHDPDp$^HLnj1LXF#tx$iTY!`bNe=8v5 zlT&Iqe{nnYGlQn=$U$*KG`N`cQ<8l z*Le7&68iovL+?1tikaD;$A-@BSh_1uAF#2&ys#Dx9%1Sol+dx}78DxzUu|32XR7`W z7JyXR=Eiw2L1ZdpHZlo=o$J?F1vw2yj5AS{5;Ys<$Gg)N>iX)T$Xwhh_zk%4vRvN{ zcsQF3xt$*`(@!*6mRC6yD<8gkozbC8rGq*8w?xIG&Wflu-+?+M?3OInp-@@78c)=f z&3MLrq*9{7_;S|D0j*;`gtb)L#kLt z2i>gA5fMt&nx#l(XiUX+%LDlnSKrU{GwywvZ03|L(#S_FFx}jERAziE#k&a}9>L#2 zW5ltn)`)s4kcffoZYKK`)~s|q^PoKTSC5#Kz23ZFNNnTV*`z9IjQMyNZ`En~)%BX* z&?j>x`olrpQ8^6pRW2#{zs_ejYpk!-6iEd0Xm$38a(~dg{&@Ux-SMG=b+hUlT6qNF zPu=$AYrTb_h2!eJG4CWM}O1E8>TQ{zae7gjx-WtvTwx z5JA817M4>st47I)= zTlyx^617#S=*9H@%>alBIF&CPOSeEr8|l3$&mqJ#^iszmH&OZ@#h5x> zj>Nmbo=_>rxU#U>CBCcjYQgX)#Gn7U^DXRODm6!D%9a3-M}s-G_xD6B2p&7V#;EVp z+o8>3nEe=BF&`3yP@}ru^eN-9@&=>)s-%j)Q_aUG8s(uaDL)(S^JPPK1}+QA$aTW< z{=SM+-y9Y%MEJ2VCadLg>#!L8u0NuWl@#f@pFWjVOnV)_nruPX84$4SM^l1l;%_}} zBpO4Sen#@uv#T%)%y0bIr3^)h;?`<>wfObLDontD{o89LW*Xl=?#F3+F;DiNJQ+}M zdS%>El&cGdq8GXl;WJC1g)9-{h<|xOF2)L2fn%Np?8kGUEfaJj;CjeFM+E?n56M z_UEmY27mY_-Ic;tqMot3zNy_@_fR6%?cO=n7a)v1iSI!BWa=kFS70g9wt-V1DigE} zjzY3~3mh^Cz}Gu;g*>#rGRt_kmD$uxwp!=A9(hV;wU@cSJXU%pO40XENAvS&S&;_fGz*bx$}hw`CakgU zX|hcmtV9xMD&QRDa2On*L_Pn;lwW9HIAx82d@3AI)+#RZWBERLQ@a_ZJ4FrFVm&>h z&F04kl-XhhQr9iWjMk_Juz%#k+JFDKq`ZqepvTnP(y4p(w{gBqezy z0}$oFycgI>r4M2@T}Zx3@WUtho%V#4ENQ{R%r|#LH{xgRSnAUM_E>~uw4y|@ibkt$ zOrr?o_UaIklX(1C2;+VJD0fyCWqvx#H~TXV2=yJF>UsRlA-s5JHj|q8%O9=7r2ZY= z;sQfJ2G(6lw%nD`Vz?N&s8Q)QqG91x?h_v; zt2uY>`55WDG|oA#7`gd7_}`1;{v)eFE;!6%bu~wPkbH1zslX~Ur(NZ~XDZ!mzto02 zI_*2!{9b_l1CeEb2akt}LSFLUQiMHXZ=#GHE5(O-W-nCU1O{MUy7=12sAcNiQw7lb7)j6nM$2V{ zsl(=94850Z3&yG6?0f;)GA-NrTA7w0;I`B9f{>0U6ybEX4ugyQf4gGD(bG-HxV4}_ zPBUu-lT+`ZXv1;ryw5wxZCoGX5VUz_$Ui-xrOHk2v1r#=f%!2oS{FaGk|giC(n{rK z)F!39-m~fv(vw^mCKidI8x`8(dN5n@1y>z%DyL7zzah>hxcAb@e@clgi<7OtR~+j% zO`&r{k1lufiSCZ(4+jrb98&8QANEh}o00Uc_>C4fV)HZhc5$X5Rxd>if9 z{nbsK9i0lPXqeTE6$U^g4juw^vQx8!L_Ffbm95;aT4nzr2!Dyw9ig zzcM}O#`e#rh;AHn-n7Dq=k`{w&cx0ACuSr+VqCRuuAN_4etk1iwjb(9f!~2c+0m|x z)x^5F$`U8QFwZuQLW8~!PaxQVqK>-Pt)n$bAjIZ)im5FH!Z?aZmN?ejvGs?pZ5kl7 zRTBqAwrEvyWdpgmg7u?Nubln`G)@LPjc5CN>~OWA?f;#89L=gx7{{+k5I$=~d-kon z7HiK=-nYMOv)tlB#d;gjfXnWr;Nl83H3&BsHfPnB&yfCs#9f~1c6;Wg(Oj!Wwn;7a z5IEY~e9J?WM!m41?5hWX~;*ua? z=NweP9RIx?^E|;Ty=sghZKDKaDSK#57#Vv{`JYK^5s%kO-PhQ6bb-9&2QH}g#I3~cX(z(fk;3oBUQe3qKg>sPhus0!jni~NaoaW zwB&BfZ@S;%X`t%lHsM%1AAytMdP@WDVmpNi3byns;MA$&3bF360r}-0H82P9JugPo z8ikext)1Xv6p+n9HBSyd^9^Rnl9OsH&Z-uPme6!lj3?D%8IRuXgQvgt(=dA! zFk|B`H*`^jKmICA)0}f_9Iz#dVi8L?iqwOnX7W~d;Pr1UEE-}sKCiW<;RGOudJ~*D_^QM0bj7Z(om<4{# zkVqb1Ny_y3a(k1Z)?PL1sULoh*(X=U9W&)VP5jN!@96ugQx9QhznV`@l9v5710<*A z3nZQFI@onRTP7!X$McWphp9T2n)Ix{^k~6sQuA7JWn+D`dnf8MWu5q^I%)qi>UMSgIn$6&Ujif2m><815g`|( zm@W`U`u|;Wl0)J({MsB`t$CUKCtiug_lZC(XdVkLUM0>dfdOf2n0nep_cA`)I={%d zr1NfhFt^+u6Snp63G1=VbJ43z=G2Jp$>Gx#qa`V0Q^AAv6W-kf4_0aaH(?jQ6uf>L zuE@$<)>68kj|(T&&8bSk9=@2w--it^wB6fbN5Z*scT~G+i(sfOqK$box}X1|!9jbQ zh{&URs{hGCIoWAGu5St;*{*ylYp>)$^wE=-(x+LhW_tS(>;oG<-v{ddqTH+%nre(; zL1%t{dxgWDVU)5@%bzR~1eTY^?D-1T(563oUW+JI3{H3p-u|&tojQsu9yO0iV=lMKRBNDmPZfKJf>{sy;xj)j+qO0fg@q%lVb~#OedE$3eo~qwfqw1}SD` z{imqr1%YDEjM17$xI%$0tGNBmm?8&-`*zDqC{~+?$6A~+MRiqyvz)gU z?pD6|zPAFm#SW(`R{E|Y2uf7byHhx;8$H2^0!c|?83ze!@Eo9|y{`$J~z+iT6pzVprrPSE=kExqIJ0+PIheyYp(`V4x7p>{==*Bpq zco65MBUU6-+i^J-vLLxy8|3aLIKxJdEN5Lq^O3Wt8b3oJ39H4SLv{OT zccrJO&uu5L1r7h?CEufwG#>Qj7?0gP_5g`vy9KYf6<;2?$`OI0kh#egdpq~JO>2S> z(5+Wu@R+p6x?yQKy?{4a{C{l42$?1wytCTT*?pau1`<;cv2yOk9 zQi^AycXDSl&GY-&;`Ku^kPDBzU-U@eYH48d;MutYe%+kKBn3{4(57eqO=f3b8&~uo z{%U>o)?l~Dps%N$)*P2dZa{R?DW@IUM7>3hycw642MAbIAxMH^XPLTBlb-IGuV&or z3yHW%3nt08C|cWul>Yc2ArczX;HlpX@%<(;cDNQC`>n+k5}^3U*%~d9((`SjyW1@} zqEg_qnYczK)bXc<(SY}3-F{slLbOsmcRSeElT4YE2ep+dmOrqNuZfqK^qkMn5l_H&^7|hl(|3Jv}dBMv(6zT$g!nd0!6Q>Go z-_dE9SsM8>p%DC-#Y?eT9(34JH^&AGAE(>5-hF}Jv0QOmmkx37lZu_lIZDgNGykf=7QFwHck!dJ9#1N*duU*eQ z4#%1pU@HqntFtcZ?#`o+&T2dgPvs^JrbF92EuEe(R+|R$iaC&2sHheSpH%n$YU)Tv0-+!CW;b-A_fYnw#cDIm<gQpM9+*R_WFkW*wrxv8vDgP3531e4l)e)nXxBb1vE>9LMHqV8Ig9rOj zh{Ht3XEPu8`yaQOCxL>l8!g#80%mR5&uV6-j2%{&^Qw5=>HimdZyi?k6TOQ9BB`)R zr5mJ01f)y4lS!qW0$e$Y+&i7&!qR_@d zrbnrX1KnG@L$~%C+KfAvOTnFgr8C)|(^-YZ=39eTyyS-BZA-e$Pb7oGjJrt<&o*^~ zgAhB+vbC)}C=NafidMsF+NJ@9Bpx6GW+q)^0gLa)Z z0+S4kN@4|3VwwL`=o zEP0#7gcRB>-N@(Og3hOT%gO?453VNM_?UJEI+<0YElF8r`nS41lwTLMm8PbQ%8$K4;oA}6Os4Ecr0zp#;8bc37TuaoNZNmAhw z&-?APd}obSueCa8hvdTo-79IHQs6(+VEWOx)?0iXCIHXJ4y%@Us$60%Dz-d)8HW7e z&2NfbSSgYrZvI!uxn z%y+CKwYZCqW7eVad(U4Bcx^qPS1tHbmcskuE!|9=VSe4HMOcJ0u1l(hCG_Z*^Hs=f zQjJh-#JAn4NaH0RagKD&HCnff4_k^c&_Ml@H9tA?%hO_b@KmJCC+Ru&+LtNAYSy(2 zf~{0jwU%-ECwrFWDuG%!0C{!tsN-;-so2-!=GdDOJ61OMiDkh_NnXN&y({$l z>*_X@S$0y(w-T;>KNs2GbJ$Y9#T2ippZ#jc|JvI5nVMYfs8~wcp4{e<4b=|i)2kH= z0ZPZqEBF3!P@*DG=AejO>JLZv^>?!;^`wRDM^%VW+&02RTp!*KXx%+b#hSB@6cwN- z79XQ}MhUrqbKRGq*}P=$SGXhm$8&vwt}dw@$ld5UcZuSSs7T!pO0Jk;)phj+=o<}k z!m&?yTYM}aojrUq+NVXaC+n(Xop@KYh{eM9SwA>PU2{~q+L6B+WWju^gh{M(7Z__6 z8hDp4;;zn57jlY0%;Uip8#3qSSC+^>t(H7UWTB@+8H%K!dKNDi0Oe!NNW?k3#9b8q zA>{xM#@97kwD#a}*A;;eC^~L77 z+MTlyT2S1=uD7{Kfo9h5OBH(Yd8ZuSM84x8G64!F)n#oV7a+plTI=a!KuWd6c|4}| z?gbDOdtjh-vRx(4FPcY>O(F2hgurC~a_pLEm%detK49(?Hjv38&~xjTuvBxaKdj`* z2HzANC4>m>X%cY3DlqN=7|BL7+(o~H*Nk-ZB7EAfof^_&EBqzC<%y@&NM&woK;?YP zR~Nr4IWVPR9EnR=m1_$s^MD#K!TI@7O%+H5I)0$7t+639b5ujLnE0NEf3A(Ud-)+T ztYB)u`TcE_SjJT%j>i>!J-8j6^8c+kFI-Kd9H`|6*F_XIF7v__JZkjxijKHIQhN@m zrEn9ymhS@Dw}FW6{7q?^lRj#+gMioFP)hNI@c^9|AW|X9`kbExyHCYeOuP0)7+RDKG;=K=D|D?X{rHpIUm9)C<2nZIYtxrbem+$MRq?ly z!cRMH_Ai`n0zDfn(2y_axz^gq=4vZFb8iZNY1XMvC<>3xX0iw@?9P7+vGhkQ&R*{w z+p<&-^Qw1>j==g@jJw95ar+LW$?l>k?~hs6iwnfKM@7oao-$@rndGZDOo&N}=^}hk zcFerPI_=E7y}Rq3unMQ`YQvIKiO3_OO0Ipn$lE_}$qSF&tuu!*aCDFu?;h|vGlt&P z^AzAt?=8ReT&KAFb!Ve5!W(hxAY|%QZC(!rPyk*2{pnT#PZji^OAkEo6Vc*_$Lm*! zPMr5pFLd(JI=|9oh0v%p9bB13aHO1*G?`#7rZw~6&fSS*6c&dw;{Bn#J-_|MlZp4o zvi*U5(wAF@Dc)+Lj>9jzX-!*!*o$)YBZW*~U`3qT(YacBt`tF!dZlX5$?W`OgB$JT zOl*Nls?3NPI)D2xW?0QTe53oW^6SgjbCsEL?aAXdqA_e1ASo?--A5Hx^)H_%@<}2Z~HarAzq@vS@ zbHy#|jlnFX11_{qN9P!oGmqdMvp090$u|up+ZXm%&|9ix6km}vjCQIj)==?pm0o>! zj2@(B)E%d!!%JR$E72_iUa}9uA=a08XRmB#v>Q=ZZA9w0@jchwnG%iVnPJE(%1IcG zZg~AAeJj-~lXh9P{*I_2EC7J1iNUFUYJ=xYl3-GT+<@`>ltgIwetS{C{6ScgfTfU>fTNcmK+tO4XSowlsCpkuotg){$2QKEDzY9u|QOZtcn|u|P z+ro%}Ic2M-BICNRZ6`qT!94JlgXj+yEcu{0P)L=97Q_jd4%ybH_xRym|A~}Z=wiM^ zx{-gOYn#BAo-BdMmAj)hw@vQ7dKjl0Q(hW-t{or$0!sM~DcNjn+9{1g3%Fv&P3h)& z!lk-w-pqNU|C$iB$aP3@>GBLcI@GGn241#7*IA9*jHg1aA93P|wb!$u$;XElfir*D zZ*)JQm6Mh;N~_M+b|*d6{;ZL5@rt7T6TZ^rmBzP3bd8jZ&e}>VmF(H_?7B%4{|G6= z%?0-1bJKjBuG6!`#@M(`$3YdKs?B0TR!_5y(lKKf#|= zGp<*JmD)^Z+&@Aa?2<5uk$+b`QLCo}K^?k>y%dvF9whSjv3ptYw^uH&gEz#=_Y8uL zAQj=`9%n<5T}rGKhZb1fH7YLBUql)5-uzS>X;k4k~b>y=Ian7K-muIcM1Un)xQ%B5K$l1FjYz>b-llHz)`fs(oqLOvK z@2Vf6tgs?O8@u|pX5(EE`OR|g$fbjO<=&ub{yune>X-9{&f-DEW11_K>YKwN_R6*Q z6J^HbCEl#m4`uMi=CYF#D>nByx{GVlhJ52bFnjf-@Mh7;grQ){=&7To^^vPDoWWV> zB7P~;JOUe}!3{6SINw~^rlDYxg3#A_NgYGpN_xTH1<_87B)y_E-~x5YM({<3iXcBc zGR0N<)o+>6X>Q}fujy9@p*xpP!_rn5)P|=nRow@&ed_iOX4}<~{I8%rkUhLi%qIYrLNe5y)e=C$01ohr1o-P;RG@?N`db2CPJbuuf9YfR?21lNnknyG{HQ9IH>_-gn%37Hwg`DVWoihG zsn=C#^~jh;TF@ak{SF;88F+QWSC&j<)RW0I8$;!ex!%z9x(#olq_drCcEHH~7F+B~ z#yP1+pVRGl{#uBGJ4g_nev>x|~LwPx214 z(JYSMXQiVYfa*s+b5_2<$6^PMRwc{wM2s@8<2Y+1=KPfDACW@F-Wk??cuKxj8D1;z zG~Y1m9bsh;HRkJaYexz_YC`i_Wj!1EQJt-iD38)Q6DNOMt<1KvisSoYGQufE&{FlZA?oa-lx>l*1^11`08nKu0MTKigFoWT|WWu zluT%mb%tSxmhJ16y4zdQB=YN7Z8SQ?5~~<0qy!S0VQmJ>hnpCdBBDhoO%2 zfb!OF-&>qA@*%m4Bus~2WjDWiHB$qE@EK^@B=~&CGi6eAheK`{L=<{@t^OEvI`z4GvokxW+W*A!1zW?=D zs~roj54@-ckW2}IAz|{@MkVj(kxOr;D`DWpyl>Cd6nx7Q*v9*Gq6t#SyG3N%(xW*j zW)TLrC4AqzYyV54Kz{9wSSJ~eNec!nc!9)5e>m~ zW=h*~8jRiFXT-caq!q4eM|(PLKO?;3=xwg_?k3E%NVO`KAWzlHpsvY2>-G~y!x~Mi zNf>GoaYy>KJr1_2q{dW;r7X3`G7{B=+VS~g3&)_#Bpe#=#R_cmzk zobt90hL8UBf5bOz2_?ZQuYbS z>m@5gLtrri0wcxq;1@187Q&0#Kix}L>BNF6Nv&rBghtOkxAUM!3&-P@(5lyqzHI5n zkPS2NWg5hN*1B}XQ=@B2GTq-x$+z1FkhE5S_HskonwyNZc!bBA*WTkFESD$SvOFnq zq?Y6INN^)_JfC{=yJ~jR*rjMUHnEj(9%`cxC@J+HP<{>D_~hOS1Pxcs}1wYlitGjT+d#HGv7rB&9M)xPLJIe5>@G+bN9_UB1j8_K~0 z!FZgPZ+$O=<@|Cu$8N{1-BRl~@g;i>0&bE<$OdsJP_Dz_=DGNP{^2O76i|6Z(NLfn z{?Pf=%$}!s;m@A{yLKA^*m|-GRo2AL=KTNa?f_i;6x(J>#_N%FOct#VHK3NJ7(4Ji ze~fLD=3?aUvPG6LTEKSOyKr~y)6S76_I5(P020~ds~3?n9P44j|NOg|!c0LyXotla z>8N-tcO#2AO=@sF4&s@Al}DAZN16ND2*>m+#lHj#Rzq^`GS zu633V8ta;p!0C%qhE+fe&}X@-oMl7sck}HAaV4}L-(WTq2rR!17{1z3bC_>yvY>S$ z^L^w`Kz)28+G{SAfXqNG%@+~?@md{b6&RBiP5F?o^Llvud)~~J%1JE^Vga!#`@+=- zR8f%LW6#oaMV?ocf;3vLX#Qi>x)T=Z*)rf?aYd}id!%5ShzDUh?f?V<{qF-Fx2RrC z-SPMLTBP-Li{7LGi><_REUV zm)sY4e0V#=RaOYSXq7-E4Nj#J!RaM7ede272aVLVVKg%N}kjN}fT;a@u%6TM*`sgYw z`s8SCu#c3_Pd3%I%yhl|VVEsJrV)$;#1Z#@A_0N(Pwo}PtUB!^>cIn2_dj%`mox91 zFs2_M)?7bV0k_W-fSnUi#-B>bSI>oxlMPwv_FVL2B|#!f5=a8O-#HSi!Z~QYXjYD+ z7|}8lCiXO5_}cU}Z;{S-*Q6_9Ml-MSZ0B6~3McT>?Av^%A{p_Zz*j!=?#1$5c#HWv z4<4KgA@L%l@r@`YxqpWu6S5)5rF zHB=f?Xf8evD{-!e;FZpERNnYbjnhF{jePBcrmy&@edR4&+KiQg2dUGC7s?qX6Cn11&blVjT5gl)U(6F2!WaOQ6W(-5kKgOxTEx_VCQJ(SCDd zO2i#ZisI(X5S%%c-)%4E%eZ>Be<>c+2$0!|%k5Fp@i#JBlxd{e$uB|qCtniX`Az#n z6#_hV%CxUlk7QxDeJ`${kpZZpUZdf4^1Lci?l)NxB7M2Xx$6LXdwPMywQ~Lv+@GFO z?1T5L)ODdn-*pfCD-2_l!2;b7%>yUip8gN~7Iyzrkul_s;a3^7b@M5t_^WAOre5;e z@+6DYX)KtmRg!YKz!(c}iz6D;WP>KFS4+?iSR2^oAi0#PsL|2xm0pFkq<&Yt(;Ob) z*=XnVv5=81HuAEQ4Nm8s#dgvMq*xi^-PRhsW8E|7%@E-JeQqAri7I;Y)o=o zs*fPjrAG~wN^jWv(=R+lUez=LAC=T})s}ODApem*pKLJS&;oH&tG1xWc9L|W);WRiZKD}G7qa7YS=0)`2Nwzz;!jrfzP>boM+wE#;% zORTkJauyZTn9>J(@-nq569o!`c7lW6X$fI%VF~Gv$_0FMTL`5T?EZ*xOsM`O`Xi`L zukghDYNczW>mr*J~l_8@w&wIS61oalR z9-HJ7Dp1Vw)$Sl9rpd)W^RLCHRJg9a_#;^eX2HhCR!-J6F!PW6Vt6 zc|zMyRV@jcHIik*X#Kr@Hk-?%&xPO|h_h(B-A`!g(63sR)4Un@giKZ)UYLT^;afBS z7iEnq{>Rh74x+eEoCW!0(ZWlB1TD!y(I>=fl&ZINQV|d@n=xG)n{Q{e617R+q@M-u z8^(bo1(gevza~n$<=Ue&trylON2K8OxU)XN!Tgq?NkWW80m8*L1puU!;N|HywE~2a(}Z;GuRK#!tw?l3%VZhlR)sTkpJ9XR65k{NTtM_!)n*Qs7}KyA17z7{RPT4s&o-5aR%feUCth- z<>bjt0bHsZBlwW70nbv7<8%_bPVIv67UjsL`2-j2va~ZoJl8mGV|M({BcIeU!;1J@ zbgJh7#@U+HkZanRzUoo-EF_wLqCDZc(7?BIee7x6HlQO-hZ|MP)O3$M;spm%vbxkq z^6zaiCT>$2ziy_z7v}c_Z=`h16%(3! z!|ORb;dJn78$8XvXDNv?nd6xouP-+B8MY@3I@Fy=?MyCXl!vTFY+`Qme82s*Tzz;6CJ$r3E<)V!du}2c2qeCEqS1oi%$M}^^2k|inATyq_qgU z)&r?sb|2=NgZy3c9*Op==2@-R*@R)Zg@)2O@nKBND{I5Bt*9LT`vw5xfd)KvF~_!{ z*1wH^E^|r3So#TZ@D)5)2v--@V%k6w0A?+ePFErQoX#mwR=msWy6kX#MnQVAc0$TX z#;l$_6WN5|xjeKJ6M1S~*5`5_fhh|yor-f8L2tFudT_=eC6z4}#D(zaDX1JiqhL~` z@U;8ga0oJ1lX$&Kth9(l9@)Bd7rGaL5+z8?mO42l*d2WvsqQxHSig<^*|e$p6$jmB zzz%dnYWZWvyy*v7?2S*CR1l^^IOM7Ft9|WJk6cKkP!&X!!lh*glDrbX?M-IFMf-0J zYc)=w9=uOO#SPgZoJj#dEIQc&12XVtM&G)3uS;QTEAh3j1XVK)YCGP!U?34*l~OS+ zm~A7uzN^ixkyj|Hw+nSEm)!aQ4vfLB8{@HTwPV-eHY-`#>tcUE^JHs;k3N(;`Nse7g9M7Y(o?QNO?-Hgb~N>EFR&i<6L=A5`k2PH@$5 zLGk|E*+tM@AIIZ}UsV%31z9IK@U02L#PA?_PUyGf1!LC{%KV>*+4JHDv>5>8^*teD z28A$i-u8PGM^K8+8$Ogu$m>ran`Xg(Zcmnt;4N~zaWabArIq(-XGQM0K$XUOUQ_@4 zNWG~C%n7=XMK2W}Yc3Yu{I=rbxOYWmw=J|x*^RojQd%@EJOhN|ry|S)@VdwXw{?H) zN`lCSQbV_|jdS_N(=+)7ii-;(=d1T*zZO>3GU(lc%4uq4xXeRdcm*}dWiNFK__2K_ zfyBrimAT$P>}ibQ%@GlX(QHF^Cq&wBQ}$(cjP}8gRAY}4O1QNwPCcrMow}6swCT+R zozv&IDMXyqZbKh~qW;aY;!`Zr@bwP_b9eMI;UJ$Y?46SSx*O+J_sCPu6^ zrZspgAYC0)9HHYR!uQ7@uh`(Dhi*RH)l%HtHPhnTj;b&b5?T3{8zn;=S}HdBVB_gb znp5J>qm}Jni`;h;^b6D5wMWAANUd@L_ z)0&u?e~!^~L!b(eC|?C)wW>_i$&e9!It=2`f=Iw?J_+zj2HW-B=*8aKptJwT!$hzR zdlBj56G|UAuwg&&wuwU|zTdFTKt%L{cjm29C{YnKkE<%}H2U!wwO8_m&Wp9qO^M-a zgGbI>{9on`5TB@XQ#O$y6i{#w42jO8kFzX%<=@NAeK5Gf0F5EO+3-Fcn7QdibY^tw zXcB84=Dax}o95zI*WG;lVb5L#fvnkDA)c}5QQkyp^~?efYliu=(RV3a*-FZ&Szp`fS z)qhol+wL!_ugU2afcEaH<_`3| zVdZdp37!q=*dv=e`1bHW30J00^QUP zM#`a<{Pa-k`kcepf$dQOMo2q{NX=#oS)|b>1CmcIkkl+Pn48gK8dNLC_$;t^4V}*3 z)>!8}BU=ZFaZC3u7xaTbuj#D(MK;vn3d&CNE?(==x`9Syr`_0?rM2z~$CULhBH2aq zgaiD&C+Y?UQ5^4cy8yJSvOxAlhhh*HwTM@ZB{AlVB3_wJJ^4@W%VjzNrl`J8ra+nO zoJp)(ZF)ARi`Tg&Hj^6W!HQ8hg-I1aR6=3urmt3k7@n&kGOG-N>Dw;xs=~l0j#)vT zJNfgJaoZ1tByrRqE|?TSrL~}OB&gVzsNo^z4m3jcI*Zk^D_}|vqGjZ(F@i< z1()*xuCDV?t}CaM_Bg(7^6}NqlzF0?^IK8P^kx_Eb{WdJxtwTDrSdMm=NQ6^Ljw-f zT!D0}?nxp_7p5rGQNrEn_8MYCBwAd+Sb9@rsTiw0QBLzrl{y~rAa7$lSsK#yuEZCX zKj~c#F_Y*IW+Ko>eS%~d)I`dUDs5}f&Kb`vE<$8T2snl_%L=-1uO#RK`CI28Xwzs9DARU<_W|NcEH{^}e8%3pqcG%;0 zW_hxkdNeYguZxGFQB^E`qupn(_^hAcX;MIJw|~Wtuz-hZ=(M%rN8O*V1djd={U~Pu zKrYT!0uiWeUK!!s&qV9(*1e~V3u)%%!2}BVRu$NPmhbKuSs#62ozo~}XCu|gZXb&d zjM!Ai0|kBMTszV;L2?3j_u+IBPf%xI3xg-DvdzG$=-ywz4nHeC56rC#m0n_};_*tD#nrUtGeZ^!bC zpLd8cw5Ht_eYGspVzp=b=pssaoCQQZ`hnErUTc5}2IBKq_3W+{O1mQU?AB7V^u zDwe(Pr@rykI2{OykcoW2dV7}Q)%D?|C3s#$9iMPBl7To-U%`c2OvF2mH-{Tpb^z~! z49G%&2&yIXqKDF_3dWd!(o-kNKmiWqckepBm$d z0x?E{>H%dNv58Nh;JyKcnIf#*e&oxhN&VdQ+s>)K$%nrQxV0Etz@v*e zZd_T#0o3wP^Ct8oCsCmD9n*eb#PagjOo=ugH%E~fpPl7DbYvCWLm!!_T;`5?j1+5~ zeFZPjGlJ)Q*BcDCnee`yJh(|JPKKu>)h3_088&i@2`Fc>A)+K}_@y=Wp`Q_5mK*w3 za5S^wORn~S@kZay7H4j1F6+h>=R*iCg=Fj-)nuEo2e!nc8%6$)Wg*+ZV|DboKj3AM zU(clbj;or@J?|`JZ>R+b%MmXL;igBjqo@ee?x2qAkt=|()$Ts`H|e-EL$^@yL_Vk^ zCksAOFpKUP1<#odztZy!Q*7FAsu3wvTdPb%v8%LXKWsNbD(RamzgGlsUaTHXMGA~9 zjrDc&%wF;}zw+22;o4|wb&8g4H+{vlLFPK^WfFlpXIR zd>`6ko2umJ>+I`Edv|h0*1)hoaBFjvwOpVX19cr3_vZc9_A7H!#b6aW z?~nJO+^w=dI)ru(YUO5MK4@~SW1dj*KK%D8)G5V=-w*Yo_xIQ(t=bz*U%thddF~G+ z>{!!;!UYeQ7i}NqZC|O>JpVogvS*FKeYY`*sL6o}TF};|SJXIV90m$y=qYdD>*bQl zZZ}}OHo^B;d#^jFOw?~{)^bS^Kz&B0UbAS8g(8%XW6 zPsfAdcijmCp0qO~Lo0YLCO%5quLpV@*4jOU`}_dEiMAP;jfwp8-j}e#BgM062IkCT z=bTM7B&_4{22ttP{IoE5woa%}on$!b-aLeo+*7y?ZG}sKW15%gmd(}M-M{u}sW}DP z84`tMqh`{GOaK;#2DQ)E!CIo-D^7?T&*@4679a1C*%H}f?7SijL)?EizaNyhBsiTWUVh> z?YWqpcGZ!%A_ntndI^(4A#Hn+$XwN%&}H@EX7)!*EZQ^6&pt_%tn{wDig@j&TkF=J z6LAB5Q~0vU9@i-738G1u$8&6hdwvueR>&F>(WC)_?Mr|&#VYutgF6!bwr$<2!jG;*V4H-TLJC1+1~nJ71iN@ zQFL~UG9A>k`P5ec;a$rJ8jwGQLFHkRM%BEHbRY1*-|qL&c}gHf6dWPZohJf~xORoY zM2NSi)s^25uiw6=6uJ?G`na8kOCMkU>+QCVn!xx~BLB7hI z1=CT)(t-Q!y3ojUS1y)*p3n8#;SpnXAk4De@9nYa93ZqE(u_RvjD{%**{##ujrabZ z8}qD_o3D#nv5sA%Jsp5QiSlwv26AQOs{gRQs{Fl2Fu4cSdv3-nm>kK{=Mqq7*>^n| z8sl9lk&W9z83ob}RJ8(AnI1Zn=ZM!YcLBiR8X$Xi3bEjSN!OADqg4;aJ>XnDi~KQ{5haUb1u8YxY(R>a#X|)9gh6 z>Bf8}7_ko@JWr~g4Bfv zev&i4dIA-T1-DVNYek!3+j9~A{TVBg{AyAnMfUMn!%LI z%BHXC&RVfkU82|mzCy}t2Rf}N9?mi63dUCRi=(%iTZp7)0`hrI6NPL3M8l3>?P<4vCci@y zre5Y`(+P~1-AF1(@IrknZZ9tmaPCm!6A(YI6->q$-&|KZ#Of4{mKU;o7W1P^6osaU zDSw)jXHd8NiP-EVGL&qt_#@~g>5NWh1wrbAH1#dNJ9JQMe+bXMaupMFdw5juw>$F1 zR2hdpPA@^Kpy9%;ttPi?G)1qzQEIjEm8yL1ytEH$G$|%$HvJjjkxLFn2%gKF)zld{ z>n*p(lgj||92)hRru9E9fXNMuS+hgHU6B&_wft>iJH_jF*DnbgwdP@h{ukUIT$oU+ z68^4}+KZ`{3K-w-^F7~B9HtZHlFq~}!e@m=DN!wrN27CjT(Z$S(oh><0;MpSxScH% zn~bX6*UXrLo1E#%uU==d z)*dR1<#|*wT4P5$MoT|SYRDAxGp=1%F?S@$I&^`&E==db4hR|O>F`o`P9dkj>is7p z6OHzj5mBVskjNN#f*0?;lzQD{Je_?e_FK1bDojXMGDd48MtU=aV+Y)^+PQXFiah&3 zY8D18YTIIMMnqC2Z3=1Zicb!4H`_9BBZ|g*Kx&a$S-RX^@W8(r`*!%dk?)b=2;7gD z#`9A~HyV+C)Of5X_fl#PQCLL~IKe)3F9zmM4y0fZpT4`NWCj5KlFd!5c zzQieaYy$4o-}iUL?LMBHCnbV#%p7I9zRqT8iLf<4@=gIjl+wlAP--AfTaP!$o%s$@ zSt`*E2v~kU%g=hOyl6+S6L6%jvFl7!`QE2t&z+sdyQ?0zQ zdz9n~F0BMQS!qs0el+!t$Q4HOhDs0k?&3S|tUe9Es8w0R_o8X(E zK=(L%Jj(k;Rb=TcnmWoOCBR>mMyn?lU(ls7hv+ZGdAaI`M>M4`BMlW*~sjHVw z#}Htp`IGkT;6=o*_HIC4Vm`sV096JZ1=V;oo5)g?;0v2TKNUn-dI^{~;+M2m#wi{i|FIl=5Jk|54(-o<{Ir9{0&#pYDOZO&kpkqL!gM%Y0C3V?b#H~7l<|8 zb5Y4%2d&2!OJnSlcb7vRQ|ATsF6+t~K*ME+CF5rLa|5*0cCiU-1 z`x?Lu@w18_Hy&3iH#|x7%+|}uoZOL4g*2YjKZb#jbSm$zO0St(Kcfnsyk}L+^FIf) z=)qn6W3k(-zM@X0veaW;?hybOzQ9%H>U#b1f}*1rNRZvSg&3^qJ;K9&eG_h~#&nNN`_#$U80F-r0T;!2%ItY>U-08QtN ze=aW3)UW6~xA~ikXss`0h()7l&gu_J3Z_F<4Z+n15g|pC^R{l9sus461@Hq}RBrM;>6a4d@_>)h~%=RvH4 zO50KHbE=X*_M+Yvzqo+zeg%Gtgl7Zw`YSH9gOoyTI>70RIa>R(C6s^XUQrWtS?*c$ zAv`a!R95R?>LRvsmRNh-dunXYdRW+8j^KK^GJaYCOrihx@!(GAqMPO9yj*WJ4(FL+T;d3c9`?wa>(91t@;sKV>B-8oIfBp9y;T-ZtR+zAynpxaQb z-pibFKhpI^6oFmd5gwIFH3fiq`A;YF`M>y5Hn13kw%&*cF!~VY`{X&6k1m45v(vU| zkh?Q7FY}bGyKgrV=KrY7$j%os(zq!v6-t)7S^SjA2PB7llH85;W7Zr`YWyaD1N;aY z&r9HV16zzHv!O^U(IsdlZPQ`+_5$d%{q&onWCb@EhII>6dZ62s^jS$A6`8S!pw7|gra{F-p0VS1H|zhHWmKq|Ga7$%hp7zD2+{|V1-zf!$- zuDg2<)aKK_iz;?d1o}Js!7`8nI!G%wSYSw)+M-DsbGsy`*SPhayM)DS+4)y@YMkmZ zGuzG|ev$-T=`iv!SLvzyRet5{FWelcM@HPWfsk`PSKop$u!6ECH!lA>@;%DmrO8@N zsF?!DWo~q&i-nMCtx)0UDYDfaV7jg#-z}2so&T&f-NGQyt<4um2SK7@fn8hEuWB;7 z`x71&l~oi$82A-3g#i+(KTM2}nh+n@^+`zb@2Vj!$yEoEmO=06^gjG*6%SotB?=H3WYQ=)?0JYc{h`rRvUBrYYFRn=c&Ioye zt%`%oprVbY>lL2f7cab)a5o=-!~zAkQ5J=q5tMjDHj&ZddgK+yw*ADDlnnvl)Un?$ z^b9aoOuyZ;Y)oI)5d#I@_yb013QOqP3KD9%IM`AZjS3HwfzuiSeL$F+q0!6|pCA}E z^S>cI>;Jl-X%$bfYF-cvA`dBNGdJcCs;2h=zh6j?wJrHgH>KWg zq|q6>xLmdH-2?Mg~j$<7KLFtUflJ!d=-1C6|1-422`I?E9tqCyn@YL zWl@Nq&aUm2{f+y^;*%I31B`E&Fqo4f^fRFRm3AFCnbN=UK(&FsLjKJ}T5=MA1M-0C zVz6OS&k1Y8GIlke3~e-XWThoJ0Lm;+1{p;GhaI?;u?1j)l_>-o7{=YO75eJ<4( zFndvvgVN!!h-s4v*<*cj(MZP-XFu4xHyf3JT`m0g+=!1ph zm+(9gQ3MN!JcT?ux__48<=>49b4Km2s65!Ei8^*B-yc3}CBi1XccbzfuF~PvGJ3j1jGAhcd5h`IaO8wU{UwRSbgj?Eu6;b&v+4VJ-NIUxJsk_mc)j*~Jt*`=V?%_S~2szbYEGUGGwN zs%AWdh19TzInu;Hs-sNukPqx(X2Ji`DH+yedoduzLHIYFg7dd%?}JMTH}+y?FFUPJ zx7L|)B{Qoeg^1w1IZ!L{%#>x+QB!u86^MO(70F*QiI#>%@9DEtkyJj*FpIQ(`2?7O zgGa}@C7yvNS|H5-HqiX!6<1hfigvGJHvJFe3Z8ypdtB4`M*+O})|4cxYC@92HnehB zi@mV$n-t#m^SnYMjL?{P=NbyLX_oT$XeKSNNlD52P}fin*p!EQbPa6E23Ott6j8f1 z!5#>G$YcN7#Q2|0)NpfqzdL%O0QN0P@V0MO!~6GJW6LUhr?|EmnyT7bt^GE94PpdD z)ZHk!X2056mfcb zga?w*qACsba@A@W`l<9%T1<(lNn9vZC7#v$3t5{pxuGgrm3kvW7SYwSap^e_g1EB3 z(-@%$r8XfRj?#6+J~{s_K0prVql~iL8$pK$7<<7OSLB}_AjC+wL60UHdll|3X0Hv} zE^KLu=fcc~ZGuwa2V~s!Z8Ac^<&5DRe#|?s8l=EST{via1kbu>x zl|rnQtTAFw0e+1f@$g+obP;7M-GBiZBKSRARPZ^a*dyb|KCcl_4uW0AQGFOd?^Ir$ z!@+|O=n-7EqK$qh=5S*8!Xc7FusRb-BCVKKAA>isa5;tH5FnktF#Ag8n>olK5^xj( z5olB75Cymc+GKPs3eX1izaA)SZTo>1`eTGhPb3V* zW&6Z|hz$mTZqa}aF$XlDyfFU&W@RSIa=tjTwZ z-WD(4V(kk9blCsM!rXxphg=&Gj2dLoD76R`!4ZtaF#NA~u%Sf#JCt5&?M+EBc%LNwnz^ql=H|reyT_A!Y%*&nw`Sw#ws3J}$s3546Y8U?UwYGz;rO`lR8#6Bod` zP&>QO$&7M9rLm6zo1o15Cg`8CAo+O1i9q4}Dk1NM;0`k3$(=-j*9K91subX^;LD8q zJN-VePPF6``kzzt-23S#N}t(b_4FX0Jc>_}U^cNZc`4qAfdrqZFRyykv=W7FhhZGgIu^6K3^3Q;v$&Oj<*Ixh>Ku8TkQ=PVTM!xU0c!S zq4!Q)gAL|l&n=LulDsw2xS1E$L;^OfiA?>fr-%q(AmAU(ilo8=uSD`5e+(OlfWHF~ zFKu}5A&9U}pjJx1|4RMpuNAcSR&@E)QG9w0tblDQZ{&a#Uy)Pptq}cd1$%ir8_WuE zSRcq-O?BoLu;yY=wP62aECQR)1h@rCFlycqvIigKAU{Px8yHVH*8-a4OA9u4SB}T_ zdV2Y7ss8%-A7^aX99aByjRqqT=%Cm*vJVp~`J}N_q6KWtOUA)*^6NA#W5CRiK^Q+~ zGP7rfnVdxBeX~m}W5>V+mZ9C47ln+%gSVaL8E-&?QmFbvGMWdtL))bJnNC~{*& zKw~uI-Zw15G)k?qV3mU%Pf!^WDBp>I z!Nlp&rj|i?3DFo_)r1Y+AdR=2LL6%~%)dILfO-G*U=2mTk+&o2Teln>MZ@%oUgzMD z5N?SkJuV{o8%1Dxlqz3a030Hk6!_79mZE}BWZrb)vf+SfPtaeb)WA;l>5SqWyWcuk zfbBKVE)tk|vB13ldcY$AcQwNuyR2*lzFH}`6Nybm`tfN*`aAHpC}4VA*&;nB_)$sl zqyJed1b3z5E1emX0CwI!MD{0LQE-o6$Cg#&!(7w>*gmu(vwOcWPSx1`2xpTpN^+cOabR30)F(#KT8q7CxDeB*5eP> zBW--OWGv!)@{;u4a+s5)%fKZjv*_8uc9>5zz`Xx@@CU2O(>a2MJ0=l)RlU{tDI?40 znQ^(|GjKSi1Jh~pT|&T8zy;%7|IbovNRww=Y81U8E!Y9sn|s7csD&PUrm-vlof9J0 z0k%6_MDfDRV*=)t{_BB51l(z@JyTcqH2CVb&fP~Q{>QKXb1*QotY&RWAHa`(0zdkn zrQYBZ#28sDcCao|P~Wjc(ZCZ1{pZLV!1k=)anrE9-xy47^1mLW+9MXtB^gP{uh4o! z^mZ>TQa{m)YR|9vW@8i`zI{QCVkYH%&EX_fu1Yo>?BTJ_LgX&M#L5ak1>QcpYSv_Hw7 z@VRyk?C8;Xx!nm)LKh4MA1!e1k@w3Xj2~JdEFUXEQWbPV%y-K__J9Z-A^3j_&_7kS zBTxvmb<-9wlD+e5?PD(Z~!&?j|8BPb9~O@GGm6_> z1%c;tURN<2>{n1JlXv&`BW)0s8~H#|LJJ>q!Na2*QzDEX=qhVWGr(dX9>k&*$EG3z zSS-m(@rjE!**_nMifXa)nPJSmM^(C*yXzsmIprYQb#zdy#yP9MJr-SUw)n#lEz9nw z;XEcB+!5=#hEu6+pzFF#%6_Abo`A=>qxTN?BZK?fhP#r7XWC9*FK#OJYKQw;a;@}f zzm_P(?Pys4?a~QXYSmcnZn|HuSL4NCPh*SbyWNZ{{L0k&bmP0LCCsCi2bEu)10Qpw zG#u3sI1URY9?&Ir_f)q^zpD_7Vb{S5FQTytArk-{u$ zxX-5y(1$_1?qW<6LNR6rVjLsd)LvfcIw-)*;dlGhw))MZ@}<}9Z@u1!zdWV*DtdSe z2}&BPPqx!~hf8}bk>(B!6W-$HY>Qpo-d+Fll54zZ56&l=K%NLrmUp#aDuFEC-3-*O zCdDOl&OvXBFP6^cdU4r%-d*-^7AEK)X1mw$RCyw!5*HsRuJ(S9tP!4M#^A5_<5n}x z5I$F!bsK@6oiz5&wG;oAQC)OL58+sD$8MRt2s&TZKw*i+Q87tXwI!1ah@ z=ntFMwg4w5?Hh64*?2SwaA$(7>wsCT&us{Csnmn+S6qNn7a_i7j>hT>FCwG7fDT(Y z#BOm|z9HDlz~6=ceRAa;=jv9Uzzd=s_iso0(hM$x4L?vmOPOr93izMPzv?7V5plX#+&zVi9Z}qx3a9=Hg zt*}+ z(pN_(f!4gT36|=E*bQvL)B~UjB5^ zR60xLS{k>t71J>PT>8wE5m_VTV=2fw25=bXbz3Vs6m6R~f=^<;ztz2osP1ICIAbOx zY4=3=5&_E&C_cm$4^m(-&bHVwGudp#M# zwpoVvYB{wK*Cq8L@p#03bS2ku|x`ybh^TbFm>Dn}**Z;%bS-(Z~?T_C2 zfQW=BqNIqlfRf4(3er*{Euf&(&@to;BHaQa-JL^sC@tMF#7M_53^2e@!`zMMbM8I& zp67i3fqQ=AdDye}UTeSCyVq+uTHAM*2GI`PcUk<^*GKx;0#@#0jcGY)Bn`?uoip8WW8*X4Zh(t{$qihX19odY{1X*U z`=+=?s>2EB*wp@TQdRRu<<25jM*1Q{iwY4)r}*b!On)CoX1FDqo$B8Uadi|jpkIOZU#s|b{)*v!Ly7s)+ZdNDpy;Qwji=H*<$KsGw zbDbW-Nx$KegyodjoOtIH^HV(#qj{S|2`zM$?An7TU;a#66EG)3oCTEkHO6eLr2;W` zn`cwrjbS9EJeT0_Xs-;YBE8HGji4%6;JYV0?Yuu|<+AT*n(|4@F}>xCLEHtSyn0?Z zAUwn55_I3=?3NEJ%*e$E%hnF=*;o=WGHr^A<#BrUwbJ)!GR5-;eN)GcoHzM#8*7GB zRvyd8rUmh9q0MaW@9UP%mioP#&t|P)>lVX@DX>wEhp|L#BO20N~BF~XSU@s=a}o62v_I${1O)K?{4z?Q%O3IXH{XSYch$UEL4=Xuoj zFxmqXlfD|8>LnO`5dW+DRCyzS;AtXJ&P;3>ZQLe&T6(;Y$d@RR3LQH;LfRTwO~bwY2H}Yl1M7xG6rgg`> z$L-xnC3ay@(&o;|OGY0>BiLDU$Xub?ExoUh?QAV)w~k~?4Vl;WQ|v(RV1lGoErl{6 z$-cScU0g(^NjqV;3u8KEf=d`gGTA&@R~AP13A5qfnL2>Tgm^bzN)aL1-R90MzNya& zhdtqDdU|1aOq*9G_X9MW6icDU!wm*EQR-WDfVs&0R2h7XHIge7Ejje@WfuB=V$<8^ z^7?cx#i~Aj4re(V#8xccxQWHIoEz^OoK1C#p7%^d+#ODwO#xLg;BDB5l5*CX!6-S# zbOFBFtoRQpG6^?+4JBi|G%xF&J z2KI5R>%`r@>TU?Tn6%Bz;+jBF|4VddKm~u#;`NNF4qk zm)`Qx{6P88h$((9B9ONjc^Hc2+1}I@H z5?!`%=c6vjN+0IbI!yRh!->|CvY6Xec2yo6MxzLys>bCAgxjGHFUN|nS08(6OgCpB z#@!VT7urM&9nisuxBa3(W0`KkrpZKaMLhab=~BPl@3?unv}oai7?}B#tg6V`7h$K_ zbc`T#A?55;zbmvE{-`L0=ZnJnFX&@6Zb5@tN3mM;)_= zo;2Pt%7bkJmL0M0nAqxTF~_#?KoUk^!qM->qe)>qCm{o-sCgpf$(yYzYCXL{+zlj$ z_zS4z#=EenlYFM;^C&3M{Z)FqvDEK}4;j?J;!!cZo&#IvfNrmK7y3#G^@+L@I$JJ9ho;|MJ zpX}nx{moN*tMMR0Os!6#?BRfeyhw})qVYHO&Xe%VqVKRZUA}O+K{wcg7_eLwtH&3+RiY+Z_L1U zUsQa#d2dQQpJ#&0$8$hJzqXdJP<3F)SVf0%Y1ChTI$IM#ZE@!|A~%}xe2De{*U#Bd zWP6Uq44#>?=g@oL0`2Y%%bu=YM#Xp>t9#U1oGm~|Hm<6){3LtfJA?NKvR-2-4YNLC zJWR}(R41I#RD26tsy)QiLYM96B$fP{tSF0DpmM#>7uCV+#Gob9^IazR`Jv} zOeWXQHOQfXSE7!Tx_pY3?xG--;s0V!uD`x+sc|WJN7rNdmBwKi`e&WZrAi1=*8@N5 zo-7jX0o`0mjd%C?h(@>uRj?lzb*(qtjvbT8PgZX{;UP}G?;_tjqf&LKjH9dmBypQe z^bmBoIEBfxl^%aSzp-3s;uSkOQ1|)y?-AnJN}bePFU~k!x}dVPd< z+LJKOG<5ThL5r}0xxQn-)jkBCqF`=i3$@V@c4xSreuqHUdT!(zI&rmbW52Dxc#VGP zSrgH_s+D2HZL^NRv8?Wa&r(J+9#LXsrZnw#>@sGy7*A+#}b1(u*~{LF~aR+2oiK zKX5qRw#H;(O55GIvXzt7XLDV)q(-c>sYAA8 zG_?G)+eoG8dvoW?!uZFggB7+yH&kiaHuN2~zO61tr=ZvL9H=W$7L(73pS6g4j_awf zAl<%BFEi(%T=#wLn<~|&L@DE){_DFGh8~8?$vGD2 zld@h9R7ZeKM7wiXmcf&v!kuFE4i;g<5jWl5%y%pyR7rOXUmB;jA4#L3Jv&ZZ;x5Sk z)hvcdh@m5uEL)rt7j_~RQI98PPDWkkOBt73WK!1lvgkc~LktGc14KMO*gN&! zYe-c$HnJyVM9^_U(Hq3cavKF&N%J2Gw+nb35A07xy_cUiJV_qVY-48Dr0-05Z!D1W z>4Clc7q}?y1MOiubhWlv+ny1-_ha}Ag!YKiW4zGRIM9C9ILZSq7=1g_a|>m$7Kc3B zTcQ|fc)J*Rt=vGRZsKv}W*{+zmqADMto26zoRG6uLRDJidngS6R9FA#_MXZ4^oc;w z0oTpwJu$c3mIh2R?c<=BP{@~LHA+|2fJKeU( z`ErfQ?RihM`k7Dx6Ak-2L;V2j+FC4R?B?YlLz8fU=0tS*9+yxs@mx41ymr-cU5?>k z(3A-5l9TH3D%1>54Mj)F>v0J7*GLsezl4~_)Mm_ z54qWM^I1zS=Bzb1!I~)H$VGF=(-|Y{84yaUH!l9JPHVr8I-{Z&@>3cfG}eS82v(Ty zWf!M~0Tc+v&$VCd6g`=0 zkeKEDE#EX@)cn0Zo;*8QrM`!pJoL`D13SBpVUK!D0TG)R%{08!t2E0X&vk%RtH5Q3 z$JT3O0aO&-YDqSz?-lXWl;Eb&r_=-kjX55mH=bF>VjGs7V+?d>0-+ebT zGLJKfQ@S%m=NmK_I3)$*r6^}~p@43L^f}0+H3%1g@M@CpOU}nXh>$z0h<{-Lxs93j zK3B09R`kiR9F4itqo{yR*$PuF@7KHSbIn`p`ukq}^o}xJN^_aONqinAYwcVeEf6l% zfAWX|yRoV_Gz*)zFHtSHAFv=0nWVJxfEzE37ZIm%rEp$hrt?^@Qia4@!#POhtjfP7 zRu!2gX;x45i~Fok>khukJa=xd&`P?S?kO!U1r^!2f`sDz+zv}n$<;aM87E;?3iJ3I z-uYQ5t}9;zpUZ>|6&YH583Biv3A{`UdaRZxnd-sk-8jAbC`z2IJ>W`z{GjKG*fmx~ zuK83|s&&?}(^@}%UNcEtY5}p1SCQe$9BeopBz$L)T6j6FYR%DbkSdFn^0<_RR#VJ0 zgnEwRnMz=6oXe0NR+JCz7OUU3W!1EI)}7LLmDskv5<8~jjxR<9i!F$YoEl2CyxO;O z&lw4^6!*^PYT zA(N%|9M9B@y=k&z<2Q#n4=ibVP?+HrVF3eU=;hTrE_!|C*IoJGCGK%{1c%4BbhM59N zxOY-AO>%n>y3ndF@nB3+efpI`3oZcO@WcEkJ=xu==)DOYC%wSf2PN?|in)@>WSQ6yY6gUBm!v|%i97e#TfPjB`2!oZWv@dVc%P|V0G^5^k6oejHWq0 zhxmW4$+v_b>e#>}o@T%_6g^-7aW1UjH#-#zE~`>?uM?6+(8rk$!l2{KvnV&zz3!}C)8 z+B!xarEx96(0|;3@s!HeZY18qoLI(j~Cj=l72t_Y0a=TU5llSHzdkT?+RT zN-<)-`7DhwvbNg{eTjb;NoGx?8XNCJ^gbH9*t*+|T>XYSITUF`vf}FQsqZ1a zzkdwxzRl=5iP_#m6#el+bZpOVwe~-CX`W!6ua&(8^wb(wmgn<@v z4HVkQUn%o7^s|Zl(ESkIuxT}+gmwC@Tuc9I4p6S`ayQ5(&z` zNB<%|5TJUyT<6kblKNKs2kIW*Aw(|_XINVA@5>zYqL>Y7w;|@or*`iutI<5-$$ct^ z!Fe-Bm)El{i;#vRq-NI^`Y?5OKUR~-h%quDD*FyQq>jP){Fvx9<4fbm@FUY`u?=bs zWkboT54vNQFlDn&vOfA%DM!F+6Y;RM@AAu>yrIdgI&B(7y8y$6qZ?O*;ug$$jvQC~ zIPY3^iy+o?DiGByo4+tPcrn1c`5gHpXH0(UVAA|R=pT+e;cLmuA0dYLV#PR{7){s4 z=Gz61L9jhhZT4bAOU*#r$xd+pmvd6pb<-OiWWgtp*SPrcjQGVR7~S_+m`Ih6cRD50 zfH3wk)b&#T$S}Yvb)zfP6${22B?1|Ag*6_eLWC=cAnnxHR_bN5Sv8Me?Spe=yXPOI zu1*A3Fm}33%1{8Ab{amzg$2PdVcsehhmMueO<}j18Y-6NIgD54=^m0*{Bb^6aV+B2 z;`oGxKT@i&XpLwXwURt>2nc&+ZKQ4U-@<+p1%7`}Ypvx>E~zIVQ}I&hbJ1mKKxf#! zR=mEe;50S)cY0jH`bsl=u85K^Kr?7mk!mrU*moWSI5}}4OXq_=D#F~h&EU3~SnRw6 z_aOg6$p)RGZI;&#rK4ErY{ln^Lxgp_#q7e3^r9-UR;S&xO`55%SD9*6bBi zr+bjIu;VTAR>whvhkyu+Vns3_=M^k3U;J^6Ewj6PUFXI=v*tRHN^RMleUi;|o=kQs zB^6?^1U!YrtQ}~J?SnRzu>M3o)8EFfKH>bCJ0eoW414#!ua$~bx5;odVDS`~ z^l_X7earv|HEo-q=PT32HBlzJ6LUvJ{)3-o^r9?yU#z6;u&bJ@9VN!(ATL$ExW(z~ zD^{~#cboH%G#~X6V@$W}k3QnmpaY}?v@pM04a}?!%0WN*7ctH;q?GGUpwyQ!C15Gc z->tM*#YrBqG3L>qF^pd~44-z8ndBXJPh8hq5bwaViIegm z3N9R(U>HcH-We^Z=pgkF=}p9aqpAGAMG#TTv_tMdgMhB z(Z^__>xf%*mhjPmH7!5a!TfQI*KCu8Y{-d&J9P7~FnRJDq8n>?@^ss-rkH`*5d2Hb zeTjAZtS;7YTzt#oQpa z4eT-=-l)qPO8hWVlzSfXdbyHogK~tWHA}53xRi3OF>C1#%1~f&>8@K$W*)t4%<|($ zax5FZ33EzZA{RbZ7GhnU+3@LWYWKFT)PvEJ1Cx=(ta@rf$l}rsAZ(<4Tkl%`_U=5H|W9qOPcRLt; z!_QnJ%B=p{I^vr@C1%E7FHy?_U3;N>fzL0wu6vES8HDt**Pej_dv@UmkmeczCTPPd z>Jb^UAvtC4R(VQ8bxXLI<(mEn9<$tWky=jmT(6N7da8gWGocH9{4MM@ruk^_V1er|Q zSJ~guNqlBi;Zg1qlQ+i^PeA9-3`3Ov)b=0|H+BuX59Y1+*v=LjY-ckZ623?rJ5|J) zY`cpm92(FCR9C<6zp>l!*hYN2xRj00(`{)o=MnR~s&|Pv`*y`$qLACDrEu#?<&?F> z?5vlUcuJqnw?~7X%8_2~F=+sf0+4-*zfyN_H*^1Ba$`sk^!PnZOx0DgUaQhD{m&J* z5^M3z3wj>8KK<@j$wNR{mNU(TeVeo?ey29?Ari+41<>=h3CGY?vC~DVpX>KJ-oUzf zubeGgYd2Wzor`tkYjb=@Ozpy246AB4r6C`B(s8qtVK2Z9vJe+ZR8B#s(oEab%t_L> zay5wS~fCTuE$?}S0QiaX+r=JTX-nOE^*YXua--#G7r*TGruGU77R5b zmKs~oAXZI}73FDn>FNw?@ItSy0M_aJcXv%lO6QzQW!jol&s_ z!Mz}53@eed3P8BuTA%XaLY!y6z)KR|0=lu-s1Z1$rW?&k8T~@ym>haV9LRwK)ql8WC5^ubZQ4y z+j+lhf!~J9)GQ^xf5mvrK2$)-RP7R`s69hL7BTr__a(UBM8dN9`lvGu##W%}|5>b}K|-A%q5|df?;2y5Vt*kWF}9xt|E+n4Z96z{YR5QlboL`W}`rjf@&J~4(87OkRIAq$ji15 zZ#i4OYo;krsq8JbHxX+4}2$B9;e24 z>|dzMe14JB=3mv*6dNElV(GxI(%o|8u^Ik~I8~!J@$(z@36^@raV$h(89^pl`1Bi1 z^`e5(*|?cVk5wCU=+gn$;dCmoF>@|!pkir*_caI3>?`X`OtZW-we3j&m)mS)rcN&s zPbG-Etv87zM4uAo7+|eeF`7Krmaxy9C&@yIvNruRhV0QdcF$?u$G4kLmCZulO`LkS8}zx0QNYl?&HvaO*O_yj;Za;RK`u*tKIME^}$_0 zBZ`w4f?i(IpC<`k9#*(wI4G|Xh^sJKdCMlpEUxXRu;4Nd55B$VoOGglL*RDo9Rf6! z6ca9W?bPJs+KOvFWlu2u1HQwoN5o@~az)&BP<6XyiF}UkpZ5~Xdn6#%zi?6IpNuY( zTx_6W-c|=bHH^{YEU0f)>)ph@4O6)3?~{-?7ejXhN69fThc2UU20S*1aeD9JWrFSdACG8uZJXFgVxEaq>4n&hxnIj=ZZCXCdnqy( ze+F^K*_h3bW#?It?7~>B!qOPcXnxD{HyYS%5=4$JBy4^X-1L`so7&BQxhG?<=>+_^ zwhfmTDvXfRNogfAud3qAD}HtLjPF)0FbSHx@A|9Dh2n8=Opz{Ye-0o+iA}Pz_P8m# zzhW7i3llq-Zx!oe+Mh^xHT}HFBb%qwt#@6Pw6#53<@&CK5sxtRYjPFKa6ag&EYOt# zsH~7!Txs&B^z44`SjpvE!k!1<``z=uw#WUy%PR%qs?!+g;NfXflF*~o-VzPYne2g_ zEx*RIoJ7Z;yesC{Gk#u%@lY5@SLX|7Gq;1;zFecsb|-xVj6MM38jV*i4T-=Q>d)4v zLYt~hmUOd-VCTcHpSD?T*O}$G(Fo9f3{!$ZYqHF* z*{=y9R!_@BRZ#jCIZyJ%{sbZ2xI9Xx&VUiAKzB7P#X<0C)-z37*61fCw}jl<*|Sak zT=^bw^C{cH(d%wL58)l8{+eCgH)up+%A$r9LB~5csWR?(DN%g;u-9tIF1{Gk()jhj zj$tjbSdF;Lx+yp$775tMh<9F**u6>dEvJ!j$VZ5DlQrib&0}adKzsd7a0nWMda>K} zE{UIxmgV)0=T8yqCjN;TIK8);0-|RRG%DMgFWQNc)M(b2PdkWh6PvV>h8dctnR63j z&g&dq3uban1S?Di9c{pRzfl9YmQ5nPvnOYDT?TQS$y$7~>hHc&@z>N;+!919v#cgu zrp&qMLpX&?AUyZ%b7!mdv zbgXdqJZ_tWOC0~;IBzEKnE$kzhT%&FTu*un;x_g47k0wFKS`jP?q->vapYNb97DIY zDs?tv!ozSGU#wQ^ghSsvBNI3@W|&&10R?lX=2&dkE#9>G*{9;(9zrNlQdwvf?jXBl z{#nt-L@%!*I;84GtdY5*J%`Xv+D5LuLV7D3TA&qX>E&YO$?oggF+0{4J_0n*SXRm^ z-_0NI(3`=Z2ds6(sFL4we`dLSi!j?H5+8jMq2u`56+~$She<;eAcz_l%Z5Sv6J;7X z|0k&sWbDuInqGgbJB2sI>f1>kdS6;bhMX)xCewS)Ps;%57x6|?8Si2}+KLF!r(+tp zRFYjc*i$j}RoFIR<$QS1%K2!zs^R#i_xFn&aBIiU`xkLWg47uhFKAyz{d#OgoBO)LU#Wu&tVYVkU-jPZ_8u`3dX!GxcuXzy zJT>Rcd{~pUpEu4de*BCVr)OKHg{D6|YLX?L<+K}E6$ROXxsjc{b#3cgx^DQ{M-NpY;?ma9z*DRD-B zz1qiPHZ$~SLdmirlQ+;5kl`g)y5?8RYHufALcIHGJFM~e?XuZQ-mK8O9^Y^=?f0R% z%{9)f6hluq-$oLdX(6m=GW{~3<`1Snn{C$*BzgU)ws{}}rd;=!Q0dV(iS(z0U!Hsk z@sGat67R1)?cf5sULLM4U#|X`0GDt>7DuGr^X)BashT$aeV;rVmo^>%>oTcgbHP4S}l1< zg_|c}uM(dW{q+&=GK;~DYJzFNmsvO-CsPmm1*}$dg&15Qc8` zhNSD@ZK@1|VIGc%%l31b&2&snyXAEYl`mx(?8($D)86ERgqv?mHEuOctbOetc}zR* z=gM7Omvdm5zM*%%-Pv#q*JK9rV=Y7m5M5(mvhm+j;nKVj#$tuV^}m0^1_lbBaB3C$!gHq}0jO;El&6He}=nSI03!SC%Vysy#IWxyWicl9Eq85T;J!@>Cx*V81h* ziT5My=JcD?eC=^Vo7MbNyIS#fo*0&)@G#37CN9z3t;Q4-E`JcR&j63NXHOV=sX^Er zGnjC}oZ2+yqL>;eo)Q{IzfWk;I^hMjac~Du!82-s9*#^}GPURjLyv)m!-}kDyxe~~ zL6Ow^R#{c|sh>(IfU_8C`U_TlCJpUUwWp0gPw)STp7kK8LIsL<^UOTO1A1RNzow(o zws;JVt-4-ldNm-V_QWx}?k3{>Q<_6rsO!dC!%20cw}3Sez5`F0Yh7b|e*agZVKr>@ zs%nv*oa*595!vOlT*yKtyF_n0XHRUwsxERsGWQ6V-=V)ZpfG zJN~8gI^E-uUaz@=t7bvi+WbY(rVtogoApT5(?FZzkiRxX1L(RiP%_?K&1fB(#UsTPFHGCvo~P;vy^cb&dIhG@17ZEOwTO&S*B3@M{KM=dQ`1xHJCv=5BON zMATvzc!=%Y3(NM08d%ZGMqTn#$@>HvB6X8ASt*60@CGy2@f-8OEPQawad=cakp39o!;RCgY@ke)Va_>0>IASWW(mphJI%=dom7pFF2 zepq{3c@?HXHSz7y5Tir??d>K)F7kj9RF5BNbE#DhJ{3MZy4^teKvdOmG~nH-?{E)F zVPlWQ#U(_0^By!;&^6nA#I|5UU-WFdH)nmSB}^#%c#3z5E3#|l12H8r@+4|zO}O%i z56Jq_N;Q66cPY9zNBd+~p(RG0uty{EW2HtROX8?UZcb2kTB9R2HZUiPcypI6zBTCk zNtk;>ZoRIy4>O)(Z~ARh0gKzZJE=?y=kztQGezInFzu#*lu^+-13EY|Me0Kyz94pa zr3UDGsO#uPF^u*=I>6ciZy7Yo;vnIF^_iQ5ZkUp;*DO5GFPfPaR`5YcILignOnRgE zwyeVTMxA9V-hG;UTeBIc!lkqxD|vE*7Z*O)AMV zB*2G1x`%V;lfC^~qUzK-FGX-kK_FE$S&w?r84qb8z@8ecq@RBw@_Q7RFHVa*hY-N0 z=3IqVXFn$#6_Jy*%adnA5>+8fub<`Pn_%m{o_>rP*ZEV=-bWIObcJa5Pu7b+47VC2 zDAyJm^rpq@s!6138TJ}?>3?V~Yl?mWy68j6qqp2Y%gxdauiEt%;+Cy2*`Jw19?nrS zJ>#{o|7lXAH%(@4*|ODVJJGrBHiSb-U1sN{sDAn5_K)u#RE>vsyh$mhf64^zOip6q zQ7FE@pls91n6#{e-pYZDC@Hsan^eu1>ST|bxKL-{KvY@bu|Y1frkR7j3^N+>w(ixQ zNO_1xMR+!D{aAZaE%5$x>WP8%eF2I&8IyAbO609$(=2Nw)%9Y=!=cR(Ae<)Fx;k}P5g zlJ#EO%BJV$=v#&%Rp}OIRd>jj_#0lN(B$JGBq>ldg_oZ)q=>c&dpZM+;b5hRtSLU) z4lDO<|0FUyK|)p^OI#v4BiRZRlvw)m+kwl#@}hMRRz=r zhAcx?NV{e};-W)#-TOI^nM;x=|B~xFs-0TPM^#y65K3)p5DJ!-+-gD4%YrG|ffxnq z-$htk4MxEJGLl?G6D%qN9=u{{xu8*{Dizm%lv!bD~WN9hKroOo>b$J zJV!HjF?#l9%8pCMz)HJTy8VfIDFHO17*&Q^4ba-5Zze^|G_{cG$E}wAY^6%)a zxu^O?>k^%A?MSE2TPk7`7-{>v;AmKHF1kM54Rzd=c`}UoKsRFno90(LO#%Fw)35B6 zrbA*d>`LVIOf=QGj0`QGP!6#-G%K_We*W$#O~gfjNHXWN3+FVjc?2Y}|0 zRd?Kv4j)MHLxx8NdIKpVMcIM zzCxF(wOMdUL+=X#LNNHWf4GpI7cOCAge^Bn)4V0*IpVgrKKTW?fDK0uf_ML9P;O!D znI$$>%ct1YSC?vxgj&vyeDu}enu8>6XTR>rk3~PY$36xrr{~X0cqkZ7eeI+9M?23b z4*s50*?iE-4AbO5VY_M2hlI_1&iwkyH?25Z9BpW@-M zwgNLN{U^2_Ksp{5M)%7`YiQzg#~M@PC)n|Vhd}y4=%lpX5y*TMQNTo!eKM!=>{w9! zUWWVHW~#-pliA>4st;2iEi6HQG~406ZuuD=t&q}}+H9R0 zqaxxMd-oLiI~)rB&UdGXw+Oxvh7C~ z*58H7$35hY&-FhH<}(zj)*CjQ_Lz6geClv(3V2Z{TrdYS&kgJ@lS%*8N$VvwN$&Qv zK;t-Cb(iFQZ-`OXwzszOG0=%2c+& zls^432ca3`gz43)3U0=TCTfyzGyMYJ=`JDK-8V|=$G6M6%5DLBm&C7?E;Z$r)M{MZ z-~foGQIQ`&*XpU%%(eYgzDKWeMtISJ5)I54nun=)O&d(3xCPlsE}jH5-EwhDc~UVP z{vk?4zL=*a^#$0i`U3vs?;IJ|!sak>A^nSBYrU9w;{qd(*2^}LpWOB*6Ek$3sQ`+H z_w4N-;cLyowJ$H8Oa{yf-NVI8*I!)|Y;)`KV|Pk7(VnxUprPVVx}2}uO<@QvsasqZ zH_%zR2#y)P9tIAqkzE^kaN#_^mHk(82q09~chTC_qFKZQ=w+HH={gSb##SB+%j`vO zZl`ZXNRzgvaAELN6{S3|M-9XNnE``$mM6IYA-wNBCV!D8bcKdu%XYhA@wH1->oHeJ zqm&=l$84z=T%K7`6I=J}PRMxts_fbKD6a7QnTC4kzde4g(8~E4T+E#hyG}-<&BK`W zj#|V1Q0BWp?|Ejb?#C*!1{dZ5fI8tIeF>D3Snyl+;^~#jKzdeKRORtgZx24nxml-^ z1z7F7Nf8SN{u;2L)^Fi*CehQ<$1FwX)ZSzt4*$#Sg zwZ_Ox*d#)-?7l6q;VG3Ms<%t3LgEOpRv=;|5Ki57rWH|PR71fqEV-@pBr+g$Q42LMNW;C_uin5zHxDSuzt{WtIA`v3I`|9TsMnGyql9CX1c zz+(Kjm;L9JFaC)$`PY&B^PhjeV_MM#yhIQC4It$FKV$wsSMvW&E4lJN4E_@AeAH)jzO3|M61*N>cX%(o?m{De@n0 zb3rQl2bJgA|1kI-MS#ImB&1q*fo1s*4}9=X=+6Hz`2UK~1@Pv-BJ^Jo`uBzXUoCXO z2K%oT`VSW9zlQApdqXClO9CA8L9^P!4Q%gHm8jG8Kv(sfXNFB|D1E&^mi%wi{D%FB z)zp&;boESv1oR}1r+TV&A3Y;-0CxEMk{1=%fD=Px9A~U+;#Xpg<{#IuXY{9Z+5;}b zO~CcI2QlQoRs@RsL5Z_l5u>1WZeAcN2hVKhtf|;8p*~>a^=I25lsqzE9 zUXR=*_3X9Hv(Hz_pSbJX0~eL`jcXgtv4}Vzp}2UD?K<{D#OP~*unlC(1%^ZF(v_7* zJ4Z_qiLBpsTB*;!N|QhJAtK{qzpnU)&=_#7XC59a!9V9T8ZUW%)HXgalrG(Wh8;|8 zOIZB=Uz~XCwjx%IGSew37kgDejWqTzUY0<;+s2uK4TsKR#a`FoEt5%ZbDqK;n;2k%ak35Wsb~F3a!z zVuR#cQ`>xWh9aZyk-)&Swpsfw2F;eM(7QCgM_i9R0gdq3o4@8hd#Y#oG#K90hiM_$ zYy%nnidl!=gH23(Fn5W$Ah%_bedklc@t~Kxvu>^1^%w82dGpVh-}-uW*%RAuX6S(y z%7h&{4vL$xrMMb5GXn3Ic=3MMx-7(1fBR5KJXs18NkOecHf<;uK{8RE57ca~eA+I> zRpvzzGfQmGNUp}2BrtiM-C~v?C!cws=5XbcjQ|k^qZPU6`U4yp#}`UWo7uzm*fTlh zT~e8}K4FCR$Lu#xZ5wy}(J$gbTi17Yk#1s(OLe8xS3W)5>1K(jmB~W7qj+l4D(^Pv zn}iAV;gDX)5#wSC$+ObB?Rr!n2-9c+!yjmm=BhGP^l%l%IZx0^YtCbN_MQF!v~UcVDb~-E$4_?C~j^wJ%Ev4Z%B1M zYVU^jlzegBomds|gSRxnvZs}$RTL{#OW~k|c+-UZ%{x5C4Lhx*+SVbbc9RA!@%pw+ zg75fi&w;Qo4HMSW+Z2XDV)w)~u#>MfqidDEw zGy*dk#GbPLEN@GW(Jc!}-(=H#ylUD?!X7Gt1=zRUlHU9{6xIloT-V0eayBA{-ydB? zfZ@(wmTNhwu#c7yCFok*yhrkntd7&l_2g^R zK_DuHoH*m)cH7s*a%VdWZOPp%3yMzzY2MkrtQdQv6#wbFZjOxvm7`Ni6e9N7Qw7oz ziR0zy?Ktb2xrtEm!>Mg(lNZn^7|abv1qLCco<`<|Gm=+6D1IeCBCoI!rNnD5l_)7I zhDUIvLJ#AIGxXd1$sE5VY&-gp3Fle*;N2PlU4_2=mFGFCsG|Pk12_oJ2(MeEt-<`Q zvo;-)guDHV8@B6Lzw6rs&GG#_`AN(qs3*%yyEL)QKhFr6zE$xOiY~617d0IWRF=f= zN9#I|Ylc~QzSFbFj_E_!AZ^ai)+Ak~@$(*-_`0=}nKN7q!TI#jD!g&8N3)CbZh39b zUGCYwUcMHqw~$o9uWBCQNE%$hO9~!EB|vBG$$f_5yG7l8dlUDO@-;qjklwL5oHlc* zmYjNfpoRWgd6%rHq_p+MDxh2Jp!Ns3LPwA1WEN-v!EvY&4m2KuH=k^($mRE}6C}-l z>s+l4R)@oOM3A0KDa79hfK*uv=FPnxsb{#*!YFMeCv7I3LqY?l*|A9eV-Nh{IuQ?R28F23Xe(o4vu?ApjOp;~YA^>w)Ow2Hb6or-F}=i?G*+E#K^ z+sCl34*?8XWxKQ{*s=`8j^Lj2Azru^y~CRBhxEP8Lbz5=iu>l2XHnjpva9h11C#zw zJMXa^(i?^Ej0ba-xtV8D|GJGGc2*JH$f{oh2QXZBcGHQhcNo{y)|92QuACABbPyq! zI^7ssF6HDDPOWSlF>P5`oMBQV_i^j#g6v>kHIy_W$ir3!yDrKsN0^74b59fsZI7AH zBsmV~#snDH^`aXV{OP!~N(&LWo`l^9uUBh(z+zvQ@b2YpczlxlORI3uf1vZ5a_xCVTdJ`Had^&@n?QeWIu$ zXLOUTfD?Sv2c-#-pRD@g8q;5O<@Hwax(@1W@qFurv6Ccr0AyP^hp91Ev0{W}Qafc} z+2cP(1v8B071hkU$0)ET3nEH9N@B;@@^79be(=A~rS(3Dyy_|wHTTY|Q@-Y-Me|fI z6B9P=tt%D7(9_1PK40@hz7XoPGmDGm_nhTLOLM(c5mxErX`=xau5VNK>ENeOD$&h* zzXvq+&4{@gmK}foMe}w{;^^$GFj@U=v_^LMj7@{{^8qHfc2#;k(=;mUBQbS`neWco zP7{Nbvvoj9%T|JR*)%6pmC%CQdDHf%?yt;cYd~t9`LgmH%O9z`{^4U%QLcgJIV8b! z1@LU_yrcuj!w#n-``77F2KTHd=};s(Wuxy3&l~fGizsd7K-`cl{(ArRgfNl$TI1YZ z$gQ<#-y>~9w`^l^rc6&KBg?$8Ay;OrqO#6{sg2i~pDsY_YV=nExyGHRIn_Vm=3HE= zH5qhKD~rAiL60X+LFIJ_GijE$$QL`mNOw2ypmds404b(MWxvsV@BLSXXXTuGGLc!h zXCgcEyyW$KXqllf_3jYfuN8n@w;eitRFG}dNUUEB=GH0-DCBOrCWhM8i@d(Z#3=ZF z35ojf4WPI?`F|#og0`r)JVzZH#+04ZraQ`sGtaw^%C)GI+)MR3cl#DWWF_=@)vHa8 z3F=hI=ZWnT7<}bpTOLyNu;$=W=-$u+`*$~h9urlcgo(tbqGRqc+CjU38Fa7Y+wQ+5 zM}Ny;ly`#GMSjkDp>A_9z^B6A z5q$)BYc7;&M#!oZa`F8kF7&4qx7DDn4;504w$WRCnZYOD4O@0+gw_kuw0+~k2RLvv z8J^m}W$za|TxZZNx5-r>g)J9ovQ!O|(8rwTF=%-q2Fe|z8j?H&zZQsP5#Bis%M3ra z2a@QEA>H;GYk5Y^8##&f64e8f!Yj6B6P?Ke!a!Ana*^duzNT;5K(Z43PbTp zMt85wVxTR{rkn0&mRewJP0f}=oFL^EHu8x6hWvT07|E{iy#30s980)w3C6#nzUnI3mD*=2M(Xhn0xD|PLw5^m?p<-;erl7EGzV zAHMYKW3;l?u)qs}Um?=hcl(DH?_J*)yL@u+3-4|uoq*q%$p>zBkZ*0y7aNxJ0Lqx~ zGP}7bDm5%E{?)loP22hZYVW$Dn%uUvAR8$gL8MEsQUte(A~iG->C&VH0i}g(L@5%w z(xn$sK>DT%0g)bxp(-MsPy+-52!tL2gmM;V--kQSJ?HVh9iB2+BkN!JXIbC;=3H}C z3rN`5V#5ozGV;u?oJhG|5Z=aQtup@JyaCIm_l$8YJ|LH}PeA(VpQA=RT=`@Tul)Ep z+E(k6lz3b>?|Fpx4TG=);AxXknR}V)iOFbUYOW=$ye3bIV6UH;IIK*IQ?~KNrw+C_ z%Hzsuw(QykD4NZF4l4E3OQeLXmt`^Wg-=(}Gap(Z5vdV>Z4cN=+WD5=m)R4`?Y253Qy``msP9kw#!fu z;w*tUMH9a_Ovvq$Sk`1mlngBFWz(D+tDx`XO0KwgsQM`FSsca1yt&Hf5}Su8Nn}D= zNMQFSIteRJjg6*lf~?_@9hEYfEH`b`!uuCuZvI$)UJhF0_(~rtvi#r|58JEf)Z)%I zU-m*ilOsOl_*3ikC2eg1>Z1TM#!TG{)ZJeKeM8S+RlxIBEank7P^UQEUcmVfTN^$| zi#{oFyjqileqZ4Mk3?G>!a%@LQDxCP(Jj49hNJxjz5vLS)weg)*Yo-mgy=qs9Ua|o-`?@dXHi~uMqHykyM`b*)Ycuw zqEicX74GO&?_bK7OUlKbl-ej^3|C`tY4;;kNcUqfR-4_<~6na}%R1T-k3{ z#Opr^&dr#0nk@sBW6zXUHKvh5;F}S_37hx@Zp^}9DO1pV82cDoMn4F(^ZhpVza>H#|p^jDV4V!W2TKy(?xb0)45hwaM zmTkZ|#+DBaS#k+apoUm!(`=Ow;2zQJN&ycVOD492BOXMW2iY1?e{Or}+`oYBrrl~h z1uRhg3p^^1sNy=DAfTRy!navud^Ss+JRj8O=$SLXS#w)OS^bz}N!Z<&xz`wAK9U-+k;a8-#JZ6|48c0+`mEaxU+| z0@GDOw8RPrIqs8N8Jnd|&@oYxjwgS=b%mq<*065kwGGhvCxHPQ*zNlvxwmtuM4rE`-Vrn*B?VPUp>!Pq1ClZia^!*t+3ybwM^K{DEt? zDA?&&2})B$GZ1Pyb3d3_A3J`#T-Q_Hng$|ZaXKRY?k}%Nm!fiifW=NK!;v&w43|J3 z*yrlNt``EEksq^|aIB`P&^4?mtT1&_rjuwsmFsl=S_grIxs0nRjAS1gW$P`2!V9vL zekw`~TKo1~x+?txHDEB`g*C8RMT`hy%bn2WlQUrXog$T%T~Jy9F9Du zaMya*K<$A~GPT^1Qm?{|7**tNwi$Y0A5X%`{k@N$Y2jOhS|lSzDRJQ|;Hwth#SlEV zoDH8et%BM*IqUnQQ-_2_t>Vpy0VEhEF?HphAptv@eqf z*cehQxY%C8B}&3hj;q!CK7qZ^SPi9id;dort`JkGNVDbY;GZERfXvboRr7}vwm2yd z1h2RMep6%bUbW^z67P7k-ov1V+e%<>9Xv5}91T4v=37;_7FT>yR@EN>?W5UxeStA< zomAO=ai^%OB^+ix4MNGP?hZI`Uhl=B{ys@7o!y^popZw^=+yabUHnJ{HP8RMpZKwUf>mk5{7N0=~^%Bl37guyXsGq{8dJG%cH*vPlvRK$b!d z-hiTQBr)@(2V4iemQ=vo%!iG>QALKoTyI$N?$&mW*U^z__DqRqV-^O>!3zZ!5BVD5 z0l69HR@Olu#qUvK3@p9JHnX0YZVQlq9@FI}V}PJq&KjoC%U?ab*xHeUpcQE0<*Zfb z=c58tV9s)#)Kvb7<2!61nGkLJ#Ip4OfS0AvEsual-Pie=F=`lXC_bIyCn5R}@1ZZ! zIQGE@YcYs^=>hsq_yF6J#wP~zU}5>cJych2`)C=qTc`HykCr7xBR@sM*G6Mz%2*Ej z_1Dfr^+cA4A--;KZP5UkhQD+=^^ptkjn&_8=sdCA07y<6lkjd~;bU3z{^2UJi94SG zYG}l9Yeok?^x1gBMSAcwKVG?1siQM%mtZ`7%e?a=iAPNY+6H(wBH8zd{tcwf$id}|9iFUjS$4~Q_Xr|p42)I$Fw zYAzt^S;q{xAoNRnu3NY2^`CEP9+bi&?A%zC zmjKbWU9?3P@(9EQRgcEU#VJu z_R%0!BGRBg#cHZ1g9h|vR}>rb+oz`n^1p<|3iiDSq+QKAtH^}eNF$l>FPVT971B?&>PY9KZ(r~>4j_ik-FC95xncSQ|u=+;$WJPTPhy)9?Aj8;z<;$mj47TPV4p^>(8bo5i zVe9?*L67-^nhhV2yJ-wFIyHagL%3U$&6+17JxaG#vvkl8QaO6Yuj3l;aeV8_=4&}4 z3O(v@wP97b_7`Z9J>Q+mXCMQk!42e48;x3nB~S7z!S;{1e2rH)p8&0U6GvJKLt@2C z_ALEDjmxLAfFr9xW8KhUkiVqPA>D{|y^P`78fLoKrqUCvB6gdGm_A^nalS-3ELV86 z(D`kmV_?Z_DG8*(d{eU5^^T|O3ql-&Hcm_lIn;$jF*1_V!70nvG0*r7wZrkjQ1ULf zfVC0W}i5$h8tu(uiLTU-ZIqrxW4LPSP&uL^Y$HVNHfY6|t-_*zfySq>~hcNeOAq*VHyIo5R;(0`mf3-JuL zvJh3ER6`=q0IBG8iQ#AYH5rw#kS*wrwi;^PFl#INGT%u!5xS&vZVZi#O>$cTfrr&o zgOrH1wYBPgXrg%?^)c=tXfk&h`YM3XECn2CYQ2S`$H4*I>4TTqQ?P-oC#4-u6R9PS z8=`sEoW6yV1T(IFxw_r}QTtK95Vu-UPBzHGm6rZ4FVfLVE4r?XZrPM5`^c;b9XKAL zzw%B>8jQKCAji&^ry3=%dMY+Q`sYhp}yNKnP*j778~zV4>7hu<=gft zOuU6B$7FNPPYC-qV6(Pf*3ME3SlGx-X&2Yxn3|yR!G;z8-nQQM0hyI_;SNb#PP@*| z--}3x>lH0ga7?LM*hTiU2%`ulU=xkgnjk9&#PpY zg+N~5ptbbSwtq_*Ap&}ZCfFpY|g8-1x zqIIs+wQua$te=Hdvl}aO%_Uz$;8V>dR$kPlKe>>NSSg8S(@yQ9S7KSkv(jyyrP~yA zk5@qkGcT{wemieX|8RHM;Db_1K(e}yWXzKcudJ~X!ZlYJxTw4#P?S>%Lv%H8HM$Q|;D@9|`bI)-p zVuzv_Hi zmp7f&H=U))`p_=@YT2=y(>*N{C23K<;}k{?MfQyjAB%Anuue*+sgGA3+}1QnY45br z`ct{LYmJYB2s3{?5LRWOo{ONN#!)B+SB2#AL))xgQv9*a?1~Hc-W;mMhGed=XUQ2g(J0%6k77iZ)A;jZ90nVoMU6jTtqr}C3~Eiq(IWj48FZZGdijkX8|j40ufNxsX#^Nb&aRti!1Q1tnD} zmA|8jcBwk}KZ0Uou-Zlcl&zu5n6E9#0Ny7PIQ6a5PKe!T!*3_5y2mA@mB%g_5*y6l z(q+U-NkzlNU-^K7XU`OSn_Uk~pb6%ZLa!HqfpMKSzuoV<``}<3i4@@EH|afJh+~$y>#Lw+!S;K+dJe;Dnge6??Hi2py|`g@+XtNf9CvY*dh%D6H6k@+G30Jx;~ z@V*WJKr#jZTzE-AhPzXy=sphsTwJhIR(`CetjzJ)4Pn(WD~gAd4U_|v9JdZ0e=>MNsm781DxU7jX6M4i!i$`ohRsjuNev7XM}iHDGscRA zzGtL;2`6Q_;}@hlHtjXzw}L+IPC=rM;fU@8sTBm^y|TBC9k&K4hk6uOIQ5QoE6>eR z7X;De$E13S7usYSO*$!QX^H;U=!M1|dl89>&z3UpA0lV5`9Dbhk^%y5Q_f!rN`}97 z;k#$Ln^npRcpOUKF#nZx?b_^v_#4WX?^i?L4RW7!UCVhVk|^!Wz_u1rOb_tB4C7J1 z5NH{)YE**~{YJ~n`hq1)kzdK1^eekRy>_7RWv1@$ot^B9O0Rn8{CEMDWM4NwB2F~U zldxB^pzK1L9Sj~3R5J$ran^f#n7)G3*{GP@_;)XYsiPh;UJL8iu*7a#S(5V^MctEe zjA{*bJ$Gf~CAQ#B5IR;%wj}D4Fsu;=dw#!}^A=t6NI`09RgJL|oxyDs%a;2t=Fa}S z#yxHEs|FwFGl#-onPg%8pSVxmTzP9@qg^V?b;rB2O*J=;mow&+>K4(At}8mG!lnbO zQxux?cR(9+=pitVBn{sqFrc zVIbt)4fXFdFO}Kf1FsOP{6+cr-bXSu4$^XxXP2%jXFa}E&XV^U5*(>)lcxTd#~rvH zM3yG;!{!=kv^m+A3rS5PFT}fpLgsC~1(V5y0{7>Mz2!`Z3|h$NADmG8lQv%PY7v@$ zaYZpVE&g`E$E$B6_LN?yUw)E%`?~%0Jc_50c@Ks&pFQ6CZ2Z}Ih-00BIXpYl$d<31 zMVgK+WI2;5EiQdDov;7AKVlWUYP`yRKzGLS=3RfA;Y*XdCK|Hbm&)jL|K@GqY-Mdp zZl7vCcrjy9V#qymW#iqYZ`|gCCE+C-#giqM#`GO|Dw$;7wakmU#!7mbT#F3tYQDNq zxj?g^wdlNX<^tTk<1JUuHUotJ9XDq%-?dA=ld~hS+qcU_a;G_d?hyqS1%=P6ORwZ! zF@E*<3;5#I=Le#&_Q3;dkWj zz#oc6k#>ZBysz_qB>L#XGqqziP={qH<#>4V!`>T3Jfl&4QSkQ)c>_-ip4Q|M4=?3C z&5OvBcxv{9;)=z^(0i~bZQdN#ER!5+9ZEfhJlYu5=MM@p*mFFegbuITSK7a@H?rRt zqWSz>XCr6cmRnRIF;HJIKRqSuaNvo%axHC&wwdOvvVu0!!u)zRwIEl@lWxgxtLD_X zgF|dX7Q;-r2DxxOq;*C-w}zldbWM=YE1$SipTu6#UeWpTiox}q>Et6Ni`qAFZyskp z%v4&8Y_rjf&4ydQu@1A2UbYY@kA1EGYUHm1wr499WH}<)b($;MLWvN|vkOj!lYhzR z_?37@96jB7!S*}_z?R#qpHe0rQmgS?Xxe@H-n4G*1z&eLTDcO_gc-XACVmgb(3p-q z^Aw9qYYrRa>wE`Z8(yohYw^tAu6?;@E9~v~P}Gton>qG+<#n3tYu8^h3*5~U@io^F zwq^-`S9Pt51uA@3EKJ16V!(0$l=q@jtVg0iB*5~4_`ayIxxSOXkqWe{1ZdfQ*f~b#?dq?$4?HRw|_nhD* zo)qx={m#iY?0@;^Z^tK>=9}!8bF32f}n}e6zt8IZ5k(Di z7jrdpCWFwfl@Yhl;N*~4vJ@(AQdLp|GKEVe6dzufzxtZ4l%Dh#utz*qAYu9n+1Kh9 zNnhDRlimnXU~dt#(6c|ey30Qx?xtqxcSX(~G8FXrxPPXfB?H(N-+KGy#7o{1wUUYw z;DZ0+qs7`qn>L{cL{wS~hnB6@w_9!8+nTJYixM{b$&EMLKYjgF2i9B>O!Tu$9jWj3 z;aSmL-d_f4w%u&M1$>D4O11cO|BY@n@1)L~6M2*XDvnr4DZyNERgnLxytaq7faaDq zSbOrLILF?Vm~gUI`GrTj8wX7XDQm!cU+*1Ry|Zq!61SW$iznCIV=C7NUPV|R1?@#-V}_=f{Fe4r^<(=tM;iX zGZXbIrh4j@cB&!4B;r++DSh?q=FnaxBnL1#-wAX_69vu{aM;=-NYc7-Fp zEEd(FLbhnHD!ePyem+~$Y}XPQF%}UO;iWFCeiWA%vmAHid~tfnuLY4!nyXoLBC-@Vf^|ggS6Uocs6;%rFkO5c z{W-TzD!pdk`^Ro2V^ypL!Vp`Ee1!?ii(coENajqvBPr%9jXiM5FZbBdm`IqZJawY& zYlAlg2rihkpU&+w37wcpPWIX<1oWd9!qf}nBF(JLoNE`{vv%_qhb=}~i`X_?)@|#z z{Toi-bVbDW8l~ho`_9OMyN*X*Pfkr5Y%i=0E4( zs>Z9Tz;a<-a9f33%rI*7jdRX+v|OW{0sMhnz*rYD;4}$UB@{r~_r0$Zz9Cn%RkfjT zcTT}&u{D_~KqyN{m5d5%a2_zpS7xAqoj+_JPCrR+94#Hq1dTKd`_~~;TIwq2cjkGJ zWiv4U+h?CoU9mhzIY)i2a5#(~6@0F0FQe zCj~7)AmjpIAvp}=YmycQ4R$5QhVl6BxVjRjWm5&NwgO(6TrhXIH88-5;r$L(k|gb~ zBO04AmBQTG)AOsNG`p$&Nz6x9`fP+6U_uEnQ-7eUYX3kLF~ZRyPm(4E$|wRdtSbRL zFHO9Wemw;h_o6^4bNgZtLnnRL-&xH8Fj81bnTu@00T03raH2QX%23T(Qxk9pcTE8x zCAti_fV(2XeFKP?0i^%A1^^xru@J6xh;ILWh8O?{wF8j+e&!kO_ph(_xF1~W-@h+J zy#bKp{<@6&d6h(>vFn$FD~0D#!tPS4QYP*X$N0^}t4!V>h-O3>TMFF`L6L6Dn`&|N7hDIwu|Lig?o;GPfw`#8J5@D^|ebN$l^ zzMuP6U<)@p7k4|5GsmxfU%Ui)xXW>J{(2GN>z{F2dE5PYlQa1Dv~Uv?`t^y>T|r@? z|EZa~o%R1qvtOV5)9gR<`sa1BzdDnC3bt}n201xdIlIgMvAFDi-t@nG_-CB|G<kk)py!wvMsFC)s| zl@`o&EvAf9~Q?Ab(L-h=3^h zi|f`_)&PJaK<&Pgo;T5E168r(xY6wB^ooZWSWG1GdW7lf>-Uy7Qi+K4sjY6uu|1rh zdz5A(SxF`H^!=+(FE>ASmFQ}{)QQiF|8_Gys(SP0$LH0zT`p@x5qA>JQ&21jkPxdq zP&R0@xK?mn?AnBdeXZ31(scHEj^1|b@ajFB44Io zyG-Vqt%JL}EDIZ(rs`lpOTVozR5Wp_?7+OdDnOVSIL#7Kb*x%c`{Jno!nv0vHnM$j zw_E2Au8PHpNybLrq+U?Q11+day(!qv*waJXkYbZcHIx?fbG3KgEWY7jF05Z5JZ{U+ z#L*g?Whq*Db*=kis{Ow7)fj_BhhVJb0m{~9W7Gy_dOF;}cO)c10uin(UW6jK!BUKSU z4grC1lB20a@(2MP!Q19}p z{lVp<8)fH%AV96zuQXp z+$}ml8KJw4!aiNSR~pweZZe6oobc{lcHRp+-9LxHtHn!U@tE$D1HE}fFj+|Sls@-KpYUtBJ3n$XQOCdZB(L|0*tR!`CG;CMc=CeFA@j`-1)qsD8` zgBN-$Ro5#bIsOz}*s!cP^M3R!5y zTAwAd+eGsCu7xQVE~Oef3l516GuP_5W+dj)g@y0+;L ztg=k6?x)*zif`!Y*E(Z&bw{D#b4>{>4p}Rs6y;(_b}PJFa1xmzik0wI#i5FWzSw+% zYY^73)6m{me}H`2e^;e`q4UXIiSd^;35Yq&Q40ld46@$?V%yrrhzE&FU&If{iKP6d zeSQlW0h=7cbi|B?lPU7x^E#M3WaAtim2~$mETJRYSK_3_sg&<&gMa;(`kY3e$?#>w z;kSxo=Z38?P#MQ05Bgx7z}A5jpDH3Qej=a&K-|KKvtd|3`-B7#vN{fZd0K-s%nyDU zy9>N6iS#r=Z?)55Hnv8mQ5%f5bzY|2<)OdHf6nc)_up9qOC-%t36l;=ryrZ0g`$th zozd_?a#bYjM1_!FLv+FJe6LLE*>ZnR12!BMA*jv({l{uetvrf>TMM(wg@5dp93Ci_ zTM0gxU%@W#+H9kG&Mu_7GBw^G3Y8MrfE9^BSJO(xCbFkR9iL9Ryz3mX^=n4)NE9`K zR;b{pQU32oe^pDD;%7+xF0jaUj4*!y3E5xfM+0af@116wSqV95i0t#y1>@MrEkpwO z$bgMZ&-+I60@3#Z1|vigD2AexD%qnVP{aBMXz!rog|&w!-2bdlno?8T{Xc*L5XU74 zLPsb1fm3IxncCBsM*m!=uBIz0W%gP0Zyi(5-awZhBedA3&r7DgWDmFeC5(GsvHsJQ z`E8O3{Rhf5o`np5949!CbdSdD6hwG~cm9on2zkcBxTtL$2HZFI? zq-(~sFx9gKu@KET65T&tP=(lY@B%hqApM-L@yoQwz|rJ+io9z;@f%D{ft1~*{ySHX zA?Je#a|-Ur8v!xTjEL?8ipRwoJ}Z4)PqzTN+1ms%x)Tx6TtcAK~w5879ggXYBu0xZBoHyVWNWg0PqZ~{-NUKyVEfmtj40%iIalw zlB(A%<|n*@WAeo}RrcT04eC-=?8@x&vNEB^!=LWjZ6c~dCm^bK1@iNx^?Y`L;tF|U zcw9n1^t*`jj8?Yw%;daTnfqs(d5M|n)16_VfHOY+@1A_6+_d;vE^(%b4K3B)DJ1)2 z){i)|7H(rFa%LoCB=5{ti9e=eaUYK^=$IcC^4(K!_wxger426V;qhX4Bk15cE8i5b zF8Gh+F=W7vO|BM;(MCZo7RD%Ko<-YEdG!6LknwC_*Aht0?vO3>`|92=tQe zliyL0LF9v=dwoubg>_nZsE`w$1IPSvOeT(HcYQt_yS5%Ktu$rsK#|4+`6pm(Z z!snRr#t3YBOnDOR-~~CQRk)YZ{C zL59pW%tE)JvXFU9??|Qr#;>Ip&u2}yINrw^#mz}qli*FlAuLIJ^mD=ii8|T`9?m_Z zNsGx!N!0^$4i7rU!uSxZ9_m4rBwAZh+O~XtQ#lj2GS6`qQ72aQ3XAWhT58?L5G>#-_Clk8<+dk6g1k! z!6svlha^xA+`X9*;qkeq-Txug4(u|}=TIWD2!yq)+`}t|CY4)hHRy~~g_TPLS0Gbu z3U)9sJv-EfdV6{wFeAK4LF@iBhn9Y)V>wUTXkVuuxPrE*pgjHL3RpvAk5|J{c$?qi zY`0gS0e*~voJ7QqOuL1yz4%3?j>Q4D66wDKM!1(_=hQcVe8Q}l-R=WdjV?dS?^Uck zd@%Lg#t+M}Cq*A-E&c8_oZc$(sUV${&!1Ex47ZhYu1oNb(gFlgDdGmZh{gq|Oxo0j za{(T0i3h{=`hA~yV|oTFZ*gF7j>hHusG+dqyVFFX$DZ!`R-)5S8D^*Fj*(f{$})TYhm5TP0D zSh#{=&_l$ST{9aD={=)6tCQ!Zw&GZ0glW|t&X>b%{Pfz{#K5J!F*8uTZ8}Ok!!Z4= z_{{lv+WwLrex!j-a};B`ni6Iw5!w(>U(w0I)(yTEy`v!}&)xo5U*9<& z7hD0~__*8mP&{ImZw51DfHa*%zg~ayA?Wwon_E!Y%2kx`d=LAh25wy5fxT%w@#}-E z?rb+3Yc)VrUVrqpSc*xlyioDvEw*hZ$x(A#9#d$5h=RRMUlr+dq(H3g9-K|%La@+2 zO4>p>snOv-7DXuQ5NG5rRs zvUL12u6}uclef~w-9`;>tT9<)+2%^0<5wd1s9-)va-`sK4(!clrxT&WXgA;O)7MUp z2^_EfWWRX0r<*5@61s^n2A)k}t(8VUksw|Zw1Xo%5%+pe9ZxbL_+8+3adFvsErjYV zLr6VRa1^GYeQv}dpgPGf;A7Si<4Kv&IUMbLVsQH#e^9MAiXFOdfgG>#*)q!>Tp`8B z)CexcK&M4#N*)q?Jj9W=?xQOCGZZ=dR3kR>J4b8J+Z)Tn+>>rUY+VDK8>-o%wB~}# zWu}nDf({$F|H@!dCG73l${T`u925S9@)l=`oD29)T_{=E*&!+-)ytx?2U!BkD^Bdp zi69v>xcW)>qZDiIq@F&=&Ri~<=?IZ4@8E{#Gsnv~hD&mnkYvKcnoPYZ)ih}evYZ7? z43?7`iN4?v>y@-Ab#3USLpk_;KhN@037ZyZWZAV-E~j3*$fvx0f* z6d9T@LfXp~RcTG7Pu>K1(hIWuW6R=x5{hD{gRS|Wcr0EcCc?bcjT< z7Jl%n%3v~{q)L{(j5a9ar>!z0fmEf|9JXM}GkcS+;8&%Gqv>b;_2Zy2eC5FWI|+@d znZHXF#8-3nC46A-e&Whiz~0b>zdG~>UI~pgyhA8aXb4_FjJuzG!-SukMQuw65v)i} zWbF3ujN?mm6BXQho@slg_I&#kykj+yu~M*XTp726_=2kRs)Fu51^EpdRLpvP!T0ec z1fX495F*|FiXd=a2L?|lFk6Em4kc6fNA#JTPR#Tnsae2#D8L`A`TOi=LezkVC>0{Mgb@5bTO)r!^Zx^nm$?fOIEYp zwAi3s7GUEMZo`rd*4}P(fuLTBwSM`i)*0*XRRjbnF-XcwVi;7}P4=rNKFOvm`@rH( z)nod!t$P%5t(z;>DaBEt?Vwt4HS{}-joO!Fe&Ih3Zyrr?QX26m%~edZAwD{oIBD&h z!mO(IP5XoheLrw~=FMGDdYYRnzdSinIi8{2T~*;!rUH)#IZRrI%io-{3~;o zIt~^gID{ez4J|EVS26(fj4~|-X{N4VI>*Z28yjrvtPLdfFs9`8p%(m^?`hpOODPF| zu~T_e_SH}MkxgcCM)R!9AH8E2-7R|@AUj8E#5}W$_ z`|GmXZ*^x@$7jf!9@WbeC+<0yaeSEYZMt3Qm00o)`~F08dzz(TepeXSsrlc$QUp?3g z4v&Bt30)9TP`?UjiO8FphH{m%wWnI!U#;ykH0Rs{Uo@;XA8wUHh0kP_{i!T_>0q6PE}_ zGjEL=&v`n?vIG03y)NQi{@_8N(EY7TV&wSf1t1P`gR&!P7vvwBwAscb7nkqCw}!u$d@gYexG0w0QCf9hdG4 z)|f2&bbdekl_8y^u!BRWVk1)fn@gGQ<`rPs=9qLGNh9`q42ep?dfyqlvVTpcVFjo`Ge6mjMuvC zX!3g|W&d8r=4!r)X#th;wkj~VSiig{3pu8Z#2hHrU5Wdx9a>6`$GPxPPAs{nrJtJ-wm$G>k`|hP7YQ}1!7>DAk9tXv0&=FHd5Q-7s;%HYY88 zVvpT3hTe{D0$K3O?!eK+;OG$8djqdWuJH#ITrnA3$|3(T5N`DGj&Wx*zXa{)PXfca zK|1+iN9Ka2yuu;SkilDXP%H-BzLmGVDj$t^wjV%TEJl|sKg8oh(Y&2rkEoQ47WTSg z7^lxArgZn2i~jCSV}hqn3UuDeA?>AaX7hvB~Y-2=@hJU{V+UWMU8;bD6s7hb>$yAXRt zvTwG{C19g$-+lb%Htf6!%Um+M=ClyL)gTzVD^yPQ4e2c%V64makl?E6!PHBUOqL1za3%6zH(ee z@lTxS5jRuen>OG|KqE44G=${jF{Ps>w=plpN5H3*(RH4YM@>B;Ru<<%ug7)f_*I5= z=mf^qg$3lsW0~_C^$dft=BJvQYTrwoZatD2N^SG_U7A1RX@apH^d)oQp(Om6m{FPU zN%3V9KH#B7f7!R2lVJDzPW4}>Wopi=H zgEiV^`8ENEP4dkW?Aj^HVdbY)g+8|YuX_fM0?3n`jUhlv)P{Hq}(z< z-OAvWhzb4HS6IRLs_8x6q(7u^-kHL+3?NvAgJEoDJb{f3xXh7|#Ge)`uFcOM#mN=h zm#a`EMO4hOB*dZv8)}l03`Za^j4@=HZ{WWJAO0fzmWxXa?|04%penTOq=anPX_0y zU3pZVvwS4?M#mNCNzEM%@*}DWgqIG7Az=t100p-n{=*L6y3) z6=meb5b%;6TSotP_e+f9%qhJ{#num~pTl2)n#uSiOpalWvNAOYEJ>J%y1Ac?=a-z} zJ)6HaV8>x^+sy`Yl+_b~eWk&7>eKgP?`&Al>gm6QF6#u@DdtIvB|bfJZy_GLbTm6T ze)h12`ZB&JUv3-_2(drNXJoS)k`yOM*J`+P1^uLc?42+@pSlm0dZ725guQx;-mjWHVi!He>k8j`#L8wEvwQwwqs(R7V#Nw zU^0Dvoi~(_8Df6*g6)>##P@Q6ySS~JTKJ}zw z#kim}f<>YbRcXdVu>ZylT;4A?C3T0;&#huHW;ALHLRvl{Yy6h?d*+>SJx}uSJ1Z~4 zJuBigak5+tv60zd_Jr|IB#iq7uQ&-+JVpS-z<|xp?bRAT@-@4Pi=m`nWf>Vm9FNp~ z)Rb1m6xipp%_$f{j2Aonw$9O!H%ST7{=XYLK9KaUZ3WM*&Y~P--`;Jb6N7*p{gS86 zm1Aw=M%S@Eo>!y4^28{cCbu5k+x6RW?I+#}(8zEc!m+x7 z2daatoJm>j`KVAT*%fixv>u%fdLKdK2K`$gu~{0d(q5A3^$7WSX@cvR2atFNI|Xb? z-TPy0b09;*;rhlX#3211w)*A8^l_#yyg$X&ead{Pdl23|lb@ieeb^oCp?gd`5{eKB zPcKaRS_f1wmM}yy99>jCIALYROPcx1-l($}M4_Tl#+;JK?4zA8ojLD~^NN{h5q4cf zSwl0LCpN+t7LW7tN3lUy{?vQVD0b-k4@*|Q6ObO?v~7E!YaH!*c=rjm+J**x>OfhU zmAvWf)aN%>P}l-}h+;?7qWLatKQNL0XHMo8I6*bpHg!>p1V5e|^u@5lX`i0Me1+J; z@zmN!&0?_pR-XPZ@$#JO5nlwMCPP$9M&hmWJHVR=7QZ?LYia54Z|4Ykl*g|L{;#RI z_tMb_G_ld3=l0JzBQ{(0*&=;n=cg+eVo2)e;gJoo*;W3&)^6-wNrK)fz=>UFnE*N$CDUA!pY zsU|||6YLEmEh^%N%ZXgT;ri^2>eyP+-y$w3f&=G_T+SDG@SzU3gCYyDT7loGj=lb~ zDPaXYE`~+We6AVhvcnWPp7mCCF3Z=jLv388P8-(SDu7WWgc7r%0!|2QAjp%62w3Ab z*S^tkGLdfptv*YwxI)*doxe2qzIm5t*b5=nPH^QFyGUZf$Zt79_flOj~?+7*8P4a zY@$>26aIGpkXe=@Gj#8pFamEmaO39OBN5nwmka`4><(1Y z`DFn8$(EB-gA3SZ1%1Gb55D5zO-I%#G&cAOBY@Az@ax2ResY3>l~p*je*evXtb)BuS2c1DW>Rx;Kg( zhTO)e2mRe#DwU)Uy7hk9Be%dNvGQ@@#60r0R;hiN(>26a$H}&Z-_a*70=u%PDyn~p zkh!Zx@!J=034KJ9G0unUA;oED)fwbI0}6g6Sh@IYpzOmXofYxfojz*L{I0Lag3%%@ zsh{tb}$SP=G_2FPwBLWL9Kb&Q%&zfKnMuBZ|C5d4!*V+6mjzxazRu)1kLi8(vWUR<(%^z zx!W`*`a1aCed@Frf12c;2fE=UK0m%sa!-yq-hx4W?6-pV{T#iHLnJDzpq`%tf;Zm$ zQ(813AztcBWt!KMQpK+YhcGp9*|_6MfY4O!p}BZ%YKzUYQlHxe0#N2S75V;1>`USg zgBhro$fr|?qJYg^3FEoL%pA7Y()bQW)PI$Zy5N}vv;{Gc@W;EI6tcklb@Lu-o5gUa zj%L><$ZqPkPUoFPH6yIKtiudm)XbygvFjB;&T8%__*7UtPSG&SXTqu242~YIsT%L@#D}keB3loBjNvi z&#%400qX4F;3oW-Bm}W3RO`E?O=rSVPYw9zBn#84rXx}*ZLNRp|3?Or8XSSx*fAYu z4Swfl-#}P5x;2aR*!4wZ*mfO2(}`;av!H8CYNWqdg<_d zO~l|m1d-eGFepTQ=8BI&QmGS(!V<^5-uldJ+3_8MeX3%7QQ$P!!PDt@$LRp{_IA~| z?z^SlD7*+Op8r(>Ke@DUfS>9$aU$B%dJp7!mD%`&uee;uQgi8@_tv{i0j^LrU15Rc z0#~+IBC05fMEGoP%M0!^@VqapEQ3 z47lSY)c_J_HnxEMb7bQ|UtZ40f>fVuw{=pg?SAP>%<@F^l5IP*05wzZb!anF?$rt% zGtZm+i%^7W#c}u6%PF4TKefc0#K1-bb{6upx!KF-h5~H4ONzFz)hea)k%jSbHwy+M zJ8Qk;=kIqsXy$o%#)7s}nFbP+kRVM^_2oKAo1O%GY6l1kx{NzSXN>RxVVM*H4(7VF zmD7s*?VPmoTNFYCv@P79P5YcYpG|z%xIEF5BT#*mFnFZKhPPLedLYt#a>}nI0bj2G z{K7s37*&`*Is*p#tyu8zg|}wNU^XYFl%3djcMB4%9UzxHPRRAOc#74E^|W5^;q;Ep zQkC3wKbI{&%0iX*cnQb^3ydXVLUv18;605)*qr$2<|LN_0(!o9KDelQ_|$=3wS3KC zTy{O0SRpHY;&v;7n=Zk<71ZU4O5G?rY2#bm)gDph#-9^uQo)hR7UzsuFJ8S7QCuUU zDs5O_IRYtwp`P{8oHn#&rTc#;o%6~G8J+UBVl_xmb+e7jXAuZ*i+VJy_opHken8@h zIFUNNI@!|kXa21uD2vy~#-r~tRqd6Z$~Y3*tINgq@A&0GXFRjhSTPNI=Xv=s)YD)G z9X{TF3^}*J@FQw{!Hm)Xf2<65M(9yfs-<67-zMT9J#*YWLorckPZrMTcSjQb{{MGD zzkMHi(Ak58SUPMV&$rC+z?^fVd+1Qu<;rV9ieE{(Yl1=Oam)sEZKfYNouqlZ{v+|V zpX@^BRWk?q`%{qePJ}>4% zNii1fsEyu4$H}{aq#H~X^0JpwIprRqgTWB%Tyki=$!ILVsC3k6X_0Yh7wQASH1L~w zZC!<0;~cBxRU~0+PVg#GLABtpitF7!wD_3>cg|`|Xc}=o+1C@9s?}dgl0lPXC1`pi zAV_M#(v<`6?=HCVP?tAuSAh?~7u|6nXTa`ryzPzZ5>qB@1DbFXyFT?uC=7SNZ%p8} zWp9})RW{JF&R|!73eqQM7r%ZX>6xP2e;N|?B)MRmJJy07>qC4A0@JR92OQ5F3LOAH z#e*JF;>oV-K8{$dYLiR|)i1i+xWS5_e(MiGq9a=Tu&%z>3P>RSyDQ`CYvLj|jqk;h zk6yWXXKSbLt{S-qI|MW~fyU0%fDau6sy+Ljl;Yv#;zN1D9E8`OxLkfN>aw?;=RwY_ z7j_#4)TgiTPd}9zmF?B%bg(WAR^?SFT+etOzth*Xs)A~Kd3-htu`fl#ja;1WSQED5 zO-ykTKoHrm)QP9oKt)a$kobAMnamXSbR`mI3I{$9zag>|QyU)px9>+yCCQ2>d6iCE z^@TkU{mIq?#Jw5m6AM2H%;$jFiF5Qt!eO^-WJFk{f1lV6QrGw;eRhi`YL1qLCK}F; zQSDR%x(2*G+Qp|J^D`{(JH}1tk4`gvU((qy39~Cs$cE#MV`o_yNh`eLXNyhnC~@6_ASscA(hM4TKo8Ho_MOF5K z4GeDfh`Yjjq+?>*Dy1Kkw|a)-ZiG(=lnU)Z7e}_W8y(I&tBlOID$fb4C#-=p^c$bq z8hbo}a0u5bBn4oKSf|lpHaYDeb;&C#_Ua8m8LoGM2GLFsC;7N0?1bgsropz!>9XpB zN#tU#7$W_5l`-fA!{6!|FQVtx@tI^`6M%AT&@p-i216{N2hqTiK$h9Q^LxzkV_tm$ zYd<^9e0xTE6-t{XcDqD#2-#f_ZktK`Jxh<(pSUncVVJhn$&;fxi_UL9pEXd4IJloE zrB9ndnsj6^TytY2)PI?#Q6R?%{aw=C}=?XZCE1x1d5Mzs;LR5 zC>7H<(+)MQIR2<|+$(6y1AKiCo+HE?uwJy*gUtc%?p^X1&$RrWZD~eN1B;uO70eam z@H^fA!UsGjGhr!4`tFikg#2Bk ze6D8*swkogewxug;?-d9fW%fdO}vm-CaCGarZtj*sFefmA%Z1w@;oJwbfWF}LFTBx z@4=h`YHl|K=_8!Pc(XD29gy*SMfSG#s; zDSPh`+PssmC)S_&`T2RY2k4p;_FGa_A%;mpHPdX~u0muM!i*pDkH2trf2u1Q1g-@T z+i=Y_SBCVj3cy*}0yb20$Y&219M9Z+(*z3d9iBefx)&fm#QaLU5_oQCch~!);LuaJ z!t(*_y0;rrGjj{*KWvG_Q7Lo4k4T#w!Vx?GNh}XtoM0C+LhfHMp-v1W?JR5%W)5gB z8zhIW)3&$Io=$D99)jT-i3v;v zu8?GNdSXNToxr|)zw*IW#qmGRjoTb*B6hCR_Z@(6%x_iYWC}qcZYYmFO&IoMtG^^* z+Gs3MyWn+RT$d@E<^ve45^@~Y@e}KfI|+~7j);V=wAmhC->*|w+*(?FLOAYu=U2g= zROJYtYU6Sa7eQi1?@95{{3GNt&U{zBR8@?Zg%*R z+~mFFXBql0Kch3`3R=VNlb88>Yik*Ax1Gh0c2te)t=N5m`Bd1I<vU$X=PD z)7%N;!qP#&WVy72$NODdaS5pRb1Q_aLVdF`jZ!ejZeD0|{na&$HG zUkd41z3Gr$uGQe(vhLOxSsla+lnc8O&pTj@Vl10N9-Bu@$(n*w<dx4z$r>7@qTo6&Fe(>6n>HH!>kpRcBPdBYIOZ1r12qCeM06^ zanBdv5`ccHWihN~5^^S1yXVmtb9^_wG^T#k$D%67pVzO^-gxhXocn`O zH|;C9ne`}3Vz%~?6t_pgipsg_%4$7riqOF9oy`=ov2w*Qo3%yIfM;kXwJf7sMVS#For!)?E+`sdO6 zUZ`bQ@tKYXZ9s+T#naq6lO2N?v}Q^bZ_hx7o)3yocws4UF77b;+rS`IB7fRibjndj z>d*Qe;Z@cY9%8nm{nrb|N-8--;$~ky!647>$fxQDRlW3z&KQ0+=biL3+8Tsn2!BTd zA>+DKufnw&ifK7{()L~W@hAR88)AlHVF)vITc2z~t)RaJ$`E<-0r{6|(Hec(C-1t4 zOb95S#kgMY>=RB0=aqu`8<;SoU@lY6tb3^YpWs(CGt>^H4wZPdt%g<4?^z@s4cffz z?XPj?f36O_lP^EfmkzG+_@GrgmV1=IcsIKKm8Q>ygC5$^I8n`QR>4TNycay0o(XXfvbKfvuQC%n34~w^VxhTu?p^939_ zMelTv*Z0zFSC((;B~n~fK+VT}d-2x?#+T0y24mhNou>>I8MJ|S_YX>M*(S=TSUL#t ziakH^WJg=V4(n4_WaApNP|vT?7Db157b$yqp-7A~Oubg^Id}N>R0_Gt;Fs(7{myqr zTV?Oe6$Nqc1Ov~~vBHqm2--!See!*tjnME~$llc#Oi8eCpR3Pc?+;!5yLSXO9ysJp znLhgpgcN=#TWz28K&0E9SwaI!{p5tCiKSIn(5wC*h~uifn9T>x9_;J1#tFriVbNjo zuEwOviSZ*Z6OJhT`mx}gPZ@*6! z+CTk3|8c)vpp(9QVB!TUkEU+bR!f-=_8i50UE}UyCo*IwR&16|X>f6x10a%j;LS>a z1()iy0V6|qxg5S3os68GB+w3yJIF&1A$C(;Qb=q&WueOXV{YlX8+Z1-_ktx!L7qHQygc2Q`N2lu^_*RDNl z#$@cPFW@m=AN8gEm{l3a6Q&~4&14iq2d2ZhSg)XqNH zapZ#z14?rnW8_{>2(gm zd}e`|oqW+CNImigl$v-kbS-dq@FQJY`vdvItINP;vjVkv1@RY{9r?*|Yvl}RRxtL_ zmm;=+zcm~lvI$Q#q)bR&OVZWxL(MaJL_LkVdSx?8ZzUJVpqbH;)==11-do{6LG(ik zT@Q2J7QG~BG`hy%D1s%GfKz@v3vTUdjA;!oveA*xTD$dh^w7FR0UYuRl%j3qZM1ED z7aoMM^uKw>8PQQJBrV3-rc4%r7r&jpp{!GXZA8n$_ zJxisFQp{EeqTv>@toNO<$KsW%+Tk+eL)WnUPfpT-XWG}(uH%OK!?5GGPc@HfFf)il z&_gCZMU5t!akMoKs4jOZMCi?8UkTAL?tsJd;>pr8pRvW%QPBAQH#nqJLqV#@)Z*Ov zf@4qqfLRxsdU^dzn%i)j(~Q_x{ow@7EnP9pRoGj4)DK!T&pUQ;>sLx#mV@RQZo_Xd z%Gh-54=8bvy`ykaTe#kqg^Ha)#S4_=`3e2CBFTLA&8qhOM~g2KE3L;TJF~A~6BVP? z%49_$VhPS}w$k93-j5F^G>ywPovd`i<^x4wo0xUnd#9{YbkVwYIxm%3bXI;fi^k>+YF%*Tb+@aRU%RQv^Ze*bqfnAgQ6b~!2>-H(su<1O$(^E_`;3|o;u7Fy#=yz<@$1h* z^Kw1Rpg>FqkU?$6Pn2G=lwMy;O&1Y{v1G7vosQE4 zMAsod#CsDcGUtB1FuTvI;m@McV>8F)AQI*<|8(%VvY@HMcQ4lz@VTRlra|v6A5nM7vk$$?M>S-a;no7lL0n9+ZZ4q~%#oM$ z_JQzubQtE3U2vFTgaUQvh76i%rTZ|m%Ag{>t)YB9gf+G zye4klwAL5z&A=f8;l?)Eh8cXpA*wnU#fC?#UM$X-SMRiw5lU zjV0D6TAd6hXIQ^vM_(s8Gm9cZ^Bgm+De*2MQOT_*A3O#&g?jFtyoIs!e@YKykj=DF&h(Pklo+TUW4Mrfpm8i*!v7zF%yq`B44Q$~|y z5_QOW*5PL~xFOXG9^d!|jR%{>b#9+fR-O$#uW#gypo-CZA|15}b_xMv!bZBt)aIDV z-$3#psyK?cN$fhmK-MlzV`;O{uKnI;_bc%}$`!hKo;{_TxKrOrTS-=^ z6#bby;`(}~xsqaCrRVHwJg)IaFf7`=NAAUD?4uSjI}w>%V>Qc)LNKV~aI0hU_u2*zrWj^nj6IrK;X^`!Z;s~jD+)Rnl_+c zd5uMe#b~k`FJGJ5*o+G&B!g)&dgMFlpDS^obq*Xo6rg;Ha_otjl@ ze!4G|@+%rxP9auy6xVm~IAkxV7da>N7_wCHxBpY#wC5DJA_-vjFRm!LyNyO zmG!{EuA{eaY4&C-ZJvaY(j6v;70gwwbiKHwux# zx!Zb>8|yel!o?40ikUadyWIBJ>5_h}5beX6u?RV=RY3rN^VVGq#^R%5E1;8-gShv&b5SPDaXV<2%vwT*Ru-UC{|XAn$V- zS*moHI?4FkK^@4om@iYCo8%(BBX#R0k39V5RgS(!=!d)`4r#Zz;ikK- zN}u%`w4}a7a}C$05?c5^5yGcA!y2}5%(uP<0I3(+EmT=A(Lidnu4ilcqt6&h&#j6V zMv|$yEb|$hw<^H;M$x{xOCwMVLA3E6Q$wsLEg;W6;FXwP$^qx~g^J1HvvmWtcCidM z;2(YV3^IqDn^F;hgZbb*fDTL0=#@Jx1b*XOPhY*OQKlh9>ctPf6-2mkLACkzva@w{ zk)9g@&fiGSGFycOh8QjU$yAL8`2ad2@^<8_q z43i30{seF>N;DrRW~=|o0vHMNV+#{R_gp27yd3nAR=UUym0H}a0FWQ)eL_K@HcDAd z5w$1KZLPK=_ZQwWhP8`K6z6-%mY)xzFqeK~fEI}F7JoZO&1kvv^iHYYm5rT9+Z&gp z4S?KwCHeb1KsxxpoP%GH3${UtE^RyP127&wwS*c&Y&VLQ;z#(8NcA$~byLPTQ;Gu( zHTh;x{IJxbF7_bxaBODqW<(`I!mZ-;QQ>{uVVS#C9x`|k+hFVmjIT076P}PNhN==1 zxjk-V26eaH6A3N&I(m{P2Hw^ipFKUX&*Rw~llIJ~ zqK_j@K(^{uRZ|+0 z&;ynICKaD)8L!%aTo+((;FsfhTYxup{f9aKbSAS?w`-D{d(=7vhs^TlI%-`HPDo7x zXmQL;1pJoVb!YvUO?SB9h$v*%(7&zzkyf4SyN!vdW!!T`?d5+nTa^XB3 zk>lFVBy>cvj)j~8P!rQ)f!p(42dLfc*@agXPwXhSKY<7IPZ8Z?gPKPvkNT~h^43}J z2zgB*mg&jKF1FS*JSi)=$9!63%4hr-#kM6FraHXu)|Im;p&MdJ%U8H5# zy!_sW!;z6`_k3esno&ZZNleKV;rV)`|ML z-|vl@g6X{`yifMhsSz3#OjgRT5M61C@7Bse?g8(1G1n;p$RnMcclNxx))#fxBC6(? z&UM&4F2F%L+BrnhT@3KPJ z1)LaDsmA%_t$`&FK@ z`QpfcOTeV}U11~eUFZdt8?{CI$_$P7pP29EUwk;WNrjQPVwjLj zu7+yb`X#`QZpKO_>ah2$$m;r$6u3xfq66^z@7dR(Dzu6@`ZHr5_*SvJjg6ho3lKD{ zv`sQ^VQ+g}hfsCE)6!R0$s70FxPCDSqepTR4%|Uf>0YyQ)xWkMtq;nUZ$@`wYX*B# z7+f^&^in^%R=)RH6({~;xII2xpznU`mz(PyHYXw1F#UjV?O}4C-d~xi4UazFd=TGQ zdk-g2vtVUGTx8?%;$fN?!oHa*njU7&i;dGrOe#0RNKQ6p*7k2U>COSPE3duB>_e%t zOUr{S)ac#F-NeII9P^6)7z>93H3?f8JKpGiw+@#NJacRDv2{f=GpfU7mB4NwP`aNiD>ESsK=8@@vujo6hlmY#*zo@t+(l=5fi`GJH_P&F?VB@lOhSsB%hdztPk82{=VLEH7Jq0{`bFi93AgR0- z(lK{Z$nS+M|KI#t?Rk;a!vYgl`=N_?9Iu*K!%(q}viGCr=ilZ(Pq2UeCPsKFY(JQv zSUe}SgK%LxN}Z6t`yIZEDzz=bnvm~7N(ol&1%u^3UG8zhtk?(+mDfg6Zy=+nhO3?B zMW{Y5?%e$cu!p0XJf%{`O~|M{p#2L?krj6Wb2DB#Koc@K)G7oJ(@_;VizXZql}s{X z$_zl*R9X$M_$#%1P5N6(mWuZC4Zhp5LT0D#n|}oyrrlfPH$J9_=*E1bukp{cB{>D| z>i~5B`r{7;k~Yp+0+N1U^M;tnVr}g;c5BGq&<*^KJ=OHRbXYf~L$bU3hF zk_h~wTC-CraCH2#Sy>VOfpAj(;^(z<4PS2)V}LWfO7goS;&H`|LwegVwHg}GNz|=i zyw1D-I%Ib4S$VSA)F^e^a1vN4Ix@dinbtbb2_u?Wi_dh4x%Y2Hjf`D;rt9{GrYJzF zGaQP7dag<+^&YVONpErC$TQSeyGE@zO5=j*%DiOC5RAM5N&Z{m2Wdeu;E-Z+tKD?m zt-sgHX}oiDx7UbW7zSC+A`h6Yg@8j2zuf(b)U| zC9TDp-@a6E2iVZM>K#vcQ<=ktXhM=S_N}%fBSzA1+!g7f6&!{YRJKq9jx%ej@oF z!A5GvK()RrA^0p*_eShdKxxIX|4c#Eya4&oj~Y%~-;(|&TO)VpFa^hY5KiRhA6*J; z#31D>()wsIF)-()@SkQzm*S-bKOtFXJKI(MU}Q(i$TeoVV$_Gwl6!KjC?8TfCP_P; z$9vf$Y}hoFsC)EoIuPbXI){tVm>!iyT5eB7R#kW<&*7JffNWl+B$yDISIEZG2^7_jAL z3{iRfKEFPN(M?8k>OwWm-(Yj6{L{Dc#*noKb+N%Q`D=-8ch^01EQH(jJO<>!n4jQn z>2E85N#$gF&g(2)%kEcdF>+tFp$68o=(_*}*;fT-q_E?MA|Cu+@|`5q)bk@mdR!8= zBEyJ>*Xwg8=vu*+fsR2QP0cz?ny|r|jJyzr8Z+F7jree$Cqf&Q$lsBeC%!Q|+8;$rt}&xO^VkcFti8~aPUdhDfdeeUn!P0&uT zu${;~O=_jzpq9r4zW-bht&!z~a6;06fME_Hsoa4xy1l7>v3sKzXj5+MHn#s>Z2oRi z5g%mnA7N9()EUFG9U+Ukw`y8JQ1i3`yi4A?*E|=ng$X$A%kbCqeDN3hvwt;e^8{bB zBQVB`87dMnr$d`5Y!%)5FLgtF%F+K`olYDLx-i7Uxus_F6&GFlq7Ng1mqQhf1=IkZ zRea`rvjZ9jYnE@^E<9aHbv7$%JEnJ+5UT1Us(Ukm8z2_ z87q%l{(V+u%Os&mFC^m-0w2X`Hy@5#wE!B;;4g~7!4Gf0vc+}XXFranj3aD`Yv;3P z0jgb}{Kn$$XQcPibp7};eBuoqn%% z8PmM9EKcR8eNts%I@=fkj4tsvqpqHz&A7p2@6#LQ_xr9b+7ixM>4^*3B`Q@% z+76igJc+4S$g;b9_cLhUXpL_*yHWx=zF75`K@Z5v^heEJirzs;OZ_}-unHA;`+j<% zUZw1H2%=9vd!*nu2{%0iNSLQFy{H~Dfc@5n;JtM4hS=EO@jzYy0^8SGtrAs5CX;dL zsm1j>M&yyuqY*8&@Tknsgph~*k##Oa z{{zM4(G*}W(d~J2cibTT+}Y(~>x!ZhY3oqed11579sN63S@Lvk`us*OLU*Jj9CNz*6rac9lJefI!*dhYJT@!~H0+}>tf5b|#BY3;Yqu2al-`xGG4seQd_(p>-eKkbz^or+aui{~5N@klP%F%7&rx|gyO5aCUvhcb573YhieTloQ74zcsfSfmU8VIRehpO*tL+J zT;hbtm`XzV<;7`H%HJvGST1}PqzbGMs_XwE{x2uJhu^QTRNso+Fb%SFPqfi$x>7^% zbBo1%zm(eI_=6Is?qj(i{5!~4I*n^UK==Gm`#G|s&r6%9e+Tex6*#Toc=id5QZ{g&mw?WlM@m%}?lo5JdFfIGK#d49+Fe^{v)Pe8Xa}`)89!oLbgIkn%HQ+Ro@d8r znwJZzy<;(}<2s@s44J#3Z5=VhK00$F(QpDhM9v5d0QNq7 z-t3*Dx~u4Q0JI~-X0E>kYpwIHkKZzBvi->9lK5OW2VHEIZmM47t8M_~+jQ`l>-(Gz z*qyIjW08j3b~^q+$iHiB^$~~$#0o&Cq&+6_-^zIBge zHUq?@!D6J<=o+f$iZ1(^iuv>+V%C|DdHZX^eg10#4g1H%(cEHkba_cI#G?iLA1jDuerAM(+mXKXb+HLmC- z{lYhI$tkF2<}#zFVnUnrf|^3!wgkCQq-1=xZjo+1d3Y_kjQj&o#vKRE%_1sUxG)>3vX> znCm-cZn>ml8^J#W3)5m}F`(y3q4Zh>o})lP1FFJnk4*~$JUOIN4_;D@jwlBB`Z&zx z=eDJ3H{Wd84g}0HttFcKaOW7hB?HC@c`-%0abhSi6|gS+Wk$Cyh)~<7IYgQt2_jC# z^0-*6yElkUZYW&j4g#3bbR*H$@-D_d>I5eQ*ZJD2o0)wD8TzY(B2mst`vI6%o@WPtrGVcwk+MgR$88%Fw{0cyC+?04LTb|Y8LQg`R(8ea zIZpkHFzqOg$F~VL2`)C;U!Y%~;{`yS;5oyA&hbxE=^@F<77Ema7V90L!A0NO#`UC& z$;jBef(yFbtDkCFLq-K_R`%B{GF-;=H!D+S(y)m~Rl(c4_pEC5zv!-u zUHXkY`2Eh1c0JVQ%!=|aUuCf)=NGjz;x&Ef4FInXzz;ZS8noS?3X9S3w10W}5>UAC z*Ee_p$o4z5;(gN2`&z}oF(!{OlzLDyK(MjiML}~BqK0m)y3X}3O#%z~VJ9(!UDn!W z!&$Md;lr)OuWQQRw*#OHBlo8Oxn!+5A8th=cWl)!&#ut+IS#D)eJ(cY?p1utz&Z-T zVfCB;ajz-bNCNPHbhYUC8zt?C5!nB~LbaGP%4`=97nhUC6-womL(lXE^BWBTvqa6M zz+oq4UZapaATZan=Q8CP!08{Y37~Dwnm04jWdSRa6N)T07Azp>3EM4EePye_b1T_#?E#SXYCw&y(O$Trz znx$DGqC?tJY7j!kPczmx_3rEH%dv$>@;QZ`v-$*G1n7}$-e5j!3&cMru1#GwPjy*< zH-bKQp~zUO-V~kEaG2|H8HCqb9t5Qj;>Ott&@Ixiz7?yk^;g*`PjA~%fF~>s|I6g0 zzQCrL!rq`mihY#_ zxb>p>-T7E44^BddPE*T2zyABO#D}-Xd{-_w$3f2aW+>0JP?Eo{I=#~$(dk816zs%G z7u%^zsyj=7ltei(&(<@SGBqxLuKsv@dyx?}ELLVB3wdj&AKErwbjIO0hn=z#_~8t3 z|7E_A#85%iO7X5xoON;^g5k)#vpQ>}kwiD)9f%)@vj6mx0U}1De_NJSCMq<3$hko+ zX!9OXYZr-Ye8WtUJh5vK10BGcSKk8yMq9;#aQa)XKY9t#YIsa;M8h;`>L0_ubffRM zISoH)w3gcT@3By7tO8)tec9Vs_BfE!NuqhhRFVs&mrU2{?T#Pfzr<%k zP~TAq9i^KqR8&b$yI-#y-sJztP=d#4_m(abbd`&Zrb-U{szj#%jfxqzS%o(qAJ7T3 znrymG=nEYc^cq_P@PIu}QNW~5plD@|Q2=+%z)19Zk}TfD#&-%Z5~PcEM7pCx-uLw! z8;VM57~=g*%(6?nu-w2G|77fTl#-P*8VM_B0S=>p?b-^qy2FlYm$`JK1WA!rn9?%) zIDMpv5PMii? z4GYR5!N{M%rqF`!r$yZW=!Zt4+A(@k{U=k9h||UebO{?!&+P(jG6UH0K&!S+*;{n# z5gvScdc-VA-O~NiR1z4FQ~*&F98HcN8q(16(&GjM9eFSh8wFl%p4Z>d%$U|ZzNS3k zk16t+?a)69BQ-Tb!B@6l&Sq;$AMT~^v-iZgF}IE9Z~@rfJ8&-VBam_ddVCWp@pL!x zD`z>OTk*E>MT^67B_a~D3IY&QMyU%e_d9o%&e;NWF zr#`@06A8onwVtz%lROzKNcO;dxMtT<+xapsTjDfL{%Z>D<>OR;rQ`k+(ACbcH1fkO zxj**g#w1zkEJ3*F^Oj}ncQUcY*yYK-jh=( zf`_FGJ6DfHdZzHlHsu=o#A!8jucTEY9sj3$@=4RTe~kT|?%+ZyD?_3V6N#Ot%P>}D zv)<@_9iD#<-ZpOS^O4EJa^etolW_iT~2Hhb6Yeycq zW+tArd=YRz6LmljlrZyxyb0KLK|S? z>(IxwhC4*^UP#E@~^jQhol zPPI7rv3W#Am>B`>mi1f36}htWsDnNXg|k4mk;5y+1@U0-1*I_($}fCC2fYckqkfSG z?ocauy82`F$Xr?O&-bugVOUA;tzRua=FOy$awswJ9c=AsXnxW#aX2jsp`f1VI&^k_ zKmaj>uZioI+T#7+7D`r6!O+xkF~K(}qHTa}QJVM z>`VxUR*-+P5=aW-BpG62;r`AIo*Yebb@jmL$C1bk4qlb7^7+Ew2|_P4-C4^1w(rm?0Osg)t5}8g7su zUEE7CSaDjxUI~0c9q1%?9X-*oOyBi1$Wr!R;!#>QvNubCyj=7eni^T(yYvaZvb6OR zWb_@Pgki)C?++(LAsW<%MODd;Vh)Bz;YumMGrSGR$AxQvt-@W^j{8hR+6PESa;n&I zx@YrTMg8?>JzkYcVgW#RI_1cC=$;*d%mmynzWCDrqBTnC4@Mxm9+ot46eOPXNLgwe zk`7%uHGS>3J3(&S9VIkNHEaleweoP9h{{Ul#MNP#~ve`z>4uHSn4 ziLwd~HGa5u#i|&+_!q*hHLDdnpX_AaPJq_`Lv)ItR^+j~dMj^jk_v`nzWbUpdM}*$ z@2=|z1cH+B^UAjFBHzhRs>WHML5Rvcxd82q^T*oXq{d_WedWN3DBN~uW&D}YOnMK{1N;Ra&Epf>OnSJm(Q;zD z_7otQjr}WCP5eizFfZqN3>guY^`qw2T>noEF1=Wr#4vO;SSH%4YD+c)6;hn!)%qzv zZk=(F_et#iy>bt#wHs{O{9VlL#@W1;U-Cfw(5$56mg?Pm{?P@!AY~PD2PGjO;dD%T zH0~p0ObT}Qs0>Xin7Cs*8@7Fc2w=@}x4uQ&`yPwK&IpGP%|nw%;Qs2prd*xCQY~D7 zH} z0)VM0rl`v$b1NQ0vaW{xvZ+~dXxctOI|=Nh(Z|DJXXC|J9n5k`@w=_}?lSywKmsnv z3vo`59~t09^Zb9m>tNx@NgU~)M0$@G71R`X-XboV+UBe*I(!j=!?M6XQ)@m*vjq;l zea5j-eVqev>x0Vb*AtmX#*ti^Qo%&=;NV-Uz~WFQAt#1hpSFayJQ}+zu(8(nOA~*V z`8eeMnF0J*|ubF@x1(>)g0QX>NPZR{cUE^Z!oE_T;^; zDZ4o_9iluH`y7R?7fdmgrAVD1X`Y=eaIN2h?RPZ}th(x~+w>M(KLG@`A&ksCnNred zmghBDr*X9Qx0kXbdPsF5a5^9Ue5}Z`qB*Txx$FhZf;A?spj$3YO@Xq1x^^t{nVMeg z-cBTPe@KVJjely8h3p0Z;KD?;ko`z#ZmM(rL#aV%!hT}PTkB@W=U0sNoCu4e-S`Y+ zkkRsPwC%`C>EXa)hYsTqfZybByI*e{p_A;huAz*77^o*<(j1aCSmvKBlHa|k4|!|x z-r4kH5KtT-_4V`7npyH9e3!i5IKC`9nNR(R3~`h#i?sS})eErWj`5BW1F#W&zx2u8 z11>V0bGbrZs#|uSPgcIqb3lJXN&-u1r6e9b+!AxS25E#;{I@druNSd;pBDeJWYxb| zEcn%QynBm%IeDyCg@wwE>5wvkKS}FaMMFC_yQU1{zX~cL<;Wazc{>XKj>BhPz+3BAu=2@;{QdB`9{hG= zr-#?JfX}wx%G{@`2vb}rYbvGG+dV3vZ;cT~%2b7v-mt2uSmn^gLDR`TPgIa)ohYrq z4(B3?(kTNh*B|e4L#=PwsSto}Asi>j{bN{{2GnzzHOL~?72}B#<+5I~qx42x@QixaIccgMk~9t!!_@4OD-5v+lEEorobAGRbUiU&t}O zHO{LdePfLD0PszW`Z!nW0c9)Vfff&jqYlm=yi)ecNZ}j?v+Z&Py#s*8IF)iuwzb;p6zQ?f#&q=mtS|$hX+&G2^pLX=50J_lfF#7?t0#@4p#%CO8okDCWwp{aH#!Z_?R+#V6OpZo6YH`U8KqlHbX^26EEO*K{Mb^#t1;GUR3SZ)nOKUGO| z-787A3epKX6H?a8_%HDufA&kr!GZVwmuqwakYze-Xh)Y8_AFfB_-nwNwFH#@%B&q9 z*<6EgG|)zZD^VF&7m9qAbuC3-`BkGz9+x zmQ|we*y+NO5igR$>&DF2$f+_ZrJfMk>8D7%!k8oLQDm*5dC_!K8@9X_Q5yXh!n@4& zUN6@6{rpsD>?Zp%ek`jMviG=6c*-G+^ch(g4eUaP698`jyj4qo`};p{|8vwTfZ99X zH=0}-+JWB7nBhV0|TB1zq{( zK&mu6a)#<-!@BS*qFpXoiFg4j!-~6nC_O2SZ<_Ip1e^uCPfeSPHi~?9P-%DJKj_~A zEQQx7xU@IXReQQl`Vn}9t@8RETf()gm`aMe+W1wD`MNQWjZo&37{A(|<+VFI+T`02 z*}4$a$fny7a0?~jy|V{h!vPb4JOFPOu}W9=0Nu(6tDPKvxm(W2+8$wz{wg+^v@qW< zCm9>!Yjh4ms#)WkvbU)>NM9R`P@0?#pSknNJZo3$3On?m2Q_NMVH% zJ}LaNMAs9TY@|vdo$3|F9)KL0rE`| znVBkpS5<6#Xa$HoAh#R&L$LwC*7&1Ylf3J{?y3zj{uFT}Gj00PO&)3E<$fnn3nkm0 zmHrEvmX>lyZ>MMI4Omd8UaL(RbQhF2gkoz#9pwiB=j>Vy2H2ZaR9vB4`Je@<8=;{B z5bxsjSWgF7z&%jw1w3~efKWo{Rl7MgyOJp|*2NBlW3)l$R0nu+MBUAk)$2MeO9XHa z=3UMMiV)Y0E#7)MqAZuKJafP`@qv|qQ*nZSY8A6TUwC1^m1M%c#LaKWEt7rD?bcXV zzO0r5!qtHS_yRBYh#Jmn&nh&Su^!=kjKgQwYDK+1&yA;zA{vIuEW#F8007LV6quIX zHRG#_%+n9Pn;%%TJWQ7n>+}b#4+z83U#+b5PspB#@hkF@`9?!F$vuiQF+m4mXK*u2 zb=$Oe3XmK-R~duhwR_DqFq<>6sx5d#$7(+mR zlfYJ2WM^FjW0qK2^|#*>1HZ2mRw?T5$n(!@E}z3(xIO>$`}VajvMRr7Pb^;(B}JSX zQ@>4-{S@BHi1GP0b>{W_=_G@)lMSM|T`9@c=r8Q}gH4sTZE}LG*VDKmul9H{8zbc% zk*`Z)FXDX=S`Y;YswCESA05>OoAaA?jpvPSZ zRRuIk>cMg5H5u6En(FPKy8l$vzyqxwmD^FQvcw~fZtHMD>R=_*S4TMN7E;@VC-Jsi zH!Vep=E3Z|`*ZT$O$_@`fLMHAtoyA+f%9yJAgKOYL6_^-`-y;Yz(A^_`j@@(-M@gv z9T^Dy)I6-F$}us8Wzgo6>FPrG5$_)iZgc8irHboNHT3@Qz47dD>dW1nD*$KH{qy*V zlHzaHEzk^PE1b4-7djVub3aS*#$B-n(QoB*Q>w~R*~i^K1WAWXocxkw*=wCRW%80s zQ~YWo5-vFw34}waH_s?mqxjj*Nz3^FzRJLmeg!W)r}>embqYam8XQI5sp9u6jn4?4 z8j}Dd;K!`!G4(&E00Ft=tfdDZE)yMi9(Z8JUX|7)(7&?MPww(+RJMjGTFyTSpJ_-P zloUD|d$ZKZziRVhT_S){D58=}?*aor2ke%x00F|KrjqS|gP7yb zUG?@8Qq(K(X5waq{Px`=&&|~Tz9~4KUns!KJE#t$yjP9zk7ez35T9{P4#552I`$GO zwJIVgZDt;@*xDJfzIO7M-h<$WkF(-uV{()!|MNNk1i+?->5Gn#-55Y)QuzK1oF=G2>g}glPb|c$4Fh|bvb_B0m9rr10lCm zA;ZFdbSqJpRilP z^b_9U_V$P${fgMG*(SLW3w@OB88H!5V`VAj!JFwBQu%$42*-4x3RFsnAZChjHGa!y z8ir~&FW1RHJtt23*O#Ldi5z!!P!QiTNRapA0jw&>ylsJj+Ie#<@K;G%hRqG5lThaK z9iR?SAVO{4f9 zxu=s~Gf`qZ(?u`t`&R@62S)?t^d-u5LM>+*0ah40aQIM=#m25jSIoW$!fTb;+4-2d(*)1{E<4iu|HacE-@#g%;B*$?u=jb|XDeXRLGIz{xddH zz?GWJ1diwzmiHFh1g+{C^p&Y0!-9vlpe(igOko0$(yv;8&7U=~6a*KF!hK zcHHr>3M!}F5F;-9{O5S}%|w~RVKz2kiX@K3p(%r(JQu!^x~QsP*NB%#8ck~I2e);i zAh5N2d~(R=?<(?%Wm&L~h~b)B9#&D6J0v1Lx9cx3K%zc>z`Dww!3BriYxxBClPjSXv3mcOEY_uVt9iVWn7cddU;%L zTglJbN(t6|qoa%(6`Hw>J^$D_s*95YKQJCR`R&oU#m*^>-nNL>yAHrXjm2pD(_Uat zPrW1$wywS1L1|ARudvH~*n%=sp=2c>`d%4HfvUJJLIxZiJTL8c`9}CPW(323_Vi3l zU;0dt0MDEJJbYQf>6>ND8)&jTxx4NCQmLG8HPKxF~Nw|5g1wyvO0zn zB{OwqGJ9?yI0mlU-j2eC08i9OpkUh<^NE(^`vNdg81bl_%JIJHw0QiTYJ|Ny7Abvs zE&K8ovbF)Xz85zWfasmZ>@{fbf8wUa2L*jn@+W$xD31 zUHO3HADo#q(M-6tozeC{Wa+lLmWGXNV5Zd;IJD6hoN9#eePL^0Fk7iy!Ud3D73`JD z^u0!fBM!-{4BEnONyQ=KkYg>YyZ)CKKD9}%q*7D;V}{qleH{<0MR3+tcqtE&9O$pV zFj%mgUz$Dm7!VULt|c0^g+IL%a4g6R=mjw$hq#^k~JAB*WH0RA`nj5xI8Lh=#Q0lZ-rg8xhA zrxfqsJS6Qy;pP{fhgSTE9{(u`Achz~cBq=i#NH0*Z;K#h57C0s1`%Y>fMb($ z+}UK+vF`OWL}gjnbZy^wuSNgzZ>Rr$@BhRjmhf8cBahgq@ORdB9z1;PSn!KF{kB-o zt$WK?d9IljSglXiD&7>-b8^aQ{yPa)x|q&J$$eJ|aMkdmXs-muMUDN79oXaM5$P6^ zIT5Hm=r{+zhK>V`zeDP}xAsa-)bickKD83Acd#s4wb_%r-BXEmLXVU)NWwF=4!`!I z#mt(;dX!Tq-xY30yhAHT&ym0W7D5h3)b)5C`K`BN)1SSr5e>tmC*XR6ZeuFtEUA;f zm-}r!J4L_LRrG|#wg!eYIHXta7n>v}V0Zt*J93{B!h1UA7f`DB^J8hlhhzCAKXI1mnBkOi=1 zBr?+SIOJt4di{jAxfip(NEO7s)s}t5LY^qZ_e;Eh@m+@3Wh1LTo%fkhwoW&VcUYNU zT#843MyhcLv6JEE;?N{$3svYebBDg$XNo`W5?cp$c(FkIf47MH>jg%|ZuR6eEv3Mq zPxsh}_aRaJZI(7M+yZh(MX6edzdL2ZG=HF7BjSH+9!-$dMBs_n&jLY#d}s>6l)FEW zE8?9YQ>XZb{G!{mJ)+*^bMyV1>dsE6_sW~@+b#1rg`t=Vbnp}MR(dG$(5$A9_5hbG zPYoZ+^_S`{TXTRj^=<#qN8Th>ypvkQ#TjD$~Xf{^rA67OoVnT~mm0wFh zKjNEWeP5ExDO}{J`pS2b5m#ZcInakyS66fnRr&Wn<5^gQ~9JbFswC#CRFZ!;q#~5SU?|v$&9OF=0mcNG2j>&Jn^qo5DvM+7v@E_h`=+VOXhv5JA0w4?@y7c8JT?IR}4Y(*8pa;@ZYQ4EFt4pC` z-+;J_#TOL2?h61MG1g;m?pMZ8b;!r=@n|~^1gz5l-F!ov2W0~EZ-tUCgQY5JOU1zw~ zWuit+s7`sEdxWB{^71$Yc4t{&j2xM1fgmrO#1yj?IW8we)Hsf~m^^8^NVIsWJK#8m zBtD7&I*4>s8-S19F{>G2*o?3PGc8KU;SQs0d8XR9H}*SiY9YOQ-)gbTz6Y&=Ab(Mf z*mZ-4?VyAQ^!K0r+b-dNWeXX2?mIy>%-J0qDRF;SDl%Mee&cvDUEzmoWs$#+0I04c z>C?zN>mku|`XWF^l{KuNIQTRYC@`oZFFln2UH=OpDCbKFpKULX_7jg@q(tThkW`dv ztgteN$WR{QA3t0ii2FiM2dQpG`zy>?=^?u>QaI%Z*KPOf1Nl7Fr73l$h_-4GKfV9n z{J6_?SOo!&K{HvNOi}x4x`MQ$--uO#2QrJQ@q)B0H`a8~4DQ0O_t#HB_3cSNKQgsgvm?Q+b z&Z-%q@F3BxY=SHt;WzRTE%;qf4_G$ixN+Zutpt1C`fsN5>5mAvu6ofH@WLRP5$$iX zYRK8am&BBI#G{*(`ZlD75YZ6r1W2Zr+2^9>xjg!>;pvb9?fESR(h za*xaTKkoy6oc!1A;>Twg>6MN2rme3 z_AYR+;`gxi;Zco9d~9BtHF|K7vKL~T^bW!wIM^&3>_F{a=k5|^@?y0=T=v(|Nt&K3 zw7viF0=>Mj;KIc?P|ru8D+RUn;0!Q%=BhaiSe64A)HUbuQVwyXQP2lj+LMU|5=953 zm#EPc)V#nyE&^Sr<96hf*)Q>Xaz!$4GZjc{CtMIcNod>uw-ZZyoF|XJc2B$D(RYzR zZrY(dZIxe5t<=MD9(nqw4gKx`XF62d~W;W946VP_L^D97t3Awd&?s<#j}z*^Zi(y#E`dyiV@7r z$Z}i(@3H^!16)R`DR7SNt*igSA4W4heh8VvvIT9Nv~+ut@i1JsAUA0@Wq42C$I|L# z&c?@U@0QY7T9K;=Y+6(&4 zCX1=ZSGso;l5EVfUMT`1g1C4pLS3NALmB#jG_3Y5VlW6r^E%8L#a@@$1}#qpZN=k8 z$~{Y2rbTo;59eOYHBXN`$MNqIX>AK+?2$lVwN7BMSyh$mX$p(kloYQ9Ru&^1o1JmyCqD?*fel zTZx|I(KpvQ_;*kY4EBRsiHTb5bl<*A(bdwniCl@IdQPCu&aK3bat83v^XopPar)AB zb`)C1aM((m(|dRiS9I|rtbwI^Z?Eu6o9Nn-QTtBM9V7IYk#gZHsNf}zOQ&J4=B~K# zT+}sP{jRQDGwC$zgT;5<-y@0wg4GR-Esh$vE$KUFZCM=dYP7hMl$dUh66M zeLv3=>}|(=q?zJ$x>w)ovqy=iVmW_`Fp0II4pXyhJFpcP;4GI?QZ@Dm^8JUiKOScV zpJcngzsrQ$hvs?u3bLPj@Qb;@~0hU!|-u_{z!|$zj|c0SFSHy8KRak zeNek>`Oc7UC%%N~;kc9K;wii*62QHhnj;Qgc=<`QY`1XRu-11TO8xiv6!UxN5U_s> zIjQantbFA(Sj*x9N!9^BsQKZnbeq7=m)2FSld~2vBWz=BtOq ze8)N(?r1b|im~sEaEF6%IiTaa={4Do|5+rk#5i6gM+E}waA-`pJ?-?5Ex-Cre2&eH zzj}gxTW3E})v#7ivd*RS@ymRtmW=c)?5j{=m8=&POnIaG0H!0cFPlFl_C3^8Sd;~% z1=|IuLA8>M?4#&yPw4y4J_`ydfxX~$t)lvbM}O&(u%F)H(MqS-Hv0w(jJslcEy;VG zALul{R{$`lX{@EnWUr9H@OTVVb@Ktr{FP$9z?xtAk1_NtE-_nIKB_xJhlNY&n=U zsdD8z*c0R%{wKhcy(bNKeq{BfJ?S2Bl}0}^A7_0Ho>f7Nej{HRxZ9O=#Au)TPDy>?BkM4aV-?2X(F-qP#GT_#w)gAa)xBXLa(CSO?%KVAwYzXzSwHbezgk;7S z564J|NKBKe`=@w$o9+=UPy-V1va`J3H^_H}+jXn8Lk&@yIvN@+rluolQj1>}Zf)c8 zhBxcD_5G@dd8GH+;Xt?~pE{^dERJh7Bi;c|<0CB&A6~7B@Ehg36LiYljl0+uzBh-JfZK4$W>3T5;>1J%3?lKlvvp{sS6sXHPL z)fkUxQL}GWwoWNG_AkC_xoP7bU0^&%kclzohbvRw>u_y5!P1QZv&jIF17okFO=?; z#-JJde)7L%{)syJxoVkue-ZzRq;pV*oUL2<2%9AuA<-P=#;aNH(@-80&cQ8y-f6_5 z`l_Odk4V;*GBNi|!fKEZA?nQGiLMGMw@BnV+p|v5#E%2yu|?P>lZvS{JTPA+V1|5U z+cqKtlGyl6Z#CIRtJBFa`bXcpzXZ#DwE!`O>6+Tvj^5Nw)AihT#hl#v5+9CRq8>lP zw``=;f*^e}ccprN|8P=sGM6P!t)};f85!LqvFjk|`+5dk^Q*5XJuig3Zz;AL&f2=C z2o!~=x5U85d=#xiTkG4K+am()4KlX}{xU*99d~Vai4Z?)XJIOqPphp_|NS}EdeLKx z^=6M*n~xeF{Vq;&-bvz=U$W$I&QAjwFDKZH+IBsTa*K=nAi?pY1wdap=)9EpknD8A z`>%HOpl|b%3==Y|^hIIh%8EIy_>haa-gJpsiC8UpE@iDh-58fq(`Yg@yg5$I(foo) z)26KENx9V^*Td;@)kn#TbBh5j8|c?59s>UVY4Z*i`=o(wZ=YbQ(w*fi)%m{ontWPO zA6mBYOMt6OOC5A-u>r$#Z^-wybLh~$vLr<2Y58e87lY1`Vad${^*q}-t1=6#Zt&sNUHi{)BYKN(R;H8-vU9E_9qcUj4~GxNVUEA! zzZoJR4Y2ypnB8B@pu1(c0zw6(fhAoKJ&h7=SlpzxKX^H%bgOk{byJCEpvIqV&oTI= z&>q9^z|p5tE49h^_hoR0hwY$xvu@AHsfT^ih=a0QZ>}|hbmbNwbT<2CDD7(0D&a(N zO5feaGQK>V0NI)ra*Mb%UO&=VjMt_;M5(-ug0^}Crr;{KnUSp#F}iQkX2e+*O*G2 z84js645`gEsY5LCiMWs7=GKmb{WSt<=iZu^4bp^=<|X<&*u0UI6%#_M%+x(V+7X+MrCilM{JDig+hs8wwL4ZdAV?* z6LUHC;Twux&jDq)`_|fjhN9A6z%Hh-XTBxx#5ct9J;&t-TAWPpTcOI;23iP^I*f7s z(U$gyRrr29vvSTNNx{@1_^Fy}`-6;d=OgMeiq zW_;_0OT%!7Q$e=hjp6x-rUYc^J6f39+sF1Uub zrX7>35%4b}NanH!u0t|^4Q6@Lz`A$l&L-5-tj$R-6Y_e1#RK*tV>%b=U6XnhCSzg# zX|WjIc=t=JcT%sCJd%TXE(QYg(PQs0bu%AP%wJ3OG*ZX+d>ryGzZN!ZcAHxxmiC`M z|6nFV%}aIi9J3TxFI38muW3qj{G73qNbt&0BPi2rC6|!P!0Z@Kt&wCjCf`~Dd@e~w zNWXg(d#THB^i>!6cCv@l;u(HcSI)q@3JhC!{nV1tR!wT0M(m5Sl^Q-mPvfkp3F75- z_Nv#C%#X96D}{Y!20mba2`#eFWO0rF4C(FtQqpY;8B_yu02wd%sgJ!H+rE$4=ILh{ zyy8FS;`pAq&e2jrAb$3^>ge~5e9MGWdnM6*rNz?O2>SY)>4|_$)DF3cUHW_MDXRm> z)j^@#YFxedzWS}7m{I9UC4N-G9sfpV8o#ZK`Gaxbz~~!wJ*l}fNQj)J6MMN5xjocv z%El`A%q;8Y%RAvLlopGV2{r|qwbE8m&s;8}!lV|P?S`-MB1%2HBrdZuQ^*bQtr!l3 zNhi|?roxEz?-hS5+ufRXR!Ar_(84UYIA+#3amu)|tZa!RY)PadM${0t!AsCOi7a}n zP4PN193tiPCT0E5Q()H?v5Ks&{xEK}|J*q+Xp3jBPlJ)o-DvaP=Yn~in(Tc={kZyR zufmSSCWTqGqCn`>o#eZ;lE}h60B%hDbT7oot|{QdFW);uRe$ObY<62&_YVrg^DIAm zIQVYcr=j7-S>H>`?O}jJ zzwZ={G%P31``u8JSPb`G86h7$tbmxjz2l}2!=YSKVoHp9(UYWFZ!s%9GWz&GvIKGrnfBY?bhPAIP}14FElIO4c7ikXHm2^R3-LHS3$x(gVv1~_tavO%Dt88QF`;& zVTZY*UtjCnuBNWY)hTswyPC4wBN%Rp;eS0eo-mh{SB)l#{q+E6R#w-Aa1N(T+F-qW4CD8A9z`llT;1>w7fbSs{ZO zGs1lWWQQoyu!mNtq_c#M h2t@$i7UtFRE82fW7l6n7qpbiHghF>s^qBf+!9{Peh zIpeQ|)=5|1;W!dbf^5n`Jf!R+hdx$A@MD%CmU%pH-yK2)HN!9ryVAYY?e{hc?c}T4 z@$=1GlKJ*k;rhxt#jp=zA3{@GAq<|_MmJSYuS8Xk2Z`}06t`a+I8TbAe7tifBT13u zI**>5b>Jt)&EbiB32@=8ML^%6w*Nc?e9uq#M_nJ~f9y7hx(q`p{fPUFHXt7F&J8M3 z`)kn6-;=F?+bKWOz%*iM?QX#F_hDf;>EE20#6DQ-N7MciHv>Zz{iOb+ZSI$g$@x8s zi<)W^o?{NSidznyh!-e1nPEtJas8*&1kX&v&^co|(7x?ZpsIu(*y1u-jaq;|%=lPr zFZLl>@EYvz6$SbqIaePiD8%0}y}HuY-EX_S6?;$hfu-#`exC29r7I1m=d=jjmc41E z z&6e8$pFp=TlNo>8m)*&WEsOirSY1u8-{ohxUb#T3MrqX(la#^I8zyDb zA|m1i8cxxr(W<&)q!;_)uPFOU+sR;c1QEL)9>qmIy0$g~Sk-Z6X7IvxL|8LS;1~^2 z7uz2r-z4hz_@?Cc@EJ?%3vZg&`?IID{8n7_!zj$WR3DPSSo?5>25>AqszMz-#PJ89 zZ+XC%i{;t%QCr)}=BQbQ#T~BT+^OI7!@9(_^kT_mOo4v>PmXMoK=bokdKcdVbrpBv zwUN2(=~oB~)X@tAlff=GC=#&8_Nxn0ZWSQZe$vUx{Zmz=<_gf;?M#vUez1&qU})E0 zrr}9L$KHTwmeq~O4(ZB1TML>g&Ha8*RJAJ-`{);HAAVeBoL*O#B!mfbnkWtM+WjY= z;`eOfov-nKhPKlK+PS@rpUFk>S=0UmJy9pp!j9}D-{h0#b|t6Er%U+<&LgO|hg?E4 z>l9!2Dlhkv!*F?$^e3Pu6gog0Khi~34o$9e7}PoL*@-Yjm~Ma{*#BmJ*WV}Qo96Bk zYNkHh+;DU`+9GasYcJH6SA&)UCY)`pPncpJQL{-dD~?42DfAC|GfHhmnG6zpKyZ=k zx4jqVlx_DPVsnwuIO>Yvu7@2@5xEn%a6omIAv73`SNC7SV7A%vNq37c#>Gz-dy4qK zvyv9^3DSnln%-tjRBK18mEIxmA@asc;IE-9rev2XXcpKd=O2D)#Xd0HdsJwYEc6I( zbF3<3VE9x$@q#Kx zK6V5r+RFtbHWsDruG;2C@R)kw@R8fN7Nhf}?jqWsUkIM$BQJjE1<0KwQ!9Vy~w`08YZt6UX#;$eewQ(Rs zg40Y2%$hHX>%xr8hH7N$b!)WW}+EzTPC&lKce8sn^|14=GV&&M&`3!dfpIvZ)BRwy?~^nI4C!WddiiZ z4}!WXFN|Kn*L+;-F?#ll!9H2(t4AGFbvE5Z4=T~#r93V{2-=bmYarvs^&8&mjyD9E zFuW;VRo>nOch4!y>j{_WrA59csP7$>WT;Nf2W?0o|$P@m~IZNlv2iw?v z!5pJEZD4Y|C|y06r!FwY?n@&B7%@URp=uBaZE?a5PLJm<($%9C>+VjA=lP*WYTumQ z{CmRX;R=?(BD1W8iFCbzlKa^Co{GqD4SB_?LBXuERu~@<;PE)5^mM$GG@?p9Bv%Ph z1Edz_S=OU7Dyh|o5eZYKWSk9nbY<^ib~9{n-044rzDYTWlBE^R%Wp~`5(f#iz$!t0 z5^LnAUGUt1M!Rmkhiv*;cEFD+78xGuUp57GKW&4T)%bfc^^xC#9)jF#XLt_7L-aj+ zUC434H$R{C-c-K)cB`17#ptRbt6(4WPS6YajfL54rjyhlq3#Tp5Bp#bKOonA!~e{m zJU%R}yX!FI=WxNDYN@%)Is-M?nIzV@j)`xGuz=|RUEyJw;V zHYX1Nre;?%+O>=9(Fano+^5*|!@xt!W7cjY zxY3@!eu+qJAt?pG^k;fGGD__4X`!Zl!!8QAE)FIc%B0z8L?Jz|W0|9}GN+us z7r1Z?a-MOoggrrA@W|zQ-&^Kf((87)e0qedGKms&%WhYFjP_+|F(>iiw3e9qgw&SC zj7w69oy%Lc%{Vtyz8Sc zM*E}t8b3vaZ-}PlTn{*(UUxe$Fvi9qdwnqYkyXDizc!}h zEm3QH#_E$zf>kEbJ3KDi>~=Mei1PElQvAb@1OvmT{A$h)8_tlFoo79(5ZlNV@e3hT zptE6(BYZA?A|Y#EcB9W)7NqPVefP~kk6>rRkG-O<@+nQmnjR=oPxOE~9j>sdd;} z2=&majP6F{M4BZa0@>(mP0S?ZPo#QM$axRkbwSkyxc%RJ8G9hb+pu=xEqaaC3oc8( zfeY>8k+3lf4w|88>WO_VZE@-Y8?CMnBQ}-}hC*?*Wrb)L%hr0+=dKCQ1n55IB!jE& zJzM>c)4o$Oy%YB^tHq6;_A#Xq%ms-30ktaZVz5!7* zY18u8!lxN?ay3KozV++8BFlJtYQ!^Cak>!0P7vSM=QLckKdlBz$4yq1R?TWXGe$E%KJ>)`W7+P@lWS2(D*`g+_YfDR*B9S|OGkoRvvNP3My4eB8LioAR>tXz`uG^7r=E-%`u9 zxtGy(7AdC0Ju2*Qj=XUKm|EcV3)ebhR+I-jm(%4Lp3>Ofk`{-&cOv--I)%e`V>Qwu zgX<;ELxY{}9JxZ<-|nh&2;QT!S^iq*wx&l?-?(G0%;2l z?-|kyu8Q6K>X&U_It6~}|Ln4VuLqqXg#y^m_f{LmeRNfwt!I1K)I1t%x{IH$pgN#2 zqYOllxNgJ9&JSmSfDhL_IV9y4|403x)Gvs3>%{@Xs5QkI z;I+UDPYL3*LsX70enO;{YRljj;zoA*Quhp78LPyd?L@}0frsV!S7jKNlJoG!Ku+qloJg@k?kC4TRoaw9a%9k@<_QlQT3V5hYR}fH(6{E*A7FO2*8T)X?u;xr^ z&`V)=c$JUM!l>>h&%1i*r_fs(*br~&Ui!m17&a3n126_LVAK%_o#Zz&Mz69CFdWMD zBTKvda4X~W5d)^tRNdIkSvJ-O=QFkh(mqY(lT~{v2rgm_uX8t&fFKlhV0#u>$@m6qbbl}w?2d#yr(PA z&CuT;4nkIHOT4TuFyTYJ#1uNgr%XsCqWs+qv9(q|#T-D_SU9{2i;MX1FjO4bcU&rJy?1g5*l3Ku){VDHj)W z*NqX0>KKCL$_Rw`U>wkf$*x9|)PV=-5W_bd3bjDY&3-)>lHtjlq*AD z0PfxEtAQQ%wUk?IR(aU7P6FPpxl+8}dTIjw+D0Ri#$XFapQ&%^GV?EBTAi3Ek{o@u z`QkzuTa}wy%c2Wro8D*W>5D%lLFkbz?>j=2{j$iHB2Wc`%ql-bpY(D1LORL=AcBC( z`5h5FeI18?$KE~sj^tJuniI=;dFp3Z`-%k z7I}kEzNmYSkLT!4v3X=7-Mdb(E;t}SWA)4*^LrR}2jA{l+jx$q)ps`Ax$Hd@xCM;+ zy}?wUtvJ%pGx;PSM_~Eot5imJQI}4@C%<-_KfV!nP+EMcGGUG)0^y6muIuj z{5$YoFhm*!fEKM{lO633DZJ$T=r7qT-d~*x_95G87qSsO!p)-OjlF=1qqjcB`WWD} z@M0f$L2kleHsJz+Q@$0^U+^WJf@*Pi&o)rrHv8GbP|aLdpvUoHCepl1xS7-vvr%)i zu1vARo6k2{24A}hokqN^0$Idiq3Wl9N6-~JcS=4PoE6-g9c+!zhPmXn>Bx`A-5-wy z3MY;?v2zY@^z}pBAiaZ#=D)R2CeQ!>ta-Lq{ARl$eEfRc55zYBxd#6{Y zy<^RqoS(WUyK8d)3H?36MzDv>w5P|Epl{QO50s!H&=lLig@+mQhyQ*@j8sDM-VY6H z2BwLgFaCZ)jM!1q4%Xk1AGFXnw4wc6$#7YmPqlSQelMVIuulQWK#3nx3I!2z3P+x9 zoWqZJ^St;=Os!U1cETQJ0C%eZ-0hhQH7LZ} z&oWKaDpcF*-rgoSw~$lXPRdF!ny zcG{cvufuKb0Pf62VkKnGl{RT-t~-Rk7-!D;p9}iLQQVuG05T=9r}Za;eYeq9H(a+5 zX4#w~)KOIYf>v(EMtb{Pfo;6FhIDs*Q--YJt5cr;afd^n#e(AXap{6t3^h!M{}VhF zb;klSyv+8i92_O_HF3$%A;3G5x$K`rT43DKcgD|^Tqx(FF!J_}Zs)gFVy}`2y~UzK zv)OgmKN=pZ!-l5;J=%?s?u>H*`2g!ihOHnECI>4kalR;28aVRBH9*7bfa%))zxUD7 zWhQTe9(#ZvAzr zEF0LC45%zQ06G~stk(KMVR&6&iB+zwF|d92m6^84<+$nVyEd6+;)a|P9Fs?}s}`ED z4ZqXfx@w;G;h+@4o9xs-_6}0XcL%=z7bMT0?fh>=%$dXWV%5B(PUsjir;}u`lVSmL zW!RgfEI2cB{ijRp4OQ(XZ}m1x4tbx;?!L<$F2wrkp#GjXb~}qq<@MfStWmNio^&#m z`04xpWYZ)8i1}^qOEEA8#-k>zL0Y6YFHa=|5sg?ORI0ePNj`ASak!r^W(90O_h(Z| zgo&m{unDLd54ap9m6Uv<_ZJB`Qxm!G5PJd9Jei86_*1GIFia!NhliN|4qO{xfL&sa zkd_M)cTFsTQFF)H@W*fU99gVmo%A}aS2X8Z;-XO2^!ezR@Xl({$e8gQfhCr(P>^Bx~@mvwN z+%FOyg(AlC%~&g135Mi)O9xhedgE)(G#_mGUTzPrQxb)5mqq54&Cy#jrNmF|83sPXp=bORDD7iYbFq8bQAJ#y zFV+B^fVjkY2+j8BqDq?kGg#S*T2vNX@U}_WQn7SFIUp@D(W;C)Ao2;k9i@P`Pd4%E z96`axn{CX)#)6k0xbhC*7ciTX8!-)hiS5pkUy|JZ{3K5cD%TrV88)Fgyk*wuQ7RHg z!V~1DPh@YJ0pUOWxA;^OlW@UO4wiJe5fL)iJuufDH$^p^+dP5(;9eZQ5CD;knWEZ+ zO#IJ}+zctJE#j%>yh7Vt2@Y1CeujP>_FKB9p{|y-o}uM7gK)*giJ;~tfra+X;3wF9 zc?p;uT7~_Xo6|&l{4}lZlY4UTNZ~D?X_Z$b+#|@DF3-FH?T6j!=Tu3bOERi%tSWvCgm)^l^{^LUPVPF3>&XnFH-#)cVK~ zGY-wZqN3N+EIyW-H|Ga4#mF~*n#JKC9mfY`L>U6r{s?vAq~XSg&L7tS#b^Eel>P>) znTzn*!n`%oa4O8(FD!h86J8tOV9T(u_t_z_w_ zfZFzr5H;3s2g^_o(m5E;>)5_WMc8uVC{DLZm24M+T_udJAe1gnlFt>#L8tJZd~UcP z0=Ckfyp@wx*6H1*FiX{PAv7f{KHFpi$a*oqISGL^tZvi z@4;llW@%BW;3u*ZWm_K52fe-)wHxfkNGP51$wmcKDTL)m%Ta zn?fE0fO5dV#)bgP!2P?D3{P0$^z}GD74pq=TdaxeVzA^y^oG*I&QA66?V}Ro^+ziPt}pC>Mww1{B3RJKrOYT)L+)m?c8=% z(8kQO@?>&vO46{YIO)5vVB>BDg0Ph_q^o4JNF4T150!KVKL)xM&5dv*hj)<+b{KMg zXz)3CTA4APKOShdBPhHl&o;)@!~3f81m?ETfL3sjn$O{n_67nY90TuAxA*B{R&~+0atFDp9_vDR1zMrO@J#> zZ(xygHTk2uNXjJIk$IDx_v#@|V0F|xRZO{D|0}o)1 zEdzM?pLEK?-hh(&q8Z89#-ve%UpF@2#0Im{Vn|b}420(WP?;4VG#*9X0TD-ZXVz*9 z*VA#uJs=~fw+30{f|vU!!<(*;dlIX;71Q*G*%=W}jv=aHu(PgBQ1pQLt6<8<)5TK7 zEI4jrl*v6euT>S}SdKLZq4WAm9rfi_LV-t{($PZ4J-nr|D zEe49go2O4~zJtDNK@k`4ayzLXEDU>SfT}U6#tm`Pu}litSC%|x24@-Z4LZX1w~1cT-TO%D zE4BkG+gXP!V0|!zHftWd0Z8ej@nn>oRGUR`8AM{>?gIKxd`-W$#{yU8&I&GxZB+H{ zJBNr>~*X< z>I>~~b#Ae6zo)ndF3L}oX2&~N7O<~NtUI`TM3z92Ugp^B)?yqXpHewq>I4G0e-eq0 z6a!aMt>J3em5Y2%R>Fs-qG0w!O8CYUsmU6gSKm~W z(KVefis*Z_e-aQzGVtpoDI&`AC5i* zZ5b#rF?eIh?&Fct5J&xyxdl4XXq+%t!*<+u2~BI+gM=s+7#;$O#-H7O3*T3Xg*585 zXAW)uQnjw1>EcjNGtzyWP;{)k*ZXY;t5BP^nD-k8?dq^JkStuVJfR^WB}&e0Z$CD% z%Kv@9!2g~3#n^N)C(pi!Y@R~Xif5zTTtM@lK17&0=-h+p2TeSV#rL+r;@iC+ zQa;=`5B{U{LeFs8XL7apV2k%dBn5NiD*%xyhaD-_7wz39W%1iQfk2jy%7xXTRQ;1q zHZzT^KRR;o?=5=YlZl3T2k6?8R?iRvIefhi=o2(hz0~U$Hd^%4D=BIFe5eq=-^ay> z<`lRl;C*nF;#S%Cf~bUU48I>Q!GSI?0|?U7Gn@Z3qVZnQ{5~EA5Fct-f6(=M^}+8Y zc!U;QMk5>qQ8u^{;&z5%XW!~cBNd1L^a8*Zf`||Y;NWcoB|Q4IJIOiVpT?<9<3>tF zT)lN_z%YbN@*1CgeoKt>9dABjO3@xZ8GE&NwI#u>=K>(N?ZCcqn4z+ArHBA>`!`uy z_!ADzDvgLog8BO6e62h#xy3C`sAO1~))qu)u~-sR05nrh48={XQDN->x<*kjb6N)+ zT&<$;=pgf#qG~Cqfmq@OEI%!RD(7_+fdR~Fh+ktrIg-5^?-7&c4 z`KAT(`)%sQ(=T_)*PT(?alV?7n>@W!JcGpQ+Bs6?7uEsa*tv$r{MtyOZ3h~V|NP#tdt|x-d_a6A2)fbqu)oa>(k2d!ipi|m;EUpes-<) z#EnUE$}=1Pte3z*nkDS3p4`)gbw6t17`Vb;#{pbEr=r!Eq1Ky3kY{AH)iL0bS?n8l zZF2=9<10GdD<2A|uXu{YO$Y#+V`CpKcUv`HCMjc@*Q+l`m-*lvZ`XS zS*Q76Wx}~Aw5!9Ss<-3-0wAd*y&(aH5Sw+M7~6NZWj%dRu8hPt$=Tm{9P}1`yXRHH zpynmpwN5I>wO#D2zOt3q<=HJ|&G%aM33GmpVs9(Gif8qcDGYrYTu{80AXm&YfhIy{ zY)~MGpGzpxVlcU!N4w5mB0h^(aV-&u%k*nJ3Q%SH(AA?#=jZIutzZbPUq&^kqgDtH z#C3Ueqn=;xZxUuyc1NLq+D3lC(t@alPT&cYW>{MPp~trF+P&+n8n9s{tx`&1I1&(BgE>I1+W)!B-iyHbt}~Y84Dvb zb|gT`t^Zh~7W|)Fp=@DfVPgIN;tJIBZNTII( zZco$5Z?-Qdz|K>toGqYa1Wl6BUv#ukO@OH0(M!xCn=T(&IMK|3{p)5t63nLi|yhNk{DMi0}h($B9?}X1iJ??^O+Aex* zbOpb;Uj_P!X~gg4{7!~-`s^-AHQ6GI$`_jT1$3LJei8LZFOP|oq?cT0MQCut>U11C zv%>&dd>8!@g7_FOuhpfjbqT*p?4B46Ta~y^G`%YMB#GiYXJD&x`6q zEFUbUD8EsOSQ!6t&K(GE{b+hKyRk*$U|eUm!nI(r;YI6eXT8K!LjDyqTp2JxY~AG8 zje=!x)_xPjvNMs|E?L|)e)c9*FouilGPXqGTxhUpPY zO>sbd+~$A;z`44t^|h|-2^J>Qmt1+cxeT`TbA|Bfg{9&zHh0GE@sGGU%!{w*ZPj_L zC?%3?nQPTFi8PHZ>|j5D{(l2(2aa}GIxcHkIKSk4u{EL4Ymq=L95b$a(=5E#4&QzfRwdK%BOEu6^PC*u2ihTt|s0(dP4-mTQ?nHlxC7 z`L;FBVp(~aD8g2rAcM4e5FA|AI5{Tgw?-B1;)JCUZ?$Rnj`}0{*rSwZK^AT!>wcdx zHodf)x}#3PsO{C*8*fEYx$xs@fz6*(>EH;0c;_0axm0a#K#Goj4-KR2tYhg&c|J`M zYyW@s(b&f-eN2g`C>F0y^LCXNE-oxpD}HEB2j9y8#vW%6K>K?x0FVFYNAiQfliY5Y z=VLgH8oR4lTvR<>ib+4NPgW1^oe!Y5?Ml7r=aV-c1kJoaD4uWyk~eKx5vI4V3`8$w z5zQ6Ku!fl%pD@BB^yaJ3f(OkMWhHVcb&n2E2JHD{Kfn;X5ckczg?hexwBhjz7<>MbAi82AS5fB%Lv-OdZOZZZ7sPwmw02w&88 zPCqOFmAxeu2!ar`0@}v;XSN7S8oF~)cuMsdWV;`-xdAbx*)gf2AqcC@;+KE^W6ny7 zE^2%BBIDn})RGbX#uTtUNhO&!72i#BN?Duzlt zXtUWw&C(_R$?sNZ_D!w9EiD4}=)iJ3K5}=L90^Ss|02{t@&2!P=2?*eR zD~_{e^0WP+ZMjHn?1ih9Wm62J7CLFaw1v62iQv(?k&l{qT zamVoq&6`O-!<&}3`9LR(Y0cRQN{a`Zo}jGq4i zs$(QqWo~^3I4f|P$Q(lcGrH5ffwx2gniq(_|5d*E;aXORkc(eL2J`b4rx&wtA*h$q zbOz*O(YX;Pp*H)QC-sGjR$B_~Zd_JX4(eoo;04jc2dzwLa|}vowH{&!A#HR{>Fd>8 zT6KD9dM^aW33u@W;uR*-3=~%;Tzy9sepM(BJ!rDRtWM=#Tt$e2v}RViZL*Aw{xH_O@q8@zAbdgx7se=*Jjhr51Q zC>L=I+1?T_&!~NSZ~N%PErnm@I(PL$4M31i4iPG4vMJ*acUm|hDFea=U8*x!l7GqN#3``5O~&t;O3Pw1PVB4|rkQ>Ur<(dd#c(FOGA5 zdw&!Z8+}igX2^xa_c^NOt2}9Dn~ssNq*ZC_$9+P^;Uvt}+CdWw!&%`r2gj43K15g! zh={p!CeZYO9-#gjA^1^-Zgc;MvW*<+ntVt*IYSYDFYOXk=G?si)&3&+he1ADJv~01 z5;bn7Xs>Vq;J)fm+V!#?-{cHKs9M&ouk`md6QHTa{~o}f|B}Zws?^lhW~KXIPRSdc zJ6oG!X_fcUNG(-7;Ar_$zoIJjvhdN)=LhvUzOE7yU=N?>sPQua2e^+$EnX@hE&(U> zJEEQ{zQawv$sYL?Fod_Wc?F>LmF~8;(=L+nP!)2mM9fzfsR{}bGq1r~cbDQ|cJrtCZ z=dT0@)O_?ga?NKK1K=XSWWRF6!{6~}nm*h3T4?~B+r|9#{TjPu5X|KTM-*9mddI*o8OzS&YV<+2KvspWBhq#04qy2gZPnFN9IdTjnq{FJ zH>wKq7MLNh= zqhe%`0rS2lsLG^;sBv7b7Tzs`oYh*w>a7JgthK+xas4RcvrM9~!n z%NoaiEvonx?jx4EWv$lT`NPt7^!{vOW2>g#H%=?Q)#5)+CaFiUe#@*+YmtIIJWYGL zTM3$)PJH=e7G8I_4$RE`7h89Fr=bo60%>J+nbO*x`W|3@xI1wN`HuC%&DVCf#>?o9 zV?pfO)75xT-*j-bW!Y}{syRaG82zS!2oP};r*R1E(cj7k_63$b;RtrIIJ}(p+5~HR zC!-+jk+f;b^xWknByP6#M*Wj#xB(ym+|1GiuP7VP6v%OYPiVpTXY%Z zKd}b)%co45oh@5I3f46{{mLi*WD7JTlJZArNzxQFBj#pOz%WntWpb_yNmR3QC7klq z==$rZc*o6W)Y%~YjjP71?}duHVa`uMX)l=HP6j2-tV>?|phr}HqXmN1&A)ad4f39e zi&~ck@qhQa6KZ^0j45V)PpgX?XtP6a(INX?H$680WS0J+1V=}0S#I~Hf&Cy@qUB48 zUnS+R5wJI6Q%c_w-@GMxQ2hEE-#?IeeTWR;rr%^pxqJJZ%>5`5#jO;%F3R!i;o+w{13r3yvQJ8X1C&v>nA`R224&QAGs#77PVwK$sDl99`J;R= z+JS@&d$oqW8bdw}vGVT7?|)C9h@iKI>c$)+0IBF?BScZ7*OCP!>j6(Y0puPL&RiC( zUc}9?-Kr0t(-oi?+!V`*kr&AqcNJyT5_`KrDg{N2z~ORJ_KAmXsRPXfH`4WrB`*ShPfQ9(j6MO0(Qo!He{Z970>@B^7sqy7^gWN^Ptd6R z3j0=P%iy>0ojdE#ygF<+s85rfYQF$PJO+d)8PIqA09{}XngTDXj~rvjc+|gf9$l(- zwogmT7dv*#Qs44{C_GN2p#OnuUWrltL(nGqFls7&=2K*$ONARHWLMlX^X!Ell@gKG z(FR-I;H}7$thg;n(F^j)F~-+n8>b= zRk>sAKRGWX;nVg7+xi~r*sv%9mVH10EW2|y>T^@T{T=jW7j~N8R-XCCx~Vsqrm{zt zZ>9H8r*dI!SE}3Ywq*_OU3q$gSlV=PmNinI7zpi;eWW+q1sbY;ikCI&9(i>coS2Hv z@=V_saUhnw6aiyo6wch!`>mozbGPk-yix~0T3)SNiE)-(X;loU zCpd=_Ne5lRh?Q(5%2ijQUNjuyreu&dqNF;!g-4QIxbg=@CX_Z$(k7M4`7hsUsD=?R z%L+w10o?Rkow5GjNo{jnhEO&vSkeBPo1k&N1Hc7UQZZvcjUUC(aZ`YqTel zYbY&UwoDLQZ6p;Fx$(?mMPPlI=yf-92a4-%nFPyXva#QU;Ij5L&%oC8;|rSd26LWWt&7Jd8q$ZwZh|Bd8Tp133SZ-m&#u#eAvxHqF5CC26d$pO1fk9R)^ zmEa54LBb$LPK$Cn%`6QyZzH4JR5^n*@I235)FKx-c1P zVT75a5;gwXNGCYi$AgOj(vL~Z>!N}vfy0Yv%1n*TuqjR5928YvBPGxQ3}RsB#!)U8 zE$@>o($BR4cI%IW86Og*FX@C5o_Qn`4FM}ru24Useb1jLb1Hrb@U?+R=GXkQO1@$8 zGVlP_S6MZGsx{0i4B5s8M?A2KP%gj0__N_D;Vn7)SVJl!=7?H#Z$ZlaO)tK+fz7T; znAL)@*Y`DI@p8d>hBNK5EANot{TD2P!L&P2YYP>^@x{>}YR|lT^hjF+Iq|M+<25Lb2Si8 zOqL1eT1RW7%K~PFIbMh!0(yPnH)RGaADDFD$GwY21XT~KJ;OT0&Qf5kn?#h|xL2vn zlpiix*dYp0N6po|AR)mx`{5KD3uDPV_02NHLQY(d08rzJbT8@QRB*v`3`&SwZF`x#A<%AcN2}{xo0U2!z+bxQ-A5lo$H4Fc# zXhtjCgOtIQXYOBaVuP6;=)Gd3JMiU*j7pf8#$ml;6#<33A}xQ7-5C)oD$q*5hAwOf z6afY~fqC->Shn&o_Ii$+3tAwTCZi2iIu zge3z+!%n#T!N!Bp}(yD5WA!XhY^e!V+@=&zOu zv0Oa@2i~~z5A1ZjAtGH+Q>afg{Lbm9Ur^x&%C$^b;YCNTo%gnBK}_A4C!ZWOnLChhNL@4R(L?~?OqW=oj@(N?hqn|F(#I%dpM zdb*kGsN9w4JfWJt{3aW~Z2FbQMP7dAN#ge-;ra*xz?^4`S0A&h|J-Mw&=v8QPj6Y` zvGuoTt3hG?se;3D?T$(Ksj_TZAhg&%D4*)oS15_7iVpbPH3^b~>b@!_9a~scvfv&@ z7MLyfK(Y44&>~tb=BpjzsjJku3f5U-2pcUi!>rAe$QTmmpxx50LUolIPG1$c&;@BWB4x(_F;pR9 z|G?vY0H^Y16o}+B$^5%u{;2|*_Zc?&mk+-+sI!zS=yv;&^u;3KdE>^sw~Ulw?T_$&4Uf|n1!`O6eaaYh88HSuf}QFVj>;0mpA5_RW+n{=>`sH z?MQcmP=4Gj@}(aV9P?`4r1g-Sr?JRC31EN>%rN}@z=7td%WqIa<9{pyI2)prXR1{` zpP9M3>Bfg$|LOw+ztO`3BnexnYi?S1+;5^Lt1w35aeeC(sLd7JO-93-ZAj!b`;f_T zm-PiemHzJpltLtK=Id1Oyd*jVMNl)qmgmxgmg5SM61@Np*&@LcU`stYqgT;T)iZ&e z;!GHKxt-_T)lSrdxdKP#j#eY9T(3sWL4);58V_vQ1t>pfR(s<&qKo=-y}Rt2T9FIy z)NijUGPpA^4PAqAMa9*<%81*?(WnCNsqrBvf)24hcyt2myRzmZZNnt&e@bXzMdDsO z5PDyg+MT5AUI1(wN!ft?kw_&qA+arQf(MP^k&PVz=JM-SsT7|o+Tt>4SwRkg|0+}k zer=_WI1`BKha+LVb92vCfkDPUaTyjmjK4XsBWqa5C4Oc($t;jd@p0g0wxt$#<(QsR zUzUK-qa+p3RLe%{oO_5VxgR_E7@^_1U}#xL^;mb98~4)d&Z)H1qoh_4%sd*KLpm`V z@Gu|q5#P!Wjl=Xx5{_g-bpDDZ&iKz78%_O`+SHs$+kv`q4=4TaX5zgr=cdFhIY82V z5o7zoA>(@gPS}-u8775#V5`dP!@NuBIR(0M1Q1sONkMGPDpOK$HAAR?WN`{CGXI2Q zeAx%9_-65(R@V!39eieEZQh|XpKR8+bYcSoA6@#X{DfM-Ze*o~LASdDbLh8)RS7={ zwmJy2$_gt|K!}qX^(#-dN$JU#NEw&uq*&Q(I6vvC@v{sf z3BGmy4H}g`ejUzuv3C=B=8dbK{sa^*ZtiNG(LAtg-3-K-1X)3B!WMmB0a>r4e`$9+ z0UP6JqtH<4hhVY~2R|;W5N-m1r13=HZyTP~_3{C8=jEu(mR-I{kj8~u;suq_a3>Jo zr9Es;O%YzdUEomG7^Z_biWMgUGVn=YMd$7{HmZ=v8 z(#;fGqc~e7 zq=Ysqc&^_(>NR`{^2SQ-DXidjd(^i5R}P+V#EBdq9ALge3B8%Z<>z7= z8Oj06=||sAW`|^xHS5Mb%;?*%c7ZE@vmyh(AX;*>p!L$UcR&Ii<@0N_(j^{ zwEzab)_*W4kx<^EEa2yxPJ#IC9L*qjQLdJ#wS=GizbyPN2akV1h3^n&x!2r9$B=`R zK~f708#K2vs?znp|7mX`R9>Zyq7|Ncy(G2Kj^o>Cd* zaBu1nf|=KrBxl3s1zefRH={Un4?+*d0cf!q=f_ny)Mps}VssToi(haCkq5zrjmlJG zIjtA&JbJ*V7Z`gnZI1Twn*@Z$G~2mk-&Iig9B@friWS8*IFEq&TlN$!+kX=p)#m{T zK~UVuU(RF|gLefhj`KH(EaF-Z>)61Kz22l``G+;F&URtEF2q`u#*l){Ipd{=$63eFD#qv9@2>ttSX8Tnj=UGHDK&lG#Kdf2B4aeH2BTu?(qW#z4+VbrJf(w zU#(K;8|*8|8Vnd|IpBGS^?DaArrB4+d}4pQ)<(2PkS&$OJ!Hd6+i2*a97tZ zmrh(>rzjr~l9QFIteIC_D^gA&oYQ^idenS{^_pa36H>TgRLFY-(V89hHo|-72X(AKfFhp*z zl&Oi6aByfS(R}SUE6pg5;3*MPf$QcK3FyVD$qO~>?mwN@uQp7&3(&asymZu+kjx*$ z8p@&`i>1Vs7i-%j1lRv6$EP<e_Mg<^`gLEOwU3C3X_~ z0PIRKSXP8Z?e|>VQC&*J28>T--Mk<797WnNP^=ilSvcEOD7YWnrN=w0#*HOd#@Q1X z%nP1Cye>0c|6(?!8n&W}c4i3EGMvYO_1n*EC6@EMhjYz2!`8tpavH(syp*^@Cmb&! zp)WxDR#gOARAr$+(Y{{8SJ%f_MNvYT!zt+naTeX`;AdXorI zh}$y5spW<33&{?$vNP?Zyw-9Odd3OXYLf)Az1T9e#6tKplsZ+j5qSl3I;tOjH^#R; zFeTt3siHGnXtGgW7j(0;H2P(wbaT7V-02n*Dxcs>?TKZ-wFEx}IuE$u>^k56YrKgz zC#rrxLFyb?L8&nIO`hwJ2%}hK{h6V}w6$ets08MZZs>`#O6EbR>2p)Z);d#;J5V#^ zmuHm~wED~e;c~iu+Irr;C2^&A=_fn>wg_$?%X#YLa45Z*2{Fe@BA!U0@rL>E@w-wl zZ&1RGU4q16fyi=2P&mg-joe6+rxwEOW&%j(Fwt_2&?GcjeMD=}mI^^~%Wb%Bu6X;6 zVz{?9?B)l%_0YUd!-3}YlLHaTE!9!#*D0N2yJYw9K@Em&S=ss%3s<4mxoRV+hI#3Rqg8IsA>SJZ zWhi{1=r&~fh$+(HW==x~y&!dIBXB^$HW5&=LE8EHP(UNHfh;Mq)upiN#4DjTsql(w z6IbYVPwT=a$2HfB_)W^Hb(N@?#=HBD>7xk*Rh-fw0N(=}qpI^d`zV7iyOj}^a_V&e zONokM$15Dgaj_g#J&t&BiwX|uQ{}RSWhYl3_D3AXx?*8eiG-by114S$DsTOF3dl>+ zsV(V8A>z9t*6*w`b!REWT_;p=pMlC*Faalav$f8~zJ@7`0cmJA?UtG={(ZD1(X_6<)}2 z7XIKHE!4nrY~8NDQoU=qbFlYWKkE^KHVIco9VvMUngXR7_yq=u#ZUX{ivV z=@96Quy4_3D4sV94=BqNH*c$J21Uq`==mN0Kug-PE}JdP57iX+5mYTjr9=ABx%@d!byJLh^xwdnq5+Oz;JYw{8D>#nLk7d-&}2+G@fjM z8W;j!LpfTRYJtjy+lYz1jNsQqw4>mm2m)~+ksUv91t-LE_S!i=hdrRr7Iik7tU^Bz zES41o4_gOAV@jp)Q(Y8B-)3;My~&($%cZ!m>Xa*j9`qWQIr5?LpiG%TTfE;iX?5-N zoYycSsZkIU-i{Kin;ovlm@r?RXw^~vMfaMuTA3$qWpPur@c{VnE#@k=?Dfe@+oESn zJ8jLWd~8~()@ki1kk&!+s0b0y``J+I;@qXRJTwLVa;{J$2$FEw7^t@1FjA(dL=3n~P{m;Uv8kh5r5HTK=pnfg>Ck2va6&!sIO2sObk$Thr*aw6baK5-x zmwIx81+K5=*$!X$JoxC=NaLuArY{oQ*)AQkmTOI`%H=LyD0M|JQyjeFUUUH0ptxZ( zFVQX-Q`C}G3)qQ~^Hu^hAzbpn`5zIapR=;)555XFR#>4QPd1OzF(JE^S5(D;4I%qQ zo3F+w_iP5xg4Z6lT=e0K!B)L)T^-0l+M))vkq0(xZOkd?tfMU-D}zFI1}eS-vEOxw zqL_Z6PcU5Lf|3xV5h>9wGF1G}rQb^bxD=awvPsP35HGFts14P#utoiU^RsNW97T$3 zEkzx%IkURlfSQi6r6L0sOT+0Vjprdzq}zD+?-QlN&rP@fFa0@USc~aM@y}Eb7N3J3 zitJZQ8am%jMg2GS6S{i5oBn(R{=tF9_fsK?R~H`Vcx~R%Wxmw4R1&z-4h;W)9;Mje zJ^Gi!xP)(@hDKA6k z^8fcE`$*9bicchhSe$75WkO>pVaG-cs|1`0-h2WyQBSoy)K4U3TA`0z*%MG7d!=p` z8lx+ZAh4KP!u&A#SG^6j>!<$7@S)H71-XJ{;&=65wS%W@k)&?>!%Hw}Nu75^_09eb zFHQW?&(OXZQ;sJ@61pqwCA>W66&&jUk_{A_HOK;CA6U)@FITXQZ&@Idu+RX4)D4L--J#KWP~+9y4+F>>Ix3h$W}XePTCy<-*z&<)$d* z*w2?d7Ux!!$v6$7SZJ4CmdJRUy>~jkbbcrg@$%giC+ej)jfW*0lTu16>D*J^X)$_ykhHj8b*QsI zh8(6SERQ%>>;*VWeL4?;0*(6LSKfAxuP+|(;md}DA4$zAu8$O#DOR=|uIht~o0h%= zVYCvP9d{pBx^*7a<8AY3Y_m~U-{lk|;|4rF4&lDosOS373!+IfGt=q$o_jk!t5V4# z@JNrkahtT2>6Nn8UZv)08ggZIqj5BLE4JZ(Dzy6K$v?ska$0ZA&Q+`DNiH5mX6Ejr zRc?EPI0OtP$_HhF8o(Y9Fp{30J=>BEtWG{3O(xnDd&RWXwSD>>4SM*1u~M8H&X-P) z;$1gPL^mbetvZa}L;Zw2=0@;TLf_0)gsl>f8iuj&U=a?m3jq zpbWDQulZj;h5u6^hBU4Lg@|EYZO(vA@5^V<==${xN=I=|LD8nIe=zBYpYYktF&o<;f278Bk4A%!jR~QkP+W| zBbtnl>(b;tWx<9NM9g?#h0`rdd}1YLp%jUaF2qmAzsS_+3E^Lx?MvPlS ztB#pg^)N`Y7DV5E&4;dsOjlSOt$rM2vpU||T(&bCZRZ>ZCrRT=4bBt@(KNQh1`S@z z&=qFSCi>iyH(lFErv&#O2`Md9TnoRH)|b1IiuHH1HqHK~n&5Rp%DRVq!`TPBmX)d{9xI>p5}+}7(f(FQ zskp6G1It?IFTo}7cqPnxQOD$GCzT?p#v#ZHq`B++b;l z8r23ALo!{H4m^A|mM9+=XbkgrT_e4e&6=7_l3b7BvDpw%@(7GuL>qa4a?HGCWqF$5 z?A39y&d*|<`D#If|JjTFC!gJ**wRrEWb=~wk#jQOll~_!`X7E_oEA{wYhuGj ze~?L->O#f!O>;$*)2b#s?g7|1j=U}vb?!0J!VAPzx(H}#GBp2yIrH{v-s?3ZCKMW& zA5)HQO-+oNJd9K?)rTKAWqyyh{bkY#8=CY|!MP`gGw%pIOvqqa;p-FM2b#Xx@rZ9v zPxI3F%`VcI+^S_uBCRN7&Zf3%rJ&m93{5CyQysQ&6mzm@U3Ka5t(a?r9mA&9`s!s) z*f-5HJh{utc!B)Q;pdjCrz8U>RX<^lIHG38sRuc?id2)hyURXC6q}$7{xHP(;K~ed z>7J^KqC4w{?cXfiM?UPDz1K!L`m!z7w#WOt1Za8El1_ZX`Zv5ggwZ?Wl{w6R5eT}y zq9z7J4{l9EQu=GGDTP<~v~xLTa=s9jFn*$|l)^N`DUH@Nm&e84fo5=5$>pb=MrR`i zonbTQ$0t;i$d9_x^wGTr5%slK9&Q~4H?dcA|A6e7%+ABMVZ)wlA>UOA{4BfXyqxHg z4Zr&b`s0V0=IU1onVN2c3{H%?K{uS$u^m8*J zP8~DN(>Ui;1w}%ZmkfMG^))#!iPTAc*(UZuF;;5h$-ns${-E@DaB|n`!zOLc01)nc zCP}cUv$TxjE`Orj#OSeN`EApMVftzG(qc2Zk-N_i+h0In_yg|#_6#LOIBj_SbcWJL z<0Qa_-qJ1Y;qbqk_1fV>9=yT8_2*8sbb%!k=E+B7y--Frz*d|4=S)0?r=jPj6O7@KLLv$PLr#$gthMP#JBe_n<-)2W1Y&Pd}3 zo?THD={9_;E@@fu0AI^#6-?nPO?+Uu1D5pywH|%p<+g3=e%enq>2?@}(ojB{RA8kmcH}(YF zl_RM>#WeX*=kZBnGrR{~@(+~#!BSe7AN;~ufx0JQW7t6bnp10&BtCL8Iix20k*zaM zimGJf-YU&?uZt_yg^VGeHJ`f2J7$Z`QT7~F+YRP^1?!TzSHJRgdM&thHydBs^-q2| zQsf0)7JWuhVJ{_AR}g0XiHtQY5XAHXW#t<@$g)pr`KxA$Gt zyf7D^dae9z3QFbXjvQ|d(;gneWyHLR?`4J{@8-MYspMTDvnAfQ}`DOzH(5>L{(gc)=RNb7OGdt&4Y z?TvaM(1y{oTHm2Uy*&ShWqN8Lsal<6wFS#D@ejV+ulR2N_kDU_h;1_SE0i|hT=}&m z>1e81xdhdVkLsMUQL+PpCu9)*$lf0P@R**Nm&9iW&kuLi?THg*pIUc#OO17S&H0O> zxrpmdXg*_ro=1e1ooBhOum`{F<6XB?rHOu3nxke{j~sbD_uTK=O5N+7HIlV6`yexS z2cP6|I~t6hg{H*qiKuW8v@dr2I|Cr|%`5VWs8DjSZjkZ1|HFmTvGcoFE(2cn`2BrH zL>Ung3A1GEX0Y%)d+JV-I>r6WXN?XZKTP?8HihIWuJGt7(Im+{dV4`n^j`tKG-Wx@ zOp?Tp;Pwto7&+UPHR@#l+VcyR{^ETHFV{NnOM6#3!kKgQ5~KcpOmJe8zAXp(9pcs7 z*BSszjeDQh(fS%hO%MU%XVAUQcUpCz!}yB9w`|tH%Mp%q{PA8+f0a&sH>wL!?lJmf z2GLdcIxNdnLw%*;7+U+g65)%*)0G*VQx$2UOFQi22J*XMZ`-vjhZV}Lx`vOz&gn3N zOm+g8BVqD~ItVi=GB@$6zikS8LO0)Nsd&4q9oe`YGg)HN{@Se^07v@+J};eLDi_ma zYDZ(7(X*wS(Zt8A?zbz>6s7?1bES@cdd`C~I9>`}Hqj)8y-1<(!kpEoJW54B_{v=O zWdU=5S%j6#veD(fkDp)zmMafMs9)_06sbyfF9+(DxF7MSDGZq(697o?+@-+#>-k`1>FA^8NK7Yzjqo|^T;4=080Ji++JA*-hW9c`a%P6y8|LrHz(%q5>G6l%C z{;DxWs`2q3=5zVh{BDJP-pA!rj$eIB&EKO}tl(Jmg?h;z`f)iICfm=~5Nw|wO`F-s zVPu5~0lR9lfQSd|k?VdFU)E~;2%FU{DK1je%VzGj8k0GzJCCPO_hY)QcQu*i!@}IRoQvk{BzfrjGca^0xdf5 zVs34|#+`PKXTCqbRMXhOz11BZDKfSrsgT!fI521;8?KqiC$6T1{f3oKaNGmC80R2+ zAX!m-S9^fpFW8uA1cp(1@mr|p%lGzeI|p{)A4QGdjpo5H_Gi1y0Vg@ z-R}$(H|F#8h&(L24H4PcUT59hvUOe}`+fkEG9?ioqCmCtbPS#Z?uIb%XCVi`6;}S$PfF z%pAnrp=Hm=#-zk?6DRKIxi1KULwE`?V*VY7iUHQZ9y`L)n}2LBY(a zoTYY`z!L@ZzLH4;&Z@7Vi98{5AUQq#P{GYGL|N&<$dp5}Hc&8Z?q7CnPCr^OG2Zas zBC-L{$sJNxVVw{}Ux3L<7G1HOdnix5qQhGxs@Y#`5sq?k?tM(>cO<^$`G zeP=5t*nrvp#m*=1`E4NLcjA?JM7oK2Gf%85Emai-E3PA8L&X?jU9>>pwGog!H1VL*!sjPpD$AJ_H_(fvjIO>cQad zfQA>1Hxpb&doQ&$&N{d%SVf?YTFtF4}`q0DoxEWlpPb@2g}j&Q$c<_&Oy=UZqsvV;H14% z_g$9LUDZG-_dLx@>8mwdV7X|yc+`WMVJj}oOkd8e(xIUJ_cO1hRH8WaYzp?^jGxXf z_2x_Id&$whiQo0_LBB?v&i-cC!_?A1fjURQ^=~IQiKA2NKLUUk+$}v3rsS1%m0#fa zyyH!*aPj#YS=;f1B%Zmop*(+T#8uo_m558!y*CPX|tl)C+=3NJKSosSy4*%DQh0g=FMZWShuMFUP4KL`t@M|njg3DW$^<$i+I2n->8gcqCZw;pP`Rypv^=bII z{PTsk3exi*GI{|Xa3dzt&~JTCSA1Lv zr=$y#`(x<*LYpd*hfaH0^~ie#HY+$XBO92h$PO8#0+Kh^?-IY~5BMC)=tKeEJ(L1j zn)!)?FpbMWsVSnt;EvC|h|!w+0Fo2)Y0>aJ^y>QVjSLTJgJZf>A0ps{WIc1WxAOQl zmdrv(zRWo-cpiRRgzu(?(pSg!xe~(_?9~Zqn-gZcXf&NCIr+Wx#gnzu3ULBr_4iH@ zlT`M1Y3F6;yQH_2o-tH@piHZPi8-uK?>C5vBX`&*ef~Z-<~Ozea+?9_<(>1KFBVxo zY8527F6YAnpj>D7NG421aJkk91utHPubFZ|r?onPM87jV(?>7u7eD-$jFzeWGI+Q( z5>%aiDQ=HWQ3Avj9)H`MA8}CJtxY@rVD`{aW4vbyJ%xYMv)RWS(sUjS+LP8>nS<9B zW*R6c!{*3=6jg%pf9!A$bkk}S#Rola>S@Q0-MZHJ()bl>xRtYd{wAV7oSu+-_O{rM zIg0RyKfk_H<)ecwb*d&=Udcs&Z$Ejo801Qfd$p+w$zfJ0exV3}=xp)Cj^c2*aj>ew zq^EYF{N9jRdzt^KpURWz?zbQ}#D&`(0`-!V)fQ7xVro@Sy0G;<>xVOt zr=Vt_Bs!(<%j(Rvi)~UO(?`Uy$4JmhB1UV%^IEa3pH8tTg4#4xzl(|l=WWuW2oY|r z)n`kW73U6piOmTIN>!%%H9|^rCRq9X@{WUBs}X6DlANA+&fdgWJE~>;=6LH{nJRH8 zeb%1y%VaMSC6RlDacvhcNY1}=WZkSH95mCnZ9gF$g+bj60>TwL$lu7p(1-a%%W}6? z!Qs_$&n8Y>1J>JIV6P<&OL-~c`vgVKeYmFXuHe6&-1jWT!F-G)0;uWRw5R#xp6cXLN4Q% z5*!$j|1;NE>BJcHr*?#n^wnFMdUNfY=UsecUZSJpHH&5g0c^#_WG)VNy*UDQuiimW z8|gGbNqpc4|5Ra^J%z*^|FGiaOJVic_$##jg7#c@`ZZmy-s(F&aBV|I=%dZEPhF0{ zS8^|{?-T0xr+$*Qzf&OSWStF?8P~HN^$e#dk%}M zh>rfN+_ti_*gmJ+;?SuQ%FWF zC?ShU%#%nRyke^#-6+<1gG|2xR5Lylw)uHqUp2_P)>MQ_#VkXyUb_exRfgy)Fj;(K)} z#IsETp)|p_Ec4D~1lK%1^b{bym3KD!l=+tx%I7`s%{KU_%xHi+8?80E~Wt1f8tx2nW3G&QbTe$e@L!S z6WXcoFyfdvkv{|Om$|QB0Ypo4Bw4al*Yz(hSYDVy4(8mQKy*BhXzR|zQ(r&vRp^24 zKP@8dbnA97Aq6Tb+w!WlBSqPALUA@#UaMmllO?8l;IJZQUydItoE>lakivSrc|teG zE??cId@A03u=0L{YvsJUSxN0Y3`H@RAFCR~h)ia;x{Wro|Cws>3+0Rjp-N1~)hsz+ zLX~iEYzLFtzon`YX7dTH32ypwxj<>I*iIniZN1spq|USGQeQ108X*SZb9OjuCh==j z;rM1bgqj>jK*UY0?T!G4bcz&-8NEma2sg)>I+nuH|G;+X64#J&Hv!s;#+)yHD6i@e zEhKH$t!lw03k~=qr0+_)-v=XIk!jN;e0M*ox4!^G_?61v-sc~cm)~k=MbCZM*6v>PR&Ouv2)K<fmP=;RiMaDK>8- zQMdCPVwIJH^`n_LM^8ZRqpqhjz=hBr4{kuwt8f<3%>2L?u8NX#t$s6tcbA;aW1Zfe z^fevGEem;mOSxiw4iOEtzTENM3)`&;-|kw^XB67={g%bvLERBIU%F~x&x(?aX>%%9 zSjTtW0gJt^9pP@QHTVe-H0K z%;=@}2BtHMYEssQRtIhL`-k$ihMiLA3Y=_c>E$>LxoS>D}y`xhcGQk))M~%?=iw!tn#{I%U_)Df7 z=@744%XUqRzMcOChlsOf6W5FBQVyV<-*U}~vpFW@?Wk5%OEE&|OGuC@vyo}MUdqL1 zhP{Hj-UE`i#vbTw{*x%C%wGL1P>)vpv{paI^qS%JhP`@UuE$6p*-m5mi;n|mXveUb z37ox^Yu>lkzp|;lCL_<0>Y@Zgi%ZkeE^Ke)c1bjYhWvb5WDC_9c7(Gy5L^q7L| z>WtI~eA`H}>ibNwO>55Bn@MkZnuRrb1Kw( z6~VjLHasuk0<||BVNo!C+Gky@yEngonyEstS@;F-Fk{}a75rxEz=}kRXandijpeSt zb&8!FIca23=czJYSKDN#jjIWm&7_S{XlC>)bb0g-3A1S*Yg$KaBj!;wacML;5R&TF zF@`mvsH7Cl6xGm0-2>?{Nfy1Xt1JoAIPC=@05OL;(>6S>b!qox#YQ_q2}Z$gawWl)r-(1a+3 zxKQ?x>VV|sQWsD#R16h{b0Hl^m%Zm!t(l98hJ&8PYcCQxQ1`bX`Cj%!WO&|(wYLRL zNp6ElIp)U-xm^V}4(h_!5}GVGXMa3`6{fx9)l;nPnCbX_J9#$-H_1chba)WjV?AZLPP{6P3QhC{)-79Y@AdcpnG0-^QSbsx1UHUztf z^vc_{NRB6E8%Tj$6F^Via*LaD*-dYNhrxhtEWWsFiai7D@&|NU-=?lHg1puZv= z6lS#r9b{jM>CO+kOl6dPvEmgy{pPjXO3TpKaL`vADOGnV+B0XLsL>ZL@8#K+p8j_- zv$wi)ruz(Ow-6{Ew>W`3y`cAMGUrPy|JzRmuJ4-iOu7n8s*p!w=YQqG~)aZyjcGB8d09Ao@*a0!vG9%Jk^wRUk|To?S@_eqU|Tiqn8{<`iy zgg%OKO5o0#5xR5q5EvnCPi;@@p76T*xBkE@zD46TnK+cGh9QkIl-mrnUfu~h!V}7J z3BeQJmq*m7ZKDu`$?1$La?qk>R}^*O8b3q1A$`(sahe>607DUoUoo(^_9D?=D{-QB ziJW6O2QazNrCsZuSW=k_Jp@5*_Ed}4^%&7or$&6VpV|f~J!#$_6CzyX^Twt+z2*Ap z_3j$0T|HkNRDfP(Z2`(%5Sf>jg|DYK~1e)GupZ2DM| zPP=pt_TCNt^n+TBGR8Gm-WA0&3qeuId&=?mvX^horyB#~)(-8rMHX*S3b@0x`BcTf zi;U@dNB@3%QX{1V=47ar;R^YEIW31I(u7kV!mVX~t{3Sf~lw>wE!gJ19F+J{M z{n=BtQxpYkMUT@c0iwbuFcpw@mZDOH{7^3W+wQi9^W65=uLBN@Zde%CwZkMpkA5lF z{yVjfTyCdnP6kWrz1Jgh9yF#^FfE2zWrghJdDO-AHZy$A9z(8Mf%Fn;fW>{_7Eb8Z z_s}_s%YD{Ypxc+N^+wAsoN8?FoUyb9DnKCj{7Y_L7%sp5<;tm)M0x)+8`nJahU&yp zy9?&Wl{JVl*Sgn%XB%SNh(uTQ`}`j`=(?Qola;)-j=HN@{fOz+5W_B|sR#(qrJ9O% z4fvW`wotgsW}a6lfLgq!Cj5cP-gZFQw6`FxP=FEC?;bwSFIYXxJCH!Q?yQZH{Zet= z@4~Zi_xGLAfCG498)WW;lbtv22&L3gx@9swIl}*Q873kCFzVLA-ISh$=&HuM}=-#>;Nssg_jGJWW%xy%ag$&aVsnZLF#SN!lu>i%c2D7wu<;Wh^=O_5{_HEmV0~Z^k&C<;J%>o5F$j zL}@@0nI9c8@ALL4J3jXuj-Lf~YUf-tSe-!*wA&#TdP{DYSWlz7Ww`OVce6`A*PlY~ z5y`hCb;YkjMJZXzK4$L?79Wr|hryz~iHM)MahMY&tWv=u@7pwEqaT=9;^CLRX4{9o zLMcu}J78-MdPX4WMrk%l_m0uNVdv~e4_)j_t-0H1N9kw{YxShSWBdLL1eVWVwvFUV7F+ZX~>xe2=^SAL)?1jMHANgO_a}0dcGuPHX;|gOF5g+&- z$4!AA{(zje(}m!a=9ryE>J0&9ar@SQ)pG#iWmsjz-N9!J$+HaPHZ|E?7v6@M4d2w) zGm1!bDUie7!qL-hbfqhm{Xo8q$K>|vG7du=j4FQBz1e`|-)whGv(cffQ*A28(7*Fv zqXV*IA+anzg^0zF)o7OA08L_xLxcnyqxX58dPQP;fOlY^p3D!H=cuY)Y>CBW1qOxlXF#0+`gmL6u`TC!5O+W1svw3m=Z9J#u`2iFYzx?&I3e=yB zI#Ts9rByzDoI{SqmW zVlyFtxqPPOaLhBhSkIInpd$WoF0@PPb`*Bwy^&TLsgJi%RFI2s0n9XCM1&-m#Ql>V z!pHqgsYhzA89#YWb;D%<^vI0V2>#TBg}R?|PkVfS1+v}X8+--KJ2K)%F%t8G$>5O2 z)RssslsANf@4U`_m*tQGRdmJizSL{@Sqaer+(sEb#JhY^7U%>NxGN7W3p3*P=*i|6 zbZ5~mP`978nW9jH`aHcJ^5{j?DXRL>sa1Ok25wcG;ia;^uvu7C0KV@Q_xmKkzG|;{ z6W__W_(xP8J>h)SKyAD3wI%6@ozzbTWp7o0hG0TI@N!@HSufAw^rJ<@1{!nNYm-R= zg&2f$tFK?sfj2lg(T)n-)N|h10ko4Y-FA-Jv{t|E75uU-uX~&OcA44-VsdxEU~2xJ zJ~U8IKyvaE^Bpre{9$;zlYAKR&GY4M)^X#UFpQb4J@t zVnG>V%H05aeyJHvT~j{@%J{)6$W~yCf|+iSL6#8s^KEQve3Qy;#nk=Y;rW!`hP1mA z&Qn%{>W2V*|M1B=7R`{qT4?>~lJ&)e+HK}E1&dGD=wA)-YD*Tm@DG0-PT03u_+p(` zPi(5A^+f$kFVDsbV=bl`Z<{Lg?PBe2z#26v6aRPJ5Gt_${=t*@isP#f^^DMObJJEw zlje@p4LgO=@r7epif}7)(D%9{{n}B*VyJl|7zEn=SHN4u`yYP~`#}A@C4u7dE6Fnu zY-^ePXLxS2?E&Mb7H(6wT;3I2W9$~Q73MSI*}W;ae9Li<0)PnY*y5%p{+`JG>S$h! z&4~R*k;5POVvrRQpOdmPX?F5wrCriJGph6AagWFxLp7OYtLkEMTM zxT3_7aVCcApGk5eJ$$mL|NWsFs~Ponj`|nx85>9jUa+B!z^V7>XSRu8(k5VEh6Dq# z{$JbBg$Twb5q{0o;Ps5id5rPYy_82&9zU6|v)a5vPe9f;^7F0;`HoMQYR1Zm5XLxs zZ=D-s+{2pXo;dzPoa3{AXVSQJlhQPB5)pVj_sd(P5550i-XdZ!!MUpfHP!V^?(IbX z7|b#-Y(tFCKsl0IcniQII5#B52NhLlF=|~q=~dq{Pc@ncE+rIc_6{4+b76j>LlG_K z-pAe%*p^_)in%5~;N75B%QAAhax3JE-}kr%+> zP@BOyMX7N%6IQNz49K-@Nhpf!YO?;P{Y0q`;#*}GWbHKm(x^?lP(Gvl_Owk&ccYXfa7+}V} z?*qlLzVN+WEZq`l5guRH-#zd+WoLd^T1Y@iG1jE=lxA96bf@CL3?cd1%Q8JX8!J+q zH2LtRb@M~FMdo!cdFG+R@C#iAOyzG9lAw}`J25r@MpFe2uIoDwZj}L_Kkz1?tiIa2 z>x{Ib!d0cgEb6u zr{bJ@nMC9d?mGZ-@Gk5xFiZp-p#L;XoUky~^LH#t6nb{OAyHw}!uw|7<3ho;`yV6e zJG#ocL2E@whh94C+Ww|f5U8!=cr`m(9YLPpeief|$wbp*+3G=lIey`QJSln@$KqD{ z-jbt`H{;ZS(td>tk~(ocQ5&&GcTY}BgaWz>yYSB(S6xW1?ev`I-ZKB5iH|lUP|YOG z;bV#&FM|;%d9pr>9NcN@|BAfeOxj0WZ)E#V#Fb|kEYm7?JZ~1WA6bGn-NH#t>U$#Y zmrEgHPq%;X|KLf~qzZ)Jua|>p+fowOXODw=VJ6&O!qg&I@5rwWgRI(=LVSeu`oE3G zzjxMX-sVFHj>XCsxWaVa8!f6ghg1!J#rR4*p+{Z741RTi^PpEy&52;nC@a#>9I2gN z_a`x{Cr?m4ozMJU!E&2!8nNjYHVOEm`6@Ng?PZJhbBe18xsXki_W=djtVl7g0m+q) zR%hY_2Ti@s093ddcluS~Mo%Hngg>nO;o!}R-N|epi4Q7W2um$`I?_^I?3?5|nT}fBzPfka#AOUxE*tDS9(q!EJXz$g%_n{ zzMOmRi8E@Hge~nGzWUp(bos*SU~NwqTG!CMVtM)Uh&^`J+RNeD?V!TM zj`>}4BT=g*>zswCN$mG9Ym#f)96(Dy0z)|2G|tMO&F6lc6L$-nV+vYrW_6ZI7~a`Q zE+fkz8Ar;^S#!y^h4LNvQqb=0V#?4}nr&CB<2qvXLwY;Kol9VCz@;e~mh+ll0Bp;( zZUO2>YHK%9BIUVFOw<>=^RJvt3&8JK5mSP(Y@GzXJEkFdRjHzacD|}VT?o0#|<&0)+m-3wu6$a z7$!3gW1TCSa!7>1E9*QMhf-~lnTQ-_SZ2b!&t!XlvueM-pLRYypZ>r9{GR`PKlgLp z*L_`EH6fctSj8X{=1z6^KYFTgH=vx=CG8Z=kp@Y;V;GobqATFsT%6HEj(2~7GW;UB zX80hiHJ%mqp+m1H)(5zUym>3hUS&lcb{JLqT!z2UxYeSk7}G%L;!uCe`!$A{wX~g~ z&KlPpVv90(?V-quegx&5uH1^UG@$&A66Y^|6xqP+oT$IRXA!DS#<;`*!BYkJqp$Wq z7MMu_AaYDxZo4~p|9oIKx^P*OUzHij|GmHfnl3>N}y^;47U zRjLbYV0`}*zuXil{m;fnlSB(|T>Ca)wEiFsQZ?Rrc!=$c52<$k22{i%3q?5R7Fk!o zL1nq2avMGwNl(Vy@1+io(YCIn>{-p0HxW3z&R8oV#8T5jF!3BrN>z|!z7p|C$%D5O ziP_bA@Mk`f(PWzD>=6U)RpO|?!S1OCBZV07?Mi~ENNkz}g&Gx)`}VZzmH~NzzwIrI z?6sj%2yIU6;Hb-bQOsA>7=!uqtHJ2lN!XeB3rK{MVSzXZ3jjKQ67*LyMD&e4Qvpy8 zK~b=abH+juvDKax5OumX*C!qD^@m1KA|_3Q(;bvu7H3L$Eo$s`*t`dD3M2XU`xEav zC75Rg>1doSveJf!qq6cC1;_H(*!8|Bdn=~h53672u!W!Z&j|A8#+G-KqVpL)Pq%p{ zp&Hr^Yfr^_UAxnRMi1I#88TYs)wUP3K2g7 zglx`&T%ObfiUHUci-7|-cLuqaai;T@61>osD{2t8&<-s$g)wwDX38gw8RHqzZoz9& zEV$z1&{_{JMXG^bc;;>Hf+cut$}p$kofdl@XaxW2Zy``(L@M5#>!Ba%oU1Ohv2(v4 z+dg$At8A2xU?iqndYr60X3K@yjMdyF=cg|KRhWav@!U(zPeVpGMvn@@cgO(h1@JJt zBD~pv&Q=&JRIX{3@0_86(Y#Y9hA0>mPBfouZgFgi>6D+REjBV#nll{p?40XmO0c<*$@ z>mId{Fk)IGDvXm_K6rKg;ufm+fgIfW8e%=t8JYg{nhvNvd+SvA-5%o&HCWynnWr~X z*8|kTbmatvD&|!3>Pcw_7WAVzjRntx{cxk@jb5rJ_#MC{-H>9Sx5xKX1NR>AihK|R z>=9vBiv`+Io46*RbAGVeH5|?h6hA?Eb4~}!eaSowYMW}w^ZOxC?(OLBLX9N=m9YGZ z7YeC_p)bu?j8wh&pa;Y8zbFqem+6?i3ak6#m%Hu>L_Tw8`|^Yur+>DGjzlHd5QFv9 zYc8x2=U(5E;bzfTN@Fd>*^V_-BdV$2Oh`YJRK@I@06XuhH#aArqpBYh0VVeoF#oPx zYGV1?yI%@+6bi}^b})fUsZ!rCl9pC1!Z~batje~vk^0|L$pPA=H-}^VZJmd_ z_@xq~V6A{Xn_aAKIG5=;|Lj6x^A}p8??orft)e^7W`+H5*@tD$>7I8rcFSh4$F)|Z zre(OQ(uQ3bG}a;C^9{O)ABYc#2$E(M5&wocu@SSXIuIdt%nRdJbYOS_x}ksNV$~eT z6M2L`1>aJ*W4OemD!$`AYY6N8^tQJFxPE8=slZB2khpv>IzHpJo?!DhlOH#4kdyjH zFP}g8SCoJGz)k5)WagAlqdf&(hFKtzzbQ?8=o*m^rOJZmLDu}syQBH}8p6-lJ!$OO zFhFSHJuy^merQyQ5jg!;l7(6HU^t$T7BVG15nX|-QhQxot`9>$2#542X8=i;JZ8}^!{CtRtcdzY)aLBE`E~rFlH;o^v?4V>};_NH%VeO zP2!G!MYVH2PN!5B7Np~N$~LatYI?B*6fc0H&M#gP`8RcqCbl>)`;~~Gl0tk_tY}a& zVK=eX?#wWI^4&O86&1X{JRx0Bn)scY%a=?AxWJo<#f|M!EK{ zQ)!nwlg=?0YbbvX4D|+%q|<(fE_|t#3KN2FOZCQ@njH(@$Ejb5_;%`8v!1hg=rd=Z zh(3g$qzq7t{BfVo`&}rN!R`)*!$YZi*}(6(K92AEY|+b(W#pXQ_l2oG1$!u#}IhJ`0`+Jb^Xj zb1?v9OxPPM5Nn{ZSw1js3gsyq8x7cn8l)))cfOZ!97i>h;=3TyAa(6rExZxsG2lhm z3rbt$`AZZl_T`uI5YRSIp0gUMwT-=X!s@=)#nc=Dg!Dzikvce<2NMemP#z$Vn~=^v zB2HQQykYA`B03DNqQ^8#7!AMa4KhytrEP0c%PS*yJPr}<0!6z!eh9~InBj6}SW*iA zrY(zrBn}v`n`}gn3#T9H@_B9ToMgA=__8B0+m-%Pc;3GPD*1yY?g(j`FTdeSCzT~a ztXLrHGy_#-zO{hkIh;}pp0Cxh86Q=o!01RQOso2&wFj@tqA}4WYdrYz%)r(4WRcZB zmhevjE_5!xJ3AY|qoUDoWk3d_Wxh3HJ#GaV_Li@|s^HEOJzw* z^?BB?3Tv_}V9H&;p}*t@@426J7{H^(eTDY|>he0YNQn&CV!pEN52>TS5+2B=$f@z;Z26REfl%vJTG*KOSH zZOC1{R>VptoY?k5ZnfEuip6)co40IeP+i!r^iv%mZG9#J%@U8cG+i-xO7PDb9f-dr z$2rk~)(x_E{{VfUHrs3ww_?6#BZ&Z1N6m13S*CwnP92wY=qTOeDnFpxj9(3WnaUwa zKG`3SWof!WySuEzn5{(5+xu#U%$_^YYyq0t@@yE5k>y=i8CmW3?|*Uae$wn~W6AB|%J;lq;jpNz{S&Z{;}4cOar+P?f!Gr04ga(Ji}bfZr2MlK=z{#;mofMz-Zo#? W@A}3t`E6f;pOv|-**#O=i~j{8Q%T4G literal 0 HcmV?d00001 diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..32c3a33a --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,160 @@ +pytorch-widedeep +================ + +*A flexible package to combine tabular data with text and images using wide and deep models* + +Below there is an introduction to the architectures one can build using +``pytorch-widedeep``. If you prefer to learn about the utilities and +components go straight to corresponding sections in the Documentation. + +Documentation +------------- + +.. toctree:: + :maxdepth: 1 + + Installation + Quick Start + Utilities + Preprocessing + Model Components + Bayesian Models + Metrics + Losses + Dataloaders + Callbacks + The Trainer + Tab2Vec + Examples + + +Introduction +------------ +``pytorch-widedeep`` is based on Google's `Wide and Deep Algorithm +`_, adjusted for multi-modal datasets + + +In general terms, ``pytorch-widedeep`` is a package to use deep learning with +tabular and multimodal data. In particular, is intended to facilitate the +combination of text and images with corresponding tabular data using wide and +deep models. With that in mind there are a number of architectures that can +be implemented with just a few lines of code. The main components of those +architectures are shown in the Figure below: + +.. image:: figures/widedeep_arch.png + :width: 700px + :align: center + +The dashed boxes in the figure represent optional, overall components, and the +dashed lines indicate the corresponding connections, depending on whether or +not certain components are present. For example, the dashed, blue-arrows +indicate that the ``deeptabular``, ``deeptext`` and ``deepimage`` components +are connected directly to the output neuron or neurons (depending on whether +we are performing a binary classification or regression, or a multi-class +classification) if the optional ``deephead`` is not present. The components +within the faded-pink rectangle are concatenated. + +Note that it is not possible to illustrate the number of possible +architectures and components available in ``pytorch-widedeep`` in one Figure. +Therefore, for more details on possible architectures (and more) please, read +this documentation, or see the `Examples +`_ folders +in the repo. + +In math terms, and following the notation in the `paper +`_, the expression for the architecture +without a ``deephead`` component can be formulated as: + + +.. image:: figures/architecture_1_math.png + :width: 600px + :align: center + + +Where *'W'* are the weight matrices applied to the wide model and to the final +activations of the deep models, :math:`a` are these final activations, and +:math:`{\phi}` (x) are the cross product transformations of the original +features *'x'*. In case you are wondering what are *"cross product +transformations"*, here is a quote taken directly from the paper: *"For binary +features, a cross-product transformation (e.g., “AND(gender=female, +language=en)”) is 1 if and only if the constituent features (“gender=female” +and “language=en”) are all 1, and 0 otherwise".* Finally, :math:`{\sigma}` (.) +is the activation function. + +While if there is a ``deephead`` component, the previous expression turns +into: + +.. image:: figures/architecture_2_math.png + :width: 350px + :align: center + + +It is important to emphasize that **each individual component, wide, +deeptabular, deeptext and deepimage, can be used independently and in +isolation**. For example, one could use only ``wide``, which is in simply a +linear model. In fact, one of the most interesting functionalities in +``pytorch-widedeep`` would be the use of the ``deeptabular`` component on its +own, i.e. what one might normally refer as Deep Learning for Tabular Data. +Currently, ``pytorch-widedeep`` offers the following different models for +that component: + +0. **Wide**: a simple linear model where the nonlinearities are captured via +cross-product transformations, as explained before. + +1. **TabMlp**: a simple MLP that receives embeddings representing the +categorical features, concatenated with the continuous features, which can +also be embedded. + +2. **TabResnet**: similar to the previous model but the embeddings are +passed through a series of ResNet blocks built with dense layers. + +3. **TabNet**: details on TabNet can be found in `TabNet: Attentive +Interpretable Tabular Learning `_ + +The ``Tabformer`` family, i.e. Transformers for Tabular data: + +4. **TabTransformer**: details on the TabTransformer can be found in +`TabTransformer: Tabular Data Modeling Using Contextual Embeddings +`_. + +5. **SAINT**: Details on SAINT can be found in `SAINT: Improved Neural +Networks for Tabular Data via Row Attention and Contrastive Pre-Training +`_. + +6. **FT-Transformer**: details on the FT-Transformer can be found in +`Revisiting Deep Learning Models for Tabular Data +`_. + +7. **TabFastFormer**: adaptation of the FastFormer for tabular data. Details +on the Fasformer can be found in `FastFormers: Highly Efficient Transformer +Models for Natural Language Understanding +`_ + +8. **TabPerceiver**: adaptation of the Perceiver for tabular data. Details on +the Perceiver can be found in `Perceiver: General Perception with Iterative +Attention `_ + +And probabilistic DL models for tabular data based on +`Weight Uncertainty in Neural Networks `_: + +9. **BayesianWide**: Probabilistic adaptation of the `Wide` model. + +10. **BayesianTabMlp**: Probabilistic adaptation of the `TabMlp` model + +Note that while there are scientific publications for the TabTransformer, +SAINT and FT-Transformer, the TabFasfFormer and TabPerceiver are our own +adaptation of those algorithms for tabular data. For details on these models +and their options please see the examples in the Examples folder and the +documentation. + +Finally, it is perfectly possible to use custom models as long as the the +custom models have an attribute called ``output_dim`` with the size of the +last layer of activations, so that ``WideDeep`` can be constructed. Again, +examples on how to use custom components can be found in the Examples +folder. + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 00000000..c63af2fd --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,44 @@ +Installation +************ +This section explains how to install ``pytorch-widedeep``. + +For the latest stable release, execute: + +.. code:: + + pip install pytorch-widedeep + +For the bleeding-edge version, execute: + +.. code:: + + pip install git+https://github.com/jrzaurin/pytorch-widedeep.git + + +For developer install + +.. code:: + + # Clone the repository + git clone https://github.com/jrzaurin/pytorch-widedeep + cd pytorch-widedeep + + # Install in dev mode + pip install -e . + +Dependencies +------------ +* pandas +* numpy +* scipy +* scikit-learn +* gensim +* spacy +* opencv-contrib-python +* imutils +* tqdm +* torch +* torchvision +* einops +* wrapt +* torchmetrics diff --git a/docs/losses.rst b/docs/losses.rst new file mode 100644 index 00000000..6ce8e769 --- /dev/null +++ b/docs/losses.rst @@ -0,0 +1,65 @@ +Losses +====== + +``pytorch-widedeep`` accepts a number of losses and objectives that can be +passed to the ``Trainer`` class via the parameter ``objective`` +(see ``pytorch-widedeep.training.Trainer``). For most cases the loss function +that ``pytorch-widedeep`` will use internally is already implemented in +Pytorch. + +In addition, ``pytorch-widedeep`` implements a series of "custom" loss +functions. These are described below for completion since, as mentioned +before, they are used internally by the ``Trainer``. Of course, onen could +always use them on their own and can be imported as: + +.. code-block:: python + + from pytorch_widedeep.losses import FocalLoss + +.. note:: Losses in this module expect the predictions and ground truth to have the + same dimensions for regression and binary classification problems + :math:`(N_{samples}, 1)`. In the case of multiclass classification problems + the ground truth is expected to be a 1D tensor with the corresponding + classes. See Examples below + +.. autoclass:: pytorch_widedeep.losses.MSELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.MSLELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.RMSELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.RMSLELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.QuantileLoss + :members: + +.. autoclass:: pytorch_widedeep.losses.FocalLoss + :members: + +.. autoclass:: pytorch_widedeep.losses.BayesianSELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.TweedieLoss + :members: + +.. autoclass:: pytorch_widedeep.losses.ZILNLoss + :members: + +.. autoclass:: pytorch_widedeep.losses.L1Loss + :members: + +.. autoclass:: pytorch_widedeep.losses.FocalR_L1Loss + :members: + +.. autoclass:: pytorch_widedeep.losses.FocalR_MSELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.FocalR_RMSELoss + :members: + +.. autoclass:: pytorch_widedeep.losses.HuberLoss + :members: diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..922152e9 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/metrics.rst b/docs/metrics.rst new file mode 100644 index 00000000..0d0e84c4 --- /dev/null +++ b/docs/metrics.rst @@ -0,0 +1,51 @@ +Metrics +======= + +.. note:: Metrics in this module expect the predictions and ground truth to have the + same dimensions for regression and binary classification problems: :math:`(N_{samples}, 1)`. + In the case of multiclass classification problems the ground truth is expected to be + a 1D tensor with the corresponding classes. See Examples below + +We have added the possibility of using the metrics available at the +`torchmetrics `_ library. +Note that this library is still in its early versions and therefore this +option should be used with caution. To use ``torchmetrics`` simply import +them and use them as any of the ``pytorch-widedeep`` metrics described +below. + +.. code-block:: python + + from torchmetrics import Accuracy, Precision + + accuracy = Accuracy(average=None, num_classes=2) + precision = Precision(average='micro', num_classes=2) + + trainer = Trainer(model, objective="binary", metrics=[accuracy, precision]) + +A functioning example for ``pytorch-widedeep`` using ``torchmetrics`` can be +found in the `Examples folder `_. + + +.. autoclass:: pytorch_widedeep.metrics.Accuracy + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.metrics.Precision + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.metrics.Recall + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.metrics.FBetaScore + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.metrics.F1Score + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.metrics.R2Score + :members: + :undoc-members: diff --git a/docs/model_components.rst b/docs/model_components.rst new file mode 100644 index 00000000..00a3a1ba --- /dev/null +++ b/docs/model_components.rst @@ -0,0 +1,71 @@ +The ``models`` module +====================== + +This module contains the models that can be used as the four main components +that will comprise a Wide and Deep model (``wide``, ``deeptabular``, +``deeptext``, ``deepimage``), as well as the ``WideDeep`` "constructor" +class. Note that each of the four components can be used independently. + +.. autoclass:: pytorch_widedeep.models.tabular.linear.wide.Wide + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlp + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnet + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.tabnet.tab_net.TabNet + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.transformers.saint.SAINT + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.text.attentive_rnn.BasicRNN + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.text.attentive_rnn.AttentiveRNN + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.image.vision.Vision + :exclude-members: forward + :members: + +.. autoclass:: pytorch_widedeep.models.wide_deep.WideDeep + :exclude-members: forward + :members: diff --git a/docs/preprocessing.rst b/docs/preprocessing.rst new file mode 100644 index 00000000..a068db8c --- /dev/null +++ b/docs/preprocessing.rst @@ -0,0 +1,22 @@ +The ``preprocessing`` module +============================ + +This module contains the classes that are used to prepare the data before +being passed to the models. There is one Preprocessor per data mode or +model component: ``wide``, ``deeptabular``, ``deepimage`` and ``deeptext``. + +.. autoclass:: pytorch_widedeep.preprocessing.WidePreprocessor + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.preprocessing.TabPreprocessor + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.preprocessing.TextPreprocessor + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.preprocessing.ImagePreprocessor + :members: + :undoc-members: diff --git a/docs/quick_start.rst b/docs/quick_start.rst new file mode 100644 index 00000000..e21d618e --- /dev/null +++ b/docs/quick_start.rst @@ -0,0 +1,134 @@ +Quick Start +*********** + +This is an example of a binary classification with the `adult census +`__ +dataset using a combination of a wide and deep model (in this case a so called +``deeptabular`` model) with defaults settings. + + +Read and split the dataset +-------------------------- + +.. code-block:: python + + import pandas as pd + import numpy as np + from sklearn.model_selection import train_test_split + from pytorch_widedeep.datasets import load_adult + + df = load_adult(as_frame=True) + df["income_label"] = (df["income"].apply(lambda x: ">50K" in x)).astype(int) + df.drop("income", axis=1, inplace=True) + df_train, df_test = train_test_split(df, test_size=0.2, stratify=df.income_label) + + + +Prepare the wide and deep columns +--------------------------------- + +.. code-block:: python + + from pytorch_widedeep import Trainer + from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor + from pytorch_widedeep.models import Wide, TabMlp, WideDeep + from pytorch_widedeep.metrics import Accuracy + + # Define the 'column set up' + wide_cols = [ + "education", + "relationship", + "workclass", + "occupation", + "native-country", + "gender", + ] + crossed_cols = [("education", "occupation"), ("native-country", "occupation")] + + cat_embed_cols = [ + "workclass", + "education", + "marital-status", + "occupation", + "relationship", + "race", + "gender", + "capital-gain", + "capital-loss", + "native-country", + ] + continuous_cols = ["age", "hours-per-week"] + target = "income_label" + target = df_train[target].values + +Preprocessing and model components definition +--------------------------------------------- + +.. code-block:: python + + wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) + X_wide = wide_preprocessor.fit_transform(df_train) + + tab_preprocessor = TabPreprocessor( + cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols # type: ignore[arg-type] + ) + X_tab = tab_preprocessor.fit_transform(df_train) + + # build the model + wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) + tab_mlp = TabMlp( + column_idx=tab_preprocessor.column_idx, + cat_embed_input=tab_preprocessor.cat_embed_input, + continuous_cols=continuous_cols, + ) + model = WideDeep(wide=wide, deeptabular=tab_mlp) + + +Fit and predict +------------------------------- + +.. code-block:: python + + # train and validate + trainer = Trainer(model, objective="binary", metrics=[Accuracy]) + trainer.fit( + X_wide=X_wide, + X_tab=X_tab, + target=target, + n_epochs=5, + batch_size=256, + ) + + # predict on test + X_wide_te = wide_preprocessor.transform(df_test) + X_tab_te = tab_preprocessor.transform(df_test) + preds = trainer.predict(X_wide=X_wide_te, X_tab=X_tab_te) + + +Save and load +------------------------------- + +.. code-block:: python + + # Option 1: this will also save training history and lr history if the + # LRHistory callback is used + trainer.save(path="model_weights", save_state_dict=True) + + # Option 2: save as any other torch model + torch.save(model.state_dict(), "model_weights/wd_model.pt") + + # From here in advance, Option 1 or 2 are the same. I assume the user has + # prepared the data and defined the new model components: + # 1. Build the model + model_new = WideDeep(wide=wide, deeptabular=tab_mlp) + model_new.load_state_dict(torch.load("model_weights/wd_model.pt")) + + # 2. Instantiate the trainer + trainer_new = Trainer(model_new, objective="binary") + + # 3. Either start the fit or directly predict + preds = trainer_new.predict(X_wide=X_wide, X_tab=X_tab) + +Of course, one can do **much more**. See the Examples folder in the repo, this +documentation or the companion posts for a better understanding of the content +of the package and its functionalities. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..03877a58 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,20 @@ +sphinx +sphinx_rtd_theme +recommonmark +sphinx-markdown-tables +sphinx-copybutton +sphinx-autodoc-typehints +pandas +numpy +scipy +scikit-learn +gensim +spacy +opencv-contrib-python +imutils +tqdm +torch +torchvision +einops +wrapt +torchmetrics diff --git a/docs/tab2vec.rst b/docs/tab2vec.rst new file mode 100644 index 00000000..8daf940c --- /dev/null +++ b/docs/tab2vec.rst @@ -0,0 +1,7 @@ +Tab2Vec +======= + +.. autoclass:: pytorch_widedeep.tab2vec.Tab2Vec + :members: + :undoc-members: + diff --git a/docs/trainer.rst b/docs/trainer.rst new file mode 100644 index 00000000..808e2882 --- /dev/null +++ b/docs/trainer.rst @@ -0,0 +1,23 @@ +Training multimodal Deep Learning Models +======================================== + +Here is the documentation for the ``Trainer`` class, that will do all the heavy lifting. + +Trainer is also available from ``pytorch-widedeep`` directly, for example, one could do: + +.. code-block:: python + + from pytorch-widedeep.training import Trainer + + +or also: + +.. code-block:: python + + from pytorch-widedeep import Trainer + + +.. autoclass:: pytorch_widedeep.training.Trainer + :exclude-members: forward + :members: + :undoc-members: diff --git a/docs/utils/deeptabular_utils.rst b/docs/utils/deeptabular_utils.rst new file mode 100644 index 00000000..25f8ba4e --- /dev/null +++ b/docs/utils/deeptabular_utils.rst @@ -0,0 +1,6 @@ +deeptabular utils +================= + +.. autoclass:: pytorch_widedeep.utils.deeptabular_utils.LabelEncoder + :members: + :undoc-members: diff --git a/docs/utils/fastai_transforms.rst b/docs/utils/fastai_transforms.rst new file mode 100644 index 00000000..f18d60b0 --- /dev/null +++ b/docs/utils/fastai_transforms.rst @@ -0,0 +1,19 @@ +Fastai transforms +================= + +I have directly copied and pasted part of the ``transforms.py`` module from +the ``fastai`` library. The reason to do such a thing is because +``pytorch_widedeep`` only needs the ``Tokenizer`` and the ``Vocab`` classes +there. This way I avoid extra dependencies. Credit for all the code in the +``fastai_transforms`` module in this ``pytorch-widedeep`` package goes to +Jeremy Howard and the `fastai` team. I only include the documentation here for +completion, but I strongly advise the user to read the ``fastai`` +`documentation `_. + +.. autoclass:: pytorch_widedeep.utils.fastai_transforms.Tokenizer + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.utils.fastai_transforms.Vocab + :members: + :undoc-members: diff --git a/docs/utils/image_utils.rst b/docs/utils/image_utils.rst new file mode 100644 index 00000000..aedb1f61 --- /dev/null +++ b/docs/utils/image_utils.rst @@ -0,0 +1,20 @@ +Image utils +=========== +:class:`SimplePreprocessor +` and +:class:`AspectAwarePreprocessor +` are directly +taked from the great series of Books `Deep Learning for Computer Vision +`_ +by `Adrian `_. Therefore, all credit for the +code in the ``image_utils`` module goes to `Adrian Rosebrock +`_. + +.. autoclass:: pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor + :members: + :undoc-members: + +.. autoclass:: pytorch_widedeep.utils.image_utils.SimplePreprocessor + :members: + :undoc-members: + diff --git a/docs/utils/index.rst b/docs/utils/index.rst new file mode 100644 index 00000000..72df9e03 --- /dev/null +++ b/docs/utils/index.rst @@ -0,0 +1,23 @@ +The ``utils`` module +==================== + +These are a series of utilities that might be useful for a number of +preprocessing tasks, even not directly related to ``pytorch-widedeep``. All +the classes and functions discussed here are available directly from the +``utils`` module. For example, the ``LabelEncoder`` within the +``deeptabular_utils`` submodule can be imported as: + +.. code-block:: python + + from pytorch_widedeep.utils import LabelEncoder + + +Objects +------- + +.. toctree:: + + deeptabular_utils + fastai_transforms + image_utils + text_utils \ No newline at end of file diff --git a/docs/utils/text_utils.rst b/docs/utils/text_utils.rst new file mode 100644 index 00000000..1ad57478 --- /dev/null +++ b/docs/utils/text_utils.rst @@ -0,0 +1,15 @@ +Text utils +================= +Collection of helper function that facilitate processing text. + +.. autofunction:: pytorch_widedeep.utils.text_utils.simple_preprocess + :noindex: + +.. autofunction:: pytorch_widedeep.utils.text_utils.get_texts + :noindex: + +.. autofunction:: pytorch_widedeep.utils.text_utils.pad_sequences + :noindex: + +.. autofunction:: pytorch_widedeep.utils.text_utils.build_embeddings_matrix + :noindex: \ No newline at end of file diff --git a/mkdocs/site/pytorch-widedeep/bayesian_models.html b/mkdocs/site/pytorch-widedeep/bayesian_models.html index 8acada8f..173cd46f 100644 --- a/mkdocs/site/pytorch-widedeep/bayesian_models.html +++ b/mkdocs/site/pytorch-widedeep/bayesian_models.html @@ -2321,9 +2321,9 @@

prior_pi=self.prior_pi, posterior_mu_init=self.posterior_mu_init, posterior_rho_init=self.posterior_rho_init, - use_bias=False - if self.use_cont_bias is None - else self.use_cont_bias, + use_bias=( + False if self.use_cont_bias is None else self.use_cont_bias + ), activation_fn=self.cont_embed_activation, ) self.cont_out_dim = len(self.continuous_cols) * self.cont_embed_dim diff --git a/mkdocs/site/pytorch-widedeep/callbacks.html b/mkdocs/site/pytorch-widedeep/callbacks.html index a40c7e27..8e6547ff 100644 --- a/mkdocs/site/pytorch-widedeep/callbacks.html +++ b/mkdocs/site/pytorch-widedeep/callbacks.html @@ -1649,9 +1649,9 @@

Source code in pytorch_widedeep/callbacks.py -
268
-269
-270
def __init__(self, n_epochs: int):
+                  
269
+270
+271
def __init__(self, n_epochs: int):
     super(LRHistory, self).__init__()
     self.n_epochs = n_epochs
 
@@ -1861,8 +1861,7 @@

Source code in pytorch_widedeep/callbacks.py -
400
-401
+                  
401
 402
 403
 404
@@ -1923,7 +1922,8 @@ 

459 460 461 -462

def __init__(
+462
+463
def __init__(
     self,
     filepath: Optional[str] = None,
     monitor: str = "val_loss",
@@ -2167,8 +2167,7 @@ 

Source code in pytorch_widedeep/callbacks.py -
592
-593
+                  
593
 594
 595
 596
@@ -2212,7 +2211,8 @@ 

634 635 636 -637

def __init__(
+637
+638
def __init__(
     self,
     monitor: str = "val_loss",
     min_delta: float = 0.0,
diff --git a/mkdocs/site/pytorch-widedeep/model_components.html b/mkdocs/site/pytorch-widedeep/model_components.html
index a9b0dbf8..17ad9e12 100644
--- a/mkdocs/site/pytorch-widedeep/model_components.html
+++ b/mkdocs/site/pytorch-widedeep/model_components.html
@@ -2275,7 +2275,7 @@ 

>>> cat_embed_input = [(u, i, j) for u, i, j in zip(colnames[:4], [4] * 4, [8] * 4)] >>> column_idx = {k: v for v, k in enumerate(colnames)} >>> model = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=cat_embed_input, -... continuous_cols=["e"], +... continuous_cols=["e"]) >>> out = model(X_tab)

@@ -3204,7 +3204,9 @@

265 266 267 -268

def __init__(
+268
+269
+270
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -3285,17 +3287,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.blocks_dims[-1]] + self.mlp_hidden_dims, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=True - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + True if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -3549,9 +3553,7 @@

Source code in pytorch_widedeep/models/tabular/resnet/tab_resnet.py -
366
-367
-368
+                  
368
 369
 370
 371
@@ -3609,7 +3611,11 @@ 

423 424 425 -426

def __init__(
+426
+427
+428
+429
+430
def __init__(
     self,
     embed_dim: int,
     blocks_dims: List[int] = [100, 100, 200],
@@ -3645,17 +3651,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=True - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + True if self.mlp_linear_first is None else self.mlp_linear_first + ), ) self.decoder = DenseResnet( self.mlp_hidden_dims[-1], @@ -6504,7 +6512,9 @@

325 326 327 -328

def __init__(
+328
+329
+330
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -6618,17 +6628,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=False - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + False if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -7320,7 +7332,9 @@

302 303 304 -305

def __init__(
+305
+306
+307
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -7428,17 +7442,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=False - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + False if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -8172,7 +8188,9 @@

321 322 323 -324

def __init__(
+324
+325
+326
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -8289,17 +8307,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=False - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + False if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -9068,7 +9088,9 @@

335 336 337 -338

def __init__(
+338
+339
+340
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -9177,17 +9199,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=False - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + False if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -9948,7 +9972,9 @@

338 339 340 -341

def __init__(
+341
+342
+343
def __init__(
     self,
     column_idx: Dict[str, int],
     *,
@@ -10076,17 +10102,19 @@ 

if self.mlp_hidden_dims is not None: self.mlp = MLP( d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim, - activation="relu" - if self.mlp_activation is None - else self.mlp_activation, + activation=( + "relu" if self.mlp_activation is None else self.mlp_activation + ), dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout, batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm, - batchnorm_last=False - if self.mlp_batchnorm_last is None - else self.mlp_batchnorm_last, - linear_first=False - if self.mlp_linear_first is None - else self.mlp_linear_first, + batchnorm_last=( + False + if self.mlp_batchnorm_last is None + else self.mlp_batchnorm_last + ), + linear_first=( + False if self.mlp_linear_first is None else self.mlp_linear_first + ), ) else: self.mlp = None @@ -12005,9 +12033,9 @@

if with_pos_encoding: if pos_encoder is not None: - self.pos_encoder: Union[ - nn.Module, nn.Identity, PositionalEncoding - ] = pos_encoder + self.pos_encoder: Union[nn.Module, nn.Identity, PositionalEncoding] = ( + pos_encoder + ) else: self.pos_encoder = PositionalEncoding( input_dim, pos_encoding_dropout, seq_length @@ -13306,8 +13334,8 @@

diff --git a/mkdocs/site/pytorch-widedeep/preprocessing.html b/mkdocs/site/pytorch-widedeep/preprocessing.html index 241503af..7ff5fcc2 100644 --- a/mkdocs/site/pytorch-widedeep/preprocessing.html +++ b/mkdocs/site/pytorch-widedeep/preprocessing.html @@ -1764,15 +1764,15 @@

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
66
-67
+                  
67
 68
 69
 70
 71
 72
 73
-74
def __init__(
+74
+75
def __init__(
     self, wide_cols: List[str], crossed_cols: Optional[List[Tuple[str, str]]] = None
 ):
     super(WidePreprocessor, self).__init__()
@@ -1842,8 +1842,7 @@ 

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
 76
- 77
+            
 77
  78
  79
  80
@@ -1868,7 +1867,8 @@ 

99 100 101 -102

def fit(self, df: pd.DataFrame) -> "WidePreprocessor":
+102
+103
def fit(self, df: pd.DataFrame) -> "WidePreprocessor":
     r"""Fits the Preprocessor and creates required attributes
 
     Parameters
@@ -1946,8 +1946,7 @@ 

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
104
-105
+            
105
 106
 107
 108
@@ -1967,7 +1966,10 @@ 

122 123 124 -125

def transform(self, df: pd.DataFrame) -> np.ndarray:
+125
+126
+127
+128
def transform(self, df: pd.DataFrame) -> np.ndarray:
     r"""
     Parameters
     ----------
@@ -1984,9 +1986,11 @@ 

encoded = np.zeros([len(df_wide), len(self.wide_crossed_cols)]) for col_i, col in enumerate(self.wide_crossed_cols): encoded[:, col_i] = df_wide[col].apply( - lambda x: self.encoding_dict[col + "_" + str(x)] - if col + "_" + str(x) in self.encoding_dict - else 0 + lambda x: ( + self.encoding_dict[col + "_" + str(x)] + if col + "_" + str(x) in self.encoding_dict + else 0 + ) ) return encoded.astype("int64")

@@ -2043,10 +2047,7 @@

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
130
-131
-132
-133
+            
133
 134
 135
 136
@@ -2063,7 +2064,15 @@ 

147 148 149 -150

def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:
+150
+151
+152
+153
+154
+155
+156
+157
+158
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:
     r"""Takes as input the output from the `transform` method and it will
     return the original values.
 
@@ -2079,7 +2088,12 @@ 

Pandas dataframe with the original values """ decoded = pd.DataFrame(encoded, columns=self.wide_crossed_cols) - decoded = decoded.map(lambda x: self.inverse_encoding_dict[x]) + + if pd.__version__ >= "2.1.0": + decoded = decoded.map(lambda x: self.inverse_encoding_dict[x]) + else: + decoded = decoded.applymap(lambda x: self.inverse_encoding_dict[x]) + for col in decoded.columns: rm_str = "".join([col, "_"]) decoded[col] = decoded[col].apply(lambda x: x.replace(rm_str, "")) @@ -2136,20 +2150,20 @@

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
152
-153
-154
-155
-156
-157
-158
-159
-160
+            
160
 161
 162
 163
 164
-165
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:
+165
+166
+167
+168
+169
+170
+171
+172
+173
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:
     """Combines `fit` and `transform`
 
     Parameters
@@ -2495,8 +2509,8 @@ 

>>> cont_cols = ['age'] >>> deep_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols) >>> X_tab = deep_preprocessor.fit_transform(df) ->>> deep_preprocessor.embed_dim -{'color': 5, 'size': 5} +>>> deep_preprocessor.cat_embed_cols +[('color', 5), ('size', 5)] >>> deep_preprocessor.column_idx {'color': 0, 'size': 1, 'age': 2} >>> cont_df = pd.DataFrame({"col1": np.random.rand(10), "col2": np.random.rand(10) + 1}) @@ -2758,7 +2772,9 @@

376 377 378 -379

def fit(self, df: pd.DataFrame) -> BasePreprocessor:  # noqa: C901
+379
+380
+381
def fit(self, df: pd.DataFrame) -> BasePreprocessor:  # noqa: C901
     """Fits the Preprocessor and creates required attributes
 
     Parameters
@@ -2778,7 +2794,9 @@ 

# Categorical embeddings logic if self.cat_embed_cols is not None or self.quantization_setup is not None: - self.cat_embed_input: List[Tuple[str, int] | Tuple[str, int, int]] = [] + self.cat_embed_input: List[Union[Tuple[str, int], Tuple[str, int, int]]] = ( + [] + ) if self.cat_embed_cols is not None: df_cat, cat_embed_dim = self._prepare_categorical(df_adj) @@ -2882,9 +2900,7 @@

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py -
381
-382
-383
+            
383
 384
 385
 386
@@ -2925,7 +2941,9 @@ 

421 422 423 -424

def transform(self, df: pd.DataFrame) -> np.ndarray:  # noqa: C901
+424
+425
+426
def transform(self, df: pd.DataFrame) -> np.ndarray:  # noqa: C901
     """Returns the processed `dataframe` as a np.ndarray
 
     Parameters
@@ -3022,9 +3040,7 @@ 

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py -
429
-430
-431
+            
431
 432
 433
 434
@@ -3061,7 +3077,9 @@ 

465 466 467 -468

def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:  # noqa: C901
+468
+469
+470
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:  # noqa: C901
     r"""Takes as input the output from the `transform` method and it will
     return the original values.
 
@@ -3153,9 +3171,7 @@ 

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py -
470
-471
-472
+            
472
 473
 474
 475
@@ -3166,7 +3182,9 @@ 

480 481 482 -483

def fit_transform(self, df: pd.DataFrame) -> np.ndarray:
+483
+484
+485
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:
     """Combines `fit` and `transform`
 
     Parameters
@@ -4553,19 +4571,19 @@ 

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
246
-247
-248
-249
-250
-251
-252
-253
-254
+                  
254
 255
 256
 257
-258
def __init__(
+258
+259
+260
+261
+262
+263
+264
+265
+266
def __init__(
     self,
     wide_cols: List[str],
     n_chunks: int,
@@ -4639,15 +4657,7 @@ 

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
260
-261
-262
-263
-264
-265
-266
-267
-268
+            
268
 269
 270
 271
@@ -4674,7 +4684,15 @@ 

292 293 294 -295

def partial_fit(self, chunk: pd.DataFrame) -> "ChunkWidePreprocessor":
+295
+296
+297
+298
+299
+300
+301
+302
+303
def partial_fit(self, chunk: pd.DataFrame) -> "ChunkWidePreprocessor":
     r"""Fits the Preprocessor and creates required attributes
 
     Parameters
@@ -4736,12 +4754,12 @@ 

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py -
297
-298
-299
-300
-301
-302
def fit(self, df: pd.DataFrame) -> "ChunkWidePreprocessor":
+            
305
+306
+307
+308
+309
+310
def fit(self, df: pd.DataFrame) -> "ChunkWidePreprocessor":
     """
     Runs `partial_fit`. This is just to override the fit method in the base
     class. This class is not designed or thought to run fit
@@ -5054,23 +5072,15 @@ 

... n_chunks=1, cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols ... ) >>> X_tab = tab_preprocessor.fit_transform(chunk_df) ->>> tab_preprocessor.embed_dim -{'cat_col': 4} +>>> tab_preprocessor.cat_embed_cols +[('cat_col', 4)] >>> tab_preprocessor.column_idx {'cat_col': 0, 'cont_col': 1}

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py -
817
-818
-819
-820
-821
-822
-823
-824
-825
+                  
825
 826
 827
 828
@@ -5108,7 +5118,15 @@ 

860 861 862 -863

@alias("with_attention", ["for_transformer"])
+863
+864
+865
+866
+867
+868
+869
+870
+871
@alias("with_attention", ["for_transformer"])
 @alias("cat_embed_cols", ["embed_cols"])
 @alias("scale", ["scale_cont_cols"])
 @alias("cols_and_bins", ["quantization_setup"])
@@ -5474,8 +5492,8 @@ 

diff --git a/mkdocs/site/pytorch-widedeep/self_supervised_pretraining.html b/mkdocs/site/pytorch-widedeep/self_supervised_pretraining.html index 813016bc..d1fefb9e 100644 --- a/mkdocs/site/pytorch-widedeep/self_supervised_pretraining.html +++ b/mkdocs/site/pytorch-widedeep/self_supervised_pretraining.html @@ -3067,8 +3067,8 @@

Javier Rodriguez Zaurin, - Not Committed Yet + Javier, + Javier Rodriguez Zaurin diff --git a/mkdocs/site/pytorch-widedeep/tab2vec.html b/mkdocs/site/pytorch-widedeep/tab2vec.html index 7bb29148..9fcb9299 100644 --- a/mkdocs/site/pytorch-widedeep/tab2vec.html +++ b/mkdocs/site/pytorch-widedeep/tab2vec.html @@ -1689,7 +1689,7 @@

>>> # ...train the model... >>> >>> # vectorise the dataframe ->>> t2v = Tab2Vec(model, tab_preprocessor) +>>> t2v = Tab2Vec(tab_preprocessor, model) >>> X_vec = t2v.transform(df_t2v)

diff --git a/mkdocs/site/pytorch-widedeep/utils/fastai_transforms.html b/mkdocs/site/pytorch-widedeep/utils/fastai_transforms.html index 2c85fea2..a2f52fc2 100644 --- a/mkdocs/site/pytorch-widedeep/utils/fastai_transforms.html +++ b/mkdocs/site/pytorch-widedeep/utils/fastai_transforms.html @@ -2541,8 +2541,8 @@

diff --git a/mkdocs/site/search/search_index.json b/mkdocs/site/search/search_index.json index ee3e9973..da2f9b65 100644 --- a/mkdocs/site/search/search_index.json +++ b/mkdocs/site/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"index.html","title":"Home","text":""},{"location":"index.html#pytorch-widedeep","title":"pytorch-widedeep","text":"

A flexible package for multimodal-deep-learning to combine tabular data with text and images using Wide and Deep models in Pytorch

Documentation: https://pytorch-widedeep.readthedocs.io

Companion posts and tutorials: infinitoml

Experiments and comparison with LightGBM: TabularDL vs LightGBM

Slack: if you want to contribute or just want to chat with us, join slack

The content of this document is organized as follows:

  • pytorch-widedeep
    • Introduction
    • The deeptabular component
    • Acknowledgments
    • License
    • Cite
      • BibTex
      • APA
"},{"location":"index.html#introduction","title":"Introduction","text":"

pytorch-widedeep is based on Google's Wide and Deep Algorithm, adjusted for multi-modal datasets

In general terms, pytorch-widedeep is a package to use deep learning with tabular data. In particular, is intended to facilitate the combination of text and images with corresponding tabular data using wide and deep models. With that in mind there are a number of architectures that can be implemented with just a few lines of code. The main components of those architectures are shown in the Figure below:

The dashed boxes in the figure represent optional, overall components, and the dashed lines/arrows indicate the corresponding connections, depending on whether or not certain components are present. For example, the dashed, blue-lines indicate that the deeptabular, deeptext and deepimage components are connected directly to the output neuron or neurons (depending on whether we are performing a binary classification or regression, or a multi-class classification) if the optional deephead is not present. Finally, the components within the faded-pink rectangle are concatenated.

Note that it is not possible to illustrate the number of possible architectures and components available in pytorch-widedeep in one Figure. Therefore, for more details on possible architectures (and more) please, read this documentation, or see the Examples folder in the repo.

In math terms, and following the notation in the paper, the expression for the architecture without a deephead component can be formulated as:

\\[ pred = \\sigma(W^{T}_{wide}[x,\\phi(x)] + W^{T}_{deeptabular}a^{l_f}_{deeptabular} + W^{T}_{deeptext}a^{l_f}_{deeptext} + W^{T}_{deepimage}a^{l_f}_{deepimage} + b) \\]

Where \u03c3 is the sigmoid function, 'W' are the weight matrices applied to the wide model and to the final activations of the deep models, 'a' are these final activations, \u03c6(x) are the cross product transformations of the original features 'x', and , and 'b' is the bias term. In case you are wondering what are \"cross product transformations\", here is a quote taken directly from the paper: \"For binary features, a cross-product transformation (e.g., \u201cAND(gender=female, language=en)\u201d) is 1 if and only if the constituent features (\u201cgender=female\u201d and \u201clanguage=en\u201d) are all 1, and 0 otherwise\".

While if there is a deephead component, the previous expression turns into:

\\[ pred = \\sigma(W^{T}_{wide}[x,\\phi(x)] + W^{T}_{deephead}a^{l_f}_{deephead} + b) \\]

It is perfectly possible to use custom models (and not necessarily those in the library) as long as the the custom models have an attribute called output_dim with the size of the last layer of activations, so that WideDeep can be constructed. Examples on how to use custom components can be found in the Examples folder.

"},{"location":"index.html#the-deeptabular-component","title":"The deeptabular component","text":"

It is important to emphasize that each individual component, wide, deeptabular, deeptext and deepimage, can be used independently and in isolation. For example, one could use only wide, which is in simply a linear model. In fact, one of the most interesting functionalities inpytorch-widedeep would be the use of the deeptabular component on its own, i.e. what one might normally refer as Deep Learning for Tabular Data. Currently, pytorch-widedeep offers the following different models for that component:

  1. Wide: a simple linear model where the nonlinearities are captured via cross-product transformations, as explained before.
  2. TabMlp: a simple MLP that receives embeddings representing the categorical features, concatenated with the continuous features, which can also be embedded.
  3. TabResnet: similar to the previous model but the embeddings are passed through a series of ResNet blocks built with dense layers.
  4. TabNet: details on TabNet can be found in TabNet: Attentive Interpretable Tabular Learning

Two simpler attention based models that we call:

  1. ContextAttentionMLP: MLP with at attention mechanism \"on top\" that is based on Hierarchical Attention Networks for Document Classification
  2. SelfAttentionMLP: MLP with an attention mechanism that is a simplified version of a transformer block that we refer as \"query-key self-attention\".

The Tabformer family, i.e. Transformers for Tabular data:

  1. TabTransformer: details on the TabTransformer can be found in TabTransformer: Tabular Data Modeling Using Contextual Embeddings. Note that this is an 'enhanced' implementation that allows for many options that can be set up via the TabTransformer params.
  2. SAINT: Details on SAINT can be found in SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training.
  3. FT-Transformer: details on the FT-Transformer can be found in Revisiting Deep Learning Models for Tabular Data.
  4. TabFastFormer: adaptation of the FastFormer for tabular data. Details on the Fasformer can be found in FastFormers: Highly Efficient Transformer Models for Natural Language Understanding
  5. TabPerceiver: adaptation of the Perceiver for tabular data. Details on the Perceiver can be found in Perceiver: General Perception with Iterative Attention

And probabilistic DL models for tabular data based on Weight Uncertainty in Neural Networks:

  1. BayesianWide: Probabilistic adaptation of the Wide model.
  2. BayesianTabMlp: Probabilistic adaptation of the TabMlp model

Note that while there are scientific publications for the TabTransformer, SAINT and FT-Transformer, the TabFasfFormer and TabPerceiver are our own adaptation of those algorithms for tabular data.

In addition, Self-Supervised pre-training can be used for all deeptabular models, with the exception of the TabPerceiver. Self-Supervised pre-training can be used via two methods or routines which we refer as: encoder-decoder method and constrastive-denoising method. Please, see the documentation and the examples for details on this functionality, and all other options in the library.

"},{"location":"index.html#acknowledgments","title":"Acknowledgments","text":"

This library takes from a series of other libraries, so I think it is just fair to mention them here in the README (specific mentions are also included in the code).

The Callbacks and Initializers structure and code is inspired by the torchsample library, which in itself partially inspired by Keras.

The TextProcessor class in this library uses the fastai's Tokenizer and Vocab. The code at utils.fastai_transforms is a minor adaptation of their code so it functions within this library. To my experience their Tokenizer is the best in class.

The ImageProcessor class in this library uses code from the fantastic Deep Learning for Computer Vision (DL4CV) book by Adrian Rosebrock.

"},{"location":"index.html#license","title":"License","text":"

This work is dual-licensed under Apache 2.0 and MIT (or any later version). You can choose between one of them if you use this work.

SPDX-License-Identifier: Apache-2.0 AND MIT

"},{"location":"index.html#cite","title":"Cite","text":""},{"location":"index.html#bibtex","title":"BibTex","text":"
@article{Zaurin_pytorch-widedeep_A_flexible_2023,\nauthor = {Zaurin, Javier Rodriguez and Mulinka, Pavol},\ndoi = {10.21105/joss.05027},\njournal = {Journal of Open Source Software},\nmonth = jun,\nnumber = {86},\npages = {5027},\ntitle = {{pytorch-widedeep: A flexible package for multimodal deep learning}},\nurl = {https://joss.theoj.org/papers/10.21105/joss.05027},\nvolume = {8},\nyear = {2023}\n}\n
"},{"location":"index.html#apa","title":"APA","text":"
Zaurin, J. R., & Mulinka, P. (2023). pytorch-widedeep: A flexible package for\nmultimodal deep learning. Journal of Open Source Software, 8(86), 5027.\nhttps://doi.org/10.21105/joss.05027\n
"},{"location":"contributing.html","title":"Contributing","text":"

Pytorch-widedeep is being developed and used by many active community members. Your help is very valuable to make it better for everyone.

  • Check for the Roadmap or Open an issue to report problems or recommend new features and submit a draft pull requests, which will be changed to pull request after intial review
  • Contribute to the tests to make it more reliable.
  • Contribute to the documentation to make it clearer for everyone.
  • Contribute to the examples to share your experience with other users.
  • Join the dicussion on slack
"},{"location":"installation.html","title":"Installation","text":"

This section explains how to install pytorch-widedeep.

For the latest stable release, execute:

pip install pytorch-widedeep\n

For the bleeding-edge version, execute:

pip install git+https://github.com/jrzaurin/pytorch-widedeep.git\n

For developer install

# Clone the repository\ngit clone https://github.com/jrzaurin/pytorch-widedeep\ncd pytorch-widedeep\n\n# Install in dev mode\npip install -e .\n
"},{"location":"installation.html#dependencies","title":"Dependencies","text":"
  • pandas>=1.3.5
  • numpy>=1.21.6
  • scipy>=1.7.3
  • scikit-learn>=1.0.2
  • gensim
  • spacy
  • opencv-contrib-python
  • imutils
  • tqdm
  • torch
  • torchvision
  • einops
  • wrapt
  • torchmetrics
  • pyarrow
  • fastparquet>=0.8.1
"},{"location":"quick_start.html","title":"Quick Start","text":"

This is an example of a binary classification with the adult census dataset using a combination of a wide and deep model (in this case a so called deeptabular model) with defaults settings.

import numpy as np\nimport torch\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.models import Wide, TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\n\n\ndf = load_adult(as_frame=True)\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf_train, df_test = train_test_split(df, test_size=0.2, stratify=df.income_label)\n\n# Define the 'column set up'\nwide_cols = [\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native-country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")]\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital-status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital-gain\",\n    \"capital-loss\",\n    \"native-country\",\n]\ncontinuous_cols = [\"age\", \"hours-per-week\"]\ntarget = \"income_label\"\ntarget = df_train[target].values\n\n# prepare the data\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df_train)\n\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols  # type: ignore[arg-type]\n)\nX_tab = tab_preprocessor.fit_transform(df_train)\n\n# build the model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n\n# train and validate\ntrainer = Trainer(model, objective=\"binary\", metrics=[Accuracy])\ntrainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    n_epochs=5,\n    batch_size=256,\n)\n\n# predict on test\nX_wide_te = wide_preprocessor.transform(df_test)\nX_tab_te = tab_preprocessor.transform(df_test)\npreds = trainer.predict(X_wide=X_wide_te, X_tab=X_tab_te)\n\n# Save and load\n\n# Option 1: this will also save training history and lr history if the\n# LRHistory callback is used\ntrainer.save(path=\"model_weights\", save_state_dict=True)\n\n# Option 2: save as any other torch model\ntorch.save(model.state_dict(), \"model_weights/wd_model.pt\")\n\n# From here in advance, Option 1 or 2 are the same. I assume the user has\n# prepared the data and defined the new model components:\n# 1. Build the model\nmodel_new = WideDeep(wide=wide, deeptabular=tab_mlp)\nmodel_new.load_state_dict(torch.load(\"model_weights/wd_model.pt\"))\n\n# 2. Instantiate the trainer\ntrainer_new = Trainer(model_new, objective=\"binary\")\n\n# 3. Either start the fit or directly predict\npreds = trainer_new.predict(X_wide=X_wide, X_tab=X_tab)\n
"},{"location":"examples/01_preprocessors_and_utils.html","title":"01_preprocessors_and_utils","text":"

For example

In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport pytorch_widedeep as wd\n\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import WidePreprocessor\n
import numpy as np import pandas as pd import pytorch_widedeep as wd from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import WidePreprocessor
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
wide_cols = [\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native-country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")]\n
wide_cols = [ \"education\", \"relationship\", \"workclass\", \"occupation\", \"native-country\", \"gender\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")] In\u00a0[4]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_wide = wide_preprocessor.transform(new_df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_wide = wide_preprocessor.transform(new_df) In\u00a0[5]: Copied!
X_wide\n
X_wide Out[5]:
array([[  1,  17,  23, ...,  89,  91, 316],\n       [  2,  18,  23, ...,  89,  92, 317],\n       [  3,  18,  24, ...,  89,  93, 318],\n       ...,\n       [  2,  20,  23, ...,  90, 103, 323],\n       [  2,  17,  23, ...,  89, 103, 323],\n       [  2,  21,  29, ...,  90, 115, 324]])

Note that the label encoding starts from 1. This is because it is convenient to leave 0 for padding, i.e. unknown categories. Let's take from example the first entry

In\u00a0[6]: Copied!
X_wide[0]\n
X_wide[0] Out[6]:
array([  1,  17,  23,  32,  47,  89,  91, 316])
In\u00a0[7]: Copied!
wide_preprocessor.inverse_transform(X_wide[:1])\n
wide_preprocessor.inverse_transform(X_wide[:1]) Out[7]: education relationship workclass occupation native-country gender education_occupation native-country_occupation 0 11th Own-child Private Machine-op-inspct United-States Male 11th-Machine-op-inspct United-States-Machine-op-inspct

As we can see, wide_preprocessor numerically encodes the wide_cols and the crossed_cols, which can be recovered using the method inverse_transform.

In\u00a0[8]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\n
from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[9]: Copied!
# cat_embed_cols = [(column_name, embed_dim), ...]\ncat_embed_cols = [\n    (\"education\", 10),\n    (\"relationship\", 8),\n    (\"workclass\", 10),\n    (\"occupation\", 10),\n    (\"native-country\", 10),\n]\ncontinuous_cols = [\"age\", \"hours-per-week\"]\n
# cat_embed_cols = [(column_name, embed_dim), ...] cat_embed_cols = [ (\"education\", 10), (\"relationship\", 8), (\"workclass\", 10), (\"occupation\", 10), (\"native-country\", 10), ] continuous_cols = [\"age\", \"hours-per-week\"] In\u00a0[10]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    cols_to_scale=[\"age\"],  # or scale=True or cols_to_scale=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_deep = deep_preprocessor.transform(new_df)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, cols_to_scale=[\"age\"], # or scale=True or cols_to_scale=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_deep = deep_preprocessor.transform(new_df) In\u00a0[11]: Copied!
X_tab\n
X_tab Out[11]:
array([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -9.95128932e-01,  4.00000000e+01],\n       [ 2.00000000e+00,  2.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -4.69415091e-02,  5.00000000e+01],\n       [ 3.00000000e+00,  2.00000000e+00,  2.00000000e+00, ...,\n         1.00000000e+00, -7.76316450e-01,  4.00000000e+01],\n       ...,\n       [ 2.00000000e+00,  4.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00,  1.41180837e+00,  4.00000000e+01],\n       [ 2.00000000e+00,  1.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -1.21394141e+00,  2.00000000e+01],\n       [ 2.00000000e+00,  5.00000000e+00,  7.00000000e+00, ...,\n         1.00000000e+00,  9.74183408e-01,  4.00000000e+01]])

Note that the label encoding starts from 1. This is because it is convenient to leave 0 for padding, i.e. unknown categories. Let's take from example the first entry

In\u00a0[12]: Copied!
X_tab[0]\n
X_tab[0] Out[12]:
array([ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ,\n       -0.99512893, 40.        ])
In\u00a0[13]: Copied!
tab_preprocessor.inverse_transform(X_tab[:1])\n
tab_preprocessor.inverse_transform(X_tab[:1]) Out[13]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States 25.0 40.0

The TabPreprocessor will have a series of useful attributes that can later be used when instantiating the different Tabular Models, such us for example, the column indexes (used to slice the tensors, internally in the models) or the categorical embeddings set up

In\u00a0[14]: Copied!
tab_preprocessor.column_idx\n
tab_preprocessor.column_idx Out[14]:
{'education': 0,\n 'relationship': 1,\n 'workclass': 2,\n 'occupation': 3,\n 'native-country': 4,\n 'age': 5,\n 'hours-per-week': 6}
In\u00a0[15]: Copied!
# column name, num unique, embedding dim\ntab_preprocessor.cat_embed_input\n
# column name, num unique, embedding dim tab_preprocessor.cat_embed_input Out[15]:
[('education', 16, 10),\n ('relationship', 6, 8),\n ('workclass', 9, 10),\n ('occupation', 15, 10),\n ('native-country', 42, 10)]

As I mentioned, there is more one can do, such as for example, quantize (or bucketize) the continuous cols. For this we could use the quantization_setup param. This parameter accepts a number of different inputs and uses pd.cut under the hood to quantize the continuous cols. For more info, please, read the docs. Let's use it here to quantize \"age\" and \"hours-per-week\" in 4 and 5 \"buckets\" respectively

In\u00a0[16]: Copied!
quantization_setup = {\n    \"age\": 4,\n    \"hours-per-week\": 5,\n}  # you can also pass a list of floats with the boundaries if you wanted\nquant_tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    quantization_setup=quantization_setup,\n)\nqX_tab = quant_tab_preprocessor.fit_transform(df)\n
quantization_setup = { \"age\": 4, \"hours-per-week\": 5, } # you can also pass a list of floats with the boundaries if you wanted quant_tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, quantization_setup=quantization_setup, ) qX_tab = quant_tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
qX_tab\n
qX_tab Out[17]:
array([[1, 1, 1, ..., 1, 1, 2],\n       [2, 2, 1, ..., 1, 2, 3],\n       [3, 2, 2, ..., 1, 1, 2],\n       ...,\n       [2, 4, 1, ..., 1, 3, 2],\n       [2, 1, 1, ..., 1, 1, 1],\n       [2, 5, 7, ..., 1, 2, 2]])

Note that the continuous columns that have been bucketised into quantiles are treated as any other categorical column

In\u00a0[18]: Copied!
quant_tab_preprocessor.cat_embed_input\n
quant_tab_preprocessor.cat_embed_input Out[18]:
[('education', 16, 10),\n ('relationship', 6, 8),\n ('workclass', 9, 10),\n ('occupation', 15, 10),\n ('native-country', 42, 10),\n ('age', 4, 4),\n ('hours-per-week', 5, 4)]

Where the column 'age' has now 4 categories, which will be encoded using embeddings of 4 dims. Note that, as any other categorical columns, the categorical \"counter\" starts with 1. This is because all incoming values that are lower/higher than the existing lowest/highest value in the train (or already seen) dataset, will be encoded as 0.

In\u00a0[19]: Copied!
np.unique(qX_tab[:, quant_tab_preprocessor.column_idx[\"age\"]])\n
np.unique(qX_tab[:, quant_tab_preprocessor.column_idx[\"age\"]]) Out[19]:
array([1, 2, 3, 4])

Finally, if we now wanted to inverse_transform the transformed array into the original dataframe, we could still do it, but the continuous, bucketized columns will be transformed back to the middle of their quantile/bucket range

In\u00a0[20]: Copied!
df_decoded = quant_tab_preprocessor.inverse_transform(qX_tab)\n
df_decoded = quant_tab_preprocessor.inverse_transform(qX_tab)
Note that quantized cols will be turned into the mid point of the corresponding bin\n
In\u00a0[21]: Copied!
df.head(2)\n
df.head(2) Out[21]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K In\u00a0[22]: Copied!
df_decoded.head(2)\n
df_decoded.head(2) Out[22]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States 26.0885 30.4 1 HS-grad Husband Private Farming-fishing United-States 44.3750 50.0

there is one final comment to make regarding to the inverse_transform functionality. As we mentioned before, the encoding 0 is reserved for values that fall outside the range covered by the data we used to run the fit method. For example

In\u00a0[23]: Copied!
df.age.min(), df.age.max()\n
df.age.min(), df.age.max() Out[23]:
(17, 90)

All future age values outside that range will be encoded as 0 and decoded as NaN

In\u00a0[24]: Copied!
tmp_df = df.head(1).copy()\ntmp_df.loc[:, \"age\"] = 5\n
tmp_df = df.head(1).copy() tmp_df.loc[:, \"age\"] = 5 In\u00a0[25]: Copied!
tmp_df\n
tmp_df Out[25]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 5 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K In\u00a0[26]: Copied!
# quant_tab_preprocessor has already been fitted with a data that has an age range between 17 and 90\ntmp_qX_tab = quant_tab_preprocessor.transform(tmp_df)\n
# quant_tab_preprocessor has already been fitted with a data that has an age range between 17 and 90 tmp_qX_tab = quant_tab_preprocessor.transform(tmp_df) In\u00a0[27]: Copied!
tmp_qX_tab\n
tmp_qX_tab Out[27]:
array([[1, 1, 1, 1, 1, 0, 2]])
In\u00a0[28]: Copied!
quant_tab_preprocessor.inverse_transform(tmp_qX_tab)\n
quant_tab_preprocessor.inverse_transform(tmp_qX_tab)
Note that quantized cols will be turned into the mid point of the corresponding bin\n
Out[28]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States NaN 30.4 In\u00a0[29]: Copied!
from pytorch_widedeep.preprocessing import TextPreprocessor\n
from pytorch_widedeep.preprocessing import TextPreprocessor In\u00a0[30]: Copied!
# The airbnb dataset, which you could get from here:\n# http://insideairbnb.com/get-the-data.html, is too big to be included in\n# our datasets module (when including images). Therefore, go there,\n# download it, and use the download_images.py script to get the images\n# and the airbnb_data_processing.py to process the data. We'll find\n# better datasets in the future ;). Note that here we are only using a\n# small sample to illustrate the use, so PLEASE ignore the results, just\n# focus on usage\ndf = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\n
# The airbnb dataset, which you could get from here: # http://insideairbnb.com/get-the-data.html, is too big to be included in # our datasets module (when including images). Therefore, go there, # download it, and use the download_images.py script to get the images # and the airbnb_data_processing.py to process the data. We'll find # better datasets in the future ;). Note that here we are only using a # small sample to illustrate the use, so PLEASE ignore the results, just # focus on usage df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") In\u00a0[31]: Copied!
texts = df.description.tolist()\ntexts[:2]\n
texts = df.description.tolist() texts[:2] Out[31]:
[\"My bright double bedroom with a large window has a relaxed feeling! It comfortably fits one or two and is centrally located just two blocks from Finsbury Park. Enjoy great restaurants in the area and easy access to easy transport tubes, trains and buses. Babies and children of all ages are welcome. Hello Everyone, I'm offering my lovely double bedroom in Finsbury Park area (zone 2) for let in a shared apartment.  You will share the apartment with me and it is fully furnished with a self catering kitchen. Two people can easily sleep well as the room has a queen size bed. I also have a travel cot for a baby for guest with small children.  I will require a deposit up front as a security gesture on both our parts and will be given back to you when you return the keys.  I trust anyone who will be responding to this add would treat my home with care and respect .  Best Wishes  Alina Guest will have access to the self catering kitchen and bathroom. There is the flat is equipped wifi internet,\",\n \"Lots of windows and light.  St Luke's Gardens are at the end of the block, and the river not too far the other way. Ten minutes walk if you go slowly. Buses to everywhere round the corner and shops, restaurants, pubs, the cinema and Waitrose . Bright Chelsea Apartment  This is a bright one bedroom ground floor apartment in an interesting listed building. There is one double bedroom and a living room/kitchen The apartment has a full  bathroom and the kitchen is fully equipped. Two wardrobes are available exclusively for guests and bedside tables and two long drawers. This sunny convenient compact flat is just around the corner from the Waitrose supermarket and all sorts of shops, cinemas, restaurants and pubs.  This is a lovely part of London. There is a fun farmers market in the King's Road at the weekend.  Buses to everywhere are just round the corner, and two underground stations are within ten minutes walk. There is a very nice pub round by St. Luke's gardens, 4 mins slow walk, the \"]
In\u00a0[32]: Copied!
text_preprocessor = TextPreprocessor(text_col=\"description\")\nX_text = text_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_text = text_preprocessor.transform(new_df)\n
text_preprocessor = TextPreprocessor(text_col=\"description\") X_text = text_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_text = text_preprocessor.transform(new_df)
The vocabulary contains 2192 tokens\n
In\u00a0[33]: Copied!
print(X_text[0])\n
print(X_text[0])
[  29   48   37  367  818   17  910   17  177   15  122  349   53  879\n 1174  126  393   40  911    0   23  228   71  819    9   53   55 1380\n  225   11   18  308   18 1564   10  755    0  942  239   53   55    0\n   11   36 1013  277 1974   70   62   15 1475    9  943    5  251    5\n    0    5    0    5  177   53   37   75   11   10  294  726   32    9\n   42    5   25   12   10   22   12  136  100  145]\n
In\u00a0[34]: Copied!
from pytorch_widedeep.preprocessing import ImagePreprocessor\n
from pytorch_widedeep.preprocessing import ImagePreprocessor In\u00a0[35]: Copied!
image_preprocessor = wd.preprocessing.ImagePreprocessor(\n    img_col=\"id\", img_path=\"../tmp_data/airbnb/property_picture/\"\n)\nX_images = image_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_images = image_preprocessor.transform(new_df)\n
image_preprocessor = wd.preprocessing.ImagePreprocessor( img_col=\"id\", img_path=\"../tmp_data/airbnb/property_picture/\" ) X_images = image_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_images = image_preprocessor.transform(new_df)
Reading Images from ../tmp_data/airbnb/property_picture/\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:01<00:00, 667.89it/s]\n
Computing normalisation metrics\n
In\u00a0[36]: Copied!
X_images[0].shape\n
X_images[0].shape Out[36]:
(224, 224, 3)
"},{"location":"examples/01_preprocessors_and_utils.html#processors-and-utils","title":"Processors and Utils\u00b6","text":"

Description of the main tools and utilities that one needs to prepare the data for a WideDeep model constructor.

"},{"location":"examples/01_preprocessors_and_utils.html#the-preprocessing-module","title":"The preprocessing module\u00b6","text":"

There are 4 preprocessors, corresponding to 4 main components of the WideDeep model. These are

  • WidePreprocessor
  • TabPreprocessor
  • TextPreprocessor
  • ImagePreprocessor

Behind the scenes, these preprocessors use a series of helper funcions and classes that are in the utils module. If you were interested please go and have a look to the documentation

"},{"location":"examples/01_preprocessors_and_utils.html#1-widepreprocessor","title":"1. WidePreprocessor\u00b6","text":"

The wide component of the model is a linear model that in principle, could be implemented as a linear layer receiving the result of on one-hot encoding categorical columns. However, this is not memory efficient. Therefore, we implement a liner layer as an Embedding layer plus a bias. I will explain in a bit more detail later.

With that in mind, WidePreprocessor simply encodes the categories numerically so that they are the indexes of the lookup table that is an Embedding layer.

"},{"location":"examples/01_preprocessors_and_utils.html#2-tabpreprocessor","title":"2. TabPreprocessor\u00b6","text":"

The TabPreprocessor has a lot of different functionalities. Let's explore some of them in detail. In its basic use, the TabPreprocessor simply label encodes the categorical columns and normalises the numerical ones (unless otherwised specified).

"},{"location":"examples/01_preprocessors_and_utils.html#3-textpreprocessor","title":"3. TextPreprocessor\u00b6","text":"

This preprocessor returns the tokenised, padded sequences that will be directly fed to the stack of LSTMs.

"},{"location":"examples/01_preprocessors_and_utils.html#4-imagepreprocessor","title":"4. ImagePreprocessor\u00b6","text":"

ImagePreprocessor simply resizes the images, being aware of the aspect ratio.

"},{"location":"examples/02_model_components.html","title":"02_model_components","text":"In\u00a0[1]: Copied!
import torch\nimport pandas as pd\nimport numpy as np\n\nfrom torch import nn\n
import torch import pandas as pd import numpy as np from torch import nn In\u00a0[2]: Copied!
df = pd.DataFrame({\"color\": [\"r\", \"b\", \"g\"], \"size\": [\"s\", \"n\", \"l\"]})\ndf.head()\n
df = pd.DataFrame({\"color\": [\"r\", \"b\", \"g\"], \"size\": [\"s\", \"n\", \"l\"]}) df.head() Out[2]: color size 0 r s 1 b n 2 g l

one hot encoded, the first observation would be

In\u00a0[3]: Copied!
obs_0_oh = (np.array([1.0, 0.0, 0.0, 1.0, 0.0, 0.0])).astype(\"float32\")\n
obs_0_oh = (np.array([1.0, 0.0, 0.0, 1.0, 0.0, 0.0])).astype(\"float32\")

if we simply numerically encode (label encode or le) the values:

In\u00a0[4]: Copied!
obs_0_le = (np.array([0, 3])).astype(\"int64\")\n
obs_0_le = (np.array([0, 3])).astype(\"int64\")

Note that in the functioning implementation of the package we start from 1, saving 0 for padding, i.e. unseen values.

Now, let's see if the two implementations are equivalent

In\u00a0[5]: Copied!
# we have 6 different values. Let's assume we are performing a regression, so pred_dim = 1\nlin = nn.Linear(6, 1)\n
# we have 6 different values. Let's assume we are performing a regression, so pred_dim = 1 lin = nn.Linear(6, 1) In\u00a0[6]: Copied!
emb = nn.Embedding(6, 1)\nemb.weight = nn.Parameter(lin.weight.reshape_as(emb.weight))\n
emb = nn.Embedding(6, 1) emb.weight = nn.Parameter(lin.weight.reshape_as(emb.weight)) In\u00a0[7]: Copied!
lin(torch.tensor(obs_0_oh))\n
lin(torch.tensor(obs_0_oh)) Out[7]:
tensor([-0.5181], grad_fn=<ViewBackward0>)
In\u00a0[8]: Copied!
emb(torch.tensor(obs_0_le)).sum() + lin.bias\n
emb(torch.tensor(obs_0_le)).sum() + lin.bias Out[8]:
tensor([-0.5181], grad_fn=<AddBackward0>)

And this is precisely how the linear model Wide is implemented

In\u00a0[9]: Copied!
from pytorch_widedeep.models import Wide\n
from pytorch_widedeep.models import Wide
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[10]: Copied!
# ?Wide\n
# ?Wide In\u00a0[11]: Copied!
wide = Wide(input_dim=10, pred_dim=1)\nwide\n
wide = Wide(input_dim=10, pred_dim=1) wide Out[11]:
Wide(\n  (wide_linear): Embedding(11, 1, padding_idx=0)\n)

Note that even though the input dim is 10, the Embedding layer has 11 weights. Again, this is because we save 0 for padding, which is used for unseen values during the encoding process.

As I mentioned, deeptabular has enough complexity on its own and it will be described in a separated notebook. Let's then jump to deeptext.

In\u00a0[12]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp\n
from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp In\u00a0[13]: Copied!
data = {\n    \"cat1\": np.random.choice([\"A\", \"B\", \"C\"], size=20),\n    \"cat2\": np.random.choice([\"X\", \"Y\"], size=20),\n    \"cont1\": np.random.rand(20),\n    \"cont2\": np.random.rand(20),\n}\n\ndf = pd.DataFrame(data)\n
data = { \"cat1\": np.random.choice([\"A\", \"B\", \"C\"], size=20), \"cat2\": np.random.choice([\"X\", \"Y\"], size=20), \"cont1\": np.random.rand(20), \"cont2\": np.random.rand(20), } df = pd.DataFrame(data) In\u00a0[14]: Copied!
df.head()\n
df.head() Out[14]: cat1 cat2 cont1 cont2 0 A Y 0.789347 0.561789 1 C X 0.050822 0.061538 2 A Y 0.863784 0.241967 3 C X 0.917848 0.644658 4 C Y 0.042328 0.417303 In\u00a0[15]: Copied!
# see the docs for details on all params/options\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=[\"cat1\", \"cat2\"],\n    continuous_cols=[\"cont1\", \"cont2\"],\n    embedding_rule=\"fastai\",\n)\n
# see the docs for details on all params/options tab_preprocessor = TabPreprocessor( cat_embed_cols=[\"cat1\", \"cat2\"], continuous_cols=[\"cont1\", \"cont2\"], embedding_rule=\"fastai\", ) In\u00a0[16]: Copied!
X_tab = tab_preprocessor.fit_transform(df)\n
X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
# toy example just to build a model.\ntabmlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=4,\n    mlp_hidden_dims=[8, 4],\n    mlp_linear_first=True,\n)\ntabmlp\n
# toy example just to build a model. tabmlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=4, mlp_hidden_dims=[8, 4], mlp_linear_first=True, ) tabmlp Out[17]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_cat1): Embedding(4, 3, padding_idx=0)\n      (emb_layer_cat2): Embedding(3, 2, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (cont_embed): ContEmbeddings(\n    INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n    (linear): ContLinear(n_cont_cols=2, embed_dim=4, embed_dropout=0.0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=13, out_features=8, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=8, out_features=4, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)

Lets describe a bit the model: first we have what we call a DiffSizeCatEmbeddings, where categorical columns with different number of unique categories will be encoded with embeddings of different dimensions. Then the continuous columns will not be normalised (the normalised layer is just the identity) and they will be embedded via a \"standard\" method, using a so-called ContLinear layer. This layer displays some INFO that tells us what it is (ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]). There are two other options available to embed the continuous cols based on the paper On Embeddings for Numerical Features in Tabular Deep Learning. These are PieceWise and Periodic and all available via the embed_continuous_method param, which can adopt values \"standard\", \"piecewise\" and \"periodic\". The embedded categorical and continuous columns will be then concatenated ($3 + 2 + (4 * 2) = 13$ input dims) and passed to an MLP.

In\u00a0[18]: Copied!
from pytorch_widedeep.models import BasicRNN\n
from pytorch_widedeep.models import BasicRNN In\u00a0[19]: Copied!
basic_rnn = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)\n
basic_rnn = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/torch/nn/modules/rnn.py:82: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1 and num_layers=1\n  warnings.warn(\"dropout option adds dropout after all but last \"\n
In\u00a0[20]: Copied!
basic_rnn\n
basic_rnn Out[20]:
BasicRNN(\n  (word_embed): Embedding(4, 4, padding_idx=0)\n  (rnn): LSTM(4, 4, batch_first=True, dropout=0.1)\n  (rnn_mlp): Identity()\n)

You could, if you wanted, add a Fully Connected Head (FC-Head) on top of it

In\u00a0[21]: Copied!
from pytorch_widedeep.models import Vision\n
from pytorch_widedeep.models import Vision In\u00a0[22]: Copied!
resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=0)\n
resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=0) In\u00a0[23]: Copied!
resnet\n
resnet Out[23]:
Vision(\n  (features): Sequential(\n    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (2): ReLU(inplace=True)\n    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n    (4): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (5): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (6): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (7): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (8): AdaptiveAvgPool2d(output_size=(1, 1))\n  )\n)
"},{"location":"examples/02_model_components.html#model-components","title":"Model Components\u00b6","text":"

The main components of a WideDeep (i.e. Multimodal) model are tabular data, text and images, which are feed into the model via so called wide, deeptabular, deeptext and deepimage model components

"},{"location":"examples/02_model_components.html#1-wide","title":"1. wide\u00b6","text":"

The wide component is a Linear layer \"plugged\" into the output neuron(s). Here, the non-linearities are captured via crossed columns. Crossed columns are, quoting directly the paper: \"For binary features, a cross-product transformation (e.g., \u201cAND(gender=female, language=en)\u201d) is 1 if and only if the constituent features (\u201cgender=female\u201d and \u201clanguage=en\u201d) are all 1, and 0 otherwise\".

The only particularity of our implementation is that we have implemented the linear layer via an Embedding layer plus a bias. While the implementations are equivalent, the latter is faster and far more memory efficient, since we do not need to one hot encode the categorical features.

Let's assume we the following dataset:

"},{"location":"examples/02_model_components.html#2-deeptabular","title":"2. deeptabular\u00b6","text":"

The deeptabular model alone is what normally would be referred as Deep Learning for tabular data. As mentioned a number of times throughout the library, each component can be used independently. Therefore, if you wanted to use any of the models below alone, it is perfectly possible. There are just a couple of simple requirement that will be covered in a later notebook.

By the time of writing, there are a number of models available in pytorch-widedeep to do DL for tabular data. These are:

  1. TabMlp
  2. ContextAttentionMLP
  3. SelfAttentionMLP
  4. TabResnet
  5. Tabnet
  6. TabTransformer
  7. FT-Tabransformer
  8. SAINT
  9. TabFastFormer
  10. TabPerceiver

Let's have a look to one of them. For more information on each of these models, please, have a look to the documentation

"},{"location":"examples/02_model_components.html#3-deeptext","title":"3. deeptext\u00b6","text":"

At the time of writing, pytorch-widedeep offers three models that can be passed to WideDeep as the deeptext component. These are:

  1. BasicRNN
  2. AttentiveRNN
  3. StackedAttentiveRNN

For details on each of these models, please, have a look to the documentation of the package.

We will soon integrate with Hugginface, but let me insist. It is perfectly possible to use custom models for each component, please, have a look to the corresponding notebook. In general, simply, build them and pass them as the corresponding parameters. Note that the custom models MUST return a last layer of activations (i.e. not the final prediction) so that these activations are collected by WideDeep and combined accordingly. In addition, the models MUST also contain an attribute output_dim with the size of these last layers of activations.

Let's have a look to the BasicRNN model

"},{"location":"examples/02_model_components.html#4-deepimage","title":"4. deepimage\u00b6","text":"

At the time of writing pytorch-widedeep is integrated with torchvision via the Vision class. This means that the it is possible to use a variant of the following architectures:

  1. resnet
  2. shufflenet
  3. resnext
  4. wide_resnet
  5. regnet
  6. densenet
  7. mobilenet
  8. mnasnet
  9. efficientnet
  10. squeezenet

The user can choose which layers will be trainable. Alternatively, in none of these architectures is useful, one could use a simple, fully trained CNN (please see the package documentation) or pass a custom model.

let's have a look

"},{"location":"examples/03_binary_classification_with_defaults.html","title":"03_binary_classification_with_defaults","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import Wide, TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy, Precision\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import Wide, TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy, Precision from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
df.drop([\"fnlwgt\", \"educational_num\"], axis=1, inplace=True)\n
df.drop([\"fnlwgt\", \"educational_num\"], axis=1, inplace=True) In\u00a0[5]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] In\u00a0[6]: Copied!
cat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n
cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] In\u00a0[7]: Copied!
# TARGET\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# TARGET target_col = \"income_label\" target = df[target_col].values

let's see what the preprocessors do

In\u00a0[8]: Copied!
# wide\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n
# wide wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) In\u00a0[9]: Copied!
# # wide_preprocessor has an attribute called encoding_dict with the encoding dictionary\n# wide_preprocessor.encoding_dict\n
# # wide_preprocessor has an attribute called encoding_dict with the encoding dictionary # wide_preprocessor.encoding_dict In\u00a0[10]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    cols_to_scale=continuous_cols,\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# deeptabular tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols, cols_to_scale=continuous_cols, ) X_tab = tab_preprocessor.fit_transform(df) In\u00a0[11]: Copied!
# check the docs to understand the useful attributes that the tab_preprocessor has. For example,\n# as well as an encoding dictionary, tab_preprocessor has an attribute called cat_embed_input\n# that specifies the categortical columns that will be represented as embeddings, the number\n# of different categories per feature, and the dimension of the embeddings as defined by some\n# of the internal rules of thumb that the preprocessor has (have a look to the docs)\ntab_preprocessor.cat_embed_input\n
# check the docs to understand the useful attributes that the tab_preprocessor has. For example, # as well as an encoding dictionary, tab_preprocessor has an attribute called cat_embed_input # that specifies the categortical columns that will be represented as embeddings, the number # of different categories per feature, and the dimension of the embeddings as defined by some # of the internal rules of thumb that the preprocessor has (have a look to the docs) tab_preprocessor.cat_embed_input Out[11]:
[('workclass', 9, 5),\n ('education', 16, 8),\n ('marital_status', 7, 5),\n ('occupation', 15, 7),\n ('relationship', 6, 4),\n ('race', 5, 4),\n ('gender', 2, 2),\n ('capital_gain', 123, 24),\n ('capital_loss', 99, 21),\n ('native_country', 42, 13)]
In\u00a0[12]: Copied!
print(X_wide)\nprint(X_wide.shape)\n
print(X_wide) print(X_wide.shape)
[[  1  10  26 ...  61 103 328]\n [  1  11  27 ...  61 104 329]\n [  2  12  27 ...  61 105 330]\n ...\n [  1  11  28 ...  61 115 335]\n [  1  11  26 ...  61 115 335]\n [  7  11  27 ...  61 127 336]]\n(48842, 10)\n
In\u00a0[13]: Copied!
print(X_tab)\nprint(X_tab.shape)\n
print(X_tab) print(X_tab.shape)
[[ 1.          1.          1.         ...  1.         -0.99512893\n  -0.03408696]\n [ 1.          2.          2.         ...  1.         -0.04694151\n   0.77292975]\n [ 2.          3.          2.         ...  1.         -0.77631645\n  -0.03408696]\n ...\n [ 1.          2.          3.         ...  1.          1.41180837\n  -0.03408696]\n [ 1.          2.          1.         ...  1.         -1.21394141\n  -1.64812038]\n [ 7.          2.          2.         ...  1.          0.97418341\n  -0.03408696]]\n(48842, 12)\n
In\u00a0[14]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[400, 200],\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[400, 200], mlp_dropout=0.5, mlp_activation=\"leaky_relu\", )

Let's first find out how a linear model performs

In\u00a0[15]: Copied!
wide\n
wide Out[15]:
Wide(\n  (wide_linear): Embedding(809, 1, padding_idx=0)\n)

Before being passed to the Trainer, the models need to be \"constructed\" with the WideDeep constructor class. For the particular case of the wide/linear model, not much really happens

In\u00a0[16]: Copied!
lin_model = WideDeep(wide=wide)\n
lin_model = WideDeep(wide=wide) In\u00a0[17]: Copied!
lin_model\n
lin_model Out[17]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n)
In\u00a0[18]: Copied!
lin_trainer = Trainer(\n    model=lin_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(lin_model.parameters(), lr=0.01),\n    metrics=[Accuracy, Precision],\n)\n
lin_trainer = Trainer( model=lin_model, objective=\"binary\", optimizers=torch.optim.AdamW(lin_model.parameters(), lr=0.01), metrics=[Accuracy, Precision], ) In\u00a0[19]: Copied!
lin_trainer.fit(X_wide=X_wide, target=target, n_epochs=4, batch_size=128, val_split=0.2)\n
lin_trainer.fit(X_wide=X_wide, target=target, n_epochs=4, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 109.04it/s, loss=0.426, metrics={'acc': 0.7983, 'prec': 0.6152}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 102.46it/s, loss=0.366, metrics={'acc': 0.832, 'prec': 0.6916}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 130.27it/s, loss=0.364, metrics={'acc': 0.8305, 'prec': 0.6933}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 150.46it/s, loss=0.361, metrics={'acc': 0.8357, 'prec': 0.6982}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 133.19it/s, loss=0.359, metrics={'acc': 0.8329, 'prec': 0.6994}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 145.75it/s, loss=0.361, metrics={'acc': 0.836, 'prec': 0.7009}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 130.91it/s, loss=0.358, metrics={'acc': 0.8333, 'prec': 0.7005}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 155.08it/s, loss=0.361, metrics={'acc': 0.8364, 'prec': 0.702}]\n

Bear in mind that wide is a linear model where the non-linearities are captured via the crossed columns. For the crossed-columns to be effective one needs proper business knowledge. There is no magic formula to produce them

Let's have a look to the tabular model by itself

In\u00a0[20]: Copied!
tab_model = WideDeep(deeptabular=tab_mlp)\n
tab_model = WideDeep(deeptabular=tab_mlp) In\u00a0[21]: Copied!
tab_model\n
tab_model Out[21]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=95, out_features=400, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.5, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=400, out_features=200, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.5, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)

You can see how the WideDeep class has added a final prediction layer that collects the activations from the last layer of the model and plugs them into the output neuron. If this was a multiclass classification problem, the prediction dimension (i.e. the size of that final layer) needs to be specified via the pred_dim when instantiating the WideDeep class, as we will see later

In\u00a0[22]: Copied!
tab_trainer = Trainer(\n    model=tab_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
tab_trainer = Trainer( model=tab_model, objective=\"binary\", optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[23]: Copied!
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2)\n
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 97.00it/s, loss=0.37, metrics={'acc': 0.8267, 'prec': 0.7037}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 134.91it/s, loss=0.313, metrics={'acc': 0.8588, 'prec': 0.7577}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 86.86it/s, loss=0.319, metrics={'acc': 0.8514, 'prec': 0.761}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:01<00:00, 73.13it/s, loss=0.296, metrics={'acc': 0.8675, 'prec': 0.7685}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 79.07it/s, loss=0.305, metrics={'acc': 0.8574, 'prec': 0.7646}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 130.11it/s, loss=0.289, metrics={'acc': 0.8696, 'prec': 0.7765}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 87.39it/s, loss=0.296, metrics={'acc': 0.8622, 'prec': 0.7769}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 90.63it/s, loss=0.285, metrics={'acc': 0.8697, 'prec': 0.7741}]\n

The best result I ever obtained with LightGBM on this dataset is 0.8782...so we are pretty close.

Let's combine the wide and tab_mlp components see if it helps

In\u00a0[24]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[400, 200],\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n)\nwd_model = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[400, 200], mlp_dropout=0.5, mlp_activation=\"leaky_relu\", ) wd_model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[25]: Copied!
wd_trainer = Trainer(\n    model=wd_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(wd_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
wd_trainer = Trainer( model=wd_model, objective=\"binary\", optimizers=torch.optim.AdamW(wd_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[26]: Copied!
wd_trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2\n)\n
wd_trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 77.48it/s, loss=0.418, metrics={'acc': 0.8047, 'prec': 0.6154}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 110.51it/s, loss=0.321, metrics={'acc': 0.8521, 'prec': 0.7059}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 82.70it/s, loss=0.333, metrics={'acc': 0.8428, 'prec': 0.7141}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 112.52it/s, loss=0.299, metrics={'acc': 0.866, 'prec': 0.7447}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:04<00:00, 74.34it/s, loss=0.312, metrics={'acc': 0.8533, 'prec': 0.7404}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 89.86it/s, loss=0.29, metrics={'acc': 0.8683, 'prec': 0.7496}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:04<00:00, 65.32it/s, loss=0.301, metrics={'acc': 0.8591, 'prec': 0.7542}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 86.81it/s, loss=0.286, metrics={'acc': 0.8712, 'prec': 0.7552}]\n

For this particular case, the combination of both did not lead to better results that using just the tab_mlp model, when using only 4 epochs.

Note that we have use a TabMlp model, but we could use any other model in the library using the same syntax

In\u00a0[27]: Copied!
from pytorch_widedeep.models import TabTransformer\n
from pytorch_widedeep.models import TabTransformer

The parameters for the TabTransformer are this

column_idx: Dict[str, int],\ncat_embed_input: Optional[List[Tuple[str, int]]] = None,\ncat_embed_dropout: Optional[float] = None,\nuse_cat_bias: Optional[bool] = None,\ncat_embed_activation: Optional[str] = None,\nshared_embed: Optional[bool] = None,\nadd_shared_embed: Optional[bool] = None,\nfrac_shared_embed: Optional[float] = None,\ncontinuous_cols: Optional[List[str]] = None,\ncont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\nembed_continuous: Optional[bool] = None,\nembed_continuous_method: Optional[Literal[\"standard\", \"piecewise\", \"periodic\"]] = None,\ncont_embed_dropout: Optional[float] = None,\ncont_embed_activation: Optional[str] = None,\nquantization_setup: Optional[Dict[str, List[float]]] = None,\nn_frequencies: Optional[int] = None,\nsigma: Optional[float] = None,\nshare_last_layer: Optional[bool] = None,\nfull_embed_dropout: Optional[bool] = None,\ninput_dim: int = 32,\nn_heads: int = 8,\nuse_qkv_bias: bool = False,\nn_blocks: int = 4,\nattn_dropout: float = 0.2,\nff_dropout: float = 0.1,\nff_factor: int = 4,\ntransformer_activation: str = \"gelu\",\nuse_linear_attention: bool = False,\nuse_flash_attention: bool = False,\nmlp_hidden_dims: Optional[List[int]] = None,\nmlp_activation: str = \"relu\",\nmlp_dropout: float = 0.1,\nmlp_batchnorm: bool = False,\nmlp_batchnorm_last: bool = False,\nmlp_linear_first: bool = True,\n

Please, see the documentation for details on each one of them, for now let's see how one could use a TabTransformer model in a few lines of code

In\u00a0[28]: Copied!
tab_transformer = TabTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_norm_layer=\"layernorm\",\n    cont_embed_dropout=0.2,\n    cont_embed_activation=\"leaky_relu\",\n    n_heads=4,\n    ff_dropout=0.2,\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n    mlp_linear_first=\"True\",\n)\n
tab_transformer = TabTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_norm_layer=\"layernorm\", cont_embed_dropout=0.2, cont_embed_activation=\"leaky_relu\", n_heads=4, ff_dropout=0.2, mlp_dropout=0.5, mlp_activation=\"leaky_relu\", mlp_linear_first=\"True\", ) In\u00a0[29]: Copied!
tab_model = WideDeep(deeptabular=tab_transformer)\n
tab_model = WideDeep(deeptabular=tab_transformer) In\u00a0[30]: Copied!
tab_model\n
tab_model Out[30]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabTransformer(\n      (cat_embed): SameSizeCatEmbeddings(\n        (embed): Embedding(325, 32, padding_idx=0)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): LayerNorm((2,), eps=1e-05, elementwise_affine=True)\n      (cont_embed): ContEmbeddings(\n        INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n        (linear): ContLinear(n_cont_cols=2, embed_dim=32, embed_dropout=0.2)\n        (activation_fn): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dropout): Dropout(p=0.2, inplace=False)\n      )\n      (encoder): Sequential(\n        (transformer_block0): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block1): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block2): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block3): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=384, out_features=1, bias=True)\n  )\n)
In\u00a0[31]: Copied!
tab_trainer = Trainer(\n    model=tab_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
tab_trainer = Trainer( model=tab_model, objective=\"binary\", optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[32]: Copied!
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=128, val_split=0.2)\n
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:11<00:00, 27.57it/s, loss=0.359, metrics={'acc': 0.8334, 'prec': 0.7082}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:01<00:00, 57.89it/s, loss=0.33, metrics={'acc': 0.8536, 'prec': 0.7152}]\n
"},{"location":"examples/03_binary_classification_with_defaults.html#simple-binary-classification-with-defaults","title":"Simple Binary Classification with defaults\u00b6","text":"

In this notebook we will train a Wide and Deep model and simply a \"Deep\" model using the well known adult dataset

"},{"location":"examples/03_binary_classification_with_defaults.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/03_binary_classification_with_defaults.html#defining-the-model","title":"Defining the model\u00b6","text":""},{"location":"examples/04_regression_with_images_and_text.html","title":"04_regression_with_images_and_text","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport os\nimport torch\nfrom torchvision.transforms import ToTensor, Normalize\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import (\n    WidePreprocessor,\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n)\nfrom pytorch_widedeep.models import (\n    Wide,\n    TabMlp,\n    Vision,\n    BasicRNN,\n    WideDeep,\n)\nfrom pytorch_widedeep.losses import RMSELoss\nfrom pytorch_widedeep.initializers import *\nfrom pytorch_widedeep.callbacks import *\n
import numpy as np import pandas as pd import os import torch from torchvision.transforms import ToTensor, Normalize from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import ( WidePreprocessor, TabPreprocessor, TextPreprocessor, ImagePreprocessor, ) from pytorch_widedeep.models import ( Wide, TabMlp, Vision, BasicRNN, WideDeep, ) from pytorch_widedeep.losses import RMSELoss from pytorch_widedeep.initializers import * from pytorch_widedeep.callbacks import *
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\ndf.head()\n
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") df.head() Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 0 13913.jpg 54730 My bright double bedroom with a large window h... 4.0 f Islington 51.56802 -0.11121 t apartment ... 1 0 0 0 1 0 0 100.0 15.0 12.00 1 15400.jpg 60302 Lots of windows and light. St Luke's Gardens ... 1.0 t Kensington and Chelsea 51.48796 -0.16898 t apartment ... 0 0 0 0 1 0 0 150.0 0.0 109.50 2 17402.jpg 67564 Open from June 2018 after a 3-year break, we a... 19.0 t Westminster 51.52098 -0.14002 t apartment ... 0 0 0 0 1 0 0 350.0 10.0 149.65 3 24328.jpg 41759 Artist house, bright high ceiling rooms, priva... 2.0 t Wandsworth 51.47298 -0.16376 t other ... 0 0 0 0 1 0 0 250.0 0.0 215.60 4 25023.jpg 102813 Large, all comforts, 2-bed flat; first floor; ... 1.0 f Wandsworth 51.44687 -0.21874 t apartment ... 0 0 0 0 1 0 0 250.0 11.0 79.35

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# There are a number of columns that are already binary. Therefore, no need to one hot encode them\ncrossed_cols = [(\"property_type\", \"room_type\")]\nalready_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"]\nwide_cols = [\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"host_gender\",\n    \"instant_bookable\",\n] + already_dummies\n\ncat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [\n    (\"neighbourhood_cleansed\", 64),\n    (\"cancellation_policy\", 16),\n]\ncontinuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"]\n\n# text and image colnames\ntext_col = \"description\"\nimg_col = \"id\"\n\n# path to pretrained word embeddings and the images\nword_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\"\nimg_path = \"../tmp_data/airbnb/property_picture\"\n\n# target\ntarget_col = \"yield\"\n
# There are a number of columns that are already binary. Therefore, no need to one hot encode them crossed_cols = [(\"property_type\", \"room_type\")] already_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"] wide_cols = [ \"is_location_exact\", \"property_type\", \"room_type\", \"host_gender\", \"instant_bookable\", ] + already_dummies cat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [ (\"neighbourhood_cleansed\", 64), (\"cancellation_policy\", 16), ] continuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"] # text and image colnames text_col = \"description\" img_col = \"id\" # path to pretrained word embeddings and the images word_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\" img_path = \"../tmp_data/airbnb/property_picture\" # target target_col = \"yield\" In\u00a0[4]: Copied!
target = df[target_col].values\n
target = df[target_col].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) In\u00a0[6]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[7]: Copied!
text_preprocessor = TextPreprocessor(\n    word_vectors_path=word_vectors_path, text_col=text_col\n)\nX_text = text_preprocessor.fit_transform(df)\n
text_preprocessor = TextPreprocessor( word_vectors_path=word_vectors_path, text_col=text_col ) X_text = text_preprocessor.fit_transform(df)
The vocabulary contains 2192 tokens\nIndexing word vectors...\nLoaded 400000 word vectors\nPreparing embeddings matrix...\n2175 words in the vocabulary had ../tmp_data/glove.6B/glove.6B.100d.txt vectors and appear more than 5 times\n
In\u00a0[8]: Copied!
image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path)\nX_images = image_processor.fit_transform(df)\n
image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path) X_images = image_processor.fit_transform(df)
Reading Images from ../tmp_data/airbnb/property_picture\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:01<00:00, 638.00it/s]\n
Computing normalisation metrics\n
In\u00a0[9]: Copied!
# Linear model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\n\n# DeepDense: 2 Dense layers\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.1,\n)\n\n# DeepText: a stack of 2 LSTMs\nbasic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_matrix=text_preprocessor.embedding_matrix,\n    n_layers=2,\n    hidden_dim=64,\n    rnn_dropout=0.5,\n)\n\n# Pretrained Resnet 18\nresnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=4)\n
# Linear model wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) # DeepDense: 2 Dense layers tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.1, ) # DeepText: a stack of 2 LSTMs basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_matrix=text_preprocessor.embedding_matrix, n_layers=2, hidden_dim=64, rnn_dropout=0.5, ) # Pretrained Resnet 18 resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=4)

Combine them all with the \"collector\" class WideDeep

In\u00a0[10]: Copied!
model = WideDeep(\n    wide=wide,\n    deeptabular=tab_mlp,\n    deeptext=basic_rnn,\n    deepimage=resnet,\n    head_hidden_dims=[256, 128],\n)\n
model = WideDeep( wide=wide, deeptabular=tab_mlp, deeptext=basic_rnn, deepimage=resnet, head_hidden_dims=[256, 128], ) In\u00a0[11]: Copied!
trainer = Trainer(model, objective=\"rmse\")\n
trainer = Trainer(model, objective=\"rmse\") In\u00a0[12]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:19<00:00,  1.28it/s, loss=115]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:04<00:00,  1.62it/s, loss=94.1]\n

Both, the Text and Image components allow FC-heads on their own (have a look to the documentation).

Now let's go \"kaggle crazy\". Let's use different optimizers, initializers and schedulers for different components. Moreover, let's use a different learning rate for different parameter groups, for the deeptabular component

In\u00a0[13]: Copied!
deep_params = []\nfor childname, child in model.named_children():\n    if childname == \"deeptabular\":\n        for n, p in child.named_parameters():\n            if \"embed_layer\" in n:\n                deep_params.append({\"params\": p, \"lr\": 1e-4})\n            else:\n                deep_params.append({\"params\": p, \"lr\": 1e-3})\n
deep_params = [] for childname, child in model.named_children(): if childname == \"deeptabular\": for n, p in child.named_parameters(): if \"embed_layer\" in n: deep_params.append({\"params\": p, \"lr\": 1e-4}) else: deep_params.append({\"params\": p, \"lr\": 1e-3}) In\u00a0[14]: Copied!
wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.03)\ndeep_opt = torch.optim.Adam(deep_params)\ntext_opt = torch.optim.AdamW(model.deeptext.parameters())\nimg_opt = torch.optim.AdamW(model.deepimage.parameters())\nhead_opt = torch.optim.Adam(model.deephead.parameters())\n
wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.03) deep_opt = torch.optim.Adam(deep_params) text_opt = torch.optim.AdamW(model.deeptext.parameters()) img_opt = torch.optim.AdamW(model.deepimage.parameters()) head_opt = torch.optim.Adam(model.deephead.parameters()) In\u00a0[15]: Copied!
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)\ndeep_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8])\ntext_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)\nimg_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8])\nhead_sch = torch.optim.lr_scheduler.StepLR(head_opt, step_size=5)\n
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5) deep_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8]) text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5) img_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8]) head_sch = torch.optim.lr_scheduler.StepLR(head_opt, step_size=5) In\u00a0[16]: Copied!
# remember, one optimizer per model components, for lr_schedures and initializers is not neccesary\noptimizers = {\n    \"wide\": wide_opt,\n    \"deeptabular\": deep_opt,\n    \"deeptext\": text_opt,\n    \"deepimage\": img_opt,\n    \"deephead\": head_opt,\n}\nschedulers = {\n    \"wide\": wide_sch,\n    \"deeptabular\": deep_sch,\n    \"deeptext\": text_sch,\n    \"deepimage\": img_sch,\n    \"deephead\": head_sch,\n}\n\n# Now...we have used pretrained word embeddings, so you do not want to\n# initialise these  embeddings. However you might still want to initialise the\n# other layers in the DeepText component. No probs, you can do that with the\n# parameter pattern and your knowledge on regular  expressions. Here we are\n# telling to the KaimingNormal initializer to NOT touch the  parameters whose\n# name contains the string word_embed.\ninitializers = {\n    \"wide\": KaimingNormal,\n    \"deeptabular\": KaimingNormal,\n    \"deeptext\": KaimingNormal(pattern=r\"^(?!.*word_embed).*$\"),\n    \"deepimage\": KaimingNormal,\n}\n\nmean = [0.406, 0.456, 0.485]  # BGR\nstd = [0.225, 0.224, 0.229]  # BGR\ntransforms = [ToTensor, Normalize(mean=mean, std=std)]\ncallbacks = [\n    LRHistory(n_epochs=10),\n    EarlyStopping,\n    ModelCheckpoint(filepath=\"model_weights/wd_out\"),\n]\n
# remember, one optimizer per model components, for lr_schedures and initializers is not neccesary optimizers = { \"wide\": wide_opt, \"deeptabular\": deep_opt, \"deeptext\": text_opt, \"deepimage\": img_opt, \"deephead\": head_opt, } schedulers = { \"wide\": wide_sch, \"deeptabular\": deep_sch, \"deeptext\": text_sch, \"deepimage\": img_sch, \"deephead\": head_sch, } # Now...we have used pretrained word embeddings, so you do not want to # initialise these embeddings. However you might still want to initialise the # other layers in the DeepText component. No probs, you can do that with the # parameter pattern and your knowledge on regular expressions. Here we are # telling to the KaimingNormal initializer to NOT touch the parameters whose # name contains the string word_embed. initializers = { \"wide\": KaimingNormal, \"deeptabular\": KaimingNormal, \"deeptext\": KaimingNormal(pattern=r\"^(?!.*word_embed).*$\"), \"deepimage\": KaimingNormal, } mean = [0.406, 0.456, 0.485] # BGR std = [0.225, 0.224, 0.229] # BGR transforms = [ToTensor, Normalize(mean=mean, std=std)] callbacks = [ LRHistory(n_epochs=10), EarlyStopping, ModelCheckpoint(filepath=\"model_weights/wd_out\"), ] In\u00a0[17]: Copied!
trainer = Trainer(\n    model,\n    objective=\"rmse\",\n    initializers=initializers,\n    optimizers=optimizers,\n    lr_schedulers=schedulers,\n    callbacks=callbacks,\n    transforms=transforms,\n)\n
trainer = Trainer( model, objective=\"rmse\", initializers=initializers, optimizers=optimizers, lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms, )
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/initializers.py:34: UserWarning: No initializer found for deephead\n  warnings.warn(\n
In\u00a0[18]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:19<00:00,  1.25it/s, loss=101]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:04<00:00,  1.62it/s, loss=90.6]\n
Model weights after training corresponds to the those of the final epoch which might not be the best performing weights. Use the 'ModelCheckpoint' Callback to restore the best epoch weights.\n

we have only run one epoch, but let's check that the LRHistory callback records the lr values for each group

In\u00a0[19]: Copied!
trainer.lr_history\n
trainer.lr_history Out[19]:
{'lr_wide_0': [0.03, 0.03],\n 'lr_deeptabular_0': [0.0001, 0.0001],\n 'lr_deeptabular_1': [0.0001, 0.0001],\n 'lr_deeptabular_2': [0.0001, 0.0001],\n 'lr_deeptabular_3': [0.0001, 0.0001],\n 'lr_deeptabular_4': [0.0001, 0.0001],\n 'lr_deeptabular_5': [0.0001, 0.0001],\n 'lr_deeptabular_6': [0.0001, 0.0001],\n 'lr_deeptabular_7': [0.0001, 0.0001],\n 'lr_deeptabular_8': [0.0001, 0.0001],\n 'lr_deeptabular_9': [0.001, 0.001],\n 'lr_deeptabular_10': [0.001, 0.001],\n 'lr_deeptabular_11': [0.001, 0.001],\n 'lr_deeptabular_12': [0.001, 0.001],\n 'lr_deeptext_0': [0.001, 0.001],\n 'lr_deepimage_0': [0.001, 0.001],\n 'lr_deephead_0': [0.001, 0.001]}
"},{"location":"examples/04_regression_with_images_and_text.html#regression-with-images-and-text","title":"Regression with Images and Text\u00b6","text":"

In this notebook we will go through a series of examples on how to combine all Wide & Deep components.

To that aim I will use the Airbnb listings dataset for London, which you can download from here. I use this dataset simply because it contains tabular data, images and text.

I have taken a sample of 1000 listings to keep the data tractable in this notebook. Also, I have preprocessed the data and prepared it for this exercise. All preprocessing steps can be found in the notebook airbnb_data_preprocessing.ipynb in this examples folder.

"},{"location":"examples/04_regression_with_images_and_text.html#regression-with-the-defaults","title":"Regression with the defaults\u00b6","text":"

The set up

"},{"location":"examples/04_regression_with_images_and_text.html#prepare-the-data","title":"Prepare the data\u00b6","text":"

I will focus here on how to prepare the data and run the model. Check notebooks 1 and 2 to see what's going on behind the scences

Preparing the data is rather simple

"},{"location":"examples/04_regression_with_images_and_text.html#build-the-model-components","title":"Build the model components\u00b6","text":""},{"location":"examples/04_regression_with_images_and_text.html#build-the-trainer-and-fit","title":"Build the trainer and fit\u00b6","text":""},{"location":"examples/05_save_and_load_model_and_artifacts.html","title":"05_save_and_load_model_and_artifacts","text":"In\u00a0[1]: Copied!
import pickle\nimport numpy as np\nimport pandas as pd\nimport torch\nimport shutil\n\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom sklearn.model_selection import train_test_split\n
import pickle import numpy as np import pandas as pd import torch import shutil from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from sklearn.model_selection import train_test_split
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
train, valid = train_test_split(df, test_size=0.2, stratify=df.target)\n# the test data will be used lately as if it was \"fresh\", new data coming after some time...\nvalid, test = train_test_split(valid, test_size=0.5, stratify=valid.target)\n
train, valid = train_test_split(df, test_size=0.2, stratify=df.target) # the test data will be used lately as if it was \"fresh\", new data coming after some time... valid, test = train_test_split(valid, test_size=0.5, stratify=valid.target) In\u00a0[5]: Copied!
print(f\"train shape: {train.shape}\")\nprint(f\"valid shape: {valid.shape}\")\nprint(f\"test shape: {test.shape}\")\n
print(f\"train shape: {train.shape}\") print(f\"valid shape: {valid.shape}\") print(f\"test shape: {test.shape}\")
train shape: (39073, 15)\nvalid shape: (4884, 15)\ntest shape: (4885, 15)\n
In\u00a0[6]: Copied!
cat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n
cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] In\u00a0[7]: Copied!
tab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n)\nX_tab_train = tab_preprocessor.fit_transform(train)\ny_train = train.target.values\nX_tab_valid = tab_preprocessor.transform(valid)\ny_valid = valid.target.values\n
tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols, ) X_tab_train = tab_preprocessor.fit_transform(train) y_train = train.target.values X_tab_valid = tab_preprocessor.transform(valid) y_valid = valid.target.values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[8]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    cont_norm_layer=\"layernorm\",\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(deeptabular=tab_mlp)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, cont_norm_layer=\"layernorm\", embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(deeptabular=tab_mlp) In\u00a0[9]: Copied!
model\n
model Out[9]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(122, 23, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(97, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): LayerNorm((2,), eps=1e-05, elementwise_affine=True)\n      (cont_embed): ContEmbeddings(\n        INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n        (linear): ContLinear(n_cont_cols=2, embed_dim=8, embed_dropout=0.0)\n        (dropout): Dropout(p=0.0, inplace=False)\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=108, out_features=64, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.2, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.2, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=1, bias=True)\n  )\n)
In\u00a0[10]: Copied!
early_stopping = EarlyStopping()\nmodel_checkpoint = ModelCheckpoint(\n    filepath=\"tmp_dir/adult_tabmlp_model\",\n    save_best_only=True,\n    verbose=1,\n    max_save=1,\n)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    callbacks=[early_stopping, model_checkpoint],\n    metrics=[Accuracy],\n)\n\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=4,\n    batch_size=256,\n)\n
early_stopping = EarlyStopping() model_checkpoint = ModelCheckpoint( filepath=\"tmp_dir/adult_tabmlp_model\", save_best_only=True, verbose=1, max_save=1, ) trainer = Trainer( model, objective=\"binary\", callbacks=[early_stopping, model_checkpoint], metrics=[Accuracy], ) trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=4, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 76.25it/s, loss=0.452, metrics={'acc': 0.7867}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 125.36it/s, loss=0.335, metrics={'acc': 0.8532}]\n
\nEpoch 1: val_loss improved from inf to 0.33532 Saving model to tmp_dir/adult_tabmlp_model_1.p\n
epoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 76.98it/s, loss=0.355, metrics={'acc': 0.8401}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 106.51it/s, loss=0.303, metrics={'acc': 0.8665}]\n
\nEpoch 2: val_loss improved from 0.33532 to 0.30273 Saving model to tmp_dir/adult_tabmlp_model_2.p\n
epoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 82.71it/s, loss=0.332, metrics={'acc': 0.849}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 107.80it/s, loss=0.288, metrics={'acc': 0.8757}]\n
\nEpoch 3: val_loss improved from 0.30273 to 0.28791 Saving model to tmp_dir/adult_tabmlp_model_3.p\n
epoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 79.02it/s, loss=0.32, metrics={'acc': 0.8541}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 127.07it/s, loss=0.282, metrics={'acc': 0.8763}]
\nEpoch 4: val_loss improved from 0.28791 to 0.28238 Saving model to tmp_dir/adult_tabmlp_model_4.p\nModel weights restored to best epoch: 4\n
\n
In\u00a0[11]: Copied!
torch.save(model, \"tmp_dir/model_saved_option_1.pt\")\n
torch.save(model, \"tmp_dir/model_saved_option_1.pt\") In\u00a0[12]: Copied!
torch.save(model.state_dict(), \"tmp_dir/model_state_dict_saved_option_1.pt\")\n
torch.save(model.state_dict(), \"tmp_dir/model_state_dict_saved_option_1.pt\") In\u00a0[13]: Copied!
trainer.save(path=\"tmp_dir/\", model_filename=\"model_saved_option_2.pt\")\n
trainer.save(path=\"tmp_dir/\", model_filename=\"model_saved_option_2.pt\")

or the state dict

In\u00a0[14]: Copied!
trainer.save(\n    path=\"tmp_dir/\",\n    model_filename=\"model_state_dict_saved_option_2.pt\",\n    save_state_dict=True,\n)\n
trainer.save( path=\"tmp_dir/\", model_filename=\"model_state_dict_saved_option_2.pt\", save_state_dict=True, ) In\u00a0[15]: Copied!
%%bash\n\nls tmp_dir/\n
%%bash ls tmp_dir/
adult_tabmlp_model_4.p\nhistory\nmodel_saved_option_1.pt\nmodel_saved_option_2.pt\nmodel_state_dict_saved_option_1.pt\nmodel_state_dict_saved_option_2.pt\n
In\u00a0[16]: Copied!
%%bash\n\nls tmp_dir/history/\n
%%bash ls tmp_dir/history/
train_eval_history.json\n

Note that since we have used the ModelCheckpoint Callback, adult_tabmlp_model_2.p is the model state dict of the model at epoch 2, i.e. same as model_state_dict_saved_option_1.p or model_state_dict_saved_option_2.p.

In\u00a0[17]: Copied!
with open(\"tmp_dir/tab_preproc.pkl\", \"wb\") as dp:\n    pickle.dump(tab_preprocessor, dp)\n
with open(\"tmp_dir/tab_preproc.pkl\", \"wb\") as dp: pickle.dump(tab_preprocessor, dp) In\u00a0[18]: Copied!
with open(\"tmp_dir/eary_stop.pkl\", \"wb\") as es:\n    pickle.dump(early_stopping, es)\n
with open(\"tmp_dir/eary_stop.pkl\", \"wb\") as es: pickle.dump(early_stopping, es) In\u00a0[19]: Copied!
%%bash\n\nls tmp_dir/\n
%%bash ls tmp_dir/
adult_tabmlp_model_4.p\neary_stop.pkl\nhistory\nmodel_saved_option_1.pt\nmodel_saved_option_2.pt\nmodel_state_dict_saved_option_1.pt\nmodel_state_dict_saved_option_2.pt\ntab_preproc.pkl\n

And that is pretty much all you need to resume training or directly predict, let's see

In\u00a0[20]: Copied!
test.head()\n
test.head() Out[20]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 10103 43 Private 198282 HS-grad 9 Married-civ-spouse Craft-repair Husband White Male 0 0 40 United-States 1 31799 20 Private 228686 11th 7 Married-civ-spouse Other-service Husband White Male 0 0 40 United-States 0 19971 26 Private 291968 HS-grad 9 Married-civ-spouse Transport-moving Husband White Male 0 0 44 United-States 0 3039 48 Private 175958 Bachelors 13 Divorced Prof-specialty Not-in-family White Male 0 0 30 United-States 0 20725 18 Private 232024 11th 7 Never-married Machine-op-inspct Own-child White Male 0 0 55 United-States 0 In\u00a0[21]: Copied!
with open(\"tmp_dir/tab_preproc.pkl\", \"rb\") as tp:\n    tab_preprocessor_new = pickle.load(tp)\n
with open(\"tmp_dir/tab_preproc.pkl\", \"rb\") as tp: tab_preprocessor_new = pickle.load(tp) In\u00a0[22]: Copied!
X_test_tab = tab_preprocessor_new.transform(test)\ny_test = test.target\n
X_test_tab = tab_preprocessor_new.transform(test) y_test = test.target In\u00a0[23]: Copied!
tab_mlp_new = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    cont_norm_layer=\"layernorm\",\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nnew_model = WideDeep(deeptabular=tab_mlp)\n
tab_mlp_new = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, cont_norm_layer=\"layernorm\", embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) new_model = WideDeep(deeptabular=tab_mlp) In\u00a0[24]: Copied!
new_model.load_state_dict(torch.load(\"tmp_dir/model_state_dict_saved_option_2.pt\"))\n
new_model.load_state_dict(torch.load(\"tmp_dir/model_state_dict_saved_option_2.pt\")) Out[24]:
<All keys matched successfully>
In\u00a0[25]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n)\n
trainer = Trainer( model, objective=\"binary\", ) In\u00a0[26]: Copied!
preds = trainer.predict(X_tab=X_test_tab, batch_size=32)\n
preds = trainer.predict(X_tab=X_test_tab, batch_size=32)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:00<00:00, 309.83it/s]\n
In\u00a0[27]: Copied!
from sklearn.metrics import accuracy_score\n
from sklearn.metrics import accuracy_score In\u00a0[28]: Copied!
accuracy_score(y_test, preds)\n
accuracy_score(y_test, preds) Out[28]:
0.8595701125895598
In\u00a0[29]: Copied!
shutil.rmtree(\"tmp_dir/\")\n
shutil.rmtree(\"tmp_dir/\")"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-and-load-model-and-artifacts","title":"Save and load model and artifacts\u00b6","text":"

In this notebook I will show the different options to save and load a model, as well as some additional objects produced during training.

On a given day, you train a model...

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-model-option-1","title":"Save model: option 1\u00b6","text":"

save (and load) a model as you woud do with any other torch model

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-model-option-2","title":"Save model: option 2\u00b6","text":"

use the trainer. The trainer will also save the training history and the learning rate history (if learning rate schedulers are used)

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-preprocessors-and-callbacks","title":"Save preprocessors and callbacks\u00b6","text":"

...just pickle them

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#run-new-experiment-prepare-new-dataset-load-model-and-predict","title":"Run New experiment: prepare new dataset, load model, and predict\u00b6","text":""},{"location":"examples/06_finetune_and_warmup.html","title":"06_finetune_and_warmup","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.models import Wide, TabMlp, TabResnet, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor from pytorch_widedeep.models import Wide, TabMlp, TabResnet, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\n# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
df = load_adult(as_frame=True) # For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[2]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[3]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" target = df[target_col].values In\u00a0[4]: Copied!
# TARGET\ntarget = df[target_col].values\n\n# WIDE\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\n# DEEP\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# TARGET target = df[target_col].values # WIDE wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # DEEP tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[5]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[6]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = Trainer( model, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[7]: Copied!
trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, val_split=0.2, batch_size=256\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, val_split=0.2, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 74.26it/s, loss=0.399, metrics={'acc': 0.8163}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 91.03it/s, loss=0.296, metrics={'acc': 0.8677}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 81.31it/s, loss=0.3, metrics={'acc': 0.8614}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 106.45it/s, loss=0.285, metrics={'acc': 0.8721}]\n
In\u00a0[8]: Copied!
trainer.save(path=\"models_dir/\", save_state_dict=True, model_filename=\"model_1.pt\")\n
trainer.save(path=\"models_dir/\", save_state_dict=True, model_filename=\"model_1.pt\")

Now time goes by...and we want to fine-tune the model to another, new dataset (for example, a dataset that is identical to the one you used to train the previous model but for another country).

Here I will use the same dataset just for illustration purposes, but the flow would be identical to that new dataset

In\u00a0[9]: Copied!
wide_1 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp_1 = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel_1 = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide_1 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp_1 = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model_1 = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[10]: Copied!
model_1.load_state_dict(torch.load(\"models_dir/model_1.pt\"))\n
model_1.load_state_dict(torch.load(\"models_dir/model_1.pt\")) Out[10]:
<All keys matched successfully>
In\u00a0[11]: Copied!
trainer_1 = Trainer(model_1, objective=\"binary\", metrics=[Accuracy])\n
trainer_1 = Trainer(model_1, objective=\"binary\", metrics=[Accuracy]) In\u00a0[12]: Copied!
trainer_1.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    n_epochs=2,\n    batch_size=256,\n    finetune=True,\n    finetune_epochs=2,\n)\n
trainer_1.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, batch_size=256, finetune=True, finetune_epochs=2, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:01<00:00, 97.37it/s, loss=0.39, metrics={'acc': 0.8152}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:01<00:00, 104.04it/s, loss=0.359, metrics={'acc': 0.824}]\n
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 83.83it/s, loss=0.297, metrics={'acc': 0.8365}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 82.78it/s, loss=0.283, metrics={'acc': 0.8445}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 72.84it/s, loss=0.281, metrics={'acc': 0.8716}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 77.46it/s, loss=0.273, metrics={'acc': 0.8744}]\n

Note that, as I describe above, in scenario 2, we can just use this to warm up models before they joined training begins:

In\u00a0[13]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[14]: Copied!
trainer_2 = Trainer(model, objective=\"binary\", metrics=[Accuracy])\n
trainer_2 = Trainer(model, objective=\"binary\", metrics=[Accuracy]) In\u00a0[15]: Copied!
trainer_2.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    warmup=True,\n    warmup_epochs=2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer_2.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, warmup=True, warmup_epochs=2, n_epochs=2, batch_size=256, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 102.49it/s, loss=0.52, metrics={'acc': 0.7519}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 98.15it/s, loss=0.381, metrics={'acc': 0.7891}]\n
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 82.97it/s, loss=0.356, metrics={'acc': 0.8043}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 80.27it/s, loss=0.295, metrics={'acc': 0.8195}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 77.27it/s, loss=0.291, metrics={'acc': 0.8667}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 89.57it/s, loss=0.289, metrics={'acc': 0.8665}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 72.69it/s, loss=0.283, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 91.81it/s, loss=0.284, metrics={'acc': 0.869}]\n

We need to explicitly indicate

  1. That we want fine-tune

  2. The components that we want to individually fine-tune

  3. In case of gradual fine-tuning, the routine (\"felbo\" or \"howard\")

  4. The layers we want to fine-tune.

For example

In\u00a0[16]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_resnet = TabResnet(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 200, 200],\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_resnet)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_resnet = TabResnet( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, blocks_dims=[200, 200, 200], ) model = WideDeep(wide=wide, deeptabular=tab_resnet) In\u00a0[17]: Copied!
model\n
model Out[17]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n  (deeptabular): Sequential(\n    (0): TabResnet(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): DenseResnet(\n        (dense_resnet): Sequential(\n          (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n          (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (block_0): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n          (block_1): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)

let's first train as usual

In\u00a0[18]: Copied!
trainer_3 = Trainer(model, objective=\"binary\", metrics=[Accuracy])\n
trainer_3 = Trainer(model, objective=\"binary\", metrics=[Accuracy]) In\u00a0[19]: Copied!
trainer_3.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=2, batch_size=256\n)\n
trainer_3.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=2, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 54.23it/s, loss=0.382, metrics={'acc': 0.8239}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.72it/s, loss=0.331, metrics={'acc': 0.8526}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 54.35it/s, loss=0.33, metrics={'acc': 0.8465}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 68.15it/s, loss=0.312, metrics={'acc': 0.8604}]\n
In\u00a0[20]: Copied!
trainer_3.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_3.pt\")\n
trainer_3.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_3.pt\")

Now we are going to fine-tune the model components, and in the case of the deeptabular component, we will fine-tune the resnet-blocks and the linear layer but NOT the embeddings.

For this, we need to access the model component's children: deeptabular $\\rightarrow$ tab_resnet $\\rightarrow$ dense_resnet $\\rightarrow$ blocks

In\u00a0[21]: Copied!
wide_3 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_resnet_3 = TabResnet(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 200, 200],\n)\nmodel_3 = WideDeep(wide=wide, deeptabular=tab_resnet)\n
wide_3 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_resnet_3 = TabResnet( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, blocks_dims=[200, 200, 200], ) model_3 = WideDeep(wide=wide, deeptabular=tab_resnet) In\u00a0[22]: Copied!
model_3.load_state_dict(torch.load(\"models_dir/model_3.pt\"))\n
model_3.load_state_dict(torch.load(\"models_dir/model_3.pt\")) Out[22]:
<All keys matched successfully>
In\u00a0[23]: Copied!
model_3\n
model_3 Out[23]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n  (deeptabular): Sequential(\n    (0): TabResnet(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): DenseResnet(\n        (dense_resnet): Sequential(\n          (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n          (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (block_0): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n          (block_1): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)
In\u00a0[24]: Copied!
tab_lin_layer = list(model_3.deeptabular.children())[1]\n
tab_lin_layer = list(model_3.deeptabular.children())[1] In\u00a0[25]: Copied!
tab_lin_layer\n
tab_lin_layer Out[25]:
Linear(in_features=200, out_features=1, bias=True)
In\u00a0[26]: Copied!
tab_deep_layers = []\nfor n1, c1 in model_3.deeptabular.named_children():\n    if (\n        n1 == \"0\"\n    ):  # 0 is the model component and 1 is always the prediction layer added by the `WideDeep` class\n        for n2, c2 in c1.named_children():\n            if n2 == \"encoder\":  # TabResnet\n                for _, c3 in c2.named_children():\n                    for n4, c4 in c3.named_children():  # dense_resnet\n                        if \"block\" in n4:\n                            tab_deep_layers.append((n4, c4))\n
tab_deep_layers = [] for n1, c1 in model_3.deeptabular.named_children(): if ( n1 == \"0\" ): # 0 is the model component and 1 is always the prediction layer added by the `WideDeep` class for n2, c2 in c1.named_children(): if n2 == \"encoder\": # TabResnet for _, c3 in c2.named_children(): for n4, c4 in c3.named_children(): # dense_resnet if \"block\" in n4: tab_deep_layers.append((n4, c4)) In\u00a0[27]: Copied!
tab_deep_layers\n
tab_deep_layers Out[27]:
[('block_0',\n  BasicBlock(\n    (lin1): Linear(in_features=200, out_features=200, bias=False)\n    (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n    (dp): Dropout(p=0.1, inplace=False)\n    (lin2): Linear(in_features=200, out_features=200, bias=False)\n    (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  )),\n ('block_1',\n  BasicBlock(\n    (lin1): Linear(in_features=200, out_features=200, bias=False)\n    (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n    (dp): Dropout(p=0.1, inplace=False)\n    (lin2): Linear(in_features=200, out_features=200, bias=False)\n    (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  ))]

Now remember, we need to pass ONLY LAYERS (before I included the name for clarity) the layers in WARM UP ORDER, therefore:

In\u00a0[28]: Copied!
tab_deep_layers = [el[1] for el in tab_deep_layers][::-1]\n
tab_deep_layers = [el[1] for el in tab_deep_layers][::-1] In\u00a0[29]: Copied!
tab_layers = [tab_lin_layer] + tab_deep_layers[::-1]\n
tab_layers = [tab_lin_layer] + tab_deep_layers[::-1] In\u00a0[30]: Copied!
tab_layers\n
tab_layers Out[30]:
[Linear(in_features=200, out_features=1, bias=True),\n BasicBlock(\n   (lin1): Linear(in_features=200, out_features=200, bias=False)\n   (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n   (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n   (dp): Dropout(p=0.1, inplace=False)\n   (lin2): Linear(in_features=200, out_features=200, bias=False)\n   (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n ),\n BasicBlock(\n   (lin1): Linear(in_features=200, out_features=200, bias=False)\n   (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n   (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n   (dp): Dropout(p=0.1, inplace=False)\n   (lin2): Linear(in_features=200, out_features=200, bias=False)\n   (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )]

And now simply

In\u00a0[31]: Copied!
trainer_4 = Trainer(model_3, objective=\"binary\", metrics=[Accuracy])\n
trainer_4 = Trainer(model_3, objective=\"binary\", metrics=[Accuracy]) In\u00a0[32]: Copied!
trainer_4.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    finetune=True,\n    finetune_epochs=2,\n    deeptabular_gradual=True,\n    deeptabular_layers=tab_layers,\n    deeptabular_max_lr=0.01,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer_4.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, finetune=True, finetune_epochs=2, deeptabular_gradual=True, deeptabular_layers=tab_layers, deeptabular_max_lr=0.01, n_epochs=2, batch_size=256, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 95.17it/s, loss=0.504, metrics={'acc': 0.7523}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 99.83it/s, loss=0.384, metrics={'acc': 0.789}]\n
Training deeptabular, layer 1 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 72.31it/s, loss=0.317, metrics={'acc': 0.8098}]\n
Training deeptabular, layer 2 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 65.97it/s, loss=0.312, metrics={'acc': 0.8214}]\n
Training deeptabular, layer 3 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 63.92it/s, loss=0.306, metrics={'acc': 0.8284}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 57.26it/s, loss=0.292, metrics={'acc': 0.8664}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.56it/s, loss=0.292, metrics={'acc': 0.8696}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 53.61it/s, loss=0.282, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 80.59it/s, loss=0.289, metrics={'acc': 0.8719}]\n

Finally, there is one more use case I would like to consider. The case where we train only one component and we just want to fine-tune and stop the training afterwards, since there is no joined training. This is a simple as

In\u00a0[33]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(deeptabular=tab_mlp)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(deeptabular=tab_mlp) In\u00a0[34]: Copied!
trainer_5 = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer_5 = Trainer( model, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[35]: Copied!
trainer_5.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=1, batch_size=256\n)\n
trainer_5.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=1, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 73.69it/s, loss=0.365, metrics={'acc': 0.8331}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 92.56it/s, loss=0.299, metrics={'acc': 0.8673}]\n
In\u00a0[36]: Copied!
trainer_5.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_5.pt\")\n
trainer_5.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_5.pt\") In\u00a0[37]: Copied!
tab_mlp_5 = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel_5 = WideDeep(deeptabular=tab_mlp_5)\n
tab_mlp_5 = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model_5 = WideDeep(deeptabular=tab_mlp_5) In\u00a0[38]: Copied!
model_5.load_state_dict(torch.load(\"models_dir/model_5.pt\"))\n
model_5.load_state_dict(torch.load(\"models_dir/model_5.pt\")) Out[38]:
<All keys matched successfully>

...times go by...

In\u00a0[39]: Copied!
trainer_6 = Trainer(\n    model_5,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer_6 = Trainer( model_5, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[40]: Copied!
trainer_6.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    finetune=True,\n    finetune_epochs=2,\n    finetune_max_lr=0.01,\n    stop_after_finetuning=True,\n    batch_size=256,\n)\n
trainer_6.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, finetune=True, finetune_epochs=2, finetune_max_lr=0.01, stop_after_finetuning=True, batch_size=256, )
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 73.86it/s, loss=0.298, metrics={'acc': 0.8652}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 75.45it/s, loss=0.286, metrics={'acc': 0.8669}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 1 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 76.29it/s, loss=0.282, metrics={'acc': 0.8698}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.93it/s, loss=0.281, metrics={'acc': 0.8749}]\n
In\u00a0[42]: Copied!
import shutil\n\nshutil.rmtree(\"models_dir/\")\nshutil.rmtree(\"model_weights/\")\n
import shutil shutil.rmtree(\"models_dir/\") shutil.rmtree(\"model_weights/\") In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/06_finetune_and_warmup.html#the-finetunewarm-up-option","title":"The FineTune/Warm Up option\u00b6","text":"

Let's place ourselves in two possible scenarios.

  1. Let's assume we have run a model and we want to just transfer the learnings (you know...transfer-learning) to another dataset, or simply we have received new data and we do not want to start the training of each component from scratch. Simply, we want to load the pre-trained weights and fine-tune.

  2. We just want to \"warm up\" individual model components individually before the joined training begins.

This can be done with the finetune set of parameters. There are 3 fine-tuning routines:

  1. Fine-tune all trainable layers at once with a triangular one-cycle learning rate (referred as slanted triangular learning rates in Howard & Ruder 2018)
  2. Gradual fine-tuning inspired by the work of Felbo et al., 2017
  3. Gradual fine-tuning based on the work of Howard & Ruder 2018

Currently fine-tunning is only supported without a fully connected head, i.e. if deephead=None. In addition, Felbo and Howard routines only applied, of course, to the deeptabular, deeptext and deepimage models. The wide component can also be fine-tuned, but only in an \"all at once\" mode.

"},{"location":"examples/06_finetune_and_warmup.html#fine-tune-or-warm-up-all-at-once","title":"Fine-tune or warm-up all at once\u00b6","text":"

Here, the model components will be trained for finetune_epochs using a triangular one-cycle learning rate (slanted triangular learning rate) ranging from finetune_max_lr/10 to finetune_max_lr (default is 0.01). 10% of the training steps are used to increase the learning rate which then decreases for the remaining 90%.

Here all trainable layers are fine-tuned.

Let's have a look to one example.

"},{"location":"examples/06_finetune_and_warmup.html#fine-tune-gradually-the-felbo-and-the-howard-routines","title":"Fine-tune Gradually: The \"felbo\" and the \"howard\" routines\u00b6","text":"

The Felbo routine can be illustrated as follows:

Figure 1. The figure can be described as follows: fine-tune (or train) the last layer for one epoch using a one cycle triangular learning rate. Then fine-tune the next deeper layer for one epoch, with a learning rate that is a factor of 2.5 lower than the previous learning rate (the 2.5 factor is fixed) while freezing the already warmed up layer(s). Repeat untill all individual layers are warmed. Then warm one last epoch with all warmed layers trainable. The vanishing color gradient in the figure attempts to illustrate the decreasing learning rate.

Note that this is not identical to the Fine-Tunning routine described in Felbo et al, 2017, this is why I used the word 'inspired'.

The Howard routine can be illustrated as follows:

Figure 2. The figure can be described as follows: fine-tune (or train) the last layer for one epoch using a one cycle triangular learning rate. Then fine-tune the next deeper layer for one epoch, with a learning rate that is a factor of 2.5 lower than the previous learning rate (the 2.5 factor is fixed) while keeping the already warmed up layer(s) trainable. Repeat. The vanishing color gradient in the figure attempts to illustrate the decreasing learning rate.

Note that I write \"fine-tune (or train) the last layer for one epoch [...]\". However, in practice the user will have to specify the order of the layers to be fine-tuned. This is another reason why I wrote that the fine-tune routines I have implemented are inspired by the work of Felbo and Howard and not identical to their implemenations.

The felbo and howard routines can be accessed with via the fine-tune parameters.

"},{"location":"examples/07_custom_components.html","title":"07_custom_components","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport os\nimport torch\n\nfrom torch import Tensor\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import (\n    WidePreprocessor,\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n)\nfrom pytorch_widedeep.models import (\n    Wide,\n    TabMlp,\n    Vision,\n    BasicRNN,\n    WideDeep,\n)\nfrom pytorch_widedeep.losses import RMSELoss\nfrom pytorch_widedeep.initializers import *\nfrom pytorch_widedeep.callbacks import *\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import os import torch from torch import Tensor from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import ( WidePreprocessor, TabPreprocessor, TextPreprocessor, ImagePreprocessor, ) from pytorch_widedeep.models import ( Wide, TabMlp, Vision, BasicRNN, WideDeep, ) from pytorch_widedeep.losses import RMSELoss from pytorch_widedeep.initializers import * from pytorch_widedeep.callbacks import * from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\ndf.head()\n
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") df.head() Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 0 13913.jpg 54730 My bright double bedroom with a large window h... 4.0 f Islington 51.56802 -0.11121 t apartment ... 1 0 0 0 1 0 0 100.0 15.0 12.00 1 15400.jpg 60302 Lots of windows and light. St Luke's Gardens ... 1.0 t Kensington and Chelsea 51.48796 -0.16898 t apartment ... 0 0 0 0 1 0 0 150.0 0.0 109.50 2 17402.jpg 67564 Open from June 2018 after a 3-year break, we a... 19.0 t Westminster 51.52098 -0.14002 t apartment ... 0 0 0 0 1 0 0 350.0 10.0 149.65 3 24328.jpg 41759 Artist house, bright high ceiling rooms, priva... 2.0 t Wandsworth 51.47298 -0.16376 t other ... 0 0 0 0 1 0 0 250.0 0.0 215.60 4 25023.jpg 102813 Large, all comforts, 2-bed flat; first floor; ... 1.0 f Wandsworth 51.44687 -0.21874 t apartment ... 0 0 0 0 1 0 0 250.0 11.0 79.35

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# There are a number of columns that are already binary. Therefore, no need to one hot encode them\ncrossed_cols = [(\"property_type\", \"room_type\")]\nalready_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"]\nwide_cols = [\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"host_gender\",\n    \"instant_bookable\",\n] + already_dummies\n\ncat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [\n    (\"neighbourhood_cleansed\", 64),\n    (\"cancellation_policy\", 16),\n]\ncontinuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"]\n# it does not make sense to standarised Latitude and Longitude\nalready_standard = [\"latitude\", \"longitude\"]\n\n# text and image colnames\ntext_col = \"description\"\nimg_col = \"id\"\n\n# path to pretrained word embeddings and the images\nword_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\"\nimg_path = \"../tmp_data/airbnb/property_picture\"\n\n# target\ntarget_col = \"yield\"\n
# There are a number of columns that are already binary. Therefore, no need to one hot encode them crossed_cols = [(\"property_type\", \"room_type\")] already_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"] wide_cols = [ \"is_location_exact\", \"property_type\", \"room_type\", \"host_gender\", \"instant_bookable\", ] + already_dummies cat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [ (\"neighbourhood_cleansed\", 64), (\"cancellation_policy\", 16), ] continuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"] # it does not make sense to standarised Latitude and Longitude already_standard = [\"latitude\", \"longitude\"] # text and image colnames text_col = \"description\" img_col = \"id\" # path to pretrained word embeddings and the images word_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\" img_path = \"../tmp_data/airbnb/property_picture\" # target target_col = \"yield\" In\u00a0[4]: Copied!
target = df[target_col].values\n
target = df[target_col].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n\ntext_preprocessor = TextPreprocessor(\n    word_vectors_path=word_vectors_path, text_col=text_col\n)\nX_text = text_preprocessor.fit_transform(df)\n\nimage_processor = ImagePreprocessor(img_col=img_col, img_path=img_path)\nX_images = image_processor.fit_transform(df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df) text_preprocessor = TextPreprocessor( word_vectors_path=word_vectors_path, text_col=text_col ) X_text = text_preprocessor.fit_transform(df) image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path) X_images = image_processor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
The vocabulary contains 2192 tokens\nIndexing word vectors...\nLoaded 400000 word vectors\nPreparing embeddings matrix...\n2175 words in the vocabulary had ../tmp_data/glove.6B/glove.6B.100d.txt vectors and appear more than 5 times\nReading Images from ../tmp_data/airbnb/property_picture\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:02<00:00, 497.80it/s]\n
Computing normalisation metrics\n

Now we are ready to build a wide and deep model. Three of the four components we will use are included in this package, and they will be combined with a custom deeptext component. Then the fit process will run with a custom loss function.

Let's have a look

In\u00a0[6]: Copied!
# Linear model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\n\n# DeepDense: 2 Dense layers\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.1,\n)\n\n# Pretrained Resnet 18\nresnet = Vision(pretrained_model_name=\"resnet18\", n_trainable=0)\n
# Linear model wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) # DeepDense: 2 Dense layers tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.1, ) # Pretrained Resnet 18 resnet = Vision(pretrained_model_name=\"resnet18\", n_trainable=0) In\u00a0[7]: Copied!
class MyDeepText(nn.Module):\n    def __init__(self, vocab_size, padding_idx=1, embed_dim=100, hidden_dim=64):\n        super(MyDeepText, self).__init__()\n\n        # word/token embeddings\n        self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)\n\n        # stack of RNNs\n        self.rnn = nn.GRU(\n            embed_dim,\n            hidden_dim,\n            num_layers=2,\n            bidirectional=True,\n            batch_first=True,\n        )\n\n        # Remember, this MUST be defined. If not WideDeep will through an error\n        self.output_dim = hidden_dim * 2\n\n    def forward(self, X):\n        embed = self.word_embed(X.long())\n        o, h = self.rnn(embed)\n        return torch.cat((h[-2], h[-1]), dim=1)\n
class MyDeepText(nn.Module): def __init__(self, vocab_size, padding_idx=1, embed_dim=100, hidden_dim=64): super(MyDeepText, self).__init__() # word/token embeddings self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx) # stack of RNNs self.rnn = nn.GRU( embed_dim, hidden_dim, num_layers=2, bidirectional=True, batch_first=True, ) # Remember, this MUST be defined. If not WideDeep will through an error self.output_dim = hidden_dim * 2 def forward(self, X): embed = self.word_embed(X.long()) o, h = self.rnn(embed) return torch.cat((h[-2], h[-1]), dim=1) In\u00a0[8]: Copied!
mydeeptext = MyDeepText(vocab_size=len(text_preprocessor.vocab.itos))\n
mydeeptext = MyDeepText(vocab_size=len(text_preprocessor.vocab.itos)) In\u00a0[9]: Copied!
model = WideDeep(wide=wide, deeptabular=tab_mlp, deeptext=mydeeptext, deepimage=resnet)\n
model = WideDeep(wide=wide, deeptabular=tab_mlp, deeptext=mydeeptext, deepimage=resnet) In\u00a0[10]: Copied!
class RMSELoss(nn.Module):\n    def __init__(self):\n        \"\"\"root mean squared error\"\"\"\n        super().__init__()\n        self.mse = nn.MSELoss()\n\n    def forward(self, input: Tensor, target: Tensor) -> Tensor:\n        return torch.sqrt(self.mse(input, target))\n
class RMSELoss(nn.Module): def __init__(self): \"\"\"root mean squared error\"\"\" super().__init__() self.mse = nn.MSELoss() def forward(self, input: Tensor, target: Tensor) -> Tensor: return torch.sqrt(self.mse(input, target))

and now we just instantiate the Trainer as usual. Needless to say, but this runs with 1000 random observations, so loss and metric values are meaningless. This is just an example

In\u00a0[11]: Copied!
trainer = Trainer(model, objective=\"regression\", custom_loss_function=RMSELoss())\n
trainer = Trainer(model, objective=\"regression\", custom_loss_function=RMSELoss()) In\u00a0[12]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:23<00:00,  1.07it/s, loss=126]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:05<00:00,  1.24it/s, loss=97.4]\n

In addition to model components and loss functions, we can also use custom callbacks or custom metrics. The former need to be of type Callback and the latter need to be of type Metric. See:

pytorch-widedeep.callbacks\n

and

pytorch-widedeep.metrics\n

For this example let me use the adult dataset. Again, we first prepare the data as usual

In\u00a0[13]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[13]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[14]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[14]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[15]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" target = df[target_col].values In\u00a0[16]: Copied!
# wide\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\n# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# wide wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # deeptabular tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[18]: Copied!
from pytorch_widedeep.metrics import Metric\n
from pytorch_widedeep.metrics import Metric In\u00a0[19]: Copied!
class Accuracy(Metric):\n    def __init__(self, top_k: int = 1):\n        super(Accuracy, self).__init__()\n\n        self.top_k = top_k\n        self.correct_count = 0\n        self.total_count = 0\n\n        # \u00a0metric name needs to be defined\n        self._name = \"acc\"\n\n    def reset(self):\n        self.correct_count = 0\n        self.total_count = 0\n\n    def __call__(self, y_pred: Tensor, y_true: Tensor) -> np.ndarray:\n        num_classes = y_pred.size(1)\n\n        if num_classes == 1:\n            y_pred = y_pred.round()\n            y_true = y_true\n        elif num_classes > 1:\n            y_pred = y_pred.topk(self.top_k, 1)[1]\n            y_true = y_true.view(-1, 1).expand_as(y_pred)\n\n        self.correct_count += y_pred.eq(y_true).sum().item()\n        self.total_count += len(y_pred)\n        accuracy = float(self.correct_count) / float(self.total_count)\n        return np.array(accuracy)\n
class Accuracy(Metric): def __init__(self, top_k: int = 1): super(Accuracy, self).__init__() self.top_k = top_k self.correct_count = 0 self.total_count = 0 # \u00a0metric name needs to be defined self._name = \"acc\" def reset(self): self.correct_count = 0 self.total_count = 0 def __call__(self, y_pred: Tensor, y_true: Tensor) -> np.ndarray: num_classes = y_pred.size(1) if num_classes == 1: y_pred = y_pred.round() y_true = y_true elif num_classes > 1: y_pred = y_pred.topk(self.top_k, 1)[1] y_true = y_true.view(-1, 1).expand_as(y_pred) self.correct_count += y_pred.eq(y_true).sum().item() self.total_count += len(y_pred) accuracy = float(self.correct_count) / float(self.total_count) return np.array(accuracy) In\u00a0[20]: Copied!
# have a look to the class\nfrom pytorch_widedeep.callbacks import Callback\n
# have a look to the class from pytorch_widedeep.callbacks import Callback In\u00a0[21]: Copied!
class SillyCallback(Callback):\n    def on_train_begin(self, logs=None):\n        # recordings will be the trainer object attributes\n        self.trainer.silly_callback = {}\n\n        self.trainer.silly_callback[\"beginning\"] = []\n        self.trainer.silly_callback[\"end\"] = []\n\n    def on_epoch_begin(self, epoch, logs=None):\n        self.trainer.silly_callback[\"beginning\"].append(epoch + 1)\n\n    def on_epoch_end(self, epoch, logs=None, metric=None):\n        self.trainer.silly_callback[\"end\"].append(epoch + 1)\n
class SillyCallback(Callback): def on_train_begin(self, logs=None): # recordings will be the trainer object attributes self.trainer.silly_callback = {} self.trainer.silly_callback[\"beginning\"] = [] self.trainer.silly_callback[\"end\"] = [] def on_epoch_begin(self, epoch, logs=None): self.trainer.silly_callback[\"beginning\"].append(epoch + 1) def on_epoch_end(self, epoch, logs=None, metric=None): self.trainer.silly_callback[\"end\"].append(epoch + 1)

and now, as usual:

In\u00a0[22]: Copied!
trainer = Trainer(\n    model, objective=\"binary\", metrics=[Accuracy], callbacks=[SillyCallback]\n)\n
trainer = Trainer( model, objective=\"binary\", metrics=[Accuracy], callbacks=[SillyCallback] ) In\u00a0[23]: Copied!
trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=64, val_split=0.2\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=64, val_split=0.2 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:06<00:00, 94.39it/s, loss=0.411, metrics={'acc': 0.814}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 121.91it/s, loss=0.327, metrics={'acc': 0.8449}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:07<00:00, 85.39it/s, loss=0.324, metrics={'acc': 0.8495}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 88.68it/s, loss=0.298, metrics={'acc': 0.8612}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 74.35it/s, loss=0.302, metrics={'acc': 0.8593}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 100.51it/s, loss=0.29, metrics={'acc': 0.8665}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 73.83it/s, loss=0.292, metrics={'acc': 0.8637}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 105.98it/s, loss=0.286, metrics={'acc': 0.8695}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 72.15it/s, loss=0.286, metrics={'acc': 0.866}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 92.27it/s, loss=0.284, metrics={'acc': 0.8698}]\n
In\u00a0[24]: Copied!
trainer.silly_callback\n
trainer.silly_callback Out[24]:
{'beginning': [1, 2, 3, 4, 5], 'end': [1, 2, 3, 4, 5]}
"},{"location":"examples/07_custom_components.html#custom-components","title":"Custom components\u00b6","text":"

As I mentioned earlier in the example notebooks, and also in the README, it is possible to customise almost every component in pytorch-widedeep.

Let's now go through a couple of simple examples to illustrate how that could be done.

First let's load and process the data \"as usual\", let's start with a regression and the airbnb dataset.

"},{"location":"examples/07_custom_components.html#custom-deeptext","title":"Custom deeptext\u00b6","text":"

Standard Pytorch model

"},{"location":"examples/07_custom_components.html#custom-loss-function","title":"Custom loss function\u00b6","text":"

Loss functions must simply inherit pytorch's nn.Module. For example, let's say we want to use RMSE (note that this is already available in the package, but I will pass it here as a custom loss for illustration purposes)

"},{"location":"examples/07_custom_components.html#custom-metric","title":"Custom metric\u00b6","text":"

Let's say we want to use our own accuracy metric (again, this is already available in the package, but I will pass it here as a custom loss for illustration purposes).

This could be done as:

"},{"location":"examples/07_custom_components.html#custom-callback","title":"Custom Callback\u00b6","text":"

Let's code a callback that records the current epoch at the beginning and the end of each epoch (silly, but you know, this is just an example)

"},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html","title":"08_custom_dataLoader_imbalanced_dataset","text":"
  • In this notebook we will use the higly imbalanced Protein Homology Dataset from KDD cup 2004
* The first element of each line is a BLOCK ID that denotes to which native sequence this example belongs. There is a unique BLOCK ID for each native sequence. BLOCK IDs are integers running from 1 to 303 (one for each native sequence, i.e. for each query). BLOCK IDs were assigned before the blocks were split into the train and test sets, so they do not run consecutively in either file.\n* The second element of each line is an EXAMPLE ID that uniquely describes the example. You will need this EXAMPLE ID and the BLOCK ID when you submit results.\n* The third element is the class of the example. Proteins that are homologous to the native sequence are denoted by 1, non-homologous proteins (i.e. decoys) by 0. Test examples have a \"?\" in this position.\n* All following elements are feature values. There are 74 feature values in each line. The features describe the match (e.g. the score of a sequence alignment) between the native protein sequence and the sequence that is tested for homology.\n
In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nimport time\nimport datetime\n\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import time import datetime import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_bio_kdd04(as_frame=True)\n# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n\ndf_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n\ncontinuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
df = load_bio_kdd04(as_frame=True) # drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[3]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[4]: Copied!
# Define the model\ninput_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n\ndeeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    mlp_hidden_dims=hidden_layers,\n)\nmodel = WideDeep(deeptabular=deeptabular)\nmodel\n
# Define the model input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, mlp_hidden_dims=hidden_layers, ) model = WideDeep(deeptabular=deeptabular) model Out[4]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[5]: Copied!
# Metrics from pytorch-widedeep\naccuracy = Accuracy(top_k=2)\nprecision = Precision(average=False)\n\n# # Metrics from torchmetrics\n# accuracy = Accuracy_torchmetrics(average=None, num_classes=1)\n# precision = Precision_torchmetrics(average=\"micro\", num_classes=1)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[accuracy, precision],\n    verbose=1,\n)\n
# Metrics from pytorch-widedeep accuracy = Accuracy(top_k=2) precision = Precision(average=False) # # Metrics from torchmetrics # accuracy = Accuracy_torchmetrics(average=None, num_classes=1) # precision = Precision_torchmetrics(average=\"micro\", num_classes=1) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) trainer = Trainer( model, objective=\"binary\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision], verbose=1, ) In\u00a0[6]: Copied!
start = time.time()\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=1,\n    batch_size=32,\n    custom_dataloader=DataLoaderImbalanced,\n    oversample_mul=5,\n)\nprint(\n    \"Training time[s]: {}\".format(\n        datetime.timedelta(seconds=round(time.time() - start))\n    )\n)\n\npd.DataFrame(trainer.history)\n\ndf_pred = trainer.predict(X_tab=X_tab_test)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))\n
start = time.time() trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=1, batch_size=32, custom_dataloader=DataLoaderImbalanced, oversample_mul=5, ) print( \"Training time[s]: {}\".format( datetime.timedelta(seconds=round(time.time() - start)) ) ) pd.DataFrame(trainer.history) df_pred = trainer.predict(X_tab=X_tab_test) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 325/325 [00:02<00:00, 153.99it/s, loss=0.163, metrics={'acc': 0.9363, 'prec': [0.9358]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 456/456 [00:02<00:00, 205.93it/s, loss=0.1, metrics={'acc': 0.9501, 'prec': [0.1447]}]\n
Training time[s]: 0:00:04\n
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 456/456 [00:01<00:00, 368.16it/s]\n
              precision    recall  f1-score   support\n\n           0       1.00      0.95      0.97     14446\n           1       0.15      0.95      0.25       130\n\n    accuracy                           0.95     14576\n   macro avg       0.57      0.95      0.61     14576\nweighted avg       0.99      0.95      0.97     14576\n\nActual predicted values:\n(array([0, 1]), array([13736,   840]))\n
"},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html#custom-dataloader-for-imbalanced-dataset","title":"Custom DataLoader for Imbalanced dataset\u00b6","text":""},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/09_extracting_embeddings.html","title":"09_extracting_embeddings","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import FTTransformer, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep import Tab2Vec\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import FTTransformer, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep import Tab2Vec from pytorch_widedeep.datasets import load_adult In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop([\"income\", \"educational_num\"], axis=1, inplace=True)\n\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop([\"income\", \"educational_num\"], axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 0 25 Private 226802 11th Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
cat_cols, cont_cols = [], []\nfor col in df.columns:\n    # 50 is just a random number I choose here for this example\n    if df[col].dtype == \"O\" or df[col].nunique() < 50 and col != \"target\":\n        cat_cols.append(col)\n    elif col != \"target\":\n        cont_cols.append(col)\ntarget_col = \"target\"\n
cat_cols, cont_cols = [], [] for col in df.columns: # 50 is just a random number I choose here for this example if df[col].dtype == \"O\" or df[col].nunique() < 50 and col != \"target\": cat_cols.append(col) elif col != \"target\": cont_cols.append(col) target_col = \"target\" In\u00a0[5]: Copied!
target = df[target_col].values\n\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_cols, continuous_cols=cont_cols, for_transformer=True\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
target = df[target_col].values tab_preprocessor = TabPreprocessor( embed_cols=cat_cols, continuous_cols=cont_cols, for_transformer=True ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[6]: Copied!
ft_transformer = FTTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    n_blocks=2,\n    n_heads=4,\n    input_dim=16,\n)\n
ft_transformer = FTTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", n_blocks=2, n_heads=4, input_dim=16, )
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/utils/general_utils.py:12: DeprecationWarning: The 'embed_continuous' parameter is deprecated and will be removed in the next release. Please use 'embed_continuous_method' instead See the documentation for more details.\n  return func(*args, **kwargs)\n
In\u00a0[7]: Copied!
model = WideDeep(deeptabular=ft_transformer)\ntrainer = Trainer(model, objective=\"binary\", metrics=[Accuracy])\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=256, val_split=0.2)\n
model = WideDeep(deeptabular=ft_transformer) trainer = Trainer(model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=256, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 41.47it/s, loss=221, metrics={'acc': 0.686}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 86.24it/s, loss=9.28, metrics={'acc': 0.76}]\n
In\u00a0[8]: Copied!
t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor)\n
t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor) In\u00a0[9]: Copied!
# assuming is a test set with target col\nX_vec, y = t2v.transform(df.sample(100), target_col=\"target\")\n
# assuming is a test set with target col X_vec, y = t2v.transform(df.sample(100), target_col=\"target\") In\u00a0[10]: Copied!
# X vec is the dataframe turned into the embeddings\nX_vec.shape\n
# X vec is the dataframe turned into the embeddings X_vec.shape Out[10]:
(100, 208)

208 = input_dim (16) * n_cols (13)

In\u00a0[11]: Copied!
# ...or if we don't have target col\nX_vec = t2v.transform(df.sample(100))\n
# ...or if we don't have target col X_vec = t2v.transform(df.sample(100))"},{"location":"examples/09_extracting_embeddings.html#extracting-embeddings","title":"Extracting embeddings\u00b6","text":"

This notebook is a simple guide to extracting learned feature embeddings using Tab2Vec

"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html","title":"10_3rd_party_integration-RayTune_WnB","text":"In\u00a0[11]: Copied!
from typing import Optional, Dict\nimport os\n\nimport numpy as np\nimport pandas as pd\nimport torch\nimport wandb\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.callbacks import (\n    EarlyStopping,\n    ModelCheckpoint,\n    Callback,\n)\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\nfrom ray import tune\nfrom ray.tune.schedulers import AsyncHyperBandScheduler\nfrom ray.tune import JupyterNotebookReporter\nfrom ray.air.integrations.wandb import WandbLoggerCallback\n\n# from ray.tune.integration.wandb import wandb_mixin\n\nimport tracemalloc\n\ntracemalloc.start()\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
from typing import Optional, Dict import os import numpy as np import pandas as pd import torch import wandb from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.callbacks import ( EarlyStopping, ModelCheckpoint, Callback, ) from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune import JupyterNotebookReporter from ray.air.integrations.wandb import WandbLoggerCallback # from ray.tune.integration.wandb import wandb_mixin import tracemalloc tracemalloc.start() # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[12]: Copied!
class RayTuneReporter(Callback):\n    r\"\"\"Callback that allows reporting history and lr_history values to RayTune\n    during Hyperparameter tuning\n\n    Callbacks are passed as input parameters to the ``Trainer`` class. See\n    :class:`pytorch_widedeep.trainer.Trainer`\n\n    For examples see the examples folder at:\n\n        .. code-block:: bash\n\n            /examples/12_HyperParameter_tuning_w_RayTune.ipynb\n    \"\"\"\n\n    def on_epoch_end(\n        self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None\n    ):\n        report_dict = {}\n        for k, v in self.trainer.history.items():\n            report_dict.update({k: v[-1]})\n        if hasattr(self.trainer, \"lr_history\"):\n            for k, v in self.trainer.lr_history.items():\n                report_dict.update({k: v[-1]})\n        tune.report(report_dict)\n\n\nclass WnBReportBest(Callback):\n    r\"\"\"Callback that allows reporting best performance of a run to WnB\n    during Hyperparameter tuning. It is an adjusted pytorch_widedeep.callbacks.ModelCheckpoint\n    with added WnB and removed checkpoint saving.\n\n    Callbacks are passed as input parameters to the ``Trainer`` class.\n\n    Parameters\n    ----------\n    wb: obj\n        Weights&Biases API interface to report single best result usable for\n        comparisson of multiple paramater combinations by, for example,\n        `parallel coordinates\n        <https://docs.wandb.ai/ref/app/features/panels/parallel-coordinates>`_.\n        E.g W&B summary report `wandb.run.summary[\"best\"]`.\n    monitor: str, default=\"loss\"\n        quantity to monitor. Typically `'val_loss'` or metric name\n        (e.g. `'val_acc'`)\n    mode: str, default=\"auto\"\n        If ``save_best_only=True``, the decision to overwrite the current save\n        file is made based on either the maximization or the minimization of\n        the monitored quantity. For `'acc'`, this should be `'max'`, for\n        `'loss'` this should be `'min'`, etc. In `'auto'` mode, the\n        direction is automatically inferred from the name of the monitored\n        quantity.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        wb: object,\n        monitor: str = \"val_loss\",\n        mode: str = \"auto\",\n    ):\n        super(WnBReportBest, self).__init__()\n\n        self.monitor = monitor\n        self.mode = mode\n        self.wb = wb\n\n        if self.mode not in [\"auto\", \"min\", \"max\"]:\n            warnings.warn(\n                \"WnBReportBest mode %s is unknown, \"\n                \"fallback to auto mode.\" % (self.mode),\n                RuntimeWarning,\n            )\n            self.mode = \"auto\"\n        if self.mode == \"min\":\n            self.monitor_op = np.less\n            self.best = np.Inf\n        elif self.mode == \"max\":\n            self.monitor_op = np.greater  # type: ignore[assignment]\n            self.best = -np.Inf\n        else:\n            if self._is_metric(self.monitor):\n                self.monitor_op = np.greater  # type: ignore[assignment]\n                self.best = -np.Inf\n            else:\n                self.monitor_op = np.less\n                self.best = np.Inf\n\n    def on_epoch_end(  # noqa: C901\n        self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None\n    ):\n        logs = logs or {}\n        current = logs.get(self.monitor)\n        if current is not None:\n            if self.monitor_op(current, self.best):\n                self.wb.run.summary[\"best\"] = current  # type: ignore[attr-defined]\n                self.best = current\n                self.best_epoch = epoch\n\n    @staticmethod\n    def _is_metric(monitor: str):\n        \"copied from pytorch_widedeep.callbacks\"\n        if any([s in monitor for s in [\"acc\", \"prec\", \"rec\", \"fscore\", \"f1\", \"f2\"]]):\n            return True\n        else:\n            return False\n
class RayTuneReporter(Callback): r\"\"\"Callback that allows reporting history and lr_history values to RayTune during Hyperparameter tuning Callbacks are passed as input parameters to the ``Trainer`` class. See :class:`pytorch_widedeep.trainer.Trainer` For examples see the examples folder at: .. code-block:: bash /examples/12_HyperParameter_tuning_w_RayTune.ipynb \"\"\" def on_epoch_end( self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None ): report_dict = {} for k, v in self.trainer.history.items(): report_dict.update({k: v[-1]}) if hasattr(self.trainer, \"lr_history\"): for k, v in self.trainer.lr_history.items(): report_dict.update({k: v[-1]}) tune.report(report_dict) class WnBReportBest(Callback): r\"\"\"Callback that allows reporting best performance of a run to WnB during Hyperparameter tuning. It is an adjusted pytorch_widedeep.callbacks.ModelCheckpoint with added WnB and removed checkpoint saving. Callbacks are passed as input parameters to the ``Trainer`` class. Parameters ---------- wb: obj Weights&Biases API interface to report single best result usable for comparisson of multiple paramater combinations by, for example, `parallel coordinates `_. E.g W&B summary report `wandb.run.summary[\"best\"]`. monitor: str, default=\"loss\" quantity to monitor. Typically `'val_loss'` or metric name (e.g. `'val_acc'`) mode: str, default=\"auto\" If ``save_best_only=True``, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For `'acc'`, this should be `'max'`, for `'loss'` this should be `'min'`, etc. In `'auto'` mode, the direction is automatically inferred from the name of the monitored quantity. \"\"\" def __init__( self, wb: object, monitor: str = \"val_loss\", mode: str = \"auto\", ): super(WnBReportBest, self).__init__() self.monitor = monitor self.mode = mode self.wb = wb if self.mode not in [\"auto\", \"min\", \"max\"]: warnings.warn( \"WnBReportBest mode %s is unknown, \" \"fallback to auto mode.\" % (self.mode), RuntimeWarning, ) self.mode = \"auto\" if self.mode == \"min\": self.monitor_op = np.less self.best = np.Inf elif self.mode == \"max\": self.monitor_op = np.greater # type: ignore[assignment] self.best = -np.Inf else: if self._is_metric(self.monitor): self.monitor_op = np.greater # type: ignore[assignment] self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf def on_epoch_end( # noqa: C901 self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None ): logs = logs or {} current = logs.get(self.monitor) if current is not None: if self.monitor_op(current, self.best): self.wb.run.summary[\"best\"] = current # type: ignore[attr-defined] self.best = current self.best_epoch = epoch @staticmethod def _is_metric(monitor: str): \"copied from pytorch_widedeep.callbacks\" if any([s in monitor for s in [\"acc\", \"prec\", \"rec\", \"fscore\", \"f1\", \"f2\"]]): return True else: return False In\u00a0[13]: Copied!
df = load_bio_kdd04(as_frame=True)\ndf.head()\n
df = load_bio_kdd04(as_frame=True) df.head() Out[13]: EXAMPLE_ID BLOCK_ID target 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 0 279 261532 0 52.0 32.69 0.30 2.5 20.0 1256.8 -0.89 0.33 11.0 -55.0 267.2 0.52 0.05 -2.36 49.6 252.0 0.43 1.16 -2.06 -33.0 -123.2 1.60 -0.49 -6.06 65.0 296.1 -0.28 -0.26 -3.83 -22.6 -170.0 3.06 -1.05 -3.29 22.9 286.3 0.12 2.58 4.08 -33.0 -178.9 1.88 0.53 -7.0 -44.0 1987.0 -5.41 0.95 -4.0 -57.0 722.9 -3.26 -0.55 -7.5 125.5 1547.2 -0.36 1.12 9.0 -37.0 72.5 0.47 0.74 -11.0 -8.0 1595.1 -1.64 2.83 -2.0 -50.0 445.2 -0.35 0.26 0.76 1 279 261533 0 58.0 33.33 0.00 16.5 9.5 608.1 0.50 0.07 20.5 -52.5 521.6 -1.08 0.58 -0.02 -3.2 103.6 -0.95 0.23 -2.87 -25.9 -52.2 -0.21 0.87 -1.81 10.4 62.0 -0.28 -0.04 1.48 -17.6 -198.3 3.43 2.84 5.87 -16.9 72.6 -0.31 2.79 2.71 -33.5 -11.6 -1.11 4.01 5.0 -57.0 666.3 1.13 4.38 5.0 -64.0 39.3 1.07 -0.16 32.5 100.0 1893.7 -2.80 -0.22 2.5 -28.5 45.0 0.58 0.41 -19.0 -6.0 762.9 0.29 0.82 -3.0 -35.0 140.3 1.16 0.39 0.73 2 279 261534 0 77.0 27.27 -0.91 6.0 58.5 1623.6 -1.40 0.02 -6.5 -48.0 621.0 -1.20 0.14 -0.20 73.6 609.1 -0.44 -0.58 -0.04 -23.0 -27.4 -0.72 -1.04 -1.09 91.1 635.6 -0.88 0.24 0.59 -18.7 -7.2 -0.60 -2.82 -0.71 52.4 504.1 0.89 -0.67 -9.30 -20.8 -25.7 -0.77 -0.85 0.0 -20.0 2259.0 -0.94 1.15 -4.0 -44.0 -22.7 0.94 -0.98 -19.0 105.0 1267.9 1.03 1.27 11.0 -39.5 82.3 0.47 -0.19 -10.0 7.0 1491.8 0.32 -1.29 0.0 -34.0 658.2 -0.76 0.26 0.24 3 279 261535 0 41.0 27.91 -0.35 3.0 46.0 1921.6 -1.36 -0.47 -32.0 -51.5 560.9 -0.29 -0.10 -1.11 124.3 791.6 0.00 0.39 -1.85 -21.7 -44.9 -0.21 0.02 0.89 133.9 797.8 -0.08 1.06 -0.26 -16.4 -74.1 0.97 -0.80 -0.41 66.9 955.3 -1.90 1.28 -6.65 -28.1 47.5 -1.91 1.42 1.0 -30.0 1846.7 0.76 1.10 -4.0 -52.0 -53.9 1.71 -0.22 -12.0 97.5 1969.8 -1.70 0.16 -1.0 -32.5 255.9 -0.46 1.57 10.0 6.0 2047.7 -0.98 1.53 0.0 -49.0 554.2 -0.83 0.39 0.73 4 279 261536 0 50.0 28.00 -1.32 -9.0 12.0 464.8 0.88 0.19 8.0 -51.5 98.1 1.09 -0.33 -2.16 -3.9 102.7 0.39 -1.22 -3.39 -15.2 -42.2 -1.18 -1.11 -3.55 8.9 141.3 -0.16 -0.43 -4.15 -12.9 -13.4 -1.32 -0.98 -3.69 8.8 136.1 -0.30 4.13 1.89 -13.0 -18.7 -1.37 -0.93 0.0 -1.0 810.1 -2.29 6.72 1.0 -23.0 -29.7 0.58 -1.10 -18.5 33.5 206.8 1.84 -0.13 4.0 -29.0 30.1 0.80 -0.24 5.0 -14.0 479.5 0.68 -0.59 2.0 -36.0 -6.9 2.02 0.14 -0.23 In\u00a0[14]: Copied!
# imbalance of the classes\ndf[\"target\"].value_counts()\n
# imbalance of the classes df[\"target\"].value_counts() Out[14]:
target\n0    144455\n1      1296\nName: count, dtype: int64
In\u00a0[15]: Copied!
# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n
# drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) In\u00a0[16]: Copied!
df_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) In\u00a0[17]: Copied!
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[18]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[19]: Copied!
input_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n
input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() In\u00a0[20]: Copied!
deeptabular = TabMlp(\n    mlp_hidden_dims=hidden_layers,\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular)\nmodel\n
deeptabular = TabMlp( mlp_hidden_dims=hidden_layers, column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular) model Out[20]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[24]: Copied!
# Metrics from torchmetrics\naccuracy = Accuracy_torchmetrics(average=None, num_classes=1, task=\"binary\")\nprecision = Precision_torchmetrics(average=\"micro\", num_classes=1, task=\"binary\")\nf1 = F1_torchmetrics(average=None, num_classes=1, task=\"binary\")\nrecall = Recall_torchmetrics(average=None, num_classes=1, task=\"binary\")\n
# Metrics from torchmetrics accuracy = Accuracy_torchmetrics(average=None, num_classes=1, task=\"binary\") precision = Precision_torchmetrics(average=\"micro\", num_classes=1, task=\"binary\") f1 = F1_torchmetrics(average=None, num_classes=1, task=\"binary\") recall = Recall_torchmetrics(average=None, num_classes=1, task=\"binary\")

Note:

Following cells includes usage of both RayTuneReporter and WnBReportBest callbacks. In case you want to use just RayTuneReporter, remove following:

  • wandb from config
  • WandbLoggerCallback
  • WnBReportBest
  • @wandb_mixin decorator

We do not see strong reason to use WnB without RayTune for a single paramater combination run, but it is possible:

  • option01: define paramaters in config only for a single value tune.grid_search([1000]) (single value RayTune run)
  • option02: define WnB callback that reports currnet validation/training loss, metrics, etc. at the end of batch, ie. do not report to WnB at epoch_end as in WnBReportBest but at the on_batch_end, see pytorch_widedeep.callbacks.Callback
In\u00a0[26]: Copied!
config = {\n    \"batch_size\": tune.grid_search([1000, 5000]),\n    \"wandb\": {\n        \"project\": \"test\",\n        # \"api_key_file\": os.getcwd() + \"/wandb_api.key\",\n        \"api_key\": \"WNB_API_KEY\",\n    },\n}\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\n\n@wandb_mixin\ndef training_function(config, X_train, X_val):\n    early_stopping = EarlyStopping()\n    model_checkpoint = ModelCheckpoint(save_best_only=True)\n    # Hyperparameters\n    batch_size = config[\"batch_size\"]\n    trainer = Trainer(\n        model,\n        objective=\"binary_focal_loss\",\n        callbacks=[\n            RayTuneReporter,\n            WnBReportBest(wb=wandb),\n            early_stopping,\n            model_checkpoint,\n        ],\n        lr_schedulers={\"deeptabular\": deep_sch},\n        initializers={\"deeptabular\": XavierNormal},\n        optimizers={\"deeptabular\": deep_opt},\n        metrics=[accuracy, precision, recall, f1],\n        verbose=0,\n    )\n\n    trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=batch_size)\n\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n\nasha_scheduler = AsyncHyperBandScheduler(\n    time_attr=\"training_iteration\",\n    metric=\"_metric/val_loss\",\n    mode=\"min\",\n    max_t=100,\n    grace_period=10,\n    reduction_factor=3,\n    brackets=1,\n)\n\nanalysis = tune.run(\n    tune.with_parameters(training_function, X_train=X_train, X_val=X_val),\n    resources_per_trial={\"cpu\": 1, \"gpu\": 0},\n    progress_reporter=JupyterNotebookReporter(overwrite=True),\n    scheduler=asha_scheduler,\n    config=config,\n    callbacks=[\n        WandbLoggerCallback(\n            project=config[\"wandb\"][\"project\"],\n            # api_key_file=config[\"wandb\"][\"api_key_file\"],\n            api_key=config[\"wandb\"][\"api_key\"],\n            log_config=True,\n        )\n    ],\n)\n
config = { \"batch_size\": tune.grid_search([1000, 5000]), \"wandb\": { \"project\": \"test\", # \"api_key_file\": os.getcwd() + \"/wandb_api.key\", \"api_key\": \"WNB_API_KEY\", }, } # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) @wandb_mixin def training_function(config, X_train, X_val): early_stopping = EarlyStopping() model_checkpoint = ModelCheckpoint(save_best_only=True) # Hyperparameters batch_size = config[\"batch_size\"] trainer = Trainer( model, objective=\"binary_focal_loss\", callbacks=[ RayTuneReporter, WnBReportBest(wb=wandb), early_stopping, model_checkpoint, ], lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision, recall, f1], verbose=0, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=batch_size) X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid} asha_scheduler = AsyncHyperBandScheduler( time_attr=\"training_iteration\", metric=\"_metric/val_loss\", mode=\"min\", max_t=100, grace_period=10, reduction_factor=3, brackets=1, ) analysis = tune.run( tune.with_parameters(training_function, X_train=X_train, X_val=X_val), resources_per_trial={\"cpu\": 1, \"gpu\": 0}, progress_reporter=JupyterNotebookReporter(overwrite=True), scheduler=asha_scheduler, config=config, callbacks=[ WandbLoggerCallback( project=config[\"wandb\"][\"project\"], # api_key_file=config[\"wandb\"][\"api_key_file\"], api_key=config[\"wandb\"][\"api_key\"], log_config=True, ) ], )
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmp60pfyl1kwandb'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpnjv2rg1wwandb-artifacts'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpgebu5k1kwandb-media'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpxy9y2yriwandb-media'>\n  _warnings.warn(warn_message, ResourceWarning)\n
In\u00a0[14]: Copied!
analysis.results\n
analysis.results Out[14]:
{'fc9a8_00000': {'_metric': {'train_loss': 0.006297602537127896,\n   'train_Accuracy': 0.9925042986869812,\n   'train_Precision': 0.9939393997192383,\n   'train_Recall': 0.15814851224422455,\n   'train_F1Score': 0.2728785574436188,\n   'val_loss': 0.005045663565397263,\n   'val_Accuracy': 0.9946483969688416,\n   'val_Precision': 1.0,\n   'val_Recall': 0.39534884691238403,\n   'val_F1Score': 0.5666667222976685},\n  'time_this_iter_s': 2.388202428817749,\n  'done': True,\n  'timesteps_total': None,\n  'episodes_total': None,\n  'training_iteration': 5,\n  'trial_id': 'fc9a8_00000',\n  'experiment_id': 'baad1d4f3d924b48b9ece1b9f26c80cc',\n  'date': '2022-07-31_14-06-51',\n  'timestamp': 1659276411,\n  'time_total_s': 12.656474113464355,\n  'pid': 1813,\n  'hostname': 'jupyter-5uperpalo',\n  'node_ip': '10.32.44.172',\n  'config': {'batch_size': 1000},\n  'time_since_restore': 12.656474113464355,\n  'timesteps_since_restore': 0,\n  'iterations_since_restore': 5,\n  'warmup_time': 0.8006253242492676,\n  'experiment_tag': '0_batch_size=1000'},\n 'fc9a8_00001': {'_metric': {'train_loss': 0.02519632239515583,\n   'train_Accuracy': 0.9910891652107239,\n   'train_Precision': 0.25,\n   'train_Recall': 0.0009643201483413577,\n   'train_F1Score': 0.0019212296465411782,\n   'val_loss': 0.02578434906899929,\n   'val_Accuracy': 0.9911492466926575,\n   'val_Precision': 0.0,\n   'val_Recall': 0.0,\n   'val_F1Score': 0.0},\n  'time_this_iter_s': 4.113586902618408,\n  'done': True,\n  'timesteps_total': None,\n  'episodes_total': None,\n  'training_iteration': 5,\n  'trial_id': 'fc9a8_00001',\n  'experiment_id': 'f2e54a6a5780429fbf0db0746853347e',\n  'date': '2022-07-31_14-06-56',\n  'timestamp': 1659276416,\n  'time_total_s': 12.926990509033203,\n  'pid': 1962,\n  'hostname': 'jupyter-5uperpalo',\n  'node_ip': '10.32.44.172',\n  'config': {'batch_size': 5000},\n  'time_since_restore': 12.926990509033203,\n  'timesteps_since_restore': 0,\n  'iterations_since_restore': 5,\n  'warmup_time': 0.9253025054931641,\n  'experiment_tag': '1_batch_size=5000'}}

Using Weights and Biases logging you can create parallel coordinates graphs that map parametr combinations to the best(lowest) loss achieved during the training of the networks

local visualization of raytune reults using tensorboard

In\u00a0[23]: Copied!
%load_ext tensorboard\n%tensorboard --logdir ~/ray_results\n
%load_ext tensorboard %tensorboard --logdir ~/ray_results"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#3rd-party-integration-raytune-weights-biases","title":"3rd party integration - RayTune, Weights & Biases\u00b6","text":"

This notebook provides guideline for integration of external library functions in the model training process through Callback objects, a popular concept of using objects as arguments for other objects.

[DISCLAIMER]

We show integration of RayTune (a hyperparameter tuning framework) and Weights & Biases (ML projects experiment tracking and versioning solution) in the pytorch_widedeep model training process. We did not include RayTuneReporter and WnBReportBest in the library code to minimize the dependencies on other libraries that are not directly included in the model design and training.

"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/11_auc_multiclass.html","title":"11_auc_multiclass","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torchmetrics import AUROC\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_ecoli\nfrom pytorch_widedeep.utils import LabelEncoder\n\nfrom sklearn.model_selection import train_test_split\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from torchmetrics import AUROC from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_ecoli from pytorch_widedeep.utils import LabelEncoder from sklearn.model_selection import train_test_split # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_ecoli(as_frame=True)\ndf.head()\n
df = load_ecoli(as_frame=True) df.head() Out[2]: SequenceName mcg gvh lip chg aac alm1 alm2 class 0 AAT_ECOLI 0.49 0.29 0.48 0.5 0.56 0.24 0.35 cp 1 ACEA_ECOLI 0.07 0.40 0.48 0.5 0.54 0.35 0.44 cp 2 ACEK_ECOLI 0.56 0.40 0.48 0.5 0.49 0.37 0.46 cp 3 ACKA_ECOLI 0.59 0.49 0.48 0.5 0.52 0.45 0.36 cp 4 ADI_ECOLI 0.23 0.32 0.48 0.5 0.55 0.25 0.35 cp In\u00a0[3]: Copied!
# imbalance of the classes\ndf[\"class\"].value_counts()\n
# imbalance of the classes df[\"class\"].value_counts() Out[3]:
class\ncp     143\nim      77\npp      52\nimU     35\nom      20\nomL      5\nimS      2\nimL      2\nName: count, dtype: int64
In\u00a0[4]: Copied!
df = df.loc[~df[\"class\"].isin([\"omL\", \"imS\", \"imL\"])]\ndf.reset_index(inplace=True, drop=True)\n
df = df.loc[~df[\"class\"].isin([\"omL\", \"imS\", \"imL\"])] df.reset_index(inplace=True, drop=True) In\u00a0[5]: Copied!
encoder = LabelEncoder([\"class\"])\ndf_enc = encoder.fit_transform(df)\ndf_enc[\"class\"] = df_enc[\"class\"] - 1\n
encoder = LabelEncoder([\"class\"]) df_enc = encoder.fit_transform(df) df_enc[\"class\"] = df_enc[\"class\"] - 1 In\u00a0[6]: Copied!
# drop columns we won't need in this example\ndf_enc = df_enc.drop(columns=[\"SequenceName\"])\n
# drop columns we won't need in this example df_enc = df_enc.drop(columns=[\"SequenceName\"]) In\u00a0[7]: Copied!
df_train, df_valid = train_test_split(\n    df_enc, test_size=0.2, stratify=df_enc[\"class\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"class\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df_enc, test_size=0.2, stratify=df_enc[\"class\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"class\"], random_state=1 ) In\u00a0[8]: Copied!
continuous_cols = df_enc.drop(columns=[\"class\"]).columns.values.tolist()\n
continuous_cols = df_enc.drop(columns=[\"class\"]).columns.values.tolist() In\u00a0[9]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"class\"].values\ny_valid = df_valid[\"class\"].values\ny_test = df_test[\"class\"].values\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"class\"].values y_valid = df_valid[\"class\"].values y_test = df_test[\"class\"].values X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:295: DeprecationWarning: 'scale' and 'already_standard' will be deprecated in the next release. Please use 'cols_to_scale' instead\n  self._check_inputs(cat_embed_cols)\n
In\u00a0[10]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=df_enc[\"class\"].nunique())\nmodel\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=df_enc[\"class\"].nunique()) model Out[10]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=7, out_features=200, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=200, out_features=100, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=100, out_features=5, bias=True)\n  )\n)
In\u00a0[11]: Copied!
auroc = AUROC(num_classes=df_enc[\"class\"].nunique(), task=\"multiclass\")\n
auroc = AUROC(num_classes=df_enc[\"class\"].nunique(), task=\"multiclass\") In\u00a0[12]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"multiclass_focal_loss\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[auroc],\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"multiclass_focal_loss\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[auroc], ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 54.59it/s, loss=0.109, metrics={'MulticlassAUROC': 0.314}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 98.35it/s, loss=0.105, metrics={'MulticlassAUROC': 0.2558}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 91.55it/s, loss=0.105, metrics={'MulticlassAUROC': 0.3546}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 111.68it/s, loss=0.101, metrics={'MulticlassAUROC': 0.2737}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 62.55it/s, loss=0.1, metrics={'MulticlassAUROC': 0.3795}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 108.51it/s, loss=0.0966, metrics={'MulticlassAUROC': 0.3053}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 99.35it/s, loss=0.0965, metrics={'MulticlassAUROC': 0.3809}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 117.73it/s, loss=0.0962, metrics={'MulticlassAUROC': 0.3089}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 110.56it/s, loss=0.0967, metrics={'MulticlassAUROC': 0.3509}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 127.35it/s, loss=0.0958, metrics={'MulticlassAUROC': 0.3089}]\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/11_auc_multiclass.html#auc-multiclass-computation","title":"AUC multiclass computation\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html","title":"12_ZILNLoss_origkeras_vs_pytorch_widedeep","text":"In\u00a0[1]: Copied!
# @title Copyright 2019 The Lifetime Value Authors.\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ============================================================================\n
# @title Copyright 2019 The Lifetime Value Authors. # Licensed under the Apache License, Version 2.0 (the \"License\"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an \"AS IS\" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ Run in Google Colab View source on GitHub In\u00a0[3]: Copied!
import os\n\nimport numpy as np\nimport pandas as pd\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport tensorflow as tf\nimport tensorflow_probability as tfp\nfrom typing import Sequence\n\n# install and import ltv\n!pip install -q git+https://github.com/google/lifetime_value\nimport lifetime_value as ltv\n
import os import numpy as np import pandas as pd from scipy import stats import matplotlib.pyplot as plt import seaborn as sns import tensorflow as tf import tensorflow_probability as tfp from typing import Sequence # install and import ltv !pip install -q git+https://github.com/google/lifetime_value import lifetime_value as ltv In\u00a0[\u00a0]: Copied!
tfd = tfp.distributions\n%config InlineBackend.figure_format='retina'\nsns.set_style(\"whitegrid\")\n
tfd = tfp.distributions %config InlineBackend.figure_format='retina' sns.set_style(\"whitegrid\") In\u00a0[\u00a0]: Copied!
MODEL = \"dnn\"\nLOSS = \"ziln\"  # @param { isTemplate: true, type: 'string'} ['mse', 'ziln']\nLEARNING_RATE = 0.001  # @param { isTemplate: true}\nVERSION = 0  # @param { isTemplate: true, type: 'integer'}\nOUTPUT_CSV_FOLDER = \"/tmp/lifetime-value/kdd_cup_98/result\"  # @param { isTemplate: true, type: 'string'}\n
MODEL = \"dnn\" LOSS = \"ziln\" # @param { isTemplate: true, type: 'string'} ['mse', 'ziln'] LEARNING_RATE = 0.001 # @param { isTemplate: true} VERSION = 0 # @param { isTemplate: true, type: 'integer'} OUTPUT_CSV_FOLDER = \"/tmp/lifetime-value/kdd_cup_98/result\" # @param { isTemplate: true, type: 'string'}

Download kdd_cup_98 data to /tmp/lifetime-value/kdd_cup_98

In\u00a0[\u00a0]: Copied!
%%bash\nmkdir -p /tmp/lifetime-value/kdd_cup_98\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98lrn.zip -P /tmp/lifetime-value/kdd_cup_98/\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98val.zip -P /tmp/lifetime-value/kdd_cup_98/\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/valtargt.txt -P /tmp/lifetime-value/kdd_cup_98/\ncd /tmp/lifetime-value/kdd_cup_98/\nunzip cup98lrn.zip\nunzip cup98val.zip\n
%%bash mkdir -p /tmp/lifetime-value/kdd_cup_98 wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98lrn.zip -P /tmp/lifetime-value/kdd_cup_98/ wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98val.zip -P /tmp/lifetime-value/kdd_cup_98/ wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/valtargt.txt -P /tmp/lifetime-value/kdd_cup_98/ cd /tmp/lifetime-value/kdd_cup_98/ unzip cup98lrn.zip unzip cup98val.zip In\u00a0[\u00a0]: Copied!
df_train = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98LRN.txt\")\nnum_train = df_train.shape[0]\ndf_eval = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98VAL.txt\")\ndf_eval_target = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/valtargt.txt\")\ndf_eval = df_eval.merge(df_eval_target, on=\"CONTROLN\")\n
df_train = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98LRN.txt\") num_train = df_train.shape[0] df_eval = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98VAL.txt\") df_eval_target = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/valtargt.txt\") df_eval = df_eval.merge(df_eval_target, on=\"CONTROLN\") In\u00a0[\u00a0]: Copied!
df = pd.concat([df_train, df_eval], axis=0, sort=True)\n
df = pd.concat([df_train, df_eval], axis=0, sort=True) In\u00a0[\u00a0]: Copied!
y = df[\"TARGET_D\"][:num_train]\n
y = df[\"TARGET_D\"][:num_train] In\u00a0[\u00a0]: Copied!
def plot_hist_log_scale(y):\n    max_val = y.max() + 1.0\n    ax = pd.Series(y).hist(\n        figsize=(8, 5), bins=10 ** np.linspace(0.0, np.log10(max_val), 20)\n    )\n\n    plt.xlabel(\"Donation ($)\")\n    plt.ylabel(\"Count\")\n    # plt.title('Histogram of LTV')\n    plt.xticks(rotation=\"horizontal\")\n    plt.legend(loc=\"upper left\")\n    ax.set_xscale(\"log\")\n    ax.grid(False)\n    # Hide the right and top spines\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"top\"].set_visible(False)\n    # Only show ticks on the left and bottom spines\n    ax.yaxis.set_ticks_position(\"left\")\n    ax.xaxis.set_ticks_position(\"bottom\")\n    plt.show()\n\n    fig = ax.get_figure()\n    output_file = tf.io.gfile.GFile(\n        \"/tmp/lifetime-value/kdd_cup_98/histogram_kdd98_log_scale.pdf\", \"wb\"\n    )\n    fig.savefig(output_file, bbox_inches=\"tight\", format=\"pdf\")\n
def plot_hist_log_scale(y): max_val = y.max() + 1.0 ax = pd.Series(y).hist( figsize=(8, 5), bins=10 ** np.linspace(0.0, np.log10(max_val), 20) ) plt.xlabel(\"Donation ($)\") plt.ylabel(\"Count\") # plt.title('Histogram of LTV') plt.xticks(rotation=\"horizontal\") plt.legend(loc=\"upper left\") ax.set_xscale(\"log\") ax.grid(False) # Hide the right and top spines ax.spines[\"right\"].set_visible(False) ax.spines[\"top\"].set_visible(False) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position(\"left\") ax.xaxis.set_ticks_position(\"bottom\") plt.show() fig = ax.get_figure() output_file = tf.io.gfile.GFile( \"/tmp/lifetime-value/kdd_cup_98/histogram_kdd98_log_scale.pdf\", \"wb\" ) fig.savefig(output_file, bbox_inches=\"tight\", format=\"pdf\") In\u00a0[\u00a0]: Copied!
plot_hist_log_scale(y[y > 0])\n
plot_hist_log_scale(y[y > 0]) In\u00a0[\u00a0]: Copied!
VOCAB_FEATURES = [\n    \"ODATEDW\",  # date of donor's first gift (YYMM)\n    \"OSOURCE\",  # donor acquisition mailing list\n    \"TCODE\",  # donor title code\n    \"STATE\",\n    \"ZIP\",\n    \"DOMAIN\",  # urbanicity level and socio-economic status of the neighborhood\n    \"CLUSTER\",  # socio-economic status\n    \"GENDER\",\n    \"MAXADATE\",  # date of the most recent promotion received\n    \"MINRDATE\",\n    \"LASTDATE\",\n    \"FISTDATE\",\n    \"RFA_2A\",\n]\n
VOCAB_FEATURES = [ \"ODATEDW\", # date of donor's first gift (YYMM) \"OSOURCE\", # donor acquisition mailing list \"TCODE\", # donor title code \"STATE\", \"ZIP\", \"DOMAIN\", # urbanicity level and socio-economic status of the neighborhood \"CLUSTER\", # socio-economic status \"GENDER\", \"MAXADATE\", # date of the most recent promotion received \"MINRDATE\", \"LASTDATE\", \"FISTDATE\", \"RFA_2A\", ] In\u00a0[\u00a0]: Copied!
df[\"ODATEDW\"] = df[\"ODATEDW\"].astype(\"str\")\ndf[\"TCODE\"] = df[\"TCODE\"].apply(lambda x: \"{:03d}\".format(x // 1000 if x > 1000 else x))\ndf[\"ZIP\"] = df[\"ZIP\"].str.slice(0, 5)\ndf[\"MAXADATE\"] = df[\"MAXADATE\"].astype(\"str\")\ndf[\"MINRDATE\"] = df[\"MINRDATE\"].astype(\"str\")\ndf[\"LASTDATE\"] = df[\"LASTDATE\"].astype(\"str\")\ndf[\"FISTDATE\"] = df[\"FISTDATE\"].astype(\"str\")\n
df[\"ODATEDW\"] = df[\"ODATEDW\"].astype(\"str\") df[\"TCODE\"] = df[\"TCODE\"].apply(lambda x: \"{:03d}\".format(x // 1000 if x > 1000 else x)) df[\"ZIP\"] = df[\"ZIP\"].str.slice(0, 5) df[\"MAXADATE\"] = df[\"MAXADATE\"].astype(\"str\") df[\"MINRDATE\"] = df[\"MINRDATE\"].astype(\"str\") df[\"LASTDATE\"] = df[\"LASTDATE\"].astype(\"str\") df[\"FISTDATE\"] = df[\"FISTDATE\"].astype(\"str\") In\u00a0[\u00a0]: Copied!
def label_encoding(y, frequency_threshold=100):\n    value_counts = pd.value_counts(y)\n    categories = value_counts[value_counts >= frequency_threshold].index.to_numpy()\n    # 0 indicates the unknown category.\n    return pd.Categorical(y, categories=categories).codes + 1\n
def label_encoding(y, frequency_threshold=100): value_counts = pd.value_counts(y) categories = value_counts[value_counts >= frequency_threshold].index.to_numpy() # 0 indicates the unknown category. return pd.Categorical(y, categories=categories).codes + 1 In\u00a0[\u00a0]: Copied!
for key in VOCAB_FEATURES:\n    df[key] = label_encoding(df[key])\n
for key in VOCAB_FEATURES: df[key] = label_encoding(df[key]) In\u00a0[\u00a0]: Copied!
MAIL_ORDER_RESPONSES = [\n    \"MBCRAFT\",\n    \"MBGARDEN\",\n    \"MBBOOKS\",\n    \"MBCOLECT\",\n    \"MAGFAML\",\n    \"MAGFEM\",\n    \"MAGMALE\",\n    \"PUBGARDN\",\n    \"PUBCULIN\",\n    \"PUBHLTH\",\n    \"PUBDOITY\",\n    \"PUBNEWFN\",\n    \"PUBPHOTO\",\n    \"PUBOPP\",\n    \"RFA_2F\",\n]\n
MAIL_ORDER_RESPONSES = [ \"MBCRAFT\", \"MBGARDEN\", \"MBBOOKS\", \"MBCOLECT\", \"MAGFAML\", \"MAGFEM\", \"MAGMALE\", \"PUBGARDN\", \"PUBCULIN\", \"PUBHLTH\", \"PUBDOITY\", \"PUBNEWFN\", \"PUBPHOTO\", \"PUBOPP\", \"RFA_2F\", ] In\u00a0[\u00a0]: Copied!
INDICATOR_FEATURES = [\n    \"AGE\",  # age decile, 0 indicates unknown\n    \"NUMCHLD\",\n    \"INCOME\",\n    \"WEALTH1\",\n    \"HIT\",\n] + MAIL_ORDER_RESPONSES\n
INDICATOR_FEATURES = [ \"AGE\", # age decile, 0 indicates unknown \"NUMCHLD\", \"INCOME\", \"WEALTH1\", \"HIT\", ] + MAIL_ORDER_RESPONSES In\u00a0[\u00a0]: Copied!
df[\"AGE\"] = pd.qcut(df[\"AGE\"].values, 10).codes + 1\ndf[\"NUMCHLD\"] = df[\"NUMCHLD\"].apply(lambda x: 0 if np.isnan(x) else int(x))\ndf[\"INCOME\"] = df[\"INCOME\"].apply(lambda x: 0 if np.isnan(x) else int(x))\ndf[\"WEALTH1\"] = df[\"WEALTH1\"].apply(lambda x: 0 if np.isnan(x) else int(x) + 1)\ndf[\"HIT\"] = pd.qcut(df[\"HIT\"].values, q=50, duplicates=\"drop\").codes\n\nfor col in MAIL_ORDER_RESPONSES:\n    df[col] = pd.qcut(df[col].values, q=20, duplicates=\"drop\").codes + 1\n
df[\"AGE\"] = pd.qcut(df[\"AGE\"].values, 10).codes + 1 df[\"NUMCHLD\"] = df[\"NUMCHLD\"].apply(lambda x: 0 if np.isnan(x) else int(x)) df[\"INCOME\"] = df[\"INCOME\"].apply(lambda x: 0 if np.isnan(x) else int(x)) df[\"WEALTH1\"] = df[\"WEALTH1\"].apply(lambda x: 0 if np.isnan(x) else int(x) + 1) df[\"HIT\"] = pd.qcut(df[\"HIT\"].values, q=50, duplicates=\"drop\").codes for col in MAIL_ORDER_RESPONSES: df[col] = pd.qcut(df[col].values, q=20, duplicates=\"drop\").codes + 1 In\u00a0[\u00a0]: Copied!
NUMERIC_FEATURES = [\n    # binary\n    \"MAILCODE\",  # bad address\n    \"NOEXCH\",  # do not exchange\n    \"RECINHSE\",  # donor has given to PVA's in house program\n    \"RECP3\",  # donor has given to PVA's P3 program\n    \"RECPGVG\",  # planned giving record\n    \"RECSWEEP\",  # sweepstakes record\n    \"HOMEOWNR\",  # home owner\n    \"CHILD03\",\n    \"CHILD07\",\n    \"CHILD12\",\n    \"CHILD18\",\n    # continuous\n    \"CARDPROM\",\n    \"NUMPROM\",\n    \"CARDPM12\",\n    \"NUMPRM12\",\n    \"RAMNTALL\",\n    \"NGIFTALL\",\n    \"MINRAMNT\",\n    \"MAXRAMNT\",\n    \"LASTGIFT\",\n    \"AVGGIFT\",\n]\n
NUMERIC_FEATURES = [ # binary \"MAILCODE\", # bad address \"NOEXCH\", # do not exchange \"RECINHSE\", # donor has given to PVA's in house program \"RECP3\", # donor has given to PVA's P3 program \"RECPGVG\", # planned giving record \"RECSWEEP\", # sweepstakes record \"HOMEOWNR\", # home owner \"CHILD03\", \"CHILD07\", \"CHILD12\", \"CHILD18\", # continuous \"CARDPROM\", \"NUMPROM\", \"CARDPM12\", \"NUMPRM12\", \"RAMNTALL\", \"NGIFTALL\", \"MINRAMNT\", \"MAXRAMNT\", \"LASTGIFT\", \"AVGGIFT\", ] In\u00a0[\u00a0]: Copied!
df[\"MAILCODE\"] = (df[\"MAILCODE\"] == \"B\").astype(\"float32\")\ndf[\"PVASTATE\"] = df[\"PVASTATE\"].isin([\"P\", \"E\"]).astype(\"float32\")\ndf[\"NOEXCH\"] = df[\"NOEXCH\"].isin([\"X\", \"1\"]).astype(\"float32\")\ndf[\"RECINHSE\"] = (df[\"RECINHSE\"] == \"X\").astype(\"float32\")\ndf[\"RECP3\"] = (df[\"RECP3\"] == \"X\").astype(\"float32\")\ndf[\"RECPGVG\"] = (df[\"RECPGVG\"] == \"X\").astype(\"float32\")\ndf[\"RECSWEEP\"] = (df[\"RECSWEEP\"] == \"X\").astype(\"float32\")\ndf[\"HOMEOWNR\"] = (df[\"HOMEOWNR\"] == \"H\").astype(\"float32\")\ndf[\"CHILD03\"] = df[\"CHILD03\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD07\"] = df[\"CHILD07\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD12\"] = df[\"CHILD12\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD18\"] = df[\"CHILD18\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\n\ndf[\"CARDPROM\"] = df[\"CARDPROM\"] / 100\ndf[\"NUMPROM\"] = df[\"NUMPROM\"] / 100\ndf[\"CARDPM12\"] = df[\"CARDPM12\"] / 100\ndf[\"NUMPRM12\"] = df[\"NUMPRM12\"] / 100\ndf[\"RAMNTALL\"] = np.log1p(df[\"RAMNTALL\"])\ndf[\"NGIFTALL\"] = np.log1p(df[\"NGIFTALL\"])\ndf[\"MINRAMNT\"] = np.log1p(df[\"MINRAMNT\"])\ndf[\"MAXRAMNT\"] = np.log1p(df[\"MAXRAMNT\"])\ndf[\"LASTGIFT\"] = np.log1p(df[\"LASTGIFT\"])\ndf[\"AVGGIFT\"] = np.log1p(df[\"AVGGIFT\"])\n
df[\"MAILCODE\"] = (df[\"MAILCODE\"] == \"B\").astype(\"float32\") df[\"PVASTATE\"] = df[\"PVASTATE\"].isin([\"P\", \"E\"]).astype(\"float32\") df[\"NOEXCH\"] = df[\"NOEXCH\"].isin([\"X\", \"1\"]).astype(\"float32\") df[\"RECINHSE\"] = (df[\"RECINHSE\"] == \"X\").astype(\"float32\") df[\"RECP3\"] = (df[\"RECP3\"] == \"X\").astype(\"float32\") df[\"RECPGVG\"] = (df[\"RECPGVG\"] == \"X\").astype(\"float32\") df[\"RECSWEEP\"] = (df[\"RECSWEEP\"] == \"X\").astype(\"float32\") df[\"HOMEOWNR\"] = (df[\"HOMEOWNR\"] == \"H\").astype(\"float32\") df[\"CHILD03\"] = df[\"CHILD03\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD07\"] = df[\"CHILD07\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD12\"] = df[\"CHILD12\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD18\"] = df[\"CHILD18\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CARDPROM\"] = df[\"CARDPROM\"] / 100 df[\"NUMPROM\"] = df[\"NUMPROM\"] / 100 df[\"CARDPM12\"] = df[\"CARDPM12\"] / 100 df[\"NUMPRM12\"] = df[\"NUMPRM12\"] / 100 df[\"RAMNTALL\"] = np.log1p(df[\"RAMNTALL\"]) df[\"NGIFTALL\"] = np.log1p(df[\"NGIFTALL\"]) df[\"MINRAMNT\"] = np.log1p(df[\"MINRAMNT\"]) df[\"MAXRAMNT\"] = np.log1p(df[\"MAXRAMNT\"]) df[\"LASTGIFT\"] = np.log1p(df[\"LASTGIFT\"]) df[\"AVGGIFT\"] = np.log1p(df[\"AVGGIFT\"]) In\u00a0[\u00a0]: Copied!
CATEGORICAL_FEATURES = VOCAB_FEATURES + INDICATOR_FEATURES\nALL_FEATURES = CATEGORICAL_FEATURES + NUMERIC_FEATURES\n
CATEGORICAL_FEATURES = VOCAB_FEATURES + INDICATOR_FEATURES ALL_FEATURES = CATEGORICAL_FEATURES + NUMERIC_FEATURES In\u00a0[\u00a0]: Copied!
def dnn_split(df):\n    df_train = df.iloc[:num_train]\n    df_eval = df.iloc[num_train:]\n\n    def feature_dict(df):\n        features = {k: v.values for k, v in dict(df[CATEGORICAL_FEATURES]).items()}\n        features[\"numeric\"] = df[NUMERIC_FEATURES].astype(\"float32\").values\n        return features\n\n    x_train, y_train = (\n        feature_dict(df_train),\n        df_train[\"TARGET_D\"].astype(\"float32\").values,\n    )\n    x_eval, y_eval = feature_dict(df_eval), df_eval[\"TARGET_D\"].astype(\"float32\").values\n\n    return x_train, x_eval, y_train, y_eval\n
def dnn_split(df): df_train = df.iloc[:num_train] df_eval = df.iloc[num_train:] def feature_dict(df): features = {k: v.values for k, v in dict(df[CATEGORICAL_FEATURES]).items()} features[\"numeric\"] = df[NUMERIC_FEATURES].astype(\"float32\").values return features x_train, y_train = ( feature_dict(df_train), df_train[\"TARGET_D\"].astype(\"float32\").values, ) x_eval, y_eval = feature_dict(df_eval), df_eval[\"TARGET_D\"].astype(\"float32\").values return x_train, x_eval, y_train, y_eval In\u00a0[\u00a0]: Copied!
def embedding_dim(x):\n    return int(x**0.25) + 1\n\n\ndef embedding_layer(vocab_size):\n    return tf.keras.Sequential(\n        [\n            tf.keras.layers.Embedding(\n                input_dim=vocab_size,\n                output_dim=embedding_dim(vocab_size),\n                input_length=1,\n            ),\n            tf.keras.layers.Flatten(),\n        ]\n    )\n\n\ndef dnn_model(output_units):\n    numeric_input = tf.keras.layers.Input(\n        shape=(len(NUMERIC_FEATURES),), name=\"numeric\"\n    )\n\n    embedding_inputs = [\n        tf.keras.layers.Input(shape=(1,), name=key, dtype=np.int64)\n        for key in CATEGORICAL_FEATURES\n    ]\n\n    embedding_outputs = [\n        embedding_layer(vocab_size=df[key].max() + 1)(input)\n        for key, input in zip(CATEGORICAL_FEATURES, embedding_inputs)\n    ]\n\n    deep_input = tf.keras.layers.concatenate([numeric_input] + embedding_outputs)\n    deep_model = tf.keras.Sequential(\n        [\n            tf.keras.layers.Dense(128, activation=\"relu\"),\n            tf.keras.layers.Dense(128, activation=\"relu\"),\n            tf.keras.layers.Dense(64, activation=\"relu\"),\n            tf.keras.layers.Dense(64, activation=\"relu\"),\n            tf.keras.layers.Dense(units=output_units),\n        ]\n    )\n    return tf.keras.Model(\n        inputs=[numeric_input] + embedding_inputs, outputs=deep_model(deep_input)\n    )\n
def embedding_dim(x): return int(x**0.25) + 1 def embedding_layer(vocab_size): return tf.keras.Sequential( [ tf.keras.layers.Embedding( input_dim=vocab_size, output_dim=embedding_dim(vocab_size), input_length=1, ), tf.keras.layers.Flatten(), ] ) def dnn_model(output_units): numeric_input = tf.keras.layers.Input( shape=(len(NUMERIC_FEATURES),), name=\"numeric\" ) embedding_inputs = [ tf.keras.layers.Input(shape=(1,), name=key, dtype=np.int64) for key in CATEGORICAL_FEATURES ] embedding_outputs = [ embedding_layer(vocab_size=df[key].max() + 1)(input) for key, input in zip(CATEGORICAL_FEATURES, embedding_inputs) ] deep_input = tf.keras.layers.concatenate([numeric_input] + embedding_outputs) deep_model = tf.keras.Sequential( [ tf.keras.layers.Dense(128, activation=\"relu\"), tf.keras.layers.Dense(128, activation=\"relu\"), tf.keras.layers.Dense(64, activation=\"relu\"), tf.keras.layers.Dense(64, activation=\"relu\"), tf.keras.layers.Dense(units=output_units), ] ) return tf.keras.Model( inputs=[numeric_input] + embedding_inputs, outputs=deep_model(deep_input) ) In\u00a0[\u00a0]: Copied!
if LOSS == \"mse\":\n    loss = tf.keras.losses.MeanSquaredError()\n    output_units = 1\n\nif LOSS == \"ziln\":\n    loss = ltv.zero_inflated_lognormal_loss\n    output_units = 3\n
if LOSS == \"mse\": loss = tf.keras.losses.MeanSquaredError() output_units = 1 if LOSS == \"ziln\": loss = ltv.zero_inflated_lognormal_loss output_units = 3 In\u00a0[\u00a0]: Copied!
x_train, x_eval, y_train, y_eval = dnn_split(df)\nmodel = dnn_model(output_units)\n
x_train, x_eval, y_train, y_eval = dnn_split(df) model = dnn_model(output_units) In\u00a0[\u00a0]: Copied!
model.compile(optimizer=tf.keras.optimizers.Nadam(lr=LEARNING_RATE), loss=loss)\n
model.compile(optimizer=tf.keras.optimizers.Nadam(lr=LEARNING_RATE), loss=loss) In\u00a0[\u00a0]: Copied!
callbacks = [\n    tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", min_lr=1e-6),\n    tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10),\n]\n
callbacks = [ tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", min_lr=1e-6), tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10), ] In\u00a0[\u00a0]: Copied!
history = model.fit(\n    x=x_train,\n    y=y_train,\n    batch_size=2048,\n    epochs=200,\n    verbose=2,\n    callbacks=callbacks,\n    validation_data=(x_eval, y_eval),\n).history\n
history = model.fit( x=x_train, y=y_train, batch_size=2048, epochs=200, verbose=2, callbacks=callbacks, validation_data=(x_eval, y_eval), ).history In\u00a0[\u00a0]: Copied!
pd.DataFrame(history)[[\"loss\", \"val_loss\"]].plot();\n
pd.DataFrame(history)[[\"loss\", \"val_loss\"]].plot(); In\u00a0[\u00a0]: Copied!
if LOSS == \"mse\":\n    y_pred = model.predict(x=x_eval, batch_size=1024).flatten()\n\nif LOSS == \"ziln\":\n    logits = model.predict(x=x_eval, batch_size=1024)\n    y_pred = ltv.zero_inflated_lognormal_pred(logits).numpy().flatten()\n
if LOSS == \"mse\": y_pred = model.predict(x=x_eval, batch_size=1024).flatten() if LOSS == \"ziln\": logits = model.predict(x=x_eval, batch_size=1024) y_pred = ltv.zero_inflated_lognormal_pred(logits).numpy().flatten() In\u00a0[\u00a0]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torch.optim.lr_scheduler import ReduceLROnPlateau\nfrom pytorch_widedeep.callbacks import EarlyStopping\nfrom torch.optim import NAdam\n
from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import TabMlp, WideDeep from torch.optim.lr_scheduler import ReduceLROnPlateau from pytorch_widedeep.callbacks import EarlyStopping from torch.optim import NAdam In\u00a0[\u00a0]: Copied!
# CATEGORICAL_FEATURES\nNUMERICAL_FEATURES = [\"num\" + str(i) for i in range(21)]\nx_train_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_train[\"numeric\"])\nx_train_pyt_cat = pd.DataFrame(\n    {key: value for key, value in x_train.items() if key not in [\"numeric\"]}\n)\n\nx_eval_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_eval[\"numeric\"])\nx_eval_pyt_cat = pd.DataFrame(\n    {key: value for key, value in x_eval.items() if key not in [\"numeric\"]}\n)\n
# CATEGORICAL_FEATURES NUMERICAL_FEATURES = [\"num\" + str(i) for i in range(21)] x_train_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_train[\"numeric\"]) x_train_pyt_cat = pd.DataFrame( {key: value for key, value in x_train.items() if key not in [\"numeric\"]} ) x_eval_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_eval[\"numeric\"]) x_eval_pyt_cat = pd.DataFrame( {key: value for key, value in x_eval.items() if key not in [\"numeric\"]} ) In\u00a0[\u00a0]: Copied!
x_train_pyt = pd.concat([x_train_pyt_num, x_train_pyt_cat], axis=1)\nx_eval_pyt = pd.concat([x_eval_pyt_num, x_eval_pyt_cat], axis=1)\n
x_train_pyt = pd.concat([x_train_pyt_num, x_train_pyt_cat], axis=1) x_eval_pyt = pd.concat([x_eval_pyt_num, x_eval_pyt_cat], axis=1) In\u00a0[\u00a0]: Copied!
embed_input = [\n    (u, int(x_train_pyt[u].nunique() ** 0.25) + 1) for u in CATEGORICAL_FEATURES\n]\n
embed_input = [ (u, int(x_train_pyt[u].nunique() ** 0.25) + 1) for u in CATEGORICAL_FEATURES ] In\u00a0[\u00a0]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=embed_input,\n    continuous_cols=NUMERICAL_FEATURES,\n    shared_embed=False,\n    scale=False,\n)\nX_tab_train = tab_preprocessor.fit_transform(x_train_pyt)\nX_tab_valid = tab_preprocessor.transform(x_eval_pyt)\nX_tab_test = tab_preprocessor.transform(x_eval_pyt)\n\n# target\ny_train = y_train\ny_valid = y_eval\ny_test = y_train\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\nX_test = {\"X_tab\": X_tab_test}\n\ndeeptabular = TabMlp(\n    mlp_hidden_dims=[128, 128, 64, 64],\n    column_idx=tab_preprocessor.column_idx,\n    embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\n\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=3)\n\ndeep_opt = NAdam(model.deeptabular.parameters(), lr=LEARNING_RATE)\ncallbacks = [EarlyStopping()]\ndeep_sch = ReduceLROnPlateau(deep_opt, min_lr=1e-6)\n\nobjective = \"ziln\"\n\ntrainer = Trainer(\n    model,\n    callbacks=callbacks,\n    lr_schedulers={\"deeptabular\": deep_sch},\n    objective=objective,\n    optimizers={\"deeptabular\": deep_opt},\n)\n\ntrainer.fit(\n    X_train=X_train,\n    X_val=X_val,\n    n_epochs=200,\n    batch_size=2048,\n)\n\ny_pred_pytorch = trainer.predict(X_test=X_test)\n
# deeptabular tab_preprocessor = TabPreprocessor( embed_cols=embed_input, continuous_cols=NUMERICAL_FEATURES, shared_embed=False, scale=False, ) X_tab_train = tab_preprocessor.fit_transform(x_train_pyt) X_tab_valid = tab_preprocessor.transform(x_eval_pyt) X_tab_test = tab_preprocessor.transform(x_eval_pyt) # target y_train = y_train y_valid = y_eval y_test = y_train X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid} X_test = {\"X_tab\": X_tab_test} deeptabular = TabMlp( mlp_hidden_dims=[128, 128, 64, 64], column_idx=tab_preprocessor.column_idx, embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=3) deep_opt = NAdam(model.deeptabular.parameters(), lr=LEARNING_RATE) callbacks = [EarlyStopping()] deep_sch = ReduceLROnPlateau(deep_opt, min_lr=1e-6) objective = \"ziln\" trainer = Trainer( model, callbacks=callbacks, lr_schedulers={\"deeptabular\": deep_sch}, objective=objective, optimizers={\"deeptabular\": deep_opt}, ) trainer.fit( X_train=X_train, X_val=X_val, n_epochs=200, batch_size=2048, ) y_pred_pytorch = trainer.predict(X_test=X_test) In\u00a0[\u00a0]: Copied!
pd.DataFrame(trainer.history)[[\"train_loss\", \"val_loss\"]].plot();\n
pd.DataFrame(trainer.history)[[\"train_loss\", \"val_loss\"]].plot(); In\u00a0[\u00a0]: Copied!
from sklearn.metrics import mean_squared_error\n\nmean_squared_error(y_pred, y_pred_pytorch)\n
from sklearn.metrics import mean_squared_error mean_squared_error(y_pred, y_pred_pytorch) In\u00a0[\u00a0]: Copied!
unit_costs = [0.4, 0.5, 0.6, 0.68, 0.7, 0.8, 0.9, 1.0]\n
unit_costs = [0.4, 0.5, 0.6, 0.68, 0.7, 0.8, 0.9, 1.0] In\u00a0[\u00a0]: Copied!
num_mailed = [np.sum(y_pred > v) for v in unit_costs]\nnum_mailed\n
num_mailed = [np.sum(y_pred > v) for v in unit_costs] num_mailed In\u00a0[\u00a0]: Copied!
baseline_total_profit = np.sum(y_eval - 0.68)\nbaseline_total_profit\n
baseline_total_profit = np.sum(y_eval - 0.68) baseline_total_profit In\u00a0[\u00a0]: Copied!
total_profits = [np.sum(y_eval[y_pred > v] - v) for v in unit_costs]\ntotal_profits\n
total_profits = [np.sum(y_eval[y_pred > v] - v) for v in unit_costs] total_profits In\u00a0[\u00a0]: Copied!
gain = pd.DataFrame(\n    {\n        \"lorenz\": ltv.cumulative_true(y_eval, y_eval),\n        \"baseline\": ltv.cumulative_true(y_eval, x_eval[\"numeric\"][:, 19]),\n        \"model\": ltv.cumulative_true(y_eval, y_pred),\n    }\n)\n
gain = pd.DataFrame( { \"lorenz\": ltv.cumulative_true(y_eval, y_eval), \"baseline\": ltv.cumulative_true(y_eval, x_eval[\"numeric\"][:, 19]), \"model\": ltv.cumulative_true(y_eval, y_pred), } ) In\u00a0[\u00a0]: Copied!
num_customers = np.float32(gain.shape[0])\ngain[\"cumulative_customer\"] = (np.arange(num_customers) + 1.0) / num_customers\n
num_customers = np.float32(gain.shape[0]) gain[\"cumulative_customer\"] = (np.arange(num_customers) + 1.0) / num_customers In\u00a0[\u00a0]: Copied!
ax = gain[\n    [\n        \"cumulative_customer\",\n        \"lorenz\",\n        \"baseline\",\n        \"model\",\n    ]\n].plot(x=\"cumulative_customer\", figsize=(8, 5), legend=True)\n\nax.legend([\"Groundtruth\", \"Baseline\", \"Model\"], loc=\"lower right\")\n\nax.set_xlabel(\"Cumulative Fraction of Customers\")\nax.set_xticks(np.arange(0, 1.1, 0.1))\nax.set_xlim((0, 1.0))\n\nax.set_ylabel(\"Cumulative Fraction of Total Lifetime Value\")\nax.set_yticks(np.arange(0, 1.1, 0.1))\nax.set_ylim((0, 1.05))\nax.set_title(\"Gain Chart\");\n
ax = gain[ [ \"cumulative_customer\", \"lorenz\", \"baseline\", \"model\", ] ].plot(x=\"cumulative_customer\", figsize=(8, 5), legend=True) ax.legend([\"Groundtruth\", \"Baseline\", \"Model\"], loc=\"lower right\") ax.set_xlabel(\"Cumulative Fraction of Customers\") ax.set_xticks(np.arange(0, 1.1, 0.1)) ax.set_xlim((0, 1.0)) ax.set_ylabel(\"Cumulative Fraction of Total Lifetime Value\") ax.set_yticks(np.arange(0, 1.1, 0.1)) ax.set_ylim((0, 1.05)) ax.set_title(\"Gain Chart\"); In\u00a0[\u00a0]: Copied!
gini = ltv.gini_from_gain(gain[[\"lorenz\", \"baseline\", \"model\"]])\ngini\n
gini = ltv.gini_from_gain(gain[[\"lorenz\", \"baseline\", \"model\"]]) gini In\u00a0[\u00a0]: Copied!
df_decile = ltv.decile_stats(y_eval, y_pred)\ndf_decile\n
df_decile = ltv.decile_stats(y_eval, y_pred) df_decile In\u00a0[\u00a0]: Copied!
ax = df_decile[[\"label_mean\", \"pred_mean\"]].plot.bar(rot=0)\n\nax.set_title(\"Decile Chart\")\nax.set_xlabel(\"Prediction bucket\")\nax.set_ylabel(\"Average bucket value\")\nax.legend([\"Label\", \"Prediction\"], loc=\"upper left\");\n
ax = df_decile[[\"label_mean\", \"pred_mean\"]].plot.bar(rot=0) ax.set_title(\"Decile Chart\") ax.set_xlabel(\"Prediction bucket\") ax.set_ylabel(\"Average bucket value\") ax.legend([\"Label\", \"Prediction\"], loc=\"upper left\"); In\u00a0[\u00a0]: Copied!
def spearmanr(x1: Sequence[float], x2: Sequence[float]) -> float:\n    \"\"\"Calculates spearmanr rank correlation coefficient.\n\n    See https://docs.scipy.org/doc/scipy/reference/stats.html.\n\n    Args:\n      x1: 1D array_like.\n      x2: 1D array_like.\n\n    Returns:\n      correlation: float.\n    \"\"\"\n    return stats.spearmanr(x1, x2, nan_policy=\"raise\")[0]\n\n\nspearman_corr = spearmanr(y_eval, y_pred)\nspearman_corr\n
def spearmanr(x1: Sequence[float], x2: Sequence[float]) -> float: \"\"\"Calculates spearmanr rank correlation coefficient. See https://docs.scipy.org/doc/scipy/reference/stats.html. Args: x1: 1D array_like. x2: 1D array_like. Returns: correlation: float. \"\"\" return stats.spearmanr(x1, x2, nan_policy=\"raise\")[0] spearman_corr = spearmanr(y_eval, y_pred) spearman_corr In\u00a0[\u00a0]: Copied!
df_metrics = pd.DataFrame(\n    {\n        \"model\": MODEL,\n        \"loss_function\": LOSS,\n        \"train_loss\": history[\"loss\"][-1],\n        \"eval_loss\": history[\"val_loss\"][-1],\n        \"label_positive\": np.mean(y_eval > 0),\n        \"label_mean\": y_eval.mean(),\n        \"pred_mean\": y_pred.mean(),\n        \"decile_mape\": df_decile[\"decile_mape\"].mean(),\n        \"baseline_gini\": gini[\"normalized\"][1],\n        \"gini\": gini[\"normalized\"][2],\n        \"spearman_corr\": spearman_corr,\n    },\n    index=[VERSION],\n)\n
df_metrics = pd.DataFrame( { \"model\": MODEL, \"loss_function\": LOSS, \"train_loss\": history[\"loss\"][-1], \"eval_loss\": history[\"val_loss\"][-1], \"label_positive\": np.mean(y_eval > 0), \"label_mean\": y_eval.mean(), \"pred_mean\": y_pred.mean(), \"decile_mape\": df_decile[\"decile_mape\"].mean(), \"baseline_gini\": gini[\"normalized\"][1], \"gini\": gini[\"normalized\"][2], \"spearman_corr\": spearman_corr, }, index=[VERSION], ) In\u00a0[\u00a0]: Copied!
for unit_cost, total_profit in zip(unit_costs, total_profits):\n    df_metrics[\"total_profit_{:02d}\".format(int(unit_cost * 100))] = total_profit\n
for unit_cost, total_profit in zip(unit_costs, total_profits): df_metrics[\"total_profit_{:02d}\".format(int(unit_cost * 100))] = total_profit In\u00a0[\u00a0]: Copied!
df_metrics.T\n
df_metrics.T In\u00a0[\u00a0]: Copied!
output_path = OUTPUT_CSV_FOLDER\n
output_path = OUTPUT_CSV_FOLDER In\u00a0[\u00a0]: Copied!
if not os.path.isdir(output_path):\n    os.makedirs(output_path)\n
if not os.path.isdir(output_path): os.makedirs(output_path) In\u00a0[\u00a0]: Copied!
output_file = os.path.join(\n    output_path, \"{}_regression_{}_{}.csv\".format(MODEL, LOSS, VERSION)\n)\n
output_file = os.path.join( output_path, \"{}_regression_{}_{}.csv\".format(MODEL, LOSS, VERSION) ) In\u00a0[\u00a0]: Copied!
df_metrics.to_csv(output_file, index=False)\n
df_metrics.to_csv(output_file, index=False)"},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#zilnloss","title":"ZILNLoss\u00b6","text":"

[DISCLAIMER]

Purpose of this notebook is to check if ZILNloss implemented originaly Keras give same results in pytorch-widedeep implemenatation

"},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#kdd-cup-98-ltv-prediction","title":"KDD Cup 98 LTV Prediction\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#configs","title":"Configs\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#load-data","title":"Load data\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#label-distribution","title":"Label distribution\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#preprocess-features","title":"Preprocess features\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#vocab","title":"Vocab\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#indicator","title":"Indicator\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#numeric","title":"Numeric\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#all","title":"All\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#traineval-split","title":"Train/eval split\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#model","title":"Model\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#loss","title":"Loss\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#train","title":"Train\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#eval","title":"Eval\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#pytorch-widedeep-approach","title":"Pytorch-widedeep approach\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#appendix","title":"Appendix\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#total-profit","title":"Total Profit\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#gini-coefficient","title":"Gini Coefficient\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#calibration","title":"Calibration\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#rank-correlation","title":"Rank Correlation\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#all-metrics-together","title":"All metrics together\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#save","title":"Save\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html","title":"13_model_uncertainty_prediction","text":"
  • In this notebook we will use the higly imbalanced Protein Homology Dataset from KDD cup 2004
* The first element of each line is a BLOCK ID that denotes to which native sequence this example belongs. There is a unique BLOCK ID for each native sequence. BLOCK IDs are integers running from 1 to 303 (one for each native sequence, i.e. for each query). BLOCK IDs were assigned before the blocks were split into the train and test sets, so they do not run consecutively in either file.\n* The second element of each line is an EXAMPLE ID that uniquely describes the example. You will need this EXAMPLE ID and the BLOCK ID when you submit results.\n* The third element is the class of the example. Proteins that are homologous to the native sequence are denoted by 1, non-homologous proteins (i.e. decoys) by 0. Test examples have a \"?\" in this position.\n* All following elements are feature values. There are 74 feature values in each line. The features describe the match (e.g. the score of a sequence alignment) between the native protein sequence and the sequence that is tested for homology.\n
In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nimport time\nimport datetime\n\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import time import datetime import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_bio_kdd04(as_frame=True)\ndf.head()\n
df = load_bio_kdd04(as_frame=True) df.head() Out[2]: EXAMPLE_ID BLOCK_ID target 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 0 279 261532 0 52.0 32.69 0.30 2.5 20.0 1256.8 -0.89 0.33 11.0 -55.0 267.2 0.52 0.05 -2.36 49.6 252.0 0.43 1.16 -2.06 -33.0 -123.2 1.60 -0.49 -6.06 65.0 296.1 -0.28 -0.26 -3.83 -22.6 -170.0 3.06 -1.05 -3.29 22.9 286.3 0.12 2.58 4.08 -33.0 -178.9 1.88 0.53 -7.0 -44.0 1987.0 -5.41 0.95 -4.0 -57.0 722.9 -3.26 -0.55 -7.5 125.5 1547.2 -0.36 1.12 9.0 -37.0 72.5 0.47 0.74 -11.0 -8.0 1595.1 -1.64 2.83 -2.0 -50.0 445.2 -0.35 0.26 0.76 1 279 261533 0 58.0 33.33 0.00 16.5 9.5 608.1 0.50 0.07 20.5 -52.5 521.6 -1.08 0.58 -0.02 -3.2 103.6 -0.95 0.23 -2.87 -25.9 -52.2 -0.21 0.87 -1.81 10.4 62.0 -0.28 -0.04 1.48 -17.6 -198.3 3.43 2.84 5.87 -16.9 72.6 -0.31 2.79 2.71 -33.5 -11.6 -1.11 4.01 5.0 -57.0 666.3 1.13 4.38 5.0 -64.0 39.3 1.07 -0.16 32.5 100.0 1893.7 -2.80 -0.22 2.5 -28.5 45.0 0.58 0.41 -19.0 -6.0 762.9 0.29 0.82 -3.0 -35.0 140.3 1.16 0.39 0.73 2 279 261534 0 77.0 27.27 -0.91 6.0 58.5 1623.6 -1.40 0.02 -6.5 -48.0 621.0 -1.20 0.14 -0.20 73.6 609.1 -0.44 -0.58 -0.04 -23.0 -27.4 -0.72 -1.04 -1.09 91.1 635.6 -0.88 0.24 0.59 -18.7 -7.2 -0.60 -2.82 -0.71 52.4 504.1 0.89 -0.67 -9.30 -20.8 -25.7 -0.77 -0.85 0.0 -20.0 2259.0 -0.94 1.15 -4.0 -44.0 -22.7 0.94 -0.98 -19.0 105.0 1267.9 1.03 1.27 11.0 -39.5 82.3 0.47 -0.19 -10.0 7.0 1491.8 0.32 -1.29 0.0 -34.0 658.2 -0.76 0.26 0.24 3 279 261535 0 41.0 27.91 -0.35 3.0 46.0 1921.6 -1.36 -0.47 -32.0 -51.5 560.9 -0.29 -0.10 -1.11 124.3 791.6 0.00 0.39 -1.85 -21.7 -44.9 -0.21 0.02 0.89 133.9 797.8 -0.08 1.06 -0.26 -16.4 -74.1 0.97 -0.80 -0.41 66.9 955.3 -1.90 1.28 -6.65 -28.1 47.5 -1.91 1.42 1.0 -30.0 1846.7 0.76 1.10 -4.0 -52.0 -53.9 1.71 -0.22 -12.0 97.5 1969.8 -1.70 0.16 -1.0 -32.5 255.9 -0.46 1.57 10.0 6.0 2047.7 -0.98 1.53 0.0 -49.0 554.2 -0.83 0.39 0.73 4 279 261536 0 50.0 28.00 -1.32 -9.0 12.0 464.8 0.88 0.19 8.0 -51.5 98.1 1.09 -0.33 -2.16 -3.9 102.7 0.39 -1.22 -3.39 -15.2 -42.2 -1.18 -1.11 -3.55 8.9 141.3 -0.16 -0.43 -4.15 -12.9 -13.4 -1.32 -0.98 -3.69 8.8 136.1 -0.30 4.13 1.89 -13.0 -18.7 -1.37 -0.93 0.0 -1.0 810.1 -2.29 6.72 1.0 -23.0 -29.7 0.58 -1.10 -18.5 33.5 206.8 1.84 -0.13 4.0 -29.0 30.1 0.80 -0.24 5.0 -14.0 479.5 0.68 -0.59 2.0 -36.0 -6.9 2.02 0.14 -0.23 In\u00a0[3]: Copied!
# imbalance of the classes\ndf[\"target\"].value_counts()\n
# imbalance of the classes df[\"target\"].value_counts() Out[3]:
target\n0    144455\n1      1296\nName: count, dtype: int64
In\u00a0[4]: Copied!
# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n
# drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) In\u00a0[5]: Copied!
df_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) In\u00a0[6]: Copied!
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[7]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[8]: Copied!
input_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n
input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() In\u00a0[9]: Copied!
deeptabular = TabMlp(\n    mlp_hidden_dims=hidden_layers,\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=1)\nmodel\n
deeptabular = TabMlp( mlp_hidden_dims=hidden_layers, column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=1) model Out[9]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[10]: Copied!
# # Metrics from torchmetrics\n# accuracy = Accuracy_torchmetrics(average=None, num_classes=1)\n# precision = Precision_torchmetrics(average=\"micro\", num_classes=1)\n# f1 = F1_torchmetrics(average=None, num_classes=1)\n# recall = Recall_torchmetrics(average=None, num_classes=1)\n
# # Metrics from torchmetrics # accuracy = Accuracy_torchmetrics(average=None, num_classes=1) # precision = Precision_torchmetrics(average=\"micro\", num_classes=1) # f1 = F1_torchmetrics(average=None, num_classes=1) # recall = Recall_torchmetrics(average=None, num_classes=1) In\u00a0[11]: Copied!
# Metrics from pytorch-widedeep\naccuracy = Accuracy(top_k=2)\nprecision = Precision(average=False)\nrecall = Recall(average=True)\nf1 = F1Score(average=False)\n
# Metrics from pytorch-widedeep accuracy = Accuracy(top_k=2) precision = Precision(average=False) recall = Recall(average=True) f1 = F1Score(average=False) In\u00a0[12]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[accuracy, precision, recall, f1],\n    verbose=1,\n)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) trainer = Trainer( model, objective=\"binary\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision, recall, f1], verbose=1, ) In\u00a0[13]: Copied!
start = time.time()\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=3,\n    batch_size=50,\n    custom_dataloader=DataLoaderImbalanced,\n    oversample_mul=5,\n)\nprint(\n    \"Training time[s]: {}\".format(\n        datetime.timedelta(seconds=round(time.time() - start))\n    )\n)\n
start = time.time() trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=3, batch_size=50, custom_dataloader=DataLoaderImbalanced, oversample_mul=5, ) print( \"Training time[s]: {}\".format( datetime.timedelta(seconds=round(time.time() - start)) ) )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 130.75it/s, loss=0.187, metrics={'acc': 0.9214, 'prec': [0.9149], 'rec': 0.9318, 'f1': [0.9233]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 173.71it/s, loss=0.106, metrics={'acc': 0.9499, 'prec': [0.1435], 'rec': 0.938, 'f1': [0.249]}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 139.68it/s, loss=0.109, metrics={'acc': 0.9559, 'prec': [0.9537], 'rec': 0.9572, 'f1': [0.9554]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 177.53it/s, loss=0.0888, metrics={'acc': 0.9602, 'prec': [0.1755], 'rec': 0.9457, 'f1': [0.2961]}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 141.63it/s, loss=0.08, metrics={'acc': 0.9706, 'prec': [0.9648], 'rec': 0.9766, 'f1': [0.9707]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 165.36it/s, loss=0.0969, metrics={'acc': 0.9564, 'prec': [0.1636], 'rec': 0.9535, 'f1': [0.2792]}]
Training time[s]: 0:00:10\n
\n
In\u00a0[14]: Copied!
pd.DataFrame(trainer.history)\n
pd.DataFrame(trainer.history) Out[14]: train_loss train_acc train_prec train_rec train_f1 val_loss val_acc val_prec val_rec val_f1 0 0.186707 0.921408 [0.9149412512779236] 0.931801 [0.9232940673828125] 0.106023 0.949914 [0.14353498816490173] 0.937984 [0.24897116422653198] 1 0.109498 0.955931 [0.9536514282226562] 0.957193 [0.9554190039634705] 0.088787 0.960206 [0.17553956806659698] 0.945736 [0.29611650109291077] 2 0.079979 0.970588 [0.9648183584213257] 0.976582 [0.9706646203994751] 0.096858 0.956432 [0.1635638326406479] 0.953488 [0.279228150844574] In\u00a0[15]: Copied!
df_pred = trainer.predict(X_tab=X_tab_test)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))\n
df_pred = trainer.predict(X_tab=X_tab_test) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:00<00:00, 346.55it/s]\n
              precision    recall  f1-score   support\n\n           0       1.00      0.96      0.98     14446\n           1       0.17      0.95      0.29       130\n\n    accuracy                           0.96     14576\n   macro avg       0.58      0.95      0.63     14576\nweighted avg       0.99      0.96      0.97     14576\n\nActual predicted values:\n(array([0, 1]), array([13845,   731]))\n
In\u00a0[16]: Copied!
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=10)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\n    \"Actual predicted values:\\n{}\".format(\n        np.unique(df_pred_unc[:, -1], return_counts=True)\n    )\n)\n
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=10) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print( \"Actual predicted values:\\n{}\".format( np.unique(df_pred_unc[:, -1], return_counts=True) ) )
predict_UncertaintyIter: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 10/10 [00:03<00:00,  3.25it/s]
              precision    recall  f1-score   support\n\n           0       1.00      0.96      0.98     14446\n           1       0.17      0.95      0.29       130\n\n    accuracy                           0.96     14576\n   macro avg       0.58      0.95      0.63     14576\nweighted avg       0.99      0.96      0.97     14576\n\nActual predicted values:\n(array([0.]), array([14576]))\n
\n
In\u00a0[17]: Copied!
df_pred_unc\n
df_pred_unc Out[17]:
array([[9.98401165e-01, 1.59881881e-03, 0.00000000e+00],\n       [9.99941409e-01, 5.85634953e-05, 0.00000000e+00],\n       [9.97351170e-01, 2.64881272e-03, 0.00000000e+00],\n       ...,\n       [9.99494374e-01, 5.05603210e-04, 0.00000000e+00],\n       [9.99981642e-01, 1.83574630e-05, 0.00000000e+00],\n       [9.99996483e-01, 3.52600046e-06, 0.00000000e+00]])
"},{"location":"examples/13_model_uncertainty_prediction.html#model-uncertainty-prediction","title":"Model Uncertainty prediction\u00b6","text":"

Note:

This notebook extends the \"Custom DataLoader for Imbalanced dataset\" notebook

"},{"location":"examples/13_model_uncertainty_prediction.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#normal-prediction","title":"\"Normal\" prediction\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#prediction-using-uncertainty","title":"Prediction using uncertainty\u00b6","text":""},{"location":"examples/14_bayesian_models.html","title":"14_bayesian_models","text":"In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport pandas as pd\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\n\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint\nfrom pytorch_widedeep.preprocessing import TabPreprocessor, WidePreprocessor\nfrom pytorch_widedeep.bayesian_models import BayesianWide, BayesianTabMlp\nfrom pytorch_widedeep.training.bayesian_trainer import BayesianTrainer\n
import numpy as np import torch import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint from pytorch_widedeep.preprocessing import TabPreprocessor, WidePreprocessor from pytorch_widedeep.bayesian_models import BayesianWide, BayesianTabMlp from pytorch_widedeep.training.bayesian_trainer import BayesianTrainer

The first few things to do we know them very well, like with any other model described in any of the other notebooks

In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"age_buckets\"] = pd.cut(\n    df.age, bins=[16, 25, 30, 35, 40, 45, 50, 55, 60, 91], labels=np.arange(9)\n)\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"age_buckets\"] = pd.cut( df.age, bins=[16, 25, 30, 35, 40, 45, 50, 55, 60, 91], labels=np.arange(9) ) df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[2]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country age_buckets income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 3 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 4 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 0 In\u00a0[3]: Copied!
train, test = train_test_split(df, test_size=0.2, stratify=df.income_label)\n
train, test = train_test_split(df, test_size=0.2, stratify=df.income_label) In\u00a0[4]: Copied!
wide_cols = [\n    \"age_buckets\",\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native_country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n\ntarget = train[\"income_label\"].values\n
wide_cols = [ \"age_buckets\", \"education\", \"relationship\", \"workclass\", \"occupation\", \"native_country\", \"gender\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target = train[\"income_label\"].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_tab = wide_preprocessor.fit_transform(train)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_tab = wide_preprocessor.fit_transform(train) In\u00a0[6]: Copied!
model = BayesianWide(\n    input_dim=np.unique(X_tab).shape[0],\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0,\n    posterior_rho_init=-7.0,\n    pred_dim=1,  # here the models are NOT passed to a WideDeep constructor class so the output dim MUST be specified\n)\n
model = BayesianWide( input_dim=np.unique(X_tab).shape[0], prior_sigma_1=1.0, prior_sigma_2=0.002, prior_pi=0.8, posterior_mu_init=0, posterior_rho_init=-7.0, pred_dim=1, # here the models are NOT passed to a WideDeep constructor class so the output dim MUST be specified ) In\u00a0[7]: Copied!
trainer = BayesianTrainer(\n    model,\n    objective=\"binary\",\n    optimizer=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = BayesianTrainer( model, objective=\"binary\", optimizer=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[8]: Copied!
trainer.fit(\n    X_tab=X_tab,\n    target=target,\n    val_split=0.2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer.fit( X_tab=X_tab, target=target, val_split=0.2, n_epochs=2, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:00<00:00, 124.32it/s, loss=163, metrics={'acc': 0.7813}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 238.67it/s, loss=141, metrics={'acc': 0.8219}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:00<00:00, 132.81it/s, loss=140, metrics={'acc': 0.8285}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 190.16it/s, loss=140, metrics={'acc': 0.8298}]\n
In\u00a0[9]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(train)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(train)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[10]: Copied!
model = BayesianTabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    #     embed_continuous_method = \"standard\",\n    #     cont_embed_activation=\"leaky_relu\",\n    #     cont_embed_dim = 8,\n    mlp_hidden_dims=[128, 64],\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0,\n    posterior_rho_init=-7.0,\n    pred_dim=1,\n)\n
model = BayesianTabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, # embed_continuous_method = \"standard\", # cont_embed_activation=\"leaky_relu\", # cont_embed_dim = 8, mlp_hidden_dims=[128, 64], prior_sigma_1=1.0, prior_sigma_2=0.002, prior_pi=0.8, posterior_mu_init=0, posterior_rho_init=-7.0, pred_dim=1, ) In\u00a0[11]: Copied!
trainer = BayesianTrainer(\n    model,\n    objective=\"binary\",\n    optimizer=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = BayesianTrainer( model, objective=\"binary\", optimizer=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[12]: Copied!
trainer.fit(\n    X_tab=X_tab,\n    target=target,\n    val_split=0.2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer.fit( X_tab=X_tab, target=target, val_split=0.2, n_epochs=2, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:04<00:00, 28.74it/s, loss=2e+3, metrics={'acc': 0.8007}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 136.89it/s, loss=1.75e+3, metrics={'acc': 0.8418}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:04<00:00, 29.41it/s, loss=1.73e+3, metrics={'acc': 0.8596}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 143.87it/s, loss=1.71e+3, metrics={'acc': 0.8569}]\n

These models are powerful beyond the success metrics because they give us a sense of uncertainty as we predict. Let's have a look

In\u00a0[13]: Copied!
X_tab_test = tab_preprocessor.transform(test)\n
X_tab_test = tab_preprocessor.transform(test) In\u00a0[14]: Copied!
preds = trainer.predict(X_tab_test, return_samples=True, n_samples=5)\n
preds = trainer.predict(X_tab_test, return_samples=True, n_samples=5)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:01<00:00, 33.92it/s]\n
In\u00a0[15]: Copied!
preds.shape\n
preds.shape Out[15]:
(5, 9769)

as we can see the prediction have shape (5, 9769), one set of predictions each time we have internally run predict (i.e. sample the network and predict, defined by the parameter n_samples). This gives us an idea of how certain the model is about a certain prediction.

Similarly, we could obtain the probabilities

In\u00a0[16]: Copied!
probs = trainer.predict_proba(X_tab_test, return_samples=True, n_samples=5)\n
probs = trainer.predict_proba(X_tab_test, return_samples=True, n_samples=5)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:01<00:00, 32.79it/s]\n
In\u00a0[17]: Copied!
probs.shape\n
probs.shape Out[17]:
(5, 9769, 2)

And we could see how the model performs each time we sampled the network

In\u00a0[18]: Copied!
for p in preds:\n    print(accuracy_score(p, test[\"income_label\"].values))\n
for p in preds: print(accuracy_score(p, test[\"income_label\"].values))
0.8559729757395844\n0.8564847988535162\n0.8567918927218753\n0.8562800696079435\n0.8558706111167981\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/14_bayesian_models.html#the-bayesian-models","title":"The Bayesian Models\u00b6","text":"

Perhaps one of the most interesting functionality in the library is the access to full Bayesian models in almost exactly the same way one would use any of the other models in the library.

Note however that the Bayesian models are ONLY available for tabular data and, at the moment, we do not support combining them to form a Wide and Deep model.

The implementation in this library is based on the publication: Weight Uncertainty in Neural Networks, by Blundell et al., 2015. Code-wise, our implementation is inspired by a number of source:

  1. https://joshfeldman.net/WeightUncertainty/
  2. https://www.nitarshan.com/bayes-by-backprop/
  3. https://github.com/piEsposito/blitz-bayesian-deep-learning
  4. https://github.com/zackchase/mxnet-the-straight-dope/tree/master/chapter18_variational-methods-and-uncertainty

The two Bayesian models available in the library are:

  1. BayesianWide: this is a linear model where the non-linearities are captured via crossed-columns
  2. BayesianMLP: this is a standard MLP that receives categorical embeddings and continuous cols (embedded or not) which are the passed through a series of dense layers. All parameters in the model are probabilistic.
"},{"location":"examples/14_bayesian_models.html#1-bayesianwide","title":"1. BayesianWide\u00b6","text":""},{"location":"examples/14_bayesian_models.html#2-bayesiantabmlp","title":"2. BayesianTabMlp\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html","title":"15_DIR-LDS_and_FDS","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom sklearn.metrics import mean_squared_error\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_california_housing\n\nfrom sklearn.model_selection import train_test_split\n\nimport matplotlib.pyplot as plt\nfrom scipy.ndimage import convolve1d\nfrom scipy.ndimage import gaussian_filter1d\nfrom scipy.signal.windows import triang\nfrom pytorch_widedeep.utils.deeptabular_utils import get_kernel_window, find_bin\nfrom pytorch_widedeep.models import fds_layer\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from sklearn.metrics import mean_squared_error from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_california_housing from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from scipy.ndimage import convolve1d from scipy.ndimage import gaussian_filter1d from scipy.signal.windows import triang from pytorch_widedeep.utils.deeptabular_utils import get_kernel_window, find_bin from pytorch_widedeep.models import fds_layer # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_california_housing(as_frame=True)\ndf.head()\n
df = load_california_housing(as_frame=True) df.head() Out[2]: MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude MedHouseVal 0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 -122.23 4.526 1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 -122.22 3.585 2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 -122.24 3.521 3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 -122.25 3.413 4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 -122.25 3.422 In\u00a0[3]: Copied!
ks = 5\nsigma = 2\nhalf_ks = (ks - 1) // 2\nbase_kernel = [0.0] * half_ks + [1.0] + [0.0] * half_ks\nkernel_window = gaussian_filter1d(base_kernel, sigma=sigma)\nplt.plot(kernel_window)\n
ks = 5 sigma = 2 half_ks = (ks - 1) // 2 base_kernel = [0.0] * half_ks + [1.0] + [0.0] * half_ks kernel_window = gaussian_filter1d(base_kernel, sigma=sigma) plt.plot(kernel_window) Out[3]:
[<matplotlib.lines.Line2D at 0x16a0a43a0>]
In\u00a0[4]: Copied!
lds = True\nkernel = \"gaussian\"\nks = 5\nsigma = 2\nreweight = \"sqrt\"\nY = df[\"MedHouseVal\"].values\nlds_y_max = None\nlds_y_min = None\ngranularity = 100\n\nfig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1)\n\ny_max = max(Y) if lds_y_max is None else lds_y_max\ny_min = min(Y) if lds_y_min is None else lds_y_min\nbin_edges = np.linspace(y_min, y_max, num=granularity, endpoint=True)\nvalue_dict = dict(zip(bin_edges[:-1], np.histogram(Y, bin_edges)[0]))\n\nax1.set_title(\"Histogram of values in the Y\")\nax1.bar(\n    value_dict.keys(),\n    value_dict.values(),\n    width=list(value_dict.keys())[1] - list(value_dict.keys())[0],\n)\n\nif reweight:\n    value_dict = dict(zip(value_dict.keys(), np.sqrt(list(value_dict.values()))))\n\nif kernel is not None:\n    lds_kernel_window = get_kernel_window(kernel, ks, sigma)\n    smoothed_values = convolve1d(\n        list(value_dict.values()), weights=lds_kernel_window, mode=\"constant\"\n    )\n    weigths = sum(smoothed_values) / (len(smoothed_values) * smoothed_values)\nelse:\n    values = list(value_dict.values())\n    weigths = sum(values) / (len(values) * values)  # type: ignore[operator]\nvalue_dict = dict(zip(value_dict.keys(), weigths))\n\nleft_bin_edges = find_bin(bin_edges, Y)\nweights = np.array([value_dict[edge] for edge in left_bin_edges], dtype=\"float32\")\n\n\nax2.set_title(\"Bar plot with inverse-balanced weights for each bin from histogram\")\nax2.bar(\n    value_dict.keys(),\n    value_dict.values(),\n    width=list(value_dict.keys())[1] - list(value_dict.keys())[0],\n)\nfig.tight_layout()\n
lds = True kernel = \"gaussian\" ks = 5 sigma = 2 reweight = \"sqrt\" Y = df[\"MedHouseVal\"].values lds_y_max = None lds_y_min = None granularity = 100 fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1) y_max = max(Y) if lds_y_max is None else lds_y_max y_min = min(Y) if lds_y_min is None else lds_y_min bin_edges = np.linspace(y_min, y_max, num=granularity, endpoint=True) value_dict = dict(zip(bin_edges[:-1], np.histogram(Y, bin_edges)[0])) ax1.set_title(\"Histogram of values in the Y\") ax1.bar( value_dict.keys(), value_dict.values(), width=list(value_dict.keys())[1] - list(value_dict.keys())[0], ) if reweight: value_dict = dict(zip(value_dict.keys(), np.sqrt(list(value_dict.values())))) if kernel is not None: lds_kernel_window = get_kernel_window(kernel, ks, sigma) smoothed_values = convolve1d( list(value_dict.values()), weights=lds_kernel_window, mode=\"constant\" ) weigths = sum(smoothed_values) / (len(smoothed_values) * smoothed_values) else: values = list(value_dict.values()) weigths = sum(values) / (len(values) * values) # type: ignore[operator] value_dict = dict(zip(value_dict.keys(), weigths)) left_bin_edges = find_bin(bin_edges, Y) weights = np.array([value_dict[edge] for edge in left_bin_edges], dtype=\"float32\") ax2.set_title(\"Bar plot with inverse-balanced weights for each bin from histogram\") ax2.bar( value_dict.keys(), value_dict.values(), width=list(value_dict.keys())[1] - list(value_dict.keys())[0], ) fig.tight_layout() In\u00a0[5]: Copied!
features = torch.tensor(df.drop(columns=[\"MedHouseVal\"]).values)\nlabels = torch.tensor(np.vstack(df[\"MedHouseVal\"].values))\nFDS = fds_layer.FDSLayer(feature_dim=features.size(1))\n
features = torch.tensor(df.drop(columns=[\"MedHouseVal\"]).values) labels = torch.tensor(np.vstack(df[\"MedHouseVal\"].values)) FDS = fds_layer.FDSLayer(feature_dim=features.size(1)) In\u00a0[6]: Copied!
for epoch in range(3):\n    FDS.update_last_epoch_stats(epoch)\n    FDS.update_running_stats(torch.clone(features).detach(), labels, epoch)\n
for epoch in range(3): FDS.update_last_epoch_stats(epoch) FDS.update_running_stats(torch.clone(features).detach(), labels, epoch) In\u00a0[7]: Copied!
pd.DataFrame(FDS.running_mean_last_epoch.numpy()).iloc[:, 7].plot(\n    title=\"Running mean bina values for 'Longitude' feature\"\n);\n
pd.DataFrame(FDS.running_mean_last_epoch.numpy()).iloc[:, 7].plot( title=\"Running mean bina values for 'Longitude' feature\" ); In\u00a0[8]: Copied!
pd.DataFrame(FDS.smoothed_mean_last_epoch.numpy()).iloc[:, 7].plot(\n    title=\"Smoothed mean bina values for 'Longitude' feature\"\n);\n
pd.DataFrame(FDS.smoothed_mean_last_epoch.numpy()).iloc[:, 7].plot( title=\"Smoothed mean bina values for 'Longitude' feature\" ); In\u00a0[9]: Copied!
smoothed_features = FDS._smooth(torch.clone(features).detach(), labels, epoch).numpy()\nleft_bin_edges_indices = find_bin(\n    FDS.bin_edges, labels.squeeze(), ret_value=False\n).numpy()\ncontinuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist()\n\ndf_w_bins = df.copy()\ndf_w_bins[\"MedHouseVal_bins\"] = left_bin_edges_indices\ndf_smoothed_w_bins = df_w_bins.copy()\ndf_smoothed_w_bins[continuous_cols] = smoothed_features\n
smoothed_features = FDS._smooth(torch.clone(features).detach(), labels, epoch).numpy() left_bin_edges_indices = find_bin( FDS.bin_edges, labels.squeeze(), ret_value=False ).numpy() continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist() df_w_bins = df.copy() df_w_bins[\"MedHouseVal_bins\"] = left_bin_edges_indices df_smoothed_w_bins = df_w_bins.copy() df_smoothed_w_bins[continuous_cols] = smoothed_features In\u00a0[10]: Copied!
df_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot(\n    title=\"Longitude feature values before calibration\"\n);\n
df_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot( title=\"Longitude feature values before calibration\" ); In\u00a0[11]: Copied!
df_smoothed_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot(\n    title=\"Longitude feature values after calibration\\n(only slight change in values)\"\n);\n
df_smoothed_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot( title=\"Longitude feature values after calibration\\n(only slight change in values)\" ); In\u00a0[12]: Copied!
df_train, df_valid = train_test_split(df, test_size=0.2, random_state=1)\ndf_valid, df_test = train_test_split(df_valid, test_size=0.5, random_state=1)\n
df_train, df_valid = train_test_split(df, test_size=0.2, random_state=1) df_valid, df_test = train_test_split(df_valid, test_size=0.5, random_state=1) In\u00a0[13]: Copied!
continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist() In\u00a0[14]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"MedHouseVal\"].values\ny_valid = df_valid[\"MedHouseVal\"].values\ny_test = df_test[\"MedHouseVal\"].values\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"MedHouseVal\"].values y_valid = df_valid[\"MedHouseVal\"].values y_test = df_test[\"MedHouseVal\"].values X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:295: DeprecationWarning: 'scale' and 'already_standard' will be deprecated in the next release. Please use 'cols_to_scale' instead\n  self._check_inputs(cat_embed_cols)\n
In\u00a0[15]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, with_fds=True)\nmodel\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, with_fds=True) model Out[15]:
WideDeep(\n  (deeptabular): TabMlp(\n    (cont_norm): Identity()\n    (encoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=8, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (fds_layer): FDSLayer(\n    (pred_layer): Linear(in_features=100, out_features=1, bias=True)\n  )\n)
In\u00a0[16]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\n        \"deeptabular\": XavierNormal,\n        \"fds_layer\": XavierNormal,\n        # \"FDS_dropout\": XavierNormal,\n        # \"pred_layer\": XavierNormal,\n    },\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=True,\n    lds_kernel=\"gaussian\",\n    lds_ks=5,\n    lds_sigma=2,\n    lds_granularity=100,\n    lds_reweight=False,\n    lds_y_max=None,\n    lds_y_min=None,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={ \"deeptabular\": XavierNormal, \"fds_layer\": XavierNormal, # \"FDS_dropout\": XavierNormal, # \"pred_layer\": XavierNormal, }, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=True, lds_kernel=\"gaussian\", lds_ks=5, lds_sigma=2, lds_granularity=100, lds_reweight=False, lds_y_max=None, lds_y_min=None, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 165.40it/s, loss=0.591, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 218.64it/s, loss=0.479, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:00<00:00, 366.86it/s]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:01<00:00, 182.78it/s, loss=0.497, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 214.30it/s, loss=0.47, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:00<00:00, 350.68it/s]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:04<00:00, 81.28it/s, loss=0.52, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.50it/s, loss=0.452, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 122.88it/s]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:03<00:00, 99.77it/s, loss=0.508, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 157.48it/s, loss=0.45, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 147.92it/s]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:03<00:00, 93.21it/s, loss=0.591, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 154.60it/s, loss=0.45, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 145.73it/s]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 243.46it/s]
test RMSE: 0.7417540528440087\n
\n
In\u00a0[17]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, fds=False)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=True,\n    lds_kernel=\"gaussian\",\n    lds_ks=5,\n    lds_sigma=2,\n    lds_granularity=100,\n    lds_reweight=False,\n    lds_y_max=None,\n    lds_y_min=None,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, fds=False) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=True, lds_kernel=\"gaussian\", lds_ks=5, lds_sigma=2, lds_granularity=100, lds_reweight=False, lds_y_max=None, lds_y_min=None, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 135.40it/s, loss=0.449, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 158.01it/s, loss=0.386, metrics={}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 137.79it/s, loss=0.377, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 156.84it/s, loss=0.399, metrics={}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 138.69it/s, loss=0.358, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 150.62it/s, loss=0.41, metrics={}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 131.96it/s, loss=0.339, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.01it/s, loss=0.321, metrics={}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 136.04it/s, loss=0.331, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 173.22it/s, loss=0.32, metrics={}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 296.77it/s]\n
test RMSE: 0.6000006967500053\n
In\u00a0[18]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, fds=False)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=False,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, fds=False) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=False, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 129.54it/s, loss=0.445, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 137.64it/s, loss=0.427, metrics={}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 135.98it/s, loss=0.374, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 148.50it/s, loss=0.389, metrics={}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 127.72it/s, loss=0.359, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 147.63it/s, loss=0.383, metrics={}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 136.54it/s, loss=0.339, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.55it/s, loss=0.323, metrics={}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 131.18it/s, loss=0.331, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 174.87it/s, loss=0.318, metrics={}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 287.25it/s]\n
test RMSE: 0.6014019159826868\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/15_DIR-LDS_and_FDS.html#label-and-feature-distribution-smoothing-for-deep-imbalanced-regression","title":"Label and Feature Distribution Smoothing for Deep Imbalanced Regression\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#load-dataset","title":"Load dataset\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#effects-of-ks-and-sigma-paramaters-on-kernel-function","title":"Effects of ks and sigma paramaters on kernel function\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#label-distribution-smoothing-visualization","title":"Label Distribution Smoothing - visualization\u00b6","text":"
  • visualization of pytorch_widedeep.training._wd_dataset.WideDeepDataset._prepare_weights(...)

Assign weight to each sample by following procedure:

  1. creating histogram from label values with nuber of bins = granularity 2.[OPTIONAL] reweighting label frequencies by sqrt 3.[OPTIONAL] smoothing label frequencies by convolution of kernel function window with frequencies list
  2. inverting values by n_samples / (n_classes * np.bincount(y)), see
  3. assigning weight to each sample from closest bin value
"},{"location":"examples/15_DIR-LDS_and_FDS.html#feature-distribution-smoothing","title":"Feature Distribution Smoothing\u00b6","text":"

We use dataset feature values in this example, but during the training process features tensors are the ouput of last layer before FDS layer.

  • labels are np.vstack-ed to reflect normal training scenario
"},{"location":"examples/15_DIR-LDS_and_FDS.html#data-preparation","title":"Data preparation\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-with-lds-fds","title":"Model with LDS & FDS\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-with-lds-only","title":"Model with LDS only\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-without-fds-or-lds","title":"Model without FDS or LDS\u00b6","text":""},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html","title":"16_Self-Supervised Pre-Training pt 1","text":"In\u00a0[1]: Copied!
import torch\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.self_supervised_training import EncoderDecoderTrainer\n
import torch from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.self_supervised_training import EncoderDecoderTrainer In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) In\u00a0[3]: Copied!
# one could chose to use a validation set for early stopping, hyperparam\n# optimization, etc. This is just an example, so we simply use train/test\n# split\ndf_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label)\n
# one could chose to use a validation set for early stopping, hyperparam # optimization, etc. This is just an example, so we simply use train/test # split df_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label) In\u00a0[4]: Copied!
df_tr.head(2)\n
df_tr.head(2) Out[4]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 9042 26 Local-gov 250551 HS-grad 9 Married-civ-spouse Craft-repair Own-child Black Male 0 0 40 United-States 0 25322 50 Private 34832 Bachelors 13 Married-civ-spouse Tech-support Husband White Male 15024 0 40 United-States 1 In\u00a0[5]: Copied!
# As always, we need to define which cols will be represented as embeddings\n# and which one will be continuous features\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\n
# As always, we need to define which cols will be represented as embeddings # and which one will be continuous features cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" In\u00a0[6]: Copied!
# We prepare the data to be passed to the model\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df_tr)\ntarget = df_tr[target_col].values\n
# We prepare the data to be passed to the model tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df_tr) target = df_tr[target_col].values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[7]: Copied!
X_tab[:5]\n
X_tab[:5] Out[7]:
array([[ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 26, 40],\n       [ 2,  2,  1,  2,  2,  2,  1,  2,  1,  1, 50, 40],\n       [ 2,  1,  1,  3,  2,  2,  1,  1,  2,  1, 39, 46],\n       [ 2,  3,  2,  4,  1,  2,  2,  1,  1,  1, 17, 10],\n       [ 3,  4,  2,  1,  1,  2,  1,  1,  1,  1, 32, 20]])
In\u00a0[8]: Copied!
# We define a model that will act as the encoder in the encoder/decoder\n# architecture. This could be any of: TabMlp, TabResnet or TabNet\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\n
# We define a model that will act as the encoder in the encoder/decoder # architecture. This could be any of: TabMlp, TabResnet or TabNet tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, ) In\u00a0[9]: Copied!
tab_mlp\n
tab_mlp Out[9]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=95, out_features=200, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=200, out_features=100, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)
In\u00a0[10]: Copied!
# If we do not pass a custom decoder, which is perfectly possible via the\n# decoder param,  the EncoderDecoderTrainer will automatically build a\n# decoder which will be the 'mirror' image of the encoder\nencoder_decoder_trainer = EncoderDecoderTrainer(encoder=tab_mlp)\n
# If we do not pass a custom decoder, which is perfectly possible via the # decoder param, the EncoderDecoderTrainer will automatically build a # decoder which will be the 'mirror' image of the encoder encoder_decoder_trainer = EncoderDecoderTrainer(encoder=tab_mlp) In\u00a0[11]: Copied!
# let's have a look to the encoder_decoder_model (aka ed_model)\nencoder_decoder_trainer.ed_model\n
# let's have a look to the encoder_decoder_model (aka ed_model) encoder_decoder_trainer.ed_model Out[11]:
EncoderDecoderModel(\n  (encoder): TabMlp(\n    (cat_embed): DiffSizeCatEmbeddings(\n      (embed_layers): ModuleDict(\n        (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n        (emb_layer_education): Embedding(17, 8, padding_idx=0)\n        (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n        (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n        (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n        (emb_layer_race): Embedding(6, 4, padding_idx=0)\n        (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n        (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n        (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n        (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n      )\n      (embedding_dropout): Dropout(p=0.0, inplace=False)\n    )\n    (cont_norm): Identity()\n    (encoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=95, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (decoder): TabMlpDecoder(\n    (decoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=100, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=95, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (masker): RandomObfuscator()\n)

Ignoring the masker, which just...well...masks, the ed_model consists of:

  1. An encoder model that is a TabMlp model that is in itself comprised by an Embedding layer (or rather a collection of them, referred as cat_and_cont_embed) and an encoder (a simple MLP, referred as encoder)
  2. A decoder which is just an \"inverted\" MLP (referred as decoder)
In\u00a0[12]: Copied!
# And we just...pretrain\nencoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
# And we just...pretrain encoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 82.90it/s, loss=4.07]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 89.87it/s, loss=3.09]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 92.86it/s, loss=2.53]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 91.24it/s, loss=2.09]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 91.38it/s, loss=1.78]\n

At this point we have two options, we could either save the model for later use or we could continue to supervised training. The latter is rather simple, after running:

encoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n

you just have to

model = WideDeep(deeptabular=tab_mlp)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\n# And, you know...we get a test metric\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\n

Let's say that in any case, we are 'decent' scientists/people and we want to save the model:

In\u00a0[13]: Copied!
encoder_decoder_trainer.save(\n    path=\"pretrained_weights\", model_filename=\"encoder_decoder_model.pt\"\n)\n
encoder_decoder_trainer.save( path=\"pretrained_weights\", model_filename=\"encoder_decoder_model.pt\" )

some time has passed...

In\u00a0[14]: Copied!
encoder_decoder_model = torch.load(\"pretrained_weights/encoder_decoder_model.pt\")\n
encoder_decoder_model = torch.load(\"pretrained_weights/encoder_decoder_model.pt\")

Now, AND THIS IS IMPORTANT We have loaded the encoder AND the decoder. To proceed to the supervised training we ONLY need the encoder

In\u00a0[15]: Copied!
pretrained_encoder = encoder_decoder_model.encoder\n
pretrained_encoder = encoder_decoder_model.encoder In\u00a0[16]: Copied!
pretrained_encoder\n
pretrained_encoder Out[16]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=95, out_features=200, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=200, out_features=100, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)
In\u00a0[17]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=pretrained_encoder)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=pretrained_encoder) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 88.04it/s, loss=0.374, metrics={'acc': 0.8253}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 85.63it/s, loss=0.324, metrics={'acc': 0.8491}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 87.56it/s, loss=0.301, metrics={'acc': 0.8608}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 73.38it/s, loss=0.29, metrics={'acc': 0.8655}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 78.68it/s, loss=0.284, metrics={'acc': 0.8686}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 173.02it/s]\n
0.8730678677449074\n

As we mentioned before, we can also use a TabResNet or TabNet model and a custom decoder. Let's have a look:

In\u00a0[18]: Copied!
from pytorch_widedeep.models import TabResnet as TabResnetEncoder, TabResnetDecoder\n
from pytorch_widedeep.models import TabResnet as TabResnetEncoder, TabResnetDecoder In\u00a0[19]: Copied!
resnet_encoder = TabResnetEncoder(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 100, 100],\n)\n
resnet_encoder = TabResnetEncoder( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, blocks_dims=[200, 100, 100], )

let's have a look to the model

In\u00a0[20]: Copied!
resnet_encoder\n
resnet_encoder Out[20]:
TabResnet(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): DenseResnet(\n    (dense_resnet): Sequential(\n      (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n      (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (block_0): BasicBlock(\n        (resize): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=False)\n          (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n        (lin1): Linear(in_features=200, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (block_1): BasicBlock(\n        (lin1): Linear(in_features=100, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n  )\n)

As we can see, the tensor we are trying to reconstruct, the embeddings, is of size 94 (this number is stored in the attribute: esnet_encoder.cat_and_cont_embed.output_dim), with that information we could build or own decoder as:

In\u00a0[21]: Copied!
# for all possible params see the docs\nresnet_decoder = TabResnetDecoder(\n    embed_dim=resnet_encoder.cat_out_dim + resnet_encoder.cont_out_dim,\n    blocks_dims=[100, 100, 200],\n)\n
# for all possible params see the docs resnet_decoder = TabResnetDecoder( embed_dim=resnet_encoder.cat_out_dim + resnet_encoder.cont_out_dim, blocks_dims=[100, 100, 200], ) In\u00a0[22]: Copied!
resnet_decoder\n
resnet_decoder Out[22]:
TabResnetDecoder(\n  (decoder): DenseResnet(\n    (dense_resnet): Sequential(\n      (block_0): BasicBlock(\n        (lin1): Linear(in_features=100, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (block_1): BasicBlock(\n        (resize): Sequential(\n          (0): Linear(in_features=100, out_features=200, bias=False)\n          (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n        (lin1): Linear(in_features=100, out_features=200, bias=False)\n        (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=200, out_features=200, bias=False)\n        (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n  )\n  (reconstruction_layer): Linear(in_features=200, out_features=95, bias=False)\n)

and now:

In\u00a0[23]: Copied!
ec_trainer = EncoderDecoderTrainer(\n    encoder=resnet_encoder,\n    decoder=resnet_decoder,\n    masked_prob=0.2,\n)\nec_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
ec_trainer = EncoderDecoderTrainer( encoder=resnet_encoder, decoder=resnet_decoder, masked_prob=0.2, ) ec_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.89it/s, loss=1.52]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.78it/s, loss=0.81]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 39.82it/s, loss=0.56]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.73it/s, loss=0.417]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.24it/s, loss=0.329]\n
In\u00a0[24]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=resnet_encoder)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=resnet_encoder) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 58.63it/s, loss=0.335, metrics={'acc': 0.8442}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 58.02it/s, loss=0.296, metrics={'acc': 0.864}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 55.91it/s, loss=0.283, metrics={'acc': 0.8687}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 55.00it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 51.95it/s, loss=0.272, metrics={'acc': 0.8732}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 120.15it/s]\n
0.8725560446309756\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html#self-supervised-pretraining-for-tabular-data","title":"Self Supervised Pretraining for Tabular Data\u00b6","text":"

We have implemented two Self Supervised Pre-training routines that allow the user to pre-train all tabular models in the library with the exception of the TabPerceiver (which is a special monster).

The two routines implemented are illustrated in the figures below. The 1st is from TabNet: Attentive Interpretable Tabular Learning and is designed for models that do not use transformer-based architectures, while the second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, and is designed for models that use transformer-based architectures.

Fig 1. Figure 2 in their paper. I have included de original caption in case is useful, althought the Figure itself is pretty self explanatory

Fig 2. Figure 1 in their paper. Here the caption is necessary \ud83d\ude0f

It is beyond the scope of this notebook to explain in detail those implementations. Therefore, we strongly recommend the user to go and read the papers if this functionality is of interest to her/him.

One thing is worth noticing however. As seen in Fig 1(the TabNet paper's Fig 2) the masking of the input features happens in the feature space. However, the implementation in this library is inspired by that at the dreamquark-ai repo, which is in itself inspired by the original implementation (by the way, at this point I will write it once again. All TabNet related things in this library are inspired when not directly based in the code in that repo, therefore, ALL CREDIT TO THE GUYS AT dreamquark-ai).

In that implementation the masking happens in the embedding space, and currently does not mask the entire embedding (i.e. categorical feature). We decided to release as it is in this version and we will implement the exact same process described in the paper in future releases.

Having said all of the above let's see how to use self supervision for tabular data with pytorch-widedeep. We will concentrate in this notebook on the 1st of the two approaches (the 'TabNet approach'). For details on the second approach please see 16_Self_Supervised_Pretraning_pt2.

"},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html#self-supervision-for-non-transformer-based-models","title":"Self Supervision for non-transformer-based models..\u00b6","text":"

...or in general, for models where the embeddigns can have all different dimensions. In this library, these are: TabMlp, TabResNet and TabNet

As shown in Figure, this is an encoder-encoder approach where we learn to predict values in the incoming data that have been masked. However, as I mentioned before, our implementation is a bit different, and the masking occurs in th embedding space.

Nonetheless, the code below illustrates how to use this encoder-decoder approach with pytorch-widedeep

"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html","title":"16_Self-Supervised Pre-Training pt 2","text":"In\u00a0[1]: Copied!
import torch\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import WideDeep, FTTransformer\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.self_supervised_training import (\n    ContrastiveDenoisingTrainer,\n)\n
import torch from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from pytorch_widedeep import Trainer from pytorch_widedeep.models import WideDeep, FTTransformer from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.self_supervised_training import ( ContrastiveDenoisingTrainer, ) In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\n\n# one could chose to use a validation set for early stopping, hyperparam\n# optimization, etc. This is just an example, so we simply use train/test\n# split\ndf_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label)\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\n\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    with_attention=True,\n    with_cls_token=True,  # this is optional\n)\nX_tab = tab_preprocessor.fit_transform(df_tr)\ntarget = df_tr[target_col].values\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) # one could chose to use a validation set for early stopping, hyperparam # optimization, etc. This is just an example, so we simply use train/test # split df_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label) cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, with_attention=True, with_cls_token=True, # this is optional ) X_tab = tab_preprocessor.fit_transform(df_tr) target = df_tr[target_col].values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[3]: Copied!
ft_transformer = FTTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    input_dim=32,\n    kv_compression_factor=0.5,\n    n_blocks=3,\n    n_heads=4,\n)\n
ft_transformer = FTTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", input_dim=32, kv_compression_factor=0.5, n_blocks=3, n_heads=4, ) In\u00a0[4]: Copied!
# for a full list of the params for the the ContrastiveDenoisingTrainer (which are many) please see the docs.\n# Note that using these params involves some knowledge of the routine and the architecture of the model used\ncontrastive_denoising_trainer = ContrastiveDenoisingTrainer(\n    model=ft_transformer,\n    preprocessor=tab_preprocessor,\n)\ncontrastive_denoising_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
# for a full list of the params for the the ContrastiveDenoisingTrainer (which are many) please see the docs. # Note that using these params involves some knowledge of the routine and the architecture of the model used contrastive_denoising_trainer = ContrastiveDenoisingTrainer( model=ft_transformer, preprocessor=tab_preprocessor, ) contrastive_denoising_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:13<00:00, 11.73it/s, loss=579]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:12<00:00, 12.56it/s, loss=143]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:12<00:00, 12.49it/s, loss=141]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:11<00:00, 12.77it/s, loss=138]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:11<00:00, 13.29it/s, loss=137]\n
In\u00a0[5]: Copied!
contrastive_denoising_trainer.save(\n    path=\"pretrained_weights\", model_filename=\"contrastive_denoising_model.pt\"\n)\n
contrastive_denoising_trainer.save( path=\"pretrained_weights\", model_filename=\"contrastive_denoising_model.pt\" )

some time has passed

In\u00a0[6]: Copied!
# some time has passed, we load the model with torch as usual:\ncontrastive_denoising_model = torch.load(\n    \"pretrained_weights/contrastive_denoising_model.pt\"\n)\n
# some time has passed, we load the model with torch as usual: contrastive_denoising_model = torch.load( \"pretrained_weights/contrastive_denoising_model.pt\" )

NOW, AND THIS IS IMPORTANT! We have loaded the entire contrastive, denoising model. To proceed to the supervised training we ONLY need the attention-based model, which is the 'model' attribute of the trainer, let's have a look

In\u00a0[7]: Copied!
contrastive_denoising_model.model\n
contrastive_denoising_model.model Out[7]:
FTTransformer(\n  (cat_embed): SameSizeCatEmbeddings(\n    (embed): Embedding(323, 32, padding_idx=0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (cont_embed): ContEmbeddings(\n    INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n    (linear): ContLinear(n_cont_cols=2, embed_dim=32, embed_dropout=0.0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (encoder): Sequential(\n    (fttransformer_block0): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n    (fttransformer_block1): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n    (fttransformer_block2): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n  )\n)
In\u00a0[8]: Copied!
pretrained_model = contrastive_denoising_model.model\n
pretrained_model = contrastive_denoising_model.model In\u00a0[9]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=pretrained_model)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\n# And, you know...we get a test metric\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=pretrained_model) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) # And, you know...we get a test metric X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 27.19it/s, loss=0.383, metrics={'acc': 0.8176}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 26.08it/s, loss=0.325, metrics={'acc': 0.8502}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 26.56it/s, loss=0.306, metrics={'acc': 0.8601}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 27.41it/s, loss=0.295, metrics={'acc': 0.8641}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:06<00:00, 24.70it/s, loss=0.289, metrics={'acc': 0.8656}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 97.26it/s]
0.8695874705701709\n
\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html#self-supervised-pretraining-for-tabular-data","title":"Self Supervised Pretraining for Tabular Data\u00b6","text":"

We have implemented two Self Supervised Pre-training routines that allow the user to pre-train all tabular models in the library with the exception of the TabPerceiver (which is a special monster).

The two routines implemented are illustrated in the figures below. The 1st is from TabNet: Attentive Interpretable Tabular Learning and is designed for models that do not use transformer-based architectures, while the second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, and is designed for models that use transformer-based architectures.

Fig 1. Figure 2 in their paper. I have included de original caption in case is useful, althought the Figure itself is pretty self explanatory

Fig 2. Figure 1 in their paper. Here the caption is necessary \ud83d\ude0f

It is beyond the scope of this notebook to explain in detail those implementations. Therefore, we strongly recommend the user to go and read the papers if this functionality is of interest to her/him.

One thing is worth noticing however. As seen in Fig 1(the TabNet paper's Fig 2) the masking of the input features happens in the feature space. However, the implementation in this library is inspired by that at the dreamquark-ai repo, which is in itself inspired by the original implementation (by the way, at this point I will write it once again. All TabNet related things in this library are inspired when not directly based in the code in that repo, therefore, ALL CREDIT TO THE GUYS AT dreamquark-ai).

In that implementation the masking happens in the embedding space, and currently does not mask the entire embedding (i.e. categorical feature). We decided to release as it is in this version and we will implement the exact same process described in the paper in future releases.

Having said all of the above let's see how to use self supervision for tabular data with pytorch-widedeep. We will concentrate in this notebook on the 2nd of the two approaches (the 'SAINT approach'). For details on the 1st approach (the 'TabNet' approach) please see 16_Self_Supervised_Pretraning_pt1.

"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html#self-supervision-transformer-based-models","title":"Self Supervision transformer-based models..\u00b6","text":"

...or in general, for models where the embeddigns have all the same dimensions. In this library, these are:

  • TabTransformer
  • FTTransformer
  • SAINT
  • TabFastFormer

Note that there is one additional Transformer-based model, the TabPerceiver, however this is a \"particular\" model and at the moment we do not support self supervision for it, but it will come.

Let see at one example using the FTTransformer.

"},{"location":"examples/17_Usign_a_hugging_face_model.html","title":"17_Using_a_huggingface_model","text":"In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport lightgbm as lgb\nfrom lightgbm import Dataset as lgbDataset\nfrom scipy.sparse import hstack, csr_matrix\nfrom sklearn.metrics import (\n    f1_score,\n    recall_score,\n    accuracy_score,\n    precision_score,\n    confusion_matrix,\n)\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction.text import TfidfVectorizer\n\nfrom torch import Tensor, nn\nfrom transformers import DistilBertModel, DistilBertTokenizer\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep\nfrom pytorch_widedeep.metrics import F1Score, Accuracy\nfrom pytorch_widedeep.utils import Tokenizer, LabelEncoder\nfrom pytorch_widedeep.preprocessing import TextPreprocessor, TabPreprocessor\nfrom pytorch_widedeep.datasets import load_womens_ecommerce\nfrom pytorch_widedeep.utils.fastai_transforms import (\n    fix_html,\n    spec_add_spaces,\n    rm_useless_spaces,\n)\n
import numpy as np import torch import lightgbm as lgb from lightgbm import Dataset as lgbDataset from scipy.sparse import hstack, csr_matrix from sklearn.metrics import ( f1_score, recall_score, accuracy_score, precision_score, confusion_matrix, ) from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from torch import Tensor, nn from transformers import DistilBertModel, DistilBertTokenizer from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep from pytorch_widedeep.metrics import F1Score, Accuracy from pytorch_widedeep.utils import Tokenizer, LabelEncoder from pytorch_widedeep.preprocessing import TextPreprocessor, TabPreprocessor from pytorch_widedeep.datasets import load_womens_ecommerce from pytorch_widedeep.utils.fastai_transforms import ( fix_html, spec_add_spaces, rm_useless_spaces, )

Let's load the data and have a look:

In\u00a0[2]: Copied!
df = load_womens_ecommerce(as_frame=True)\n\ndf.columns = [c.replace(\" \", \"_\").lower() for c in df.columns]\n\n# classes from [0,num_class)\ndf[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\")\n\n# group reviews with 1 and 2 scores into one class\ndf.loc[df.rating == 0, \"rating\"] = 1\n\n# and back again to [0,num_class)\ndf[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\")\n\n# drop short reviews\ndf = df[~df.review_text.isna()]\ndf[\"review_length\"] = df.review_text.apply(lambda x: len(x.split(\" \")))\ndf = df[df.review_length >= 5]\ndf = df.drop(\"review_length\", axis=1).reset_index(drop=True)\n
df = load_womens_ecommerce(as_frame=True) df.columns = [c.replace(\" \", \"_\").lower() for c in df.columns] # classes from [0,num_class) df[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\") # group reviews with 1 and 2 scores into one class df.loc[df.rating == 0, \"rating\"] = 1 # and back again to [0,num_class) df[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\") # drop short reviews df = df[~df.review_text.isna()] df[\"review_length\"] = df.review_text.apply(lambda x: len(x.split(\" \"))) df = df[df.review_length >= 5] df = df.drop(\"review_length\", axis=1).reset_index(drop=True) In\u00a0[3]: Copied!
df.head()\n
df.head() Out[3]: clothing_id age title review_text rating recommended_ind positive_feedback_count division_name department_name class_name 0 767 33 None Absolutely wonderful - silky and sexy and comf... 2 1 0 Initmates Intimate Intimates 1 1080 34 None Love this dress! it's sooo pretty. i happene... 3 1 4 General Dresses Dresses 2 1077 60 Some major design flaws I had such high hopes for this dress and reall... 1 0 0 General Dresses Dresses 3 1049 50 My favorite buy! I love, love, love this jumpsuit. it's fun, fl... 3 1 0 General Petite Bottoms Pants 4 847 47 Flattering shirt This shirt is very flattering to all due to th... 3 1 6 General Tops Blouses

So, we will use the review_text column to predict the rating. Later on, we will try to combine it with some other columns (like division_name and age) see if these help.

Let's first have a look to the distribution of ratings

In\u00a0[4]: Copied!
df.rating.value_counts()\n
df.rating.value_counts() Out[4]:
rating\n3    12515\n2     4904\n1     2820\n0     2369\nName: count, dtype: int64

This shows that we could have perhaps grouped rating scores of 1, 2 and 3 into 1...but anyway, let's just move on with those 4 classes.

We are not going to carry any hyperparameter optimization here, so, we will only need a train and a test set (i.e. no need of a validation set for the example in this notebook)

In\u00a0[5]: Copied!
train, test = train_test_split(df, train_size=0.8, random_state=1, stratify=df.rating)\n
train, test = train_test_split(df, train_size=0.8, random_state=1, stratify=df.rating)

Let's see what we have to beat. What metrics would we obtain if we always predict the most common rating (3)?

In\u00a0[6]: Copied!
most_common_pred = [train.rating.value_counts().index[0]] * len(test)\n\nmost_common_acc = accuracy_score(test.rating, most_common_pred)\nmost_common_f1 = f1_score(test.rating, most_common_pred, average=\"weighted\")\n
most_common_pred = [train.rating.value_counts().index[0]] * len(test) most_common_acc = accuracy_score(test.rating, most_common_pred) most_common_f1 = f1_score(test.rating, most_common_pred, average=\"weighted\") In\u00a0[7]: Copied!
print(f\"Accuracy: {most_common_acc}. F1 Score: {most_common_f1}\")\n
print(f\"Accuracy: {most_common_acc}. F1 Score: {most_common_f1}\")
Accuracy: 0.553516143299425. F1 Score: 0.3944344218301668\n

ok, these are our \"baseline\" metrics.

Let's start by using simply tf-idf + lightGBM

In\u00a0[8]: Copied!
# ?Tokenizer\n
# ?Tokenizer In\u00a0[9]: Copied!
# this Tokenizer is part of our utils module but of course, any valid tokenizer can be used here.\n\n# When using notebooks there seems to be an issue related with multiprocessing (and sometimes tqdm)\n# that can only be solved by using only one CPU\ntok = Tokenizer(n_cpus=1)\ntok_reviews_tr = tok.process_all(train.review_text.tolist())\ntok_reviews_te = tok.process_all(test.review_text.tolist())\n
# this Tokenizer is part of our utils module but of course, any valid tokenizer can be used here. # When using notebooks there seems to be an issue related with multiprocessing (and sometimes tqdm) # that can only be solved by using only one CPU tok = Tokenizer(n_cpus=1) tok_reviews_tr = tok.process_all(train.review_text.tolist()) tok_reviews_te = tok.process_all(test.review_text.tolist()) In\u00a0[10]: Copied!
vectorizer = TfidfVectorizer(\n    max_features=5000, preprocessor=lambda x: x, tokenizer=lambda x: x, min_df=5\n)\n\nX_text_tr = vectorizer.fit_transform(tok_reviews_tr)\nX_text_te = vectorizer.transform(tok_reviews_te)\n
vectorizer = TfidfVectorizer( max_features=5000, preprocessor=lambda x: x, tokenizer=lambda x: x, min_df=5 ) X_text_tr = vectorizer.fit_transform(tok_reviews_tr) X_text_te = vectorizer.transform(tok_reviews_te)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:525: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n  warnings.warn(\n
In\u00a0[11]: Copied!
X_text_tr\n
X_text_tr Out[11]:
<18086x4566 sparse matrix of type '<class 'numpy.float64'>'\n\twith 884074 stored elements in Compressed Sparse Row format>

We now move our matrices to lightGBM Dataset format

In\u00a0[12]: Copied!
lgbtrain_text = lgbDataset(\n    X_text_tr,\n    train.rating.values,\n    free_raw_data=False,\n)\n\nlgbtest_text = lgbDataset(\n    X_text_te,\n    test.rating.values,\n    reference=lgbtrain_text,\n    free_raw_data=False,\n)\n
lgbtrain_text = lgbDataset( X_text_tr, train.rating.values, free_raw_data=False, ) lgbtest_text = lgbDataset( X_text_te, test.rating.values, reference=lgbtrain_text, free_raw_data=False, )

and off we go. By the way, I think as we run the next cell, we should appreciate how fast lightGBM runs. Yes, the input is a sparse matrix, but still, trains on 18086x4566 in a matter of secs

In\u00a0[\u00a0]: Copied!
lgb_text_model = lgb.train(\n    {\"objective\": \"multiclass\", \"num_classes\": 4},\n    lgbtrain_text,\n    valid_sets=[lgbtest_text, lgbtrain_text],\n    valid_names=[\"test\", \"train\"],\n)\n
lgb_text_model = lgb.train( {\"objective\": \"multiclass\", \"num_classes\": 4}, lgbtrain_text, valid_sets=[lgbtest_text, lgbtrain_text], valid_names=[\"test\", \"train\"], ) In\u00a0[14]: Copied!
preds_text = lgb_text_model.predict(X_text_te)\npred_text_class = np.argmax(preds_text, 1)\n
preds_text = lgb_text_model.predict(X_text_te) pred_text_class = np.argmax(preds_text, 1) In\u00a0[15]: Copied!
acc_text = accuracy_score(lgbtest_text.label, pred_text_class)\nf1_text = f1_score(lgbtest_text.label, pred_text_class, average=\"weighted\")\ncm_text = confusion_matrix(lgbtest_text.label, pred_text_class)\n
acc_text = accuracy_score(lgbtest_text.label, pred_text_class) f1_text = f1_score(lgbtest_text.label, pred_text_class, average=\"weighted\") cm_text = confusion_matrix(lgbtest_text.label, pred_text_class) In\u00a0[16]: Copied!
print(f\"LightGBM Accuracy: {acc_text}. LightGBM F1 Score: {f1_text}\")\n
print(f\"LightGBM Accuracy: {acc_text}. LightGBM F1 Score: {f1_text}\")
LightGBM Accuracy: 0.6444051304732419. LightGBM F1 Score: 0.617154488246181\n
In\u00a0[17]: Copied!
print(f\"LightGBM Confusion Matrix: \\n {cm_text}\")\n
print(f\"LightGBM Confusion Matrix: \\n {cm_text}\")
LightGBM Confusion Matrix: \n [[ 199  135   61   79]\n [ 123  169  149  123]\n [  30   94  279  578]\n [  16   30  190 2267]]\n

Ok, so, with no hyperparameter optimization lightGBM gets an accuracy of 0.64 and a F1 score of 0.62. This is significantly better than predicting always the most popular.

Let's see if in this implementation, some additional features, like age or class_name are of any help

In\u00a0[18]: Copied!
tab_cols = [\n    \"age\",\n    \"division_name\",\n    \"department_name\",\n    \"class_name\",\n]\n\nfor tab_df in [train, test]:\n    for c in [\"division_name\", \"department_name\", \"class_name\"]:\n        tab_df[c] = tab_df[c].str.lower()\n        tab_df[c].fillna(\"missing\", inplace=True)\n
tab_cols = [ \"age\", \"division_name\", \"department_name\", \"class_name\", ] for tab_df in [train, test]: for c in [\"division_name\", \"department_name\", \"class_name\"]: tab_df[c] = tab_df[c].str.lower() tab_df[c].fillna(\"missing\", inplace=True) In\u00a0[19]: Copied!
# This is our LabelEncoder. A class that is designed to work with the models in this library but\n# can be used for general purposes\nle = LabelEncoder(columns_to_encode=[\"division_name\", \"department_name\", \"class_name\"])\ntrain_tab_le = le.fit_transform(train)\ntest_tab_le = le.transform(test)\n
# This is our LabelEncoder. A class that is designed to work with the models in this library but # can be used for general purposes le = LabelEncoder(columns_to_encode=[\"division_name\", \"department_name\", \"class_name\"]) train_tab_le = le.fit_transform(train) test_tab_le = le.transform(test) In\u00a0[20]: Copied!
train_tab_le.head()\n
train_tab_le.head() Out[20]: clothing_id age title review_text rating recommended_ind positive_feedback_count division_name department_name class_name 4541 836 35 None Bought this on sale in my reg size- 10. im 5'9... 2 1 2 1 1 1 18573 1022 25 Look like \"mom jeans\" Maybe i just have the wrong body type for thes... 1 0 0 2 2 2 1058 815 39 Ig brought me here Love the way this top layers under my jackets ... 2 1 0 1 1 1 12132 984 47 Runs small especially the arms I love this jacket. it's the prettiest and mos... 3 1 0 1 3 3 20756 1051 42 True red, true beauty. These pants are gorgeous--the fabric has a sat... 3 1 0 2 2 4

let's for example have a look to the encodings for the categorical feature class_name

In\u00a0[21]: Copied!
le.encoding_dict[\"class_name\"]\n
le.encoding_dict[\"class_name\"] Out[21]:
{'blouses': 1,\n 'jeans': 2,\n 'jackets': 3,\n 'pants': 4,\n 'knits': 5,\n 'dresses': 6,\n 'skirts': 7,\n 'sweaters': 8,\n 'fine gauge': 9,\n 'legwear': 10,\n 'lounge': 11,\n 'shorts': 12,\n 'outerwear': 13,\n 'intimates': 14,\n 'swim': 15,\n 'trend': 16,\n 'sleep': 17,\n 'layering': 18,\n 'missing': 19,\n 'casual bottoms': 20,\n 'chemises': 21}
In\u00a0[22]: Copied!
# tabular training and test sets\nX_tab_tr = csr_matrix(train_tab_le[tab_cols].values)\nX_tab_te = csr_matrix(test_tab_le[tab_cols].values)\n\n# text + tabular training and test sets\nX_tab_text_tr = hstack((X_tab_tr, X_text_tr))\nX_tab_text_te = hstack((X_tab_te, X_text_te))\n
# tabular training and test sets X_tab_tr = csr_matrix(train_tab_le[tab_cols].values) X_tab_te = csr_matrix(test_tab_le[tab_cols].values) # text + tabular training and test sets X_tab_text_tr = hstack((X_tab_tr, X_text_tr)) X_tab_text_te = hstack((X_tab_te, X_text_te)) In\u00a0[23]: Copied!
X_tab_tr\n
X_tab_tr Out[23]:
<18086x4 sparse matrix of type '<class 'numpy.int64'>'\n\twith 72344 stored elements in Compressed Sparse Row format>
In\u00a0[24]: Copied!
X_tab_text_tr\n
X_tab_text_tr Out[24]:
<18086x4570 sparse matrix of type '<class 'numpy.float64'>'\n\twith 956418 stored elements in Compressed Sparse Row format>
In\u00a0[25]: Copied!
lgbtrain_tab_text = lgbDataset(\n    X_tab_text_tr,\n    train.rating.values,\n    categorical_feature=[0, 1, 2, 3],\n    free_raw_data=False,\n)\n\nlgbtest_tab_text = lgbDataset(\n    X_tab_text_te,\n    test.rating.values,\n    reference=lgbtrain_tab_text,\n    free_raw_data=False,\n)\n
lgbtrain_tab_text = lgbDataset( X_tab_text_tr, train.rating.values, categorical_feature=[0, 1, 2, 3], free_raw_data=False, ) lgbtest_tab_text = lgbDataset( X_tab_text_te, test.rating.values, reference=lgbtrain_tab_text, free_raw_data=False, ) In\u00a0[26]: Copied!
lgb_tab_text_model = lgb.train(\n    {\"objective\": \"multiclass\", \"num_classes\": 4},\n    lgbtrain_tab_text,\n    valid_sets=[lgbtrain_tab_text, lgbtest_tab_text],\n    valid_names=[\"test\", \"train\"],\n    verbose_eval=False,\n)\n
lgb_tab_text_model = lgb.train( {\"objective\": \"multiclass\", \"num_classes\": 4}, lgbtrain_tab_text, valid_sets=[lgbtrain_tab_text, lgbtest_tab_text], valid_names=[\"test\", \"train\"], verbose_eval=False, )
/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:2065: UserWarning: Using categorical_feature in Dataset.\n  _log_warning('Using categorical_feature in Dataset.')\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:2068: UserWarning: categorical_feature in Dataset is overridden.\nNew categorical_feature is [0, 1, 2, 3]\n  _log_warning('categorical_feature in Dataset is overridden.\\n'\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n  _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.138280 seconds.\nYou can set `force_col_wise=true` to remove the overhead.\n[LightGBM] [Info] Total Bins 143432\n[LightGBM] [Info] Number of data points in the train set: 18086, number of used features: 2289\n[LightGBM] [Info] Start training from score -2.255919\n[LightGBM] [Info] Start training from score -2.081545\n[LightGBM] [Info] Start training from score -1.528281\n[LightGBM] [Info] Start training from score -0.591354\n
/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:1780: UserWarning: Overriding the parameters from Reference Dataset.\n  _log_warning('Overriding the parameters from Reference Dataset.')\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:1513: UserWarning: categorical_column in param dict is overridden.\n  _log_warning(f'{cat_alias} in param dict is overridden.')\n
In\u00a0[27]: Copied!
preds_tab_text = lgb_tab_text_model.predict(X_tab_text_te)\npreds_tab_text_class = np.argmax(preds_tab_text, 1)\n\nacc_tab_text = accuracy_score(lgbtest_tab_text.label, preds_tab_text_class)\nf1_tab_text = f1_score(lgbtest_tab_text.label, preds_tab_text_class, average=\"weighted\")\ncm_tab_text = confusion_matrix(lgbtest_tab_text.label, preds_tab_text_class)\n
preds_tab_text = lgb_tab_text_model.predict(X_tab_text_te) preds_tab_text_class = np.argmax(preds_tab_text, 1) acc_tab_text = accuracy_score(lgbtest_tab_text.label, preds_tab_text_class) f1_tab_text = f1_score(lgbtest_tab_text.label, preds_tab_text_class, average=\"weighted\") cm_tab_text = confusion_matrix(lgbtest_tab_text.label, preds_tab_text_class) In\u00a0[28]: Copied!
print(\n    f\"LightGBM text + tabular Accuracy: {acc_tab_text}. LightGBM text + tabular F1 Score: {f1_tab_text}\"\n)\n
print( f\"LightGBM text + tabular Accuracy: {acc_tab_text}. LightGBM text + tabular F1 Score: {f1_tab_text}\" )
LightGBM text + tabular Accuracy: 0.6382131800088456. LightGBM text + tabular F1 Score: 0.6080251307242649\n
In\u00a0[29]: Copied!
print(f\"LightGBM text + tabular Confusion Matrix:\\n {cm_tab_text}\")\n
print(f\"LightGBM text + tabular Confusion Matrix:\\n {cm_tab_text}\")
LightGBM text + tabular Confusion Matrix:\n [[ 193  123   68   90]\n [ 123  146  157  138]\n [  37   90  272  582]\n [  16   37  175 2275]]\n

So, in this set up, the addition tabular columns do not help performance.

In\u00a0[30]: Copied!
text_preprocessor = TextPreprocessor(\n    text_col=\"review_text\", max_vocab=5000, min_freq=5, maxlen=90, n_cpus=1\n)\n\nwd_X_text_tr = text_preprocessor.fit_transform(train)\nwd_X_text_te = text_preprocessor.transform(test)\n
text_preprocessor = TextPreprocessor( text_col=\"review_text\", max_vocab=5000, min_freq=5, maxlen=90, n_cpus=1 ) wd_X_text_tr = text_preprocessor.fit_transform(train) wd_X_text_te = text_preprocessor.transform(test)
The vocabulary contains 4328 tokens\n
In\u00a0[31]: Copied!
basic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=300,\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.2,\n    head_hidden_dims=[32],\n)\n\n\nwd_text_model = WideDeep(deeptext=basic_rnn, pred_dim=4)\n
basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=300, hidden_dim=64, n_layers=3, rnn_dropout=0.2, head_hidden_dims=[32], ) wd_text_model = WideDeep(deeptext=basic_rnn, pred_dim=4) In\u00a0[32]: Copied!
wd_text_model\n
wd_text_model Out[32]:
WideDeep(\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(4328, 300, padding_idx=1)\n      (rnn): LSTM(300, 64, num_layers=3, batch_first=True, dropout=0.2)\n      (rnn_mlp): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=4, bias=True)\n  )\n)
In\u00a0[33]: Copied!
text_trainer = Trainer(\n    wd_text_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
text_trainer = Trainer( wd_text_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[34]: Copied!
text_trainer.fit(\n    X_text=wd_X_text_tr,\n    target=train.rating.values,\n    n_epochs=5,\n    batch_size=256,\n)\n
text_trainer.fit( X_text=wd_X_text_tr, target=train.rating.values, n_epochs=5, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.39it/s, loss=1.16, metrics={'acc': 0.5349, 'f1': 0.2011}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 70.35it/s, loss=0.964, metrics={'acc': 0.5827, 'f1': 0.3005}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 70.33it/s, loss=0.845, metrics={'acc': 0.6252, 'f1': 0.4133}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 69.99it/s, loss=0.765, metrics={'acc': 0.6575, 'f1': 0.4875}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 69.55it/s, loss=0.709, metrics={'acc': 0.6879, 'f1': 0.5423}]\n
In\u00a0[35]: Copied!
wd_pred_text = text_trainer.predict_proba(X_text=wd_X_text_te)\nwd_pred_text_class = np.argmax(wd_pred_text, 1)\n
wd_pred_text = text_trainer.predict_proba(X_text=wd_X_text_te) wd_pred_text_class = np.argmax(wd_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 18/18 [00:00<00:00, 211.51it/s]\n
In\u00a0[36]: Copied!
wd_acc_text = accuracy_score(test.rating, wd_pred_text_class)\nwd_f1_text = f1_score(test.rating, wd_pred_text_class, average=\"weighted\")\nwd_cm_text = confusion_matrix(test.rating, wd_pred_text_class)\n
wd_acc_text = accuracy_score(test.rating, wd_pred_text_class) wd_f1_text = f1_score(test.rating, wd_pred_text_class, average=\"weighted\") wd_cm_text = confusion_matrix(test.rating, wd_pred_text_class) In\u00a0[37]: Copied!
print(f\"Basic RNN Accuracy: {wd_acc_text}. Basic RNN F1 Score: {wd_f1_text}\")\n
print(f\"Basic RNN Accuracy: {wd_acc_text}. Basic RNN F1 Score: {wd_f1_text}\")
Basic RNN Accuracy: 0.6076957098628926. Basic RNN F1 Score: 0.6017335854471788\n
In\u00a0[38]: Copied!
print(f\"Basic RNN Confusion Matrix:\\n {wd_cm_text}\")\n
print(f\"Basic RNN Confusion Matrix:\\n {wd_cm_text}\")
Basic RNN Confusion Matrix:\n [[ 327   76   62    9]\n [ 285  115  117   47]\n [ 131  122  315  413]\n [  42   69  401 1991]]\n

The performance is very similar to that of using simply tf-idf and lightgbm. Let see if adding tabular features helps when using pytorch-widedeep

In\u00a0[39]: Copied!
# ?TabPreprocessor\n
# ?TabPreprocessor In\u00a0[40]: Copied!
tab_preprocessor = TabPreprocessor(cat_embed_cols=tab_cols)\n\nwd_X_tab_tr = tab_preprocessor.fit_transform(train)\nwd_X_tab_te = tab_preprocessor.transform(test)\n
tab_preprocessor = TabPreprocessor(cat_embed_cols=tab_cols) wd_X_tab_tr = tab_preprocessor.fit_transform(train) wd_X_tab_te = tab_preprocessor.transform(test) In\u00a0[41]: Copied!
# ?TabMlp\n
# ?TabMlp In\u00a0[42]: Copied!
tab_model = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[100, 50],\n)\n
tab_model = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[100, 50], ) In\u00a0[43]: Copied!
tab_model\n
tab_model Out[43]:
TabMlp(\n  (cat_and_cont_embed): DiffSizeCatAndContEmbeddings(\n    (cat_embed): DiffSizeCatEmbeddings(\n      (embed_layers): ModuleDict(\n        (emb_layer_age): Embedding(78, 18, padding_idx=0)\n        (emb_layer_division_name): Embedding(5, 3, padding_idx=0)\n        (emb_layer_department_name): Embedding(8, 5, padding_idx=0)\n        (emb_layer_class_name): Embedding(22, 9, padding_idx=0)\n      )\n      (embedding_dropout): Dropout(p=0.1, inplace=False)\n    )\n  )\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Dropout(p=0.1, inplace=False)\n        (1): Linear(in_features=35, out_features=100, bias=True)\n        (2): ReLU(inplace=True)\n      )\n      (dense_layer_1): Sequential(\n        (0): Dropout(p=0.1, inplace=False)\n        (1): Linear(in_features=100, out_features=50, bias=True)\n        (2): ReLU(inplace=True)\n      )\n    )\n  )\n)
In\u00a0[44]: Copied!
text_model = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=300,\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.2,\n    head_hidden_dims=[32],\n)\n
text_model = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=300, hidden_dim=64, n_layers=3, rnn_dropout=0.2, head_hidden_dims=[32], ) In\u00a0[45]: Copied!
wd_tab_and_text_model = WideDeep(deeptabular=tab_model, deeptext=text_model, pred_dim=4)\n
wd_tab_and_text_model = WideDeep(deeptabular=tab_model, deeptext=text_model, pred_dim=4) In\u00a0[46]: Copied!
wd_tab_and_text_model\n
wd_tab_and_text_model Out[46]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_and_cont_embed): DiffSizeCatAndContEmbeddings(\n        (cat_embed): DiffSizeCatEmbeddings(\n          (embed_layers): ModuleDict(\n            (emb_layer_age): Embedding(78, 18, padding_idx=0)\n            (emb_layer_division_name): Embedding(5, 3, padding_idx=0)\n            (emb_layer_department_name): Embedding(8, 5, padding_idx=0)\n            (emb_layer_class_name): Embedding(22, 9, padding_idx=0)\n          )\n          (embedding_dropout): Dropout(p=0.1, inplace=False)\n        )\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=35, out_features=100, bias=True)\n            (2): ReLU(inplace=True)\n          )\n          (dense_layer_1): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=100, out_features=50, bias=True)\n            (2): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=50, out_features=4, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(4328, 300, padding_idx=1)\n      (rnn): LSTM(300, 64, num_layers=3, batch_first=True, dropout=0.2)\n      (rnn_mlp): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=4, bias=True)\n  )\n)
In\u00a0[47]: Copied!
tab_and_text_trainer = Trainer(\n    wd_tab_and_text_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
tab_and_text_trainer = Trainer( wd_tab_and_text_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[48]: Copied!
tab_and_text_trainer.fit(\n    X_tab=wd_X_tab_tr,\n    X_text=wd_X_text_tr,\n    target=train.rating.values,\n    n_epochs=5,\n    batch_size=256,\n)\n
tab_and_text_trainer.fit( X_tab=wd_X_tab_tr, X_text=wd_X_text_tr, target=train.rating.values, n_epochs=5, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.04it/s, loss=1.13, metrics={'acc': 0.538, 'f1': 0.1911}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.28it/s, loss=0.936, metrics={'acc': 0.5887, 'f1': 0.3507}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.26it/s, loss=0.825, metrics={'acc': 0.6394, 'f1': 0.4545}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 51.33it/s, loss=0.757, metrics={'acc': 0.6696, 'f1': 0.5214}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 50.39it/s, loss=0.702, metrics={'acc': 0.6963, 'f1': 0.5654}]\n
In\u00a0[49]: Copied!
wd_pred_tab_and_text = tab_and_text_trainer.predict_proba(\n    X_tab=wd_X_tab_te, X_text=wd_X_text_te\n)\nwd_pred_tab_and_text_class = np.argmax(wd_pred_tab_and_text, 1)\n
wd_pred_tab_and_text = tab_and_text_trainer.predict_proba( X_tab=wd_X_tab_te, X_text=wd_X_text_te ) wd_pred_tab_and_text_class = np.argmax(wd_pred_tab_and_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 18/18 [00:00<00:00, 136.94it/s]\n
In\u00a0[50]: Copied!
wd_acc_tab_and_text = accuracy_score(test.rating, wd_pred_tab_and_text_class)\nwd_f1_tab_and_text = f1_score(\n    test.rating, wd_pred_tab_and_text_class, average=\"weighted\"\n)\nwd_cm_tab_and_text = confusion_matrix(test.rating, wd_pred_tab_and_text_class)\n
wd_acc_tab_and_text = accuracy_score(test.rating, wd_pred_tab_and_text_class) wd_f1_tab_and_text = f1_score( test.rating, wd_pred_tab_and_text_class, average=\"weighted\" ) wd_cm_tab_and_text = confusion_matrix(test.rating, wd_pred_tab_and_text_class) In\u00a0[51]: Copied!
print(\n    f\"Basic RNN + Tabular  Accuracy: {wd_acc_tab_and_text}. Basic RNN + TabularF1 Score: {wd_f1_tab_and_text}\"\n)\nprint(f\"Basic RNN + Tabular  Confusion Matrix:\\n {wd_cm_tab_and_text}\")\n
print( f\"Basic RNN + Tabular Accuracy: {wd_acc_tab_and_text}. Basic RNN + TabularF1 Score: {wd_f1_tab_and_text}\" ) print(f\"Basic RNN + Tabular Confusion Matrix:\\n {wd_cm_tab_and_text}\")
Basic RNN + Tabular  Accuracy: 0.6333480760725343. Basic RNN + TabularF1 Score: 0.6332310089593208\nBasic RNN + Tabular  Confusion Matrix:\n [[ 267  132   65   10]\n [ 198  168  159   39]\n [  57  113  410  401]\n [  12   58  414 2019]]\n

We are going to \"manually\" code the Tokenizer and the model and see how they can be used as part of the process along with the pytorch-widedeep library.

Tokenizer:

In\u00a0[52]: Copied!
class BertTokenizer(object):\n    def __init__(\n        self,\n        pretrained_tokenizer=\"distilbert-base-uncased\",\n        do_lower_case=True,\n        max_length=90,\n    ):\n        super(BertTokenizer, self).__init__()\n        self.pretrained_tokenizer = pretrained_tokenizer\n        self.do_lower_case = do_lower_case\n        self.max_length = max_length\n\n    def fit(self, texts):\n        self.tokenizer = DistilBertTokenizer.from_pretrained(\n            self.pretrained_tokenizer, do_lower_case=self.do_lower_case\n        )\n\n        return self\n\n    def transform(self, texts):\n        input_ids = []\n        for text in texts:\n            encoded_sent = self.tokenizer.encode_plus(\n                text=self._pre_rules(text),\n                add_special_tokens=True,\n                max_length=self.max_length,\n                padding=\"max_length\",\n                truncation=True,\n            )\n\n            input_ids.append(encoded_sent.get(\"input_ids\"))\n        return np.stack(input_ids)\n\n    def fit_transform(self, texts):\n        return self.fit(texts).transform(texts)\n\n    @staticmethod\n    def _pre_rules(text):\n        return fix_html(rm_useless_spaces(spec_add_spaces(text)))\n
class BertTokenizer(object): def __init__( self, pretrained_tokenizer=\"distilbert-base-uncased\", do_lower_case=True, max_length=90, ): super(BertTokenizer, self).__init__() self.pretrained_tokenizer = pretrained_tokenizer self.do_lower_case = do_lower_case self.max_length = max_length def fit(self, texts): self.tokenizer = DistilBertTokenizer.from_pretrained( self.pretrained_tokenizer, do_lower_case=self.do_lower_case ) return self def transform(self, texts): input_ids = [] for text in texts: encoded_sent = self.tokenizer.encode_plus( text=self._pre_rules(text), add_special_tokens=True, max_length=self.max_length, padding=\"max_length\", truncation=True, ) input_ids.append(encoded_sent.get(\"input_ids\")) return np.stack(input_ids) def fit_transform(self, texts): return self.fit(texts).transform(texts) @staticmethod def _pre_rules(text): return fix_html(rm_useless_spaces(spec_add_spaces(text)))

Model:

In\u00a0[53]: Copied!
class BertModel(nn.Module):\n    def __init__(\n        self,\n        model_name: str = \"distilbert-base-uncased\",\n        freeze_bert: bool = False,\n    ):\n        super(BertModel, self).__init__()\n\n        self.bert = DistilBertModel.from_pretrained(\n            model_name,\n        )\n\n        if freeze_bert:\n            for param in self.bert.parameters():\n                param.requires_grad = False\n\n    def forward(self, X_inp: Tensor) -> Tensor:\n        attn_mask = (X_inp != 0).type(torch.int8)\n        outputs = self.bert(input_ids=X_inp, attention_mask=attn_mask)\n        return outputs[0][:, 0, :]\n\n    @property\n    def output_dim(self) -> int:\n        # This is THE ONLY requirement for any model to work with pytorch-widedeep. Must\n        # have a 'output_dim' property so the WideDeep class knows the incoming dims\n        # from the custom model. in this case, I hardcoded it\n        return 768\n
class BertModel(nn.Module): def __init__( self, model_name: str = \"distilbert-base-uncased\", freeze_bert: bool = False, ): super(BertModel, self).__init__() self.bert = DistilBertModel.from_pretrained( model_name, ) if freeze_bert: for param in self.bert.parameters(): param.requires_grad = False def forward(self, X_inp: Tensor) -> Tensor: attn_mask = (X_inp != 0).type(torch.int8) outputs = self.bert(input_ids=X_inp, attention_mask=attn_mask) return outputs[0][:, 0, :] @property def output_dim(self) -> int: # This is THE ONLY requirement for any model to work with pytorch-widedeep. Must # have a 'output_dim' property so the WideDeep class knows the incoming dims # from the custom model. in this case, I hardcoded it return 768 In\u00a0[54]: Copied!
bert_tokenizer = BertTokenizer()\nX_bert_tr = bert_tokenizer.fit_transform(train[\"review_text\"].tolist())\nX_bert_te = bert_tokenizer.transform(test[\"review_text\"].tolist())\n
bert_tokenizer = BertTokenizer() X_bert_tr = bert_tokenizer.fit_transform(train[\"review_text\"].tolist()) X_bert_te = bert_tokenizer.transform(test[\"review_text\"].tolist())

As I mentioned a number of times in the documentation and examples, pytorch-widedeep is designed for flexibility. For any of the data modes (tabular, text and images) there are available components/models in the library. However, the user can choose to use any model they want with the only requirement that such model must have a output_dim property.

With that in mind, the BertModel class defined above can be used by pytorch-widedeep as any other of the internal components. In other words, simply...pass it to the WideDeep class. In this case we are going to add a FC-head as part of the classifier.

In\u00a0[55]: Copied!
bert_model = BertModel(freeze_bert=True)\nwd_bert_model = WideDeep(\n    deeptext=bert_model,\n    head_hidden_dims=[256, 128, 64],\n    pred_dim=4,\n)\n
bert_model = BertModel(freeze_bert=True) wd_bert_model = WideDeep( deeptext=bert_model, head_hidden_dims=[256, 128, 64], pred_dim=4, )
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight']\n- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n
In\u00a0[56]: Copied!
wd_bert_model\n
wd_bert_model Out[56]:
WideDeep(\n  (deeptext): BertModel(\n    (bert): DistilBertModel(\n      (embeddings): Embeddings(\n        (word_embeddings): Embedding(30522, 768, padding_idx=0)\n        (position_embeddings): Embedding(512, 768)\n        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (transformer): Transformer(\n        (layer): ModuleList(\n          (0): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (1): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (2): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (3): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (4): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (5): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n        )\n      )\n    )\n  )\n  (deephead): Sequential(\n    (0): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=768, out_features=256, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=256, out_features=128, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_2): Sequential(\n          (0): Linear(in_features=128, out_features=64, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n    (1): Linear(in_features=64, out_features=4, bias=True)\n  )\n)
In\u00a0[57]: Copied!
wd_bert_trainer = Trainer(\n    wd_bert_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n\nwd_bert_trainer.fit(\n    X_text=X_bert_tr,\n    target=train.rating.values,\n    n_epochs=3,\n    batch_size=64,\n)\n
wd_bert_trainer = Trainer( wd_bert_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) wd_bert_trainer.fit( X_text=X_bert_tr, target=train.rating.values, n_epochs=3, batch_size=64, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.68it/s, loss=0.968, metrics={'acc': 0.5879, 'f1': 0.3591}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.63it/s, loss=0.884, metrics={'acc': 0.6178, 'f1': 0.4399}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.55it/s, loss=0.87, metrics={'acc': 0.6234, 'f1': 0.4527}]\n
In\u00a0[58]: Copied!
wd_bert_pred_text = wd_bert_trainer.predict_proba(X_text=X_bert_te)\nwd_bert_pred_text_class = np.argmax(wd_bert_pred_text, 1)\n
wd_bert_pred_text = wd_bert_trainer.predict_proba(X_text=X_bert_te) wd_bert_pred_text_class = np.argmax(wd_bert_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:03<00:00, 21.97it/s]\n
In\u00a0[59]: Copied!
wd_bert_acc = accuracy_score(test.rating, wd_bert_pred_text_class)\nwd_bert_f1 = f1_score(test.rating, wd_bert_pred_text_class, average=\"weighted\")\nwd_bert_cm = confusion_matrix(test.rating, wd_bert_pred_text_class)\n
wd_bert_acc = accuracy_score(test.rating, wd_bert_pred_text_class) wd_bert_f1 = f1_score(test.rating, wd_bert_pred_text_class, average=\"weighted\") wd_bert_cm = confusion_matrix(test.rating, wd_bert_pred_text_class) In\u00a0[60]: Copied!
print(f\"Distilbert Accuracy: {wd_bert_acc}. Distilbert F1 Score: {wd_bert_f1}\")\nprint(f\"Distilbert Confusion Matrix:\\n {wd_bert_cm}\")\n
print(f\"Distilbert Accuracy: {wd_bert_acc}. Distilbert F1 Score: {wd_bert_f1}\") print(f\"Distilbert Confusion Matrix:\\n {wd_bert_cm}\")
Distilbert Accuracy: 0.6326846528084918. Distilbert F1 Score: 0.5796652991272998\nDistilbert Confusion Matrix:\n [[ 287   75   22   90]\n [ 197  136   62  169]\n [  68  119  123  671]\n [  40   64   84 2315]]\n

Now, adding a tabular model follows the exact same process as the one described in section 2.

In\u00a0[61]: Copied!
tab_model = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[100, 50],\n)\n
tab_model = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[100, 50], ) In\u00a0[62]: Copied!
wd_tab_bert_model = WideDeep(\n    deeptabular=tab_model,\n    deeptext=bert_model,\n    head_hidden_dims=[256, 128, 64],\n    pred_dim=4,\n)\n
wd_tab_bert_model = WideDeep( deeptabular=tab_model, deeptext=bert_model, head_hidden_dims=[256, 128, 64], pred_dim=4, ) In\u00a0[63]: Copied!
wd_tab_bert_trainer = Trainer(\n    wd_tab_bert_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
wd_tab_bert_trainer = Trainer( wd_tab_bert_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[64]: Copied!
wd_tab_bert_trainer.fit(\n    X_tab=wd_X_tab_tr,\n    X_text=X_bert_tr,\n    target=train.rating.values,\n    n_epochs=3,\n    batch_size=64,\n)\n
wd_tab_bert_trainer.fit( X_tab=wd_X_tab_tr, X_text=X_bert_tr, target=train.rating.values, n_epochs=3, batch_size=64, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.15it/s, loss=0.974, metrics={'acc': 0.5838, 'f1': 0.3404}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.38it/s, loss=0.885, metrics={'acc': 0.618, 'f1': 0.4378}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.40it/s, loss=0.868, metrics={'acc': 0.6252, 'f1': 0.4575}]\n
In\u00a0[65]: Copied!
wd_tab_bert_pred_text = wd_tab_bert_trainer.predict_proba(\n    X_tab=wd_X_tab_te, X_text=X_bert_te\n)\nwd_tab_bert_pred_text_class = np.argmax(wd_tab_bert_pred_text, 1)\n
wd_tab_bert_pred_text = wd_tab_bert_trainer.predict_proba( X_tab=wd_X_tab_te, X_text=X_bert_te ) wd_tab_bert_pred_text_class = np.argmax(wd_tab_bert_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:03<00:00, 21.32it/s]\n
In\u00a0[66]: Copied!
wd_tab_bert_acc = accuracy_score(test.rating, wd_tab_bert_pred_text_class)\nwd_tab_bert_f1 = f1_score(test.rating, wd_tab_bert_pred_text_class, average=\"weighted\")\nwd_tab_bert_cm = confusion_matrix(test.rating, wd_tab_bert_pred_text_class)\n
wd_tab_bert_acc = accuracy_score(test.rating, wd_tab_bert_pred_text_class) wd_tab_bert_f1 = f1_score(test.rating, wd_tab_bert_pred_text_class, average=\"weighted\") wd_tab_bert_cm = confusion_matrix(test.rating, wd_tab_bert_pred_text_class) In\u00a0[67]: Copied!
print(\n    f\"Distilbert + Tabular Accuracy: {wd_tab_bert_acc}. Distilbert+ Tabular F1 Score: {wd_tab_bert_f1}\"\n)\nprint(f\"Distilbert + Tabular Confusion Matrix:\\n {wd_tab_bert_cm}\")\n
print( f\"Distilbert + Tabular Accuracy: {wd_tab_bert_acc}. Distilbert+ Tabular F1 Score: {wd_tab_bert_f1}\" ) print(f\"Distilbert + Tabular Confusion Matrix:\\n {wd_tab_bert_cm}\")
Distilbert + Tabular Accuracy: 0.6242812914639541. Distilbert+ Tabular F1 Score: 0.5508351761564895\nDistilbert + Tabular Confusion Matrix:\n [[ 297   56   11  110]\n [ 229   91   38  206]\n [  86   90   71  734]\n [  49   48   42 2364]]\n
"},{"location":"examples/17_Usign_a_hugging_face_model.html#using-a-hugginface-model","title":"Using a Hugginface model\u00b6","text":"

In this notebook we will show how to use an \"external\" Hugginface model along with any other model in the libray. In particular we will show how to combine it with a tabular DL model.

Since we are here, we will also compare the performance of a few models on a text classification problem.

The notebook will go as follows:

  1. Text classification using tf-idf + LightGBM
  2. Text classification using a basic RNN
  3. Text classification using Distilbert

In all 3 cases we will add some tabular features to see if these help.

In general, I would not pay much attention to the results since I have placed no effort in getting the best possible results (i.e. no hyperparameter optimization or trying different architectures, for example).

Let's go

"},{"location":"examples/17_Usign_a_hugging_face_model.html#1-text-classification-using-tf-idf-lightgbm","title":"1. Text classification using tf-idf + LightGBM\u00b6","text":""},{"location":"examples/17_Usign_a_hugging_face_model.html#2-text-classification-using-pytorch-widedeeps-built-in-models-a-basic-rnn","title":"2. Text classification using pytorch-widedeep's built-in models (a basic RNN)\u00b6","text":"

Moving on now to fully using pytorch-widedeep in this dataset, let's have a look on how one could use a simple RNN to predict the ratings with the library.

"},{"location":"examples/17_Usign_a_hugging_face_model.html#3-text-classification-using-a-hugginface-model-as-a-custom-model-in-pytorch-widedeeps","title":"3. Text classification using a Hugginface model as a custom model in pytorch-widedeep's\u00b6","text":""},{"location":"examples/18_feature_importance_via_attention_weights.html","title":"18_feature_importance_via_attention_weights","text":"In\u00a0[1]: Copied!
import torch\n\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\n\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabTransformer, ContextAttentionMLP, WideDeep\nfrom pytorch_widedeep.callbacks import EarlyStopping\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
import torch import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabTransformer, ContextAttentionMLP, WideDeep from pytorch_widedeep.callbacks import EarlyStopping from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[2]: Copied!
# use_cuda = torch.cuda.is_available()\ndf = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop([\"income\", \"fnlwgt\", \"educational_num\"], axis=1, inplace=True)\ntarget_colname = \"income_label\"\n
# use_cuda = torch.cuda.is_available() df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop([\"income\", \"fnlwgt\", \"educational_num\"], axis=1, inplace=True) target_colname = \"income_label\" In\u00a0[3]: Copied!
df.head()\n
df.head() Out[3]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 11th Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private HS-grad Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov Assoc-acdm Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private Some-college Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? Some-college Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
cat_embed_cols = []\nfor col in df.columns:\n    if df[col].dtype == \"O\" or df[col].nunique() < 200 and col != target_colname:\n        cat_embed_cols.append(col)\n
cat_embed_cols = [] for col in df.columns: if df[col].dtype == \"O\" or df[col].nunique() < 200 and col != target_colname: cat_embed_cols.append(col) In\u00a0[5]: Copied!
# all cols will be categorical\nassert len(cat_embed_cols) == df.shape[1] - 1\n
# all cols will be categorical assert len(cat_embed_cols) == df.shape[1] - 1 In\u00a0[6]: Copied!
train, test = train_test_split(\n    df, test_size=0.1, random_state=1, stratify=df[[target_colname]]\n)\n
train, test = train_test_split( df, test_size=0.1, random_state=1, stratify=df[[target_colname]] ) In\u00a0[7]: Copied!
tab_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, with_attention=True)\n
tab_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, with_attention=True) In\u00a0[8]: Copied!
X_tab_train = tab_preprocessor.fit_transform(train)\nX_tab_test = tab_preprocessor.transform(test)\ntarget = train[target_colname].values\n
X_tab_train = tab_preprocessor.fit_transform(train) X_tab_test = tab_preprocessor.transform(test) target = train[target_colname].values In\u00a0[9]: Copied!
tab_transformer = TabTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    embed_continuous_method=\"standard\",\n    input_dim=8,\n    n_heads=2,\n    n_blocks=1,\n    attn_dropout=0.1,\n    transformer_activation=\"relu\",\n)\n
tab_transformer = TabTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, embed_continuous_method=\"standard\", input_dim=8, n_heads=2, n_blocks=1, attn_dropout=0.1, transformer_activation=\"relu\", ) In\u00a0[10]: Copied!
model = WideDeep(deeptabular=tab_transformer)\n
model = WideDeep(deeptabular=tab_transformer) In\u00a0[11]: Copied!
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0)\n
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0) In\u00a0[12]: Copied!
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n    optimizer,\n    threshold=0.001,\n    threshold_mode=\"abs\",\n    patience=10,\n)\n
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, threshold=0.001, threshold_mode=\"abs\", patience=10, ) In\u00a0[13]: Copied!
early_stopping = EarlyStopping(\n    min_delta=0.001, patience=30, restore_best_weights=True, verbose=True\n)\n
early_stopping = EarlyStopping( min_delta=0.001, patience=30, restore_best_weights=True, verbose=True ) In\u00a0[14]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=optimizer,\n    lr_schedulers=lr_scheduler,\n    reducelronplateau_criterion=\"loss\",\n    callbacks=[early_stopping],\n    metrics=[Accuracy],\n)\n
trainer = Trainer( model, objective=\"binary\", optimizers=optimizer, lr_schedulers=lr_scheduler, reducelronplateau_criterion=\"loss\", callbacks=[early_stopping], metrics=[Accuracy], )

The feature importances will be computed after training, using a sample of the training dataset of size feature_importance_sample_size

In\u00a0[15]: Copied!
trainer.fit(\n    X_tab=X_tab_train,\n    target=target,\n    val_split=0.2,\n    n_epochs=100,\n    batch_size=128,\n    validation_freq=1,\n    feature_importance_sample_size=1000,\n)\n
trainer.fit( X_tab=X_tab_train, target=target, val_split=0.2, n_epochs=100, batch_size=128, validation_freq=1, feature_importance_sample_size=1000, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.80it/s, loss=0.334, metrics={'acc': 0.847}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.34it/s, loss=0.294, metrics={'acc': 0.8669}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 83.02it/s, loss=0.293, metrics={'acc': 0.8656}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.03it/s, loss=0.283, metrics={'acc': 0.8678}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 87.69it/s, loss=0.282, metrics={'acc': 0.8703}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.22it/s, loss=0.279, metrics={'acc': 0.8717}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.24it/s, loss=0.277, metrics={'acc': 0.8718}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.29it/s, loss=0.277, metrics={'acc': 0.8731}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 83.76it/s, loss=0.275, metrics={'acc': 0.8727}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.80it/s, loss=0.276, metrics={'acc': 0.8727}]\nepoch 6: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.78it/s, loss=0.273, metrics={'acc': 0.873}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 100.43it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 7: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.271, metrics={'acc': 0.8742}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.14it/s, loss=0.275, metrics={'acc': 0.8726}]\nepoch 8: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.29it/s, loss=0.271, metrics={'acc': 0.875}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.46it/s, loss=0.276, metrics={'acc': 0.8718}]\nepoch 9: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.10it/s, loss=0.27, metrics={'acc': 0.8761}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.49it/s, loss=0.275, metrics={'acc': 0.8728}]\nepoch 10: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 70.40it/s, loss=0.269, metrics={'acc': 0.8747}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.47it/s, loss=0.275, metrics={'acc': 0.8726}]\nepoch 11: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.83it/s, loss=0.268, metrics={'acc': 0.8742}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.03it/s, loss=0.274, metrics={'acc': 0.873}]\nepoch 12: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.86it/s, loss=0.267, metrics={'acc': 0.8743}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 106.61it/s, loss=0.274, metrics={'acc': 0.8734}]\nepoch 13: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.39it/s, loss=0.267, metrics={'acc': 0.876}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 108.05it/s, loss=0.275, metrics={'acc': 0.8717}]\nepoch 14: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.36it/s, loss=0.265, metrics={'acc': 0.8767}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 109.60it/s, loss=0.276, metrics={'acc': 0.8747}]\nepoch 15: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.34it/s, loss=0.264, metrics={'acc': 0.876}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.55it/s, loss=0.276, metrics={'acc': 0.8706}]\nepoch 16: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.35it/s, loss=0.264, metrics={'acc': 0.8777}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.08it/s, loss=0.275, metrics={'acc': 0.8753}]\nepoch 17: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.96it/s, loss=0.263, metrics={'acc': 0.877}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.83it/s, loss=0.277, metrics={'acc': 0.8739}]\nepoch 18: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.38it/s, loss=0.263, metrics={'acc': 0.8779}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.18it/s, loss=0.278, metrics={'acc': 0.8714}]\nepoch 19: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.61it/s, loss=0.261, metrics={'acc': 0.8784}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.19it/s, loss=0.278, metrics={'acc': 0.8712}]\nepoch 20: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.43it/s, loss=0.261, metrics={'acc': 0.8791}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.87it/s, loss=0.28, metrics={'acc': 0.873}]\nepoch 21: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.97it/s, loss=0.26, metrics={'acc': 0.8787}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 107.50it/s, loss=0.279, metrics={'acc': 0.8732}]\nepoch 22: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.76it/s, loss=0.253, metrics={'acc': 0.8816}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 108.11it/s, loss=0.279, metrics={'acc': 0.8707}]\nepoch 23: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.92it/s, loss=0.252, metrics={'acc': 0.8828}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 100.14it/s, loss=0.28, metrics={'acc': 0.8711}]\nepoch 24: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.04it/s, loss=0.252, metrics={'acc': 0.8829}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 98.36it/s, loss=0.28, metrics={'acc': 0.8708}]\nepoch 25: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.31it/s, loss=0.251, metrics={'acc': 0.883}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.97it/s, loss=0.281, metrics={'acc': 0.8709}]\nepoch 26: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.63it/s, loss=0.25, metrics={'acc': 0.8834}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.07it/s, loss=0.281, metrics={'acc': 0.8698}]\nepoch 27: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.37it/s, loss=0.251, metrics={'acc': 0.884}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.75it/s, loss=0.281, metrics={'acc': 0.87}]\nepoch 28: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.19it/s, loss=0.25, metrics={'acc': 0.883}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.99it/s, loss=0.282, metrics={'acc': 0.8699}]\nepoch 29: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.19it/s, loss=0.25, metrics={'acc': 0.8829}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.11it/s, loss=0.282, metrics={'acc': 0.8695}]\nepoch 30: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.57it/s, loss=0.249, metrics={'acc': 0.8839}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.64it/s, loss=0.283, metrics={'acc': 0.8689}]\nepoch 31: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.55it/s, loss=0.249, metrics={'acc': 0.8846}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.10it/s, loss=0.283, metrics={'acc': 0.869}]\nepoch 32: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.56it/s, loss=0.248, metrics={'acc': 0.8841}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.45it/s, loss=0.284, metrics={'acc': 0.8687}]\nepoch 33: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.06it/s, loss=0.248, metrics={'acc': 0.8848}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 129.26it/s, loss=0.284, metrics={'acc': 0.8689}]\nepoch 34: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.53it/s, loss=0.248, metrics={'acc': 0.8854}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.61it/s, loss=0.283, metrics={'acc': 0.869}]\nepoch 35: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.78it/s, loss=0.248, metrics={'acc': 0.8853}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 127.31it/s, loss=0.283, metrics={'acc': 0.8694}]\nepoch 36: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.51it/s, loss=0.248, metrics={'acc': 0.8863}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.94it/s, loss=0.283, metrics={'acc': 0.8693}]\nepoch 37: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.35it/s, loss=0.247, metrics={'acc': 0.8844}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.77it/s, loss=0.283, metrics={'acc': 0.8692}]\nepoch 38: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.62it/s, loss=0.248, metrics={'acc': 0.8837}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.62it/s, loss=0.283, metrics={'acc': 0.8692}]\nepoch 39: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.82it/s, loss=0.248, metrics={'acc': 0.8842}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.64it/s, loss=0.283, metrics={'acc': 0.8695}]\nepoch 40: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.86it/s, loss=0.247, metrics={'acc': 0.8855}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.34it/s, loss=0.283, metrics={'acc': 0.8692}]\n
Best Epoch: 10. Best val_loss: 0.27451\nRestoring model weights from the end of the best epoch\n
In\u00a0[16]: Copied!
trainer.feature_importance\n
trainer.feature_importance Out[16]:
{'age': 0.09718182,\n 'workclass': 0.090637445,\n 'education': 0.08910798,\n 'marital_status': 0.08971319,\n 'occupation': 0.12546304,\n 'relationship': 0.086381145,\n 'race': 0.050686445,\n 'gender': 0.05116429,\n 'capital_gain': 0.08165918,\n 'capital_loss': 0.07702667,\n 'hours_per_week': 0.08205996,\n 'native_country': 0.07891885}
In\u00a0[17]: Copied!
preds = trainer.predict(X_tab=X_tab_test)\n
preds = trainer.predict(X_tab=X_tab_test)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 199.63it/s]\n
In\u00a0[18]: Copied!
accuracy_score(preds, test.income_label)\n
accuracy_score(preds, test.income_label) Out[18]:
0.8685772773797339
In\u00a0[19]: Copied!
test.reset_index(drop=True, inplace=True)\n
test.reset_index(drop=True, inplace=True) In\u00a0[20]: Copied!
test[test.income_label == 0].head(1)\n
test[test.income_label == 0].head(1) Out[20]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 26 Private Some-college Never-married Exec-managerial Not-in-family White Male 0 0 60 United-States 0 In\u00a0[21]: Copied!
test[test.income_label == 1].head(1)\n
test[test.income_label == 1].head(1) Out[21]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 3 36 Local-gov Doctorate Married-civ-spouse Prof-specialty Husband White Male 0 1887 50 United-States 1

To get the feature importance of a test dataset, simply use the explain method

In\u00a0[22]: Copied!
feat_imp_per_sample = trainer.explain(X_tab_test, save_step_masks=False)\n
feat_imp_per_sample = trainer.explain(X_tab_test, save_step_masks=False) In\u00a0[23]: Copied!
list(test.iloc[0].index[np.argsort(-feat_imp_per_sample[0])])\n
list(test.iloc[0].index[np.argsort(-feat_imp_per_sample[0])]) Out[23]:
['hours_per_week',\n 'education',\n 'relationship',\n 'occupation',\n 'workclass',\n 'capital_gain',\n 'native_country',\n 'marital_status',\n 'capital_loss',\n 'age',\n 'race',\n 'gender']
In\u00a0[24]: Copied!
list(test.iloc[3].index[np.argsort(-feat_imp_per_sample[3])])\n
list(test.iloc[3].index[np.argsort(-feat_imp_per_sample[3])]) Out[24]:
['age',\n 'capital_loss',\n 'hours_per_week',\n 'marital_status',\n 'native_country',\n 'relationship',\n 'race',\n 'education',\n 'occupation',\n 'capital_gain',\n 'gender',\n 'workclass']

We could do the same with the ContextAttentionMLP

In\u00a0[25]: Copied!
context_attn_mlp = ContextAttentionMLP(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.0,\n    input_dim=16,\n    attn_dropout=0.1,\n    attn_activation=\"relu\",\n)\n
context_attn_mlp = ContextAttentionMLP( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.0, input_dim=16, attn_dropout=0.1, attn_activation=\"relu\", ) In\u00a0[26]: Copied!
mlp_model = WideDeep(deeptabular=context_attn_mlp)\n
mlp_model = WideDeep(deeptabular=context_attn_mlp) In\u00a0[27]: Copied!
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.01, weight_decay=0.0)\n
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.01, weight_decay=0.0) In\u00a0[28]: Copied!
mlp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n    mlp_optimizer,\n    threshold=0.001,\n    threshold_mode=\"abs\",\n    patience=10,\n)\n
mlp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( mlp_optimizer, threshold=0.001, threshold_mode=\"abs\", patience=10, ) In\u00a0[29]: Copied!
mlp_early_stopping = EarlyStopping(\n    min_delta=0.001, patience=30, restore_best_weights=True, verbose=True\n)\n
mlp_early_stopping = EarlyStopping( min_delta=0.001, patience=30, restore_best_weights=True, verbose=True ) In\u00a0[30]: Copied!
mlp_trainer = Trainer(\n    mlp_model,\n    objective=\"binary\",\n    optimizers=mlp_optimizer,\n    lr_schedulers=mlp_lr_scheduler,\n    reducelronplateau_criterion=\"loss\",\n    callbacks=[mlp_early_stopping],\n    metrics=[Accuracy],\n)\n
mlp_trainer = Trainer( mlp_model, objective=\"binary\", optimizers=mlp_optimizer, lr_schedulers=mlp_lr_scheduler, reducelronplateau_criterion=\"loss\", callbacks=[mlp_early_stopping], metrics=[Accuracy], ) In\u00a0[31]: Copied!
mlp_trainer.fit(\n    X_tab=X_tab_train,\n    target=target,\n    val_split=0.2,\n    n_epochs=100,\n    batch_size=128,\n    validation_freq=1,\n    feature_importance_sample_size=1000,\n)\n
mlp_trainer.fit( X_tab=X_tab_train, target=target, val_split=0.2, n_epochs=100, batch_size=128, validation_freq=1, feature_importance_sample_size=1000, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.11it/s, loss=0.405, metrics={'acc': 0.8094}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.26it/s, loss=0.309, metrics={'acc': 0.8583}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.70it/s, loss=0.332, metrics={'acc': 0.8447}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.36it/s, loss=0.293, metrics={'acc': 0.8646}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.42it/s, loss=0.319, metrics={'acc': 0.8505}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.05it/s, loss=0.293, metrics={'acc': 0.8654}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.00it/s, loss=0.312, metrics={'acc': 0.8554}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.49it/s, loss=0.291, metrics={'acc': 0.8661}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.50it/s, loss=0.308, metrics={'acc': 0.8583}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.48it/s, loss=0.287, metrics={'acc': 0.8669}]\nepoch 6: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.84it/s, loss=0.303, metrics={'acc': 0.8605}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 128.70it/s, loss=0.288, metrics={'acc': 0.8673}]\nepoch 7: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.93it/s, loss=0.301, metrics={'acc': 0.8597}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 121.47it/s, loss=0.298, metrics={'acc': 0.8628}]\nepoch 8: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.56it/s, loss=0.3, metrics={'acc': 0.8592}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.84it/s, loss=0.281, metrics={'acc': 0.8718}]\nepoch 9: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.17it/s, loss=0.298, metrics={'acc': 0.8619}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.32it/s, loss=0.28, metrics={'acc': 0.8716}]\nepoch 10: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.13it/s, loss=0.297, metrics={'acc': 0.8615}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.50it/s, loss=0.281, metrics={'acc': 0.8718}]\nepoch 11: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.54it/s, loss=0.293, metrics={'acc': 0.8641}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.57it/s, loss=0.284, metrics={'acc': 0.867}]\nepoch 12: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.92it/s, loss=0.293, metrics={'acc': 0.863}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.42it/s, loss=0.282, metrics={'acc': 0.8701}]\nepoch 13: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.61it/s, loss=0.293, metrics={'acc': 0.8635}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.56it/s, loss=0.276, metrics={'acc': 0.8719}]\nepoch 14: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.92it/s, loss=0.29, metrics={'acc': 0.8633}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.06it/s, loss=0.286, metrics={'acc': 0.8669}]\nepoch 15: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.83it/s, loss=0.291, metrics={'acc': 0.865}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.88it/s, loss=0.282, metrics={'acc': 0.8677}]\nepoch 16: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.74it/s, loss=0.29, metrics={'acc': 0.8653}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.85it/s, loss=0.285, metrics={'acc': 0.8672}]\nepoch 17: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.99it/s, loss=0.29, metrics={'acc': 0.865}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.53it/s, loss=0.282, metrics={'acc': 0.8681}]\nepoch 18: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.22it/s, loss=0.288, metrics={'acc': 0.8651}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.89it/s, loss=0.288, metrics={'acc': 0.8676}]\nepoch 19: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.85it/s, loss=0.29, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.26it/s, loss=0.284, metrics={'acc': 0.8662}]\nepoch 20: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.80it/s, loss=0.289, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.44it/s, loss=0.281, metrics={'acc': 0.8703}]\nepoch 21: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.72it/s, loss=0.29, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.04it/s, loss=0.285, metrics={'acc': 0.8648}]\nepoch 22: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.86it/s, loss=0.289, metrics={'acc': 0.8656}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.75it/s, loss=0.282, metrics={'acc': 0.8666}]\nepoch 23: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.68it/s, loss=0.289, metrics={'acc': 0.8668}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.86it/s, loss=0.282, metrics={'acc': 0.8724}]\nepoch 24: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.93it/s, loss=0.288, metrics={'acc': 0.8653}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.69it/s, loss=0.285, metrics={'acc': 0.8656}]\nepoch 25: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.99it/s, loss=0.284, metrics={'acc': 0.8671}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.27it/s, loss=0.277, metrics={'acc': 0.8707}]\nepoch 26: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.86it/s, loss=0.282, metrics={'acc': 0.8686}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.94it/s, loss=0.276, metrics={'acc': 0.8712}]\nepoch 27: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.86it/s, loss=0.283, metrics={'acc': 0.8691}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.11it/s, loss=0.277, metrics={'acc': 0.8716}]\nepoch 28: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.43it/s, loss=0.281, metrics={'acc': 0.8696}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.64it/s, loss=0.277, metrics={'acc': 0.8712}]\nepoch 29: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.39it/s, loss=0.281, metrics={'acc': 0.8696}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.83it/s, loss=0.277, metrics={'acc': 0.872}]\nepoch 30: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.93it/s, loss=0.28, metrics={'acc': 0.8706}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.07it/s, loss=0.275, metrics={'acc': 0.8714}]\nepoch 31: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.35it/s, loss=0.281, metrics={'acc': 0.8697}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.68it/s, loss=0.276, metrics={'acc': 0.872}]\nepoch 32: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.72it/s, loss=0.28, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.50it/s, loss=0.276, metrics={'acc': 0.8709}]\nepoch 33: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.68it/s, loss=0.28, metrics={'acc': 0.8716}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.07it/s, loss=0.277, metrics={'acc': 0.8709}]\nepoch 34: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.58it/s, loss=0.279, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.07it/s, loss=0.274, metrics={'acc': 0.8719}]\nepoch 35: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.03it/s, loss=0.28, metrics={'acc': 0.8687}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.15it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 36: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.11it/s, loss=0.279, metrics={'acc': 0.8706}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.10it/s, loss=0.278, metrics={'acc': 0.8705}]\nepoch 37: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.30it/s, loss=0.279, metrics={'acc': 0.869}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.75it/s, loss=0.279, metrics={'acc': 0.8702}]\nepoch 38: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.34it/s, loss=0.28, metrics={'acc': 0.8691}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.79it/s, loss=0.277, metrics={'acc': 0.8698}]\nepoch 39: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.49it/s, loss=0.279, metrics={'acc': 0.8694}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.25it/s, loss=0.279, metrics={'acc': 0.87}]\nepoch 40: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.51it/s, loss=0.28, metrics={'acc': 0.8694}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.90it/s, loss=0.277, metrics={'acc': 0.8694}]\nepoch 41: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.35it/s, loss=0.278, metrics={'acc': 0.8716}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.74it/s, loss=0.28, metrics={'acc': 0.8675}]\nepoch 42: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.279, metrics={'acc': 0.8695}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.76it/s, loss=0.277, metrics={'acc': 0.8699}]\nepoch 43: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:04<00:00, 66.14it/s, loss=0.279, metrics={'acc': 0.8681}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 106.20it/s, loss=0.277, metrics={'acc': 0.8714}]\nepoch 44: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.37it/s, loss=0.279, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.18it/s, loss=0.277, metrics={'acc': 0.8716}]\nepoch 45: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.23it/s, loss=0.278, metrics={'acc': 0.8702}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.83it/s, loss=0.278, metrics={'acc': 0.8707}]\nepoch 46: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.12it/s, loss=0.278, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.62it/s, loss=0.279, metrics={'acc': 0.8693}]\nepoch 47: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.55it/s, loss=0.276, metrics={'acc': 0.8713}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.99it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 48: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.25it/s, loss=0.278, metrics={'acc': 0.8719}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.52it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 49: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.35it/s, loss=0.277, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.82it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 50: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.15it/s, loss=0.277, metrics={'acc': 0.8717}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.62it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 51: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.55it/s, loss=0.277, metrics={'acc': 0.8713}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.63it/s, loss=0.278, metrics={'acc': 0.87}]\nepoch 52: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.04it/s, loss=0.276, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.39it/s, loss=0.278, metrics={'acc': 0.8697}]\nepoch 53: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.15it/s, loss=0.277, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 127.56it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 54: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.41it/s, loss=0.277, metrics={'acc': 0.8711}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.95it/s, loss=0.278, metrics={'acc': 0.8698}]\nepoch 55: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.277, metrics={'acc': 0.8718}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.90it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 56: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.83it/s, loss=0.277, metrics={'acc': 0.8707}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.13it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 57: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.20it/s, loss=0.277, metrics={'acc': 0.8722}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.16it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 58: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.33it/s, loss=0.276, metrics={'acc': 0.871}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.37it/s, loss=0.278, metrics={'acc': 0.8691}]\nepoch 59: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.41it/s, loss=0.277, metrics={'acc': 0.8714}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.17it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 60: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.26it/s, loss=0.276, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.60it/s, loss=0.278, metrics={'acc': 0.869}]\nepoch 61: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.88it/s, loss=0.278, metrics={'acc': 0.8703}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.47it/s, loss=0.278, metrics={'acc': 0.8692}]\nepoch 62: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.13it/s, loss=0.276, metrics={'acc': 0.8711}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.80it/s, loss=0.278, metrics={'acc': 0.8691}]\nepoch 63: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.20it/s, loss=0.277, metrics={'acc': 0.8715}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.50it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 64: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.11it/s, loss=0.276, metrics={'acc': 0.8719}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.52it/s, loss=0.278, metrics={'acc': 0.869}]\n
Best Epoch: 34. Best val_loss: 0.27449\nRestoring model weights from the end of the best epoch\n
In\u00a0[32]: Copied!
mlp_trainer.feature_importance\n
mlp_trainer.feature_importance Out[32]:
{'age': 0.116632804,\n 'workclass': 0.050255153,\n 'education': 0.094621316,\n 'marital_status': 0.12328919,\n 'occupation': 0.107893184,\n 'relationship': 0.11747801,\n 'race': 0.054717205,\n 'gender': 0.07514235,\n 'capital_gain': 0.059732802,\n 'capital_loss': 0.06738944,\n 'hours_per_week': 0.0610674,\n 'native_country': 0.07178114}
In\u00a0[33]: Copied!
mlp_preds = mlp_trainer.predict(X_tab=X_tab_test)\n
mlp_preds = mlp_trainer.predict(X_tab=X_tab_test)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 212.38it/s]\n
In\u00a0[34]: Copied!
accuracy_score(mlp_preds, test.income_label)\n
accuracy_score(mlp_preds, test.income_label) Out[34]:
0.8726714431934494
"},{"location":"examples/18_feature_importance_via_attention_weights.html#feature-importance-via-the-attention-weights","title":"Feature Importance via the attention weights\u00b6","text":"

I will start by saying that I consider this feature of the library purely experimental. First of all I think there are multiple ways one could address finding the features importances for these models. However, and more importantly, one has to bear in mind that even tree-based algorithms on the same dataset produce different feature importances. This is more \"dramatic\" if one uses different techniques, such as shap or feature permutation (see for example this and references therein). All this to say that, sometimes, feature importance is just a measure contained within the experiment run, and for the model used.

With that in mind, each instantiation of a deep tabular model, that has millions of trainable parameters, will potentially produce a different set of feature importances, even if the model has the same architecture. Moreover, this effect will become more apparent if the dataset is relatively easy and there are dependent/related columns so that one could get to the same success metric with different parameters.

In summary, feature importances are implemented in this librray for all attention-based models for tabular data, with the exception of the TabPerceiver. However this functionality has to be used and interpreted with care and consider of value within the 'universe' (or context) of the model with which these features were produced.

Nonetheless, let's have a look to how one would access to the feature importances when using this library.

"},{"location":"examples/19_wide_and_deep_for_recsys_pt1.html","title":"19_wide_and_deep_for_recsys_pt1","text":"

The goal of this, and the companion (part 2) notebooks is to illustrate how one could use this library in the context of recommendation systems. In particular, this notebook and the scripts at the wide_deep_for_recsys dir are a response to this issue. Therefore, we will use the Kaggle notebook referred in that issue here.

In order to keep the length of the notebook tractable, we will split this exercise in 2. In this first notebook we will prepare the data in almost the exact same way as it is done in the Kaggle notebook and also show how one could use pytorch-widedeep to build a model almost identical to the one in that notebook.

In a second notebook, we will show how one could use this library to implement other models, still following the same problem formulation.

In\u00a0[1]: Copied!
from pathlib import Path\nimport warnings\n\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep.datasets import load_movielens100k\n
from pathlib import Path import warnings import pandas as pd from sklearn.model_selection import train_test_split from pytorch_widedeep.datasets import load_movielens100k In\u00a0[2]: Copied!
warnings.filterwarnings(\"ignore\")\n
warnings.filterwarnings(\"ignore\") In\u00a0[3]: Copied!
save_path = Path(\"prepared_data\")\nif not save_path.exists():\n    save_path.mkdir(parents=True, exist_ok=True)\n
save_path = Path(\"prepared_data\") if not save_path.exists(): save_path.mkdir(parents=True, exist_ok=True) In\u00a0[4]: Copied!
data, users, items = load_movielens100k(as_frame=True)\n
data, users, items = load_movielens100k(as_frame=True) In\u00a0[5]: Copied!
# Alternatively, as specified in the docs: 'The last 19 fields are the genres' so:\n# list_of_genres = items.columns.tolist()[-19:]\nlist_of_genres = [\n    \"unknown\",\n    \"Action\",\n    \"Adventure\",\n    \"Animation\",\n    \"Children's\",\n    \"Comedy\",\n    \"Crime\",\n    \"Documentary\",\n    \"Drama\",\n    \"Fantasy\",\n    \"Film-Noir\",\n    \"Horror\",\n    \"Musical\",\n    \"Mystery\",\n    \"Romance\",\n    \"Sci-Fi\",\n    \"Thriller\",\n    \"War\",\n    \"Western\",\n]\n
# Alternatively, as specified in the docs: 'The last 19 fields are the genres' so: # list_of_genres = items.columns.tolist()[-19:] list_of_genres = [ \"unknown\", \"Action\", \"Adventure\", \"Animation\", \"Children's\", \"Comedy\", \"Crime\", \"Documentary\", \"Drama\", \"Fantasy\", \"Film-Noir\", \"Horror\", \"Musical\", \"Mystery\", \"Romance\", \"Sci-Fi\", \"Thriller\", \"War\", \"Western\", ]

Let's first start by loading the interactions, user and item data

In\u00a0[6]: Copied!
data.head()\n
data.head() Out[6]: user_id movie_id rating timestamp 0 196 242 3 881250949 1 186 302 3 891717742 2 22 377 1 878887116 3 244 51 2 880606923 4 166 346 1 886397596 In\u00a0[7]: Copied!
users.head()\n
users.head() Out[7]: user_id age gender occupation zip_code 0 1 24 M technician 85711 1 2 53 F other 94043 2 3 23 M writer 32067 3 4 24 M technician 43537 4 5 33 F other 15213 In\u00a0[8]: Copied!
items.head()\n
items.head() Out[8]: movie_id movie_title release_date video_release_date IMDb_URL unknown Action Adventure Animation Children's ... Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War Western 0 1 Toy Story (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0 1 2 GoldenEye (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... 0 0 0 0 0 0 0 1 0 0 2 3 Four Rooms (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0 3 4 Get Shorty (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Get%20Shorty%... 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 4 5 Copycat (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0

5 rows \u00d7 24 columns

In\u00a0[9]: Copied!
# adding a column with the number of movies watched per user\ndataset = data.sort_values([\"user_id\", \"timestamp\"]).reset_index(drop=True)\ndataset[\"one\"] = 1\ndataset[\"num_watched\"] = dataset.groupby(\"user_id\")[\"one\"].cumsum()\ndataset.drop(\"one\", axis=1, inplace=True)\ndataset.head()\n
# adding a column with the number of movies watched per user dataset = data.sort_values([\"user_id\", \"timestamp\"]).reset_index(drop=True) dataset[\"one\"] = 1 dataset[\"num_watched\"] = dataset.groupby(\"user_id\")[\"one\"].cumsum() dataset.drop(\"one\", axis=1, inplace=True) dataset.head() Out[9]: user_id movie_id rating timestamp num_watched 0 1 168 5 874965478 1 1 1 172 5 874965478 2 2 1 165 5 874965518 3 3 1 156 4 874965556 4 4 1 196 5 874965677 5 In\u00a0[10]: Copied!
# adding a column with the mean rating at a point in time per user\ndataset[\"mean_rate\"] = (\n    dataset.groupby(\"user_id\")[\"rating\"].cumsum() / dataset[\"num_watched\"]\n)\ndataset.head()\n
# adding a column with the mean rating at a point in time per user dataset[\"mean_rate\"] = ( dataset.groupby(\"user_id\")[\"rating\"].cumsum() / dataset[\"num_watched\"] ) dataset.head() Out[10]: user_id movie_id rating timestamp num_watched mean_rate 0 1 168 5 874965478 1 5.00 1 1 172 5 874965478 2 5.00 2 1 165 5 874965518 3 5.00 3 1 156 4 874965556 4 4.75 4 1 196 5 874965677 5 4.80 In\u00a0[11]: Copied!
dataset[\"target\"] = dataset.groupby(\"user_id\")[\"movie_id\"].shift(-1)\n
dataset[\"target\"] = dataset.groupby(\"user_id\")[\"movie_id\"].shift(-1)

Following the same processing used by the author in the before-mentioned Kaggle notebook, we build sequences of previous movies watched

In\u00a0[12]: Copied!
# Here the author builds the sequences\ndataset[\"prev_movies\"] = dataset[\"movie_id\"].apply(lambda x: str(x))\ndataset[\"prev_movies\"] = (\n    dataset.groupby(\"user_id\")[\"prev_movies\"]\n    .apply(lambda x: (x + \" \").cumsum().str.strip())\n    .reset_index(drop=True)\n)\ndataset[\"prev_movies\"] = dataset[\"prev_movies\"].apply(lambda x: x.split())\ndataset.head()\n
# Here the author builds the sequences dataset[\"prev_movies\"] = dataset[\"movie_id\"].apply(lambda x: str(x)) dataset[\"prev_movies\"] = ( dataset.groupby(\"user_id\")[\"prev_movies\"] .apply(lambda x: (x + \" \").cumsum().str.strip()) .reset_index(drop=True) ) dataset[\"prev_movies\"] = dataset[\"prev_movies\"].apply(lambda x: x.split()) dataset.head() Out[12]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies 0 1 168 5 874965478 1 5.00 172.0 [168] 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196]

And now we add a genre_rate as the mean of all movies rated for a given genre per user

In\u00a0[13]: Copied!
dataset = dataset.merge(items[[\"movie_id\"] + list_of_genres], on=\"movie_id\", how=\"left\")\nfor genre in list_of_genres:\n    dataset[f\"{genre}_rate\"] = dataset[genre] * dataset[\"rating\"]\n    dataset[genre] = dataset.groupby(\"user_id\")[genre].cumsum()\n    dataset[f\"{genre}_rate\"] = (\n        dataset.groupby(\"user_id\")[f\"{genre}_rate\"].cumsum() / dataset[genre]\n    )\ndataset[list_of_genres] = dataset[list_of_genres].apply(\n    lambda x: x / dataset[\"num_watched\"]\n)\ndataset.head()\n
dataset = dataset.merge(items[[\"movie_id\"] + list_of_genres], on=\"movie_id\", how=\"left\") for genre in list_of_genres: dataset[f\"{genre}_rate\"] = dataset[genre] * dataset[\"rating\"] dataset[genre] = dataset.groupby(\"user_id\")[genre].cumsum() dataset[f\"{genre}_rate\"] = ( dataset.groupby(\"user_id\")[f\"{genre}_rate\"].cumsum() / dataset[genre] ) dataset[list_of_genres] = dataset[list_of_genres].apply( lambda x: x / dataset[\"num_watched\"] ) dataset.head() Out[13]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies unknown Action ... Fantasy_rate Film-Noir_rate Horror_rate Musical_rate Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate 0 1 168 5 874965478 1 5.00 172.0 [168] 0.0 0.000000 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 0.0 0.500000 ... NaN NaN NaN NaN NaN 5.0 5.0 NaN 5.0 NaN 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 0.0 0.333333 ... NaN NaN NaN NaN NaN 5.0 5.0 NaN 5.0 NaN 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 0.0 0.250000 ... NaN NaN NaN NaN NaN 5.0 5.0 4.0 5.0 NaN 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196] 0.0 0.200000 ... NaN NaN NaN NaN NaN 5.0 5.0 4.0 5.0 NaN

5 rows \u00d7 46 columns

Adding user features

In\u00a0[14]: Copied!
dataset = dataset.merge(users, on=\"user_id\", how=\"left\")\ndataset.head()\n
dataset = dataset.merge(users, on=\"user_id\", how=\"left\") dataset.head() Out[14]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies unknown Action ... Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate age gender occupation zip_code 0 1 168 5 874965478 1 5.00 172.0 [168] 0.0 0.000000 ... NaN NaN NaN NaN NaN NaN 24 M technician 85711 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 0.0 0.500000 ... NaN 5.0 5.0 NaN 5.0 NaN 24 M technician 85711 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 0.0 0.333333 ... NaN 5.0 5.0 NaN 5.0 NaN 24 M technician 85711 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 0.0 0.250000 ... NaN 5.0 5.0 4.0 5.0 NaN 24 M technician 85711 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196] 0.0 0.200000 ... NaN 5.0 5.0 4.0 5.0 NaN 24 M technician 85711

5 rows \u00d7 50 columns

Again, we use the same settings as those in the Kaggle notebook, but COLD_START_TRESH is pretty aggressive

In\u00a0[15]: Copied!
COLD_START_TRESH = 5\n\nfiltred_data = dataset[\n    (dataset[\"num_watched\"] >= COLD_START_TRESH) & ~(dataset[\"target\"].isna())\n].sort_values(\"timestamp\")\ntrain_data, _test_data = train_test_split(filtred_data, test_size=0.2, shuffle=False)\nvalid_data, test_data = train_test_split(_test_data, test_size=0.5, shuffle=False)\n
COLD_START_TRESH = 5 filtred_data = dataset[ (dataset[\"num_watched\"] >= COLD_START_TRESH) & ~(dataset[\"target\"].isna()) ].sort_values(\"timestamp\") train_data, _test_data = train_test_split(filtred_data, test_size=0.2, shuffle=False) valid_data, test_data = train_test_split(_test_data, test_size=0.5, shuffle=False) In\u00a0[16]: Copied!
cols_to_drop = [\n    # \"rating\",\n    \"timestamp\",\n    \"num_watched\",\n]\n\ndf_train = train_data.drop(cols_to_drop, axis=1)\ndf_valid = valid_data.drop(cols_to_drop, axis=1)\ndf_test = test_data.drop(cols_to_drop, axis=1)\n\ndf_train.to_pickle(save_path / \"df_train.pkl\")\ndf_valid.to_pickle(save_path / \"df_valid.pkl\")\ndf_test.to_pickle(save_path / \"df_test.pkl\")\n
cols_to_drop = [ # \"rating\", \"timestamp\", \"num_watched\", ] df_train = train_data.drop(cols_to_drop, axis=1) df_valid = valid_data.drop(cols_to_drop, axis=1) df_test = test_data.drop(cols_to_drop, axis=1) df_train.to_pickle(save_path / \"df_train.pkl\") df_valid.to_pickle(save_path / \"df_valid.pkl\") df_test.to_pickle(save_path / \"df_test.pkl\")

Let's now build a model that is nearly identical to the one use in the Kaggle notebook

In\u00a0[17]: Copied!
import numpy as np\nimport torch\nfrom torch import nn\nfrom scipy.sparse import coo_matrix\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
import numpy as np import torch from torch import nn from scipy.sparse import coo_matrix from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[18]: Copied!
device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\nsave_path = Path(\"prepared_data\")\n\nPAD_IDX = 0\n
device = \"cuda\" if torch.cuda.is_available() else \"cpu\" save_path = Path(\"prepared_data\") PAD_IDX = 0

Let's use some of the functions the author of the kaggle's notebook uses to prepare the data

In\u00a0[19]: Copied!
def get_coo_indexes(lil):\n    rows = []\n    cols = []\n    for i, el in enumerate(lil):\n        if type(el) != list:\n            el = [el]\n        for j in el:\n            rows.append(i)\n            cols.append(j)\n    return rows, cols\n\n\ndef get_sparse_features(series, shape):\n    coo_indexes = get_coo_indexes(series.tolist())\n    sparse_df = coo_matrix(\n        (np.ones(len(coo_indexes[0])), (coo_indexes[0], coo_indexes[1])), shape=shape\n    )\n    return sparse_df\n\n\ndef sparse_to_idx(data, pad_idx=-1):\n    indexes = data.nonzero()\n    indexes_df = pd.DataFrame()\n    indexes_df[\"rows\"] = indexes[0]\n    indexes_df[\"cols\"] = indexes[1]\n    mdf = indexes_df.groupby(\"rows\").apply(lambda x: x[\"cols\"].tolist())\n    max_len = mdf.apply(lambda x: len(x)).max()\n    return mdf.apply(lambda x: pd.Series(x + [pad_idx] * (max_len - len(x)))).values\n
def get_coo_indexes(lil): rows = [] cols = [] for i, el in enumerate(lil): if type(el) != list: el = [el] for j in el: rows.append(i) cols.append(j) return rows, cols def get_sparse_features(series, shape): coo_indexes = get_coo_indexes(series.tolist()) sparse_df = coo_matrix( (np.ones(len(coo_indexes[0])), (coo_indexes[0], coo_indexes[1])), shape=shape ) return sparse_df def sparse_to_idx(data, pad_idx=-1): indexes = data.nonzero() indexes_df = pd.DataFrame() indexes_df[\"rows\"] = indexes[0] indexes_df[\"cols\"] = indexes[1] mdf = indexes_df.groupby(\"rows\").apply(lambda x: x[\"cols\"].tolist()) max_len = mdf.apply(lambda x: len(x)).max() return mdf.apply(lambda x: pd.Series(x + [pad_idx] * (max_len - len(x)))).values

For the time being, we will not use a validation set for hyperparameter optimization, and we will simply concatenate the validation and the test set in one test set. I simply splitted the data into train/valid/test in case the reader wants to actually do hyperparameter optimization (and because I know in the future I will).

There is also another caveat worth mentioning, related to the indexing of the movies. To build the matrices of movies watched, we use the entire dataset. A more realistic (and correct) approach would be to use ONLY the movies that appear in the training set and consider unknown or unseen those in the testing set that have not been seen during training. Nonetheless, this will not affect the purposes of this notebook, which is to illustrate how one could use pytorch-widedeep to build a recommendation algorithm. However, if one wanted to explore the performance of different algorithms in a \"proper\" way, these \"details\" need to be accounted for.

In\u00a0[20]: Copied!
df_test = pd.concat([df_valid, df_test], ignore_index=True)\n
df_test = pd.concat([df_valid, df_test], ignore_index=True) In\u00a0[21]: Copied!
id_cols = [\"user_id\", \"movie_id\"]\nmax_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())\n
id_cols = [\"user_id\", \"movie_id\"] max_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max()) In\u00a0[22]: Copied!
X_train = df_train.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1)\ny_train = np.array(df_train.target.values, dtype=\"int64\")\ntrain_movies_watched = get_sparse_features(\n    df_train[\"prev_movies\"], (len(df_train), max_movie_index + 1)\n)\n\nX_test = df_test.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1)\ny_test = np.array(df_test.target.values, dtype=\"int64\")\ntest_movies_watched = get_sparse_features(\n    df_test[\"prev_movies\"], (len(df_test), max_movie_index + 1)\n)\n
X_train = df_train.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1) y_train = np.array(df_train.target.values, dtype=\"int64\") train_movies_watched = get_sparse_features( df_train[\"prev_movies\"], (len(df_train), max_movie_index + 1) ) X_test = df_test.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1) y_test = np.array(df_test.target.values, dtype=\"int64\") test_movies_watched = get_sparse_features( df_test[\"prev_movies\"], (len(df_test), max_movie_index + 1) )

let's have a look to the information in each dataset

In\u00a0[23]: Copied!
X_train.head()\n
X_train.head() Out[23]: mean_rate unknown Action Adventure Animation Children's Comedy Crime Documentary Drama ... Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate age gender occupation zip_code 25423 4.000000 0.0 0.400000 0.200000 0.0 0.0 0.400000 0.0 0.0 0.200000 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25425 4.000000 0.0 0.285714 0.142857 0.0 0.0 0.428571 0.0 0.0 0.285714 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25424 4.000000 0.0 0.333333 0.166667 0.0 0.0 0.333333 0.0 0.0 0.333333 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25426 3.875000 0.0 0.250000 0.125000 0.0 0.0 0.375000 0.0 0.0 0.250000 ... NaN 4.0 4.0 3.666667 4.0 NaN 21 M student 48823 25427 3.888889 0.0 0.222222 0.111111 0.0 0.0 0.333333 0.0 0.0 0.333333 ... NaN 4.0 4.0 3.666667 4.0 NaN 21 M student 48823

5 rows \u00d7 43 columns

In\u00a0[24]: Copied!
y_train\n
y_train Out[24]:
array([772, 288, 108, ..., 183, 432, 509])
In\u00a0[25]: Copied!
train_movies_watched\n
train_movies_watched Out[25]:
<76228x1683 sparse matrix of type '<class 'numpy.float64'>'\n\twith 7957390 stored elements in COOrdinate format>
In\u00a0[26]: Copied!
sorted(df_train.prev_movies.tolist()[0])\n
sorted(df_train.prev_movies.tolist()[0]) Out[26]:
['173', '185', '255', '286', '298']
In\u00a0[27]: Copied!
np.where(train_movies_watched.todense()[0])\n
np.where(train_movies_watched.todense()[0]) Out[27]:
(array([0, 0, 0, 0, 0]), array([173, 185, 255, 286, 298]))

And from now on is when the specifics related to this library start to appear. The only component that is going to be a bit different is the so-called tabular component, referred as continuous in the notebook.

In the case of pytorch-widedeep we have the TabPreprocessor that allows for a lot of flexibility as to how we would like to process the tabular component of this Wide and Deep model. In other words, here our tabular component is a bit more elaborated than that in the notebook, just a bit...

In\u00a0[28]: Copied!
cat_cols = [\"gender\", \"occupation\", \"zip_code\"]\ncont_cols = [c for c in X_train if c not in cat_cols]\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_cols,\n    continuous_cols=cont_cols,\n)\n
cat_cols = [\"gender\", \"occupation\", \"zip_code\"] cont_cols = [c for c in X_train if c not in cat_cols] tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_cols, continuous_cols=cont_cols, ) In\u00a0[29]: Copied!
X_train_tab = tab_preprocessor.fit_transform(X_train.fillna(0))\nX_test_tab = tab_preprocessor.transform(X_test.fillna(0))\n
X_train_tab = tab_preprocessor.fit_transform(X_train.fillna(0)) X_test_tab = tab_preprocessor.transform(X_test.fillna(0))

Now, in the notebook, the author moves the sparse matrices to sparse tensors and then turns them into dense tensors. In reality, this is not neccessary, one could feed sparse tensors to nn.Linear layers in pytorch. Nonetheless, this is not the most efficient implementation and is the reason why in our library the wide, linear component is implemented as an embedding layer.

Nonetheless, to reproduce the notebook the best we can and because currently the Wide model in pytorch-widedeep is not designed to receive sparse tensors (we might consider implementing this functionality), we will turn the sparse COO matrices into dense arrays. We will then code a fairly simple, custom Wide component.

In\u00a0[30]: Copied!
X_train_wide = np.array(train_movies_watched.todense())\nX_test_wide = np.array(test_movies_watched.todense())\n
X_train_wide = np.array(train_movies_watched.todense()) X_test_wide = np.array(test_movies_watched.todense())

Finally, the author of the notebook uses a simple Embedding layer to encode the sequences of movies watched, the prev_movies columns. In my opinion, there is an element of information redundancy here. This is because the wide and text components have implicitely the same information, but in different form. Moreover, both of the models used for these two components ignore the sequential element in the data. Nonetheless, we want to reproduce the Kaggle notebook as close as possible, AND as one can explore later (by simply performing simple ablation studies), the wide component seems to carry most of the predictive power.

In\u00a0[31]: Copied!
X_train_text = sparse_to_idx(train_movies_watched, pad_idx=PAD_IDX)\nX_test_text = sparse_to_idx(test_movies_watched, pad_idx=PAD_IDX)\n
X_train_text = sparse_to_idx(train_movies_watched, pad_idx=PAD_IDX) X_test_text = sparse_to_idx(test_movies_watched, pad_idx=PAD_IDX)

Let's now build the models

In\u00a0[32]: Copied!
class Wide(nn.Module):\n    def __init__(self, input_dim: int, pred_dim: int):\n        super().__init__()\n\n        self.input_dim = input_dim\n        self.pred_dim = pred_dim\n\n        # When I coded the library I never though that someone would want to code\n        # their own wide component. However, if you do, the wide component must have\n        # a 'wide_linear' attribute. In other words, the linear layer must be\n        # called 'wide_linear'\n        self.wide_linear = nn.Linear(input_dim, pred_dim)\n\n    def forward(self, X):\n        out = self.wide_linear(X.type(torch.float32))\n        return out\n\n\nwide = Wide(X_train_wide.shape[1], max_movie_index + 1)\n
class Wide(nn.Module): def __init__(self, input_dim: int, pred_dim: int): super().__init__() self.input_dim = input_dim self.pred_dim = pred_dim # When I coded the library I never though that someone would want to code # their own wide component. However, if you do, the wide component must have # a 'wide_linear' attribute. In other words, the linear layer must be # called 'wide_linear' self.wide_linear = nn.Linear(input_dim, pred_dim) def forward(self, X): out = self.wide_linear(X.type(torch.float32)) return out wide = Wide(X_train_wide.shape[1], max_movie_index + 1) In\u00a0[33]: Copied!
wide\n
wide Out[33]:
Wide(\n  (wide_linear): Linear(in_features=1683, out_features=1683, bias=True)\n)
In\u00a0[34]: Copied!
class SimpleEmbed(nn.Module):\n    def __init__(self, vocab_size: int, embed_dim: int, pad_idx: int):\n        super().__init__()\n\n        self.vocab_size = vocab_size\n        self.embed_dim = embed_dim\n        self.pad_idx = pad_idx\n\n        # The sequences of movies watched are simply embedded in the Kaggle\n        # notebook. No RNN, Transformer or any model is used\n        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)\n\n    def forward(self, X):\n        embed = self.embed(X)\n        embed_mean = torch.mean(embed, dim=1)\n        return embed_mean\n\n    @property\n    def output_dim(self) -> int:\n        # All deep components in a custom 'pytorch-widedeep' model must have\n        # an output_dim property\n        return self.embed_dim\n\n\n#  In the notebook the author uses simply embeddings\nsimple_embed = SimpleEmbed(max_movie_index + 1, 16, 0)\n
class SimpleEmbed(nn.Module): def __init__(self, vocab_size: int, embed_dim: int, pad_idx: int): super().__init__() self.vocab_size = vocab_size self.embed_dim = embed_dim self.pad_idx = pad_idx # The sequences of movies watched are simply embedded in the Kaggle # notebook. No RNN, Transformer or any model is used self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx) def forward(self, X): embed = self.embed(X) embed_mean = torch.mean(embed, dim=1) return embed_mean @property def output_dim(self) -> int: # All deep components in a custom 'pytorch-widedeep' model must have # an output_dim property return self.embed_dim # In the notebook the author uses simply embeddings simple_embed = SimpleEmbed(max_movie_index + 1, 16, 0) In\u00a0[35]: Copied!
simple_embed\n
simple_embed Out[35]:
SimpleEmbed(\n  (embed): Embedding(1683, 16, padding_idx=0)\n)

Maybe one would like to use an RNN to account for the sequence nature of the problem. If that was the case it would be as easy as:

In\u00a0[36]: Copied!
basic_rnn = BasicRNN(\n    vocab_size=max_movie_index + 1,\n    embed_dim=16,\n    hidden_dim=32,\n    n_layers=2,\n    rnn_type=\"gru\",\n)\n
basic_rnn = BasicRNN( vocab_size=max_movie_index + 1, embed_dim=16, hidden_dim=32, n_layers=2, rnn_type=\"gru\", )

And finally, the tabular component, which is the notebook is simply a stak of linear + Rely layers. In our case we have an embedding layer before the linear layers to encode categorial and numerical cols

In\u00a0[37]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    cont_norm_layer=None,\n    mlp_hidden_dims=[1024, 512, 256],\n    mlp_activation=\"relu\",\n)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, cont_norm_layer=None, mlp_hidden_dims=[1024, 512, 256], mlp_activation=\"relu\", ) In\u00a0[38]: Copied!
tab_mlp\n
tab_mlp Out[38]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_occupation): Embedding(22, 9, padding_idx=0)\n      (emb_layer_zip_code): Embedding(648, 60, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=111, out_features=1024, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=1024, out_features=512, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_2): Sequential(\n        (0): Linear(in_features=512, out_features=256, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)

Finally, we simply wrap up all models with the WideDeep 'collector' class and we are ready to train.

In\u00a0[39]: Copied!
wide_deep_model = WideDeep(\n    wide=wide, deeptabular=tab_mlp, deeptext=simple_embed, pred_dim=max_movie_index + 1\n)\n
wide_deep_model = WideDeep( wide=wide, deeptabular=tab_mlp, deeptext=simple_embed, pred_dim=max_movie_index + 1 ) In\u00a0[40]: Copied!
wide_deep_model\n
wide_deep_model Out[40]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Linear(in_features=1683, out_features=1683, bias=True)\n  )\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_occupation): Embedding(22, 9, padding_idx=0)\n          (emb_layer_zip_code): Embedding(648, 60, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=111, out_features=1024, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=1024, out_features=512, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=512, out_features=256, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=256, out_features=1683, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): SimpleEmbed(\n      (embed): Embedding(1683, 16, padding_idx=0)\n    )\n    (1): Linear(in_features=16, out_features=1683, bias=True)\n  )\n)

Note that the main difference between this wide and deep model and the Wide and Deep model in the Kaggle notebook is that in that notebook, the author concatenates the embedings and the tabular features, then passes this concatenation through a stack of linear + Relu layers with a final output dim of 256. Then concatenates this output with the binary features and connects this concatenation with the final linear layer (so the final weights are of dim (batch_size, 256 + 1683)). Our implementation follows the notation of the original paper and instead of concatenating the tabular, text and wide components and then connect them to the output neurons, we first compute their output, and then add it (see here: https://arxiv.org/pdf/1606.07792.pdf, their Eq 3). Note that this is effectively the same, with the caveat that while in one case one initialises a big weight matrix \"at once\", in our implementation we initialise different matrices for different components. Anyway, let's give it a go.

In\u00a0[41]: Copied!
trainer = Trainer(\n    model=wide_deep_model,\n    objective=\"multiclass\",\n    custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX),\n    optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3),\n)\n
trainer = Trainer( model=wide_deep_model, objective=\"multiclass\", custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX), optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3), ) In\u00a0[42]: Copied!
trainer.fit(\n    X_train={\n        \"X_wide\": X_train_wide,\n        \"X_tab\": X_train_tab,\n        \"X_text\": X_train_text,\n        \"target\": y_train,\n    },\n    X_val={\n        \"X_wide\": X_test_wide,\n        \"X_tab\": X_test_tab,\n        \"X_text\": X_test_text,\n        \"target\": y_test,\n    },\n    n_epochs=5,\n    batch_size=512,\n    shuffle=False,\n)\n
trainer.fit( X_train={ \"X_wide\": X_train_wide, \"X_tab\": X_train_tab, \"X_text\": X_train_text, \"target\": y_train, }, X_val={ \"X_wide\": X_test_wide, \"X_tab\": X_test_tab, \"X_text\": X_test_text, \"target\": y_test, }, n_epochs=5, batch_size=512, shuffle=False, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.66it/s, loss=6.66]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 18.75it/s, loss=6.6]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:21<00:00,  6.95it/s, loss=5.97]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:01<00:00, 21.03it/s, loss=6.52]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.51it/s, loss=5.65]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:01<00:00, 20.16it/s, loss=6.53]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:23<00:00,  6.29it/s, loss=5.41]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 13.97it/s, loss=6.57]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.58it/s, loss=5.2]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 18.82it/s, loss=6.63]\n

Now one could continue to the 'compare' metrics section of the Kaggle notebook. However, for the purposes of illustrating how one could use pytorch-widedeep to build recommendation algorithms we consider this notebook completed and move onto part 2

"},{"location":"examples/19_wide_and_deep_for_recsys_pt1.html#problem-formulation","title":"Problem formulation\u00b6","text":"

In this particular exercise the problem is formulated as predicting the next movie that will be watched (in consequence the last interactions will be discarded)

"},{"location":"examples/19_wide_and_deep_for_recsys_pt2.html","title":"19_wide_and_deep_for_recsys_pt2","text":"

This is the second of the two notebooks where we aim to illustrate how one could use this library to build recommendation algorithms using the example in this Kaggle notebook as guidance. In the previous notebook we used pytorch-widedeep to build a model that replicated almost exactly that in the notebook. In this, shorter notebook we will show how one could use the library to explore other models, following the same problem formulation, this is: given a state of a user at a certain point in time having watched a series of movies, our goal is to predict which movie the user will watch next.

Assuming that one has read (and run) the previous notebook, the required data will be stored in a local dir called prepared_data, so let's read it:

In\u00a0[1]: Copied!
from pathlib import Path\n\nimport numpy as np\nimport torch\nimport pandas as pd\nfrom torch import nn\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.utils import pad_sequences\nfrom pytorch_widedeep.models import TabMlp, WideDeep, Transformer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
from pathlib import Path import numpy as np import torch import pandas as pd from torch import nn from pytorch_widedeep import Trainer from pytorch_widedeep.utils import pad_sequences from pytorch_widedeep.models import TabMlp, WideDeep, Transformer from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[2]: Copied!
save_path = Path(\"prepared_data\")\n\nPAD_IDX = 0\n\nid_cols = [\"user_id\", \"movie_id\"]\n\ndf_train = pd.read_pickle(save_path / \"df_train.pkl\")\ndf_valid = pd.read_pickle(save_path / \"df_valid.pkl\")\ndf_test = pd.read_pickle(save_path / \"df_test.pkl\")\n
save_path = Path(\"prepared_data\") PAD_IDX = 0 id_cols = [\"user_id\", \"movie_id\"] df_train = pd.read_pickle(save_path / \"df_train.pkl\") df_valid = pd.read_pickle(save_path / \"df_valid.pkl\") df_test = pd.read_pickle(save_path / \"df_test.pkl\")

...remember that in the previous notebook we explained that we are not going to use a validation set here (in a real-world example, or simply a more realistic example, one should always use it).

In\u00a0[3]: Copied!
df_test = pd.concat([df_valid, df_test], ignore_index=True)\n
df_test = pd.concat([df_valid, df_test], ignore_index=True)

Also remember that, in the previous notebook we discussed that the 'maxlen' and 'max_movie_index' parameters should be computed using only the train set. In particular, to properly do the tokenization, one would have to use ONLY train tokens and add a token for new 'unknown'/'unseen' movies in the test set. This can also be done with this library or manually, so I will leave it to the reader to implement that tokenzation appraoch.

In\u00a0[4]: Copied!
maxlen = max(\n    df_train.prev_movies.apply(lambda x: len(x)).max(),\n    df_test.prev_movies.apply(lambda x: len(x)).max(),\n)\n\nmax_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())\n
maxlen = max( df_train.prev_movies.apply(lambda x: len(x)).max(), df_test.prev_movies.apply(lambda x: len(x)).max(), ) max_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())

From now one things are pretty simple, moreover bearing in mind that in this example we are not going to use a wide component since, in pple, one would believe that the information in that component is also 'carried' by the movie sequences (However in the previous notebook, if one performs ablation studies, these suggest that most of the prediction power comes from the linear, wide model).

In the example here we are going to explore one (of many) possibilities. We are simply going to encode the triplet (user, item, rating) and use it as a deeptabular component and the sequences of previously watched movies as the deeptext component. For the deeptext component we are going to use a basic encoder-only transformer model.

Let's start with the tabular data preparation

In\u00a0[5]: Copied!
df_train_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]]\ntrain_movies_sequences = df_train.prev_movies.apply(\n    lambda x: [int(el) for el in x]\n).to_list()\ny_train = df_train.target.values.astype(int)\n\ndf_test_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]]\ntest_movies_sequences = df_test.prev_movies.apply(\n    lambda x: [int(el) for el in x]\n).to_list()\ny_test = df_test.target.values.astype(int)\n\ntab_preprocessor = tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=[\"user_id\", \"movie_id\", \"rating\"],\n)\nX_train_tab = tab_preprocessor.fit_transform(df_train_user_item)\nX_test_tab = tab_preprocessor.transform(df_test_user_item)\n
df_train_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]] train_movies_sequences = df_train.prev_movies.apply( lambda x: [int(el) for el in x] ).to_list() y_train = df_train.target.values.astype(int) df_test_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]] test_movies_sequences = df_test.prev_movies.apply( lambda x: [int(el) for el in x] ).to_list() y_test = df_test.target.values.astype(int) tab_preprocessor = tab_preprocessor = TabPreprocessor( cat_embed_cols=[\"user_id\", \"movie_id\", \"rating\"], ) X_train_tab = tab_preprocessor.fit_transform(df_train_user_item) X_test_tab = tab_preprocessor.transform(df_test_user_item)

And not the text component, simply padding the sequences:

In\u00a0[6]: Copied!
X_train_text = np.array(\n    [\n        pad_sequences(\n            s,\n            maxlen=maxlen,\n            pad_first=False,\n            pad_idx=PAD_IDX,\n        )\n        for s in train_movies_sequences\n    ]\n)\nX_test_text = np.array(\n    [\n        pad_sequences(\n            s,\n            maxlen=maxlen,\n            pad_first=False,\n            pad_idx=0,\n        )\n        for s in test_movies_sequences\n    ]\n)\n
X_train_text = np.array( [ pad_sequences( s, maxlen=maxlen, pad_first=False, pad_idx=PAD_IDX, ) for s in train_movies_sequences ] ) X_test_text = np.array( [ pad_sequences( s, maxlen=maxlen, pad_first=False, pad_idx=0, ) for s in test_movies_sequences ] )

We now define the model components and the wide and deep model.

In\u00a0[7]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[1024, 512, 256],\n    mlp_activation=\"relu\",\n)\n\n# plenty of options here, see the docs\ntransformer = Transformer(\n    vocab_size=max_movie_index + 1,\n    embed_dim=32,\n    n_heads=2,\n    n_blocks=2,\n    seq_length=maxlen,\n)\n\nwide_deep_model = WideDeep(\n    deeptabular=tab_mlp, deeptext=transformer, pred_dim=max_movie_index + 1\n)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[1024, 512, 256], mlp_activation=\"relu\", ) # plenty of options here, see the docs transformer = Transformer( vocab_size=max_movie_index + 1, embed_dim=32, n_heads=2, n_blocks=2, seq_length=maxlen, ) wide_deep_model = WideDeep( deeptabular=tab_mlp, deeptext=transformer, pred_dim=max_movie_index + 1 ) In\u00a0[8]: Copied!
wide_deep_model\n
wide_deep_model Out[8]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_user_id): Embedding(749, 65, padding_idx=0)\n          (emb_layer_movie_id): Embedding(1612, 100, padding_idx=0)\n          (emb_layer_rating): Embedding(6, 4, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=169, out_features=1024, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=1024, out_features=512, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=512, out_features=256, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=256, out_features=1683, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): Transformer(\n      (embedding): Embedding(1683, 32, padding_idx=0)\n      (pos_encoder): PositionalEncoding(\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (encoder): Sequential(\n        (transformer_block0): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block1): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=23552, out_features=1683, bias=True)\n  )\n)

And as in the previous notebook, let's train (you will need a GPU for this)

In\u00a0[\u00a0]: Copied!
trainer = Trainer(\n    model=wide_deep_model,\n    objective=\"multiclass\",\n    custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX),\n    optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3),\n)\n\ntrainer.fit(\n    X_train={\n        \"X_tab\": X_train_tab,\n        \"X_text\": X_train_text,\n        \"target\": y_train,\n    },\n    X_val={\n        \"X_tab\": X_test_tab,\n        \"X_text\": X_test_text,\n        \"target\": y_test,\n    },\n    n_epochs=10,\n    batch_size=521,\n    shuffle=False,\n)\n
trainer = Trainer( model=wide_deep_model, objective=\"multiclass\", custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX), optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3), ) trainer.fit( X_train={ \"X_tab\": X_train_tab, \"X_text\": X_train_text, \"target\": y_train, }, X_val={ \"X_tab\": X_test_tab, \"X_text\": X_test_text, \"target\": y_test, }, n_epochs=10, batch_size=521, shuffle=False, )
epoch 1:   0%|                                                                                                         | 0/147 [00:34<?, ?it/s]\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/20_load_from_folder_functionality.html","title":"20_load_from_folder_functionality.","text":"

In this notebook I want to illustrate how one can use our [...]FromFolder functionalities along with the [...]ChunkPreProcessors in those cases where the dataset is too bit to fit in memory.

These functionalities in the library have been designed for the following scenarop

In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport pandas as pd\nfrom torch.utils.data import DataLoader\n\nfrom pytorch_widedeep.models import TabMlp, Vision, BasicRNN, WideDeep\nfrom pytorch_widedeep.training import TrainerFromFolder\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint\nfrom pytorch_widedeep.preprocessing import (\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n    ChunkTabPreprocessor,\n    ChunkTextPreprocessor,\n)\nfrom pytorch_widedeep.load_from_folder import (\n    TabFromFolder,\n    TextFromFolder,\n    ImageFromFolder,\n    WideDeepDatasetFromFolder,\n)\n
import numpy as np import torch import pandas as pd from torch.utils.data import DataLoader from pytorch_widedeep.models import TabMlp, Vision, BasicRNN, WideDeep from pytorch_widedeep.training import TrainerFromFolder from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint from pytorch_widedeep.preprocessing import ( TabPreprocessor, TextPreprocessor, ImagePreprocessor, ChunkTabPreprocessor, ChunkTextPreprocessor, ) from pytorch_widedeep.load_from_folder import ( TabFromFolder, TextFromFolder, ImageFromFolder, WideDeepDatasetFromFolder, ) In\u00a0[2]: Copied!
# in my case, I place the data in a folder I call tmp_data, let's see how it looks\nairbnb_data = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\nairbnb_data.sample(5)\n
# in my case, I place the data in a folder I call tmp_data, let's see how it looks airbnb_data = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") airbnb_data.sample(5) Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 39 53242.jpg 247650 A lovely big bright bedroom in a 2 bedroom fla... 2.0 t Lambeth 51.47075 -0.12913 t apartment ... 0 0 0 0 1 0 0 250.0 5.0 9.75 214 236716.jpg 1241070 We offer a warm welcome in our quiet double ro... 1.0 t Hackney 51.56593 -0.07482 t other ... 0 0 0 0 1 0 0 200.0 10.0 76.50 400 346523.jpg 1756532 Available for you to rent is a cozy studio in ... 2.0 t Kensington and Chelsea 51.48311 -0.18428 t other ... 0 0 0 0 1 0 0 0.0 50.0 180.90 512 389627.jpg 1949299 This gorgeous studio flat is situated in the v... 1.0 t Westminster 51.51838 -0.14238 f apartment ... 0 0 0 0 1 0 0 250.0 25.0 276.90 504 388767.jpg 1945165 If you want to experience London at it's best ... 2.0 f Camden 51.54293 -0.14073 t apartment ... 0 0 0 0 1 0 0 150.0 10.0 591.10

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# for example\nfrom IPython.display import Image\n\npil_img = Image(filename=\"../tmp_data/airbnb/property_picture/272908.jpg\")\ndisplay(pil_img)\n
# for example from IPython.display import Image pil_img = Image(filename=\"../tmp_data/airbnb/property_picture/272908.jpg\") display(pil_img) In\u00a0[4]: Copied!
# And the description for the property that that picture belongs to is:\nairbnb_data[airbnb_data.id == \"272908.jpg\"].description.tolist()\n
# And the description for the property that that picture belongs to is: airbnb_data[airbnb_data.id == \"272908.jpg\"].description.tolist() Out[4]:
[\"Bright, sunny beautiful room that will give you the perfect base to explore all of London. Come and explore one of London's best neighbourhoods - Herne Hill! As mentioned in (Website hidden by Airbnb)   (Website hidden by Airbnb)  WiFi availability with a fully stocked and clean uplifting home. Lovely sunny, airy and big double bedroom on a leafy south-London street.    Note: This room comes with a reserved Off-Street parking spot! The room is on the first floor and boasts an enormous Super King bed, gorgeous wooden floors, tall ceilings and large windows which let in the sunshine almost all day. (Yoga May or meditation cushion available on request) The flat is bright and airy and big! So lots of space for all.  Location wise you are only 10 minutes walk to either Herne Hill or West Dulwich stations, both of which will take you to Victoria and the city within minutes. You can also hop on a bus right outside the flat that will take you to Brixton tube station within 8 minutes where you \"]

Ok, so we have tabular data where one column is description and another id, points towards the images stored in disk. Now, remember the following, because this will appear a few times in the notebook: our \"reference dataset\" is the tabular data.

Therefore, since I want to illustrate a \"semi-realistic\" case, if we need to split the data into training, validation and test datasets, these datasets needs to be separetely stored in disk. In my case I have done this and in the tmp_data/airbnb dir I have the following:

../tmp_data/airbnb\n\u251c\u2500\u2500 airbnb_sample.csv\n\u251c\u2500\u2500 airbnb_sample_eval.csv\n\u251c\u2500\u2500 airbnb_sample_test.csv\n\u251c\u2500\u2500 airbnb_sample_train.csv\n\u2514\u2500\u2500 property_picture\n

Where airbnb_sample.csv is the full sample (1001 rows) and the train, eval and test set is the corresponding splits. In a realistic example, the full sample would be the 'gigantic' dataset and the rest the corresponding splits. One has to do this 'offline', prior to start the coding.

Also, one thing that one needs to know is the number of total observations/rows, as well as the splits. In our case the train size is 800, and the eval and test sizes are 100 and 101 respectively.

With all that info, let's star

In\u00a0[5]: Copied!
# path to the tabular data and the splits\ndata_path = \"../tmp_data/airbnb/\"\ntrain_fname = \"airbnb_sample_train.csv\"\neval_fname = \"airbnb_sample_eval.csv\"\ntest_fname = \"airbnb_sample_test.csv\"\n\n# split sizes\ntrain_size = 800\neval_size = 100\ntest_size = 101\n\n# number of chunks for the Chunk Preprocessors\nchunksize = 100\nn_chunks = int(np.ceil(train_size / chunksize))\n\n# path to the image dataset and name of the image col\nimg_path = \"../tmp_data/airbnb/property_picture/\"\nimg_col = \"id\"\n\n# name of the text col\ntext_col = \"description\"\n\n# mane of the target\ntarget_col = \"yield\"\n\n# definition of the categorical and continuous cols for the TabPreprocessor\ncat_embed_cols = [\n    \"host_listings_count\",\n    \"neighbourhood_cleansed\",\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"accommodates\",\n    \"bathrooms\",\n    \"bedrooms\",\n    \"beds\",\n    \"guests_included\",\n    \"minimum_nights\",\n    \"instant_bookable\",\n    \"cancellation_policy\",\n    \"has_house_rules\",\n    \"host_gender\",\n    \"accommodates_catg\",\n    \"guests_included_catg\",\n    \"minimum_nights_catg\",\n    \"host_listings_count_catg\",\n    \"bathrooms_catg\",\n    \"bedrooms_catg\",\n    \"beds_catg\",\n    \"security_deposit\",\n    \"extra_people\",\n]\ncont_cols = [\"latitude\", \"longitude\"]\n
# path to the tabular data and the splits data_path = \"../tmp_data/airbnb/\" train_fname = \"airbnb_sample_train.csv\" eval_fname = \"airbnb_sample_eval.csv\" test_fname = \"airbnb_sample_test.csv\" # split sizes train_size = 800 eval_size = 100 test_size = 101 # number of chunks for the Chunk Preprocessors chunksize = 100 n_chunks = int(np.ceil(train_size / chunksize)) # path to the image dataset and name of the image col img_path = \"../tmp_data/airbnb/property_picture/\" img_col = \"id\" # name of the text col text_col = \"description\" # mane of the target target_col = \"yield\" # definition of the categorical and continuous cols for the TabPreprocessor cat_embed_cols = [ \"host_listings_count\", \"neighbourhood_cleansed\", \"is_location_exact\", \"property_type\", \"room_type\", \"accommodates\", \"bathrooms\", \"bedrooms\", \"beds\", \"guests_included\", \"minimum_nights\", \"instant_bookable\", \"cancellation_policy\", \"has_house_rules\", \"host_gender\", \"accommodates_catg\", \"guests_included_catg\", \"minimum_nights_catg\", \"host_listings_count_catg\", \"bathrooms_catg\", \"bedrooms_catg\", \"beds_catg\", \"security_deposit\", \"extra_people\", ] cont_cols = [\"latitude\", \"longitude\"] In\u00a0[6]: Copied!
tab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=cont_cols,\n    default_embed_dim=8,\n    verbose=0,\n)\n\ntext_preprocessor = TextPreprocessor(\n    text_col=text_col,\n    n_cpus=1,\n)\n\nimg_preprocessor = ImagePreprocessor(\n    img_col=img_col,\n    img_path=img_path,\n)\n
tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=cont_cols, default_embed_dim=8, verbose=0, ) text_preprocessor = TextPreprocessor( text_col=text_col, n_cpus=1, ) img_preprocessor = ImagePreprocessor( img_col=img_col, img_path=img_path, ) In\u00a0[7]: Copied!
tab_preprocessor.fit(airbnb_data)\ntext_preprocessor.fit(airbnb_data)\nimg_preprocessor.fit(airbnb_data)\n
tab_preprocessor.fit(airbnb_data) text_preprocessor.fit(airbnb_data) img_preprocessor.fit(airbnb_data)
The vocabulary contains 2192 tokens\n
Out[7]:
ImagePreprocessor(img_col=id, img_path=../tmp_data/airbnb/property_picture/, width=224, height=224, verbose=1)
In\u00a0[8]: Copied!
chunk_tab_preprocessor = ChunkTabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=cont_cols,\n    n_chunks=n_chunks,\n    default_embed_dim=8,\n    verbose=0,\n)\n\nchunk_text_preprocessor = ChunkTextPreprocessor(\n    n_chunks=n_chunks,\n    text_col=text_col,\n    n_cpus=1,\n    verbose=0,\n)\n\nfor i, chunk in enumerate(\n    pd.read_csv(\"/\".join([data_path, train_fname]), chunksize=chunksize)\n):\n    print(f\"chunk in loop: {i + 1}\")\n    chunk_tab_preprocessor.fit(chunk)\n    chunk_text_preprocessor.fit(chunk)\n
chunk_tab_preprocessor = ChunkTabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=cont_cols, n_chunks=n_chunks, default_embed_dim=8, verbose=0, ) chunk_text_preprocessor = ChunkTextPreprocessor( n_chunks=n_chunks, text_col=text_col, n_cpus=1, verbose=0, ) for i, chunk in enumerate( pd.read_csv(\"/\".join([data_path, train_fname]), chunksize=chunksize) ): print(f\"chunk in loop: {i + 1}\") chunk_tab_preprocessor.fit(chunk) chunk_text_preprocessor.fit(chunk)
chunk in loop: 1\nchunk in loop: 2\nchunk in loop: 3\nchunk in loop: 4\nchunk in loop: 5\nchunk in loop: 6\nchunk in loop: 7\nchunk in loop: 8\n
In\u00a0[9]: Copied!
train_tab_folder = TabFromFolder(\n    fname=train_fname,\n    directory=data_path,\n    target_col=target_col,\n    preprocessor=tab_preprocessor,\n    text_col=text_col,\n    img_col=img_col,\n)\n\n# Note how we can use the `train_tab_folder` as reference so we don't have to\n# define all parameters again\neval_tab_folder = TabFromFolder(fname=eval_fname, reference=train_tab_folder)\n\n# Note that for the test set we can ignore the target as no metric will be\n# computed by the `predict` method\ntest_tab_folder = TabFromFolder(\n    fname=test_fname, reference=train_tab_folder, ignore_target=True\n)\n
train_tab_folder = TabFromFolder( fname=train_fname, directory=data_path, target_col=target_col, preprocessor=tab_preprocessor, text_col=text_col, img_col=img_col, ) # Note how we can use the `train_tab_folder` as reference so we don't have to # define all parameters again eval_tab_folder = TabFromFolder(fname=eval_fname, reference=train_tab_folder) # Note that for the test set we can ignore the target as no metric will be # computed by the `predict` method test_tab_folder = TabFromFolder( fname=test_fname, reference=train_tab_folder, ignore_target=True ) In\u00a0[10]: Copied!
# for the text and image datasets we do not need to specify eval or test loaders\ntext_folder = TextFromFolder(preprocessor=text_preprocessor)\nimg_folder = ImageFromFolder(preprocessor=img_preprocessor)\n
# for the text and image datasets we do not need to specify eval or test loaders text_folder = TextFromFolder(preprocessor=text_preprocessor) img_folder = ImageFromFolder(preprocessor=img_preprocessor) In\u00a0[11]: Copied!
train_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=train_size,\n    tab_from_folder=train_tab_folder,\n    text_from_folder=text_folder,\n    img_from_folder=img_folder,\n)\n\n# Note that the eval and test loaders only need their corresponding\n# `TabFromFolder` classes. The rest of the parameters can be defined\n# via a `reference` `TabFromFolder` class\neval_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=eval_size,\n    tab_from_folder=eval_tab_folder,\n    reference=train_dataset_folder,\n)\n\ntest_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=test_size,\n    tab_from_folder=test_tab_folder,\n    reference=train_dataset_folder,\n)\n
train_dataset_folder = WideDeepDatasetFromFolder( n_samples=train_size, tab_from_folder=train_tab_folder, text_from_folder=text_folder, img_from_folder=img_folder, ) # Note that the eval and test loaders only need their corresponding # `TabFromFolder` classes. The rest of the parameters can be defined # via a `reference` `TabFromFolder` class eval_dataset_folder = WideDeepDatasetFromFolder( n_samples=eval_size, tab_from_folder=eval_tab_folder, reference=train_dataset_folder, ) test_dataset_folder = WideDeepDatasetFromFolder( n_samples=test_size, tab_from_folder=test_tab_folder, reference=train_dataset_folder, ) In\u00a0[12]: Copied!
train_loader = DataLoader(train_dataset_folder, batch_size=16, num_workers=1)\neval_loader = DataLoader(eval_dataset_folder, batch_size=16, num_workers=1)\ntest_loader = DataLoader(test_dataset_folder, batch_size=16, num_workers=1)\n
train_loader = DataLoader(train_dataset_folder, batch_size=16, num_workers=1) eval_loader = DataLoader(eval_dataset_folder, batch_size=16, num_workers=1) test_loader = DataLoader(test_dataset_folder, batch_size=16, num_workers=1)

And from here on is business as usual:

In\u00a0[13]: Copied!
# for example\nbasic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=32,\n    hidden_dim=64,\n    n_layers=2,\n)\n\ndeepimage = Vision()\n\ndeepdense = TabMlp(\n    mlp_hidden_dims=[32, 16],\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=cont_cols,\n)\n\nmodel = WideDeep(\n    deeptabular=deepdense,\n    deeptext=basic_rnn,\n    deepimage=deepimage,\n)\n\nmodel\n
# for example basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=32, hidden_dim=64, n_layers=2, ) deepimage = Vision() deepdense = TabMlp( mlp_hidden_dims=[32, 16], column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=cont_cols, ) model = WideDeep( deeptabular=deepdense, deeptext=basic_rnn, deepimage=deepimage, ) model Out[13]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_host_listings_count): Embedding(28, 10, padding_idx=0)\n          (emb_layer_neighbourhood_cleansed): Embedding(33, 11, padding_idx=0)\n          (emb_layer_is_location_exact): Embedding(3, 2, padding_idx=0)\n          (emb_layer_property_type): Embedding(4, 3, padding_idx=0)\n          (emb_layer_room_type): Embedding(4, 3, padding_idx=0)\n          (emb_layer_accommodates): Embedding(14, 7, padding_idx=0)\n          (emb_layer_bathrooms): Embedding(11, 6, padding_idx=0)\n          (emb_layer_bedrooms): Embedding(7, 4, padding_idx=0)\n          (emb_layer_beds): Embedding(11, 6, padding_idx=0)\n          (emb_layer_guests_included): Embedding(11, 6, padding_idx=0)\n          (emb_layer_minimum_nights): Embedding(25, 9, padding_idx=0)\n          (emb_layer_instant_bookable): Embedding(3, 2, padding_idx=0)\n          (emb_layer_cancellation_policy): Embedding(6, 4, padding_idx=0)\n          (emb_layer_has_house_rules): Embedding(3, 2, padding_idx=0)\n          (emb_layer_host_gender): Embedding(4, 3, padding_idx=0)\n          (emb_layer_accommodates_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_guests_included_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_minimum_nights_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_host_listings_count_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_bathrooms_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_bedrooms_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_beds_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_security_deposit): Embedding(53, 15, padding_idx=0)\n          (emb_layer_extra_people): Embedding(39, 12, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=128, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=32, out_features=16, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=16, out_features=1, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(2192, 32, padding_idx=1)\n      (rnn): LSTM(32, 64, num_layers=2, batch_first=True, dropout=0.1)\n      (rnn_mlp): Identity()\n    )\n    (1): Linear(in_features=64, out_features=1, bias=True)\n  )\n  (deepimage): Sequential(\n    (0): Vision(\n      (features): Sequential(\n        (conv_layer_0): Sequential(\n          (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n          (1): BatchNorm2d(64, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n          (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n        )\n        (conv_layer_1): Sequential(\n          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(128, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n        )\n        (conv_layer_2): Sequential(\n          (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(256, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n        )\n        (conv_layer_3): Sequential(\n          (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(512, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n          (adaptiveavgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n        )\n      )\n    )\n    (1): Linear(in_features=512, out_features=1, bias=True)\n  )\n)
In\u00a0[14]: Copied!
trainer = TrainerFromFolder(\n    model,\n    objective=\"regression\",\n)\n\ntrainer.fit(\n    train_loader=train_loader,\n    eval_loader=eval_loader,\n)\n
trainer = TrainerFromFolder( model, objective=\"regression\", ) trainer.fit( train_loader=train_loader, eval_loader=eval_loader, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 50/50 [03:41<00:00,  4.42s/it, loss=1.64e+4]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:23<00:00,  3.30s/it, loss=6.27e+3]\n
In\u00a0[15]: Copied!
preds = trainer.predict(test_loader=test_loader)\n
preds = trainer.predict(test_loader=test_loader)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:22<00:00,  3.26s/it]\n

Note that in the case of predict you could also choose to do this

In\u00a0[16]: Copied!
df_test = pd.read_csv(\"/\".join([data_path, test_fname]))\n
df_test = pd.read_csv(\"/\".join([data_path, test_fname])) In\u00a0[17]: Copied!
# if the images for the test set fit in memory\nX_tab_test = chunk_tab_preprocessor.transform(df_test)\nX_text_test = chunk_text_preprocessor.transform(df_test)\nX_img_test = img_preprocessor.transform(df_test)\n
# if the images for the test set fit in memory X_tab_test = chunk_tab_preprocessor.transform(df_test) X_text_test = chunk_text_preprocessor.transform(df_test) X_img_test = img_preprocessor.transform(df_test)
Reading Images from ../tmp_data/airbnb/property_picture/\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 101/101 [00:00<00:00, 708.23it/s]
Computing normalisation metrics\n
\n
In\u00a0[18]: Copied!
preds = trainer.predict(\n    X_tab=X_tab_test, X_text=X_text_test, X_img=X_img_test, batch_size=32\n)\n
preds = trainer.predict( X_tab=X_tab_test, X_text=X_text_test, X_img=X_img_test, batch_size=32 )
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:03<00:00,  1.14it/s]\n
"},{"location":"examples/20_load_from_folder_functionality.html#scenario","title":"Scenario\u00b6","text":"

We have a tabular dataset combined with images and text and either some, or all these datasets do not fit in memory. Note that the tabular dataset MUST ALWAYS be present as it is considered the rerefence. This is, if we have an image dataset, the tabular dataset must contain a column that points to the image file names as stored in disk. Similarly, if we have a text dataset, then the tabular dataset must contain a column with the texts themselves or a the file names of the text files as stored in disk.

If you only have text and/or images and not a tabular component, I would suggest using other libraries (such as hugginface probably).

Within this scenario, they are two possible scenarios that we will cover here:

  1. The tabular data itsel fits in memory and is only the images that do not: in this case you could use the 'standard' Preprocessors in the library and off you go, move directly to the [...]FromFolder functionalities

  2. The tabular data is also very large and does not fit in memory, so we have to process it in chuncks. For this second case I have created the so called Chunk[...]Preprocessor (Wide, Tab and Text).

Note that at the moment ONLY csv format is allowed for the tabular file. More formats will be supported in the future.

Let's see a complete example to illustrate how each of these cases would be addressed with the new functionalities in the library. For this example we will use a sample of the airbnb dataset

The airbnb dataset, which you could get from here, is too big to be included in our datasets module (when including images). Therefore, what I did was, go there, download it, and use the download_images.py script to get the images and the airbnb_data_processing.py to process the data. I did this ages ago and I believe the format of the dataset might be different now. Nonetheless, I will show samples of the dataset as we go through so you can extrapolate the content of this notebook to your particular problem.

In the future we will find better datasets\ud83d\ude42. Finally, note that here we are only using a small sample to illustrate the use, so PLEASE ignore the results, just focus on usage.

"},{"location":"examples/20_load_from_folder_functionality.html#setting-variables-and-constants","title":"Setting variables and constants\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#step-1-the-preprocessors","title":"Step 1: the preprocessors\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#scenario-1-only-the-images-do-not-fit-in-disk","title":"Scenario 1: only the images do not fit in disk\u00b6","text":"

In this case we can prepare the data in the 'standard' way

"},{"location":"examples/20_load_from_folder_functionality.html#scenario-2-the-tabular-data-is-also-huge","title":"Scenario 2: the tabular data is also huge\u00b6","text":"

Then we need to prepare it in chunks. Note that, unfortunately, the tabular and text preprocessors need to see the whole dataset once. This is because to process tabular or text data we need to encode values. For those encodings to be consistent they need to have seen the whole dataset. Alternatively, one could code a solution with some streaming encoder for both datasets. However, such implementation is not trivial for this library (and in general). I also don't think that having to see the whole data once is such a big limitation. Let's see how is done.

Note that I have not mentioned the image dataset. This is because the processing of the image dataset does not require any form of encoding and in consequence can be done 'on the fly'. Therefore, no ChunkImgPreprocessor processor is needed.

"},{"location":"examples/20_load_from_folder_functionality.html#step-2-the-fromfolder-classes","title":"Step 2: the [...]FromFolder classes\u00b6","text":"

Once we have the preprocessors, we need to instantiate the classes that will enable us to load the data from their respective folders. From now on I am going to proceed with the chunk_tab_preprocessor, chunk_text_preprocessor and img_preprocessor, but the code would be identical if instead of the first two preprocessors we decided to use the tab_preprocessor and text_preprocessor.

Once more, our reference datasets are the tabular datasets, which we have splitted in train, eval and test prior to start the coding. Therefore, we will eventually need a loader for each split

"},{"location":"examples/20_load_from_folder_functionality.html#step-3-pytorch-datasets-and-dataloaders","title":"Step 3: pytorch datasets and dataloaders\u00b6","text":"

From here in advance, is all very 'standard' if you are familiar with pytorch. One needs to define a class that inherits from the Dataset class in pytorch. Then this will be passed to a DataLoader class and we are ready to train. Our Dataset child class is WideDeepDatasetFromFolder. This class will use the tabular dataset and the corresponding text and image columns to load the adequate data in the batches

Let's do it

"},{"location":"examples/20_load_from_folder_functionality.html#step-4-define-the-model","title":"Step 4: define the model\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#step-5-fit-and-predict","title":"Step 5: fit and predict\u00b6","text":""},{"location":"pytorch-widedeep/bayesian_models.html","title":"The bayesian models module","text":"

This module contains the two Bayesian Models available in this library, namely the bayesian version of the Wide and TabMlp models, referred as BayesianWide and BayesianTabMlp. These models are very useful in scenarios where getting a measure of uncertainty is important.

The models in this module are based on the publication: Weight Uncertainty in Neural Networks.

"},{"location":"pytorch-widedeep/bayesian_models.html#pytorch_widedeep.bayesian_models.tabular.bayesian_linear.bayesian_wide.BayesianWide","title":"BayesianWide","text":"
BayesianWide(\n    input_dim,\n    pred_dim=1,\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0.0,\n    posterior_rho_init=-7.0,\n)\n

Bases: BaseBayesianModel

Defines a Wide model. This is a linear model where the non-linearlities are captured via crossed-columns

Parameters:

  • input_dim (int) \u2013

    size of the Embedding layer. input_dim is the summation of all the individual values for all the features that go through the wide component. For example, if the wide component receives 2 features with 5 individual values each, input_dim = 10

  • pred_dim (int, default: 1 ) \u2013

    size of the ouput tensor containing the predictions

  • prior_sigma_1 (float, default: 1.0 ) \u2013

    The prior weight distribution is a scaled mixture of two Gaussian densities:

    \\[ \\begin{aligned} P(\\mathbf{w}) = \\prod_{i=j} \\pi N (\\mathbf{w}_j | 0, \\sigma_{1}^{2}) + (1 - \\pi) N (\\mathbf{w}_j | 0, \\sigma_{2}^{2}) \\end{aligned} \\]

    prior_sigma_1 is the prior of the sigma parameter for the first of the two Gaussians that will be mixed to produce the prior weight distribution.

  • prior_sigma_2 (float, default: 0.002 ) \u2013

    Prior of the sigma parameter for the second of the two Gaussian distributions that will be mixed to produce the prior weight distribution

  • prior_pi (float, default: 0.8 ) \u2013

    Scaling factor that will be used to mix the Gaussians to produce the prior weight distribution

  • posterior_mu_init (float, default: 0.0 ) \u2013

    The posterior sample of the weights is defined as:

    \\[ \\begin{aligned} \\mathbf{w} &= \\mu + log(1 + exp(\\rho)) \\end{aligned} \\]

    where:

    \\[ \\begin{aligned} \\mathcal{N}(x\\vert \\mu, \\sigma) &= \\frac{1}{\\sqrt{2\\pi}\\sigma}e^{-\\frac{(x-\\mu)^2}{2\\sigma^2}}\\\\ \\log{\\mathcal{N}(x\\vert \\mu, \\sigma)} &= -\\log{\\sqrt{2\\pi}} -\\log{\\sigma} -\\frac{(x-\\mu)^2}{2\\sigma^2}\\\\ \\end{aligned} \\]

    \\(\\mu\\) is initialised using a normal distributtion with mean posterior_mu_init and std equal to 0.1.

  • posterior_rho_init (float, default: -7.0 ) \u2013

    As in the case of \\(\\mu\\), \\(\\rho\\) is initialised using a normal distributtion with mean posterior_rho_init and std equal to 0.1.

Attributes:

  • bayesian_wide_linear (Module) \u2013

    the linear layer that comprises the wide branch of the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.bayesian_models import BayesianWide\n>>> X = torch.empty(4, 4).random_(6)\n>>> wide = BayesianWide(input_dim=X.unique().size(0), pred_dim=1)\n>>> out = wide(X)\n
Source code in pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py
def __init__(\n    self,\n    input_dim: int,\n    pred_dim: int = 1,\n    prior_sigma_1: float = 1.0,\n    prior_sigma_2: float = 0.002,\n    prior_pi: float = 0.8,\n    posterior_mu_init: float = 0.0,\n    posterior_rho_init: float = -7.0,\n):\n    super(BayesianWide, self).__init__()\n    #  Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.\n    self.bayesian_wide_linear = bnn.BayesianEmbedding(\n        n_embed=input_dim + 1,\n        embed_dim=pred_dim,\n        padding_idx=0,\n        prior_sigma_1=prior_sigma_1,\n        prior_sigma_2=prior_sigma_2,\n        prior_pi=prior_pi,\n        posterior_mu_init=posterior_mu_init,\n        posterior_rho_init=posterior_rho_init,\n    )\n    self.bias = nn.Parameter(torch.zeros(pred_dim))\n
"},{"location":"pytorch-widedeep/bayesian_models.html#pytorch_widedeep.bayesian_models.tabular.bayesian_mlp.bayesian_tab_mlp.BayesianTabMlp","title":"BayesianTabMlp","text":"
BayesianTabMlp(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    embed_continuous=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    use_cont_bias=None,\n    cont_norm_layer=None,\n    mlp_hidden_dims=[200, 100],\n    mlp_activation=\"leaky_relu\",\n    prior_sigma_1=1,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0.0,\n    posterior_rho_init=-7.0,\n    pred_dim=1\n)\n

Bases: BaseBayesianModel

Defines a BayesianTabMlp model.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of probabilistic dense layers (i.e. a MLP).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm', 'batchnorm' or None.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded (i.e. passed each through a linear layer with or without activation)

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings

  • use_cont_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the continuous embeddings

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_hidden_dims (List[int], default: [200, 100] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'leaky_relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • prior_sigma_1 (float, default: 1 ) \u2013

    The prior weight distribution is a scaled mixture of two Gaussian densities:

    \\[ \\begin{aligned} P(\\mathbf{w}) = \\prod_{i=j} \\pi N (\\mathbf{w}_j | 0, \\sigma_{1}^{2}) + (1 - \\pi) N (\\mathbf{w}_j | 0, \\sigma_{2}^{2}) \\end{aligned} \\]

    prior_sigma_1 is the prior of the sigma parameter for the first of the two Gaussians that will be mixed to produce the prior weight distribution.

  • prior_sigma_2 (float, default: 0.002 ) \u2013

    Prior of the sigma parameter for the second of the two Gaussian distributions that will be mixed to produce the prior weight distribution for each Bayesian linear and embedding layer

  • prior_pi (float, default: 0.8 ) \u2013

    Scaling factor that will be used to mix the Gaussians to produce the prior weight distribution ffor each Bayesian linear and embedding layer

  • posterior_mu_init (float, default: 0.0 ) \u2013

    The posterior sample of the weights is defined as:

    $$ \\begin{aligned} \\mathbf{w} &= \\mu + log(1 + exp(\\rho)) \\end{aligned} $$ where:

    \\[ \\begin{aligned} \\mathcal{N}(x\\vert \\mu, \\sigma) &= \\frac{1}{\\sqrt{2\\pi}\\sigma}e^{-\\frac{(x-\\mu)^2}{2\\sigma^2}}\\\\ \\log{\\mathcal{N}(x\\vert \\mu, \\sigma)} &= -\\log{\\sqrt{2\\pi}} -\\log{\\sigma} -\\frac{(x-\\mu)^2}{2\\sigma^2}\\\\ \\end{aligned} \\]

    \\(\\mu\\) is initialised using a normal distributtion with mean posterior_mu_init and std equal to 0.1.

  • posterior_rho_init (float, default: -7.0 ) \u2013

    As in the case of \\(\\mu\\), \\(\\rho\\) is initialised using a normal distributtion with mean posterior_rho_init and std equal to 0.1.

Attributes:

  • bayesian_cat_and_cont_embed (Module) \u2013

    This is the module that processes the categorical and continuous columns

  • bayesian_tab_mlp (Sequential) \u2013

    mlp model that will receive the concatenation of the embeddings and the continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.bayesian_models import BayesianTabMlp\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = BayesianTabMlp(mlp_hidden_dims=[8,4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    embed_continuous: Optional[bool] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    use_cont_bias: Optional[bool] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    mlp_hidden_dims: List[int] = [200, 100],\n    mlp_activation: str = \"leaky_relu\",\n    prior_sigma_1: float = 1,\n    prior_sigma_2: float = 0.002,\n    prior_pi: float = 0.8,\n    posterior_mu_init: float = 0.0,\n    posterior_rho_init: float = -7.0,\n    pred_dim=1,  # Bayesian models will require their own trainer and need the output layer\n):\n    super(BayesianTabMlp, self).__init__()\n\n    self.column_idx = column_idx\n    self.cat_embed_input = cat_embed_input\n    self.cat_embed_activation = cat_embed_activation\n\n    self.continuous_cols = continuous_cols\n    self.cont_norm_layer = cont_norm_layer\n    self.embed_continuous = embed_continuous\n    self.cont_embed_dim = cont_embed_dim\n    self.cont_embed_dropout = cont_embed_dropout\n    self.use_cont_bias = use_cont_bias\n    self.cont_embed_activation = cont_embed_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n\n    self.prior_sigma_1 = prior_sigma_1\n    self.prior_sigma_2 = prior_sigma_2\n    self.prior_pi = prior_pi\n    self.posterior_mu_init = posterior_mu_init\n    self.posterior_rho_init = posterior_rho_init\n\n    self.pred_dim = pred_dim\n\n    allowed_activations = [\"relu\", \"leaky_relu\", \"tanh\", \"gelu\"]\n    if self.mlp_activation not in allowed_activations:\n        raise ValueError(\n            \"Currently, only the following activation functions are supported \"\n            \"for the Bayesian MLP's dense layers: {}. Got '{}' instead\".format(\n                \", \".join(allowed_activations),\n                self.mlp_activation,\n            )\n        )\n\n    # Categorical\n    if self.cat_embed_input is not None:\n        self.cat_embed = BayesianDiffSizeCatEmbeddings(\n            column_idx=self.column_idx,\n            embed_input=self.cat_embed_input,\n            prior_sigma_1=self.prior_sigma_1,\n            prior_sigma_2=self.prior_sigma_2,\n            prior_pi=self.prior_pi,\n            posterior_mu_init=self.posterior_mu_init,\n            posterior_rho_init=self.posterior_rho_init,\n            activation_fn=self.cat_embed_activation,\n        )\n        self.cat_out_dim = int(np.sum([embed[2] for embed in self.cat_embed_input]))\n    else:\n        self.cat_out_dim = 0\n\n    # Continuous\n    if self.continuous_cols is not None:\n        self.cont_idx = [self.column_idx[col] for col in self.continuous_cols]\n        if cont_norm_layer == \"layernorm\":\n            self.cont_norm: NormLayers = nn.LayerNorm(len(self.continuous_cols))\n        elif cont_norm_layer == \"batchnorm\":\n            self.cont_norm = nn.BatchNorm1d(len(self.continuous_cols))\n        else:\n            self.cont_norm = nn.Identity()\n        if self.embed_continuous:\n            assert self.cont_embed_dim is not None, (\n                \"If 'embed_continuous' is True, 'cont_embed_dim' must be \"\n                \"provided\"\n            )\n            self.cont_embed = BayesianContEmbeddings(\n                n_cont_cols=len(self.continuous_cols),\n                embed_dim=self.cont_embed_dim,\n                prior_sigma_1=self.prior_sigma_1,\n                prior_sigma_2=self.prior_sigma_2,\n                prior_pi=self.prior_pi,\n                posterior_mu_init=self.posterior_mu_init,\n                posterior_rho_init=self.posterior_rho_init,\n                use_bias=False\n                if self.use_cont_bias is None\n                else self.use_cont_bias,\n                activation_fn=self.cont_embed_activation,\n            )\n            self.cont_out_dim = len(self.continuous_cols) * self.cont_embed_dim\n        else:\n            self.cont_out_dim = len(self.continuous_cols)\n    else:\n        self.cont_out_dim = 0\n\n    self.output_dim = self.cat_out_dim + self.cont_out_dim\n\n    mlp_hidden_dims = [self.output_dim] + mlp_hidden_dims + [pred_dim]\n    self.bayesian_tab_mlp = BayesianMLP(\n        mlp_hidden_dims,\n        mlp_activation,\n        True,  # use_bias\n        prior_sigma_1,\n        prior_sigma_2,\n        prior_pi,\n        posterior_mu_init,\n        posterior_rho_init,\n    )\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html","title":"Training Deep Learning Probabilistic Models","text":""},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer","title":"BayesianTrainer","text":"
BayesianTrainer(\n    model,\n    objective,\n    custom_loss_function=None,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    metrics=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseBayesianTrainer

Class to set the of attributes that will be used during the training process.

Both the Bayesian models and the Trainer in this repo are based on the paper: Weight Uncertainty in Neural Networks.

Parameters:

  • model (BaseBayesianModel) \u2013

    An object of class BaseBayesianModel. See the Model Components section here in the docs.

  • objective (str) \u2013

    Defines the objective, loss or cost function. Param aliases: loss_function, loss_fn, loss, cost_function, cost_fn, cost Possible values are: 'binary', 'multiclass', 'regression'

  • custom_loss_function (Optional[Module], default: None ) \u2013

    If none of the loss functions available suits the user, it is possible to pass a custom loss function. See for example pytorch_widedeep.losses.FocalLoss for the required structure of the object or the Examples folder in the repo.

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object(e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • metrics (Optional[Union[List[Metric], List[Metric]]], default: None ) \u2013
    • List of objects of type Metric. Metrics available are: Accuracy, Precision, Recall, FBetaScore, F1Score and R2Score. This can also be a custom metric as long as it is an object of type Metric. See pytorch_widedeep.metrics.Metric or the Examples folder in the repo
    • List of objects of type torchmetrics.Metric. This can be any metric from torchmetrics library Examples classification-metrics>_. It can also be a torchmetric custom metric as long as it is an object of typeMetric. Seethe instructions
  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • class_weight: List[float] This is the weight or pos_weight parameter in CrossEntropyLoss and BCEWithLogitsLoss, depending on whether

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Attributes:

  • cyclic_lr (bool) \u2013

    Attribute that indicates if the lr_scheduler is cyclic_lr (i.e. CyclicLR or OneCycleLR). See Pytorch schedulers <https://pytorch.org/docs/stable/optim.html>_.

Source code in pytorch_widedeep/training/bayesian_trainer.py
@alias(  # noqa: C901\n    \"objective\",\n    [\"loss_function\", \"loss_fn\", \"loss\", \"cost_function\", \"cost_fn\", \"cost\"],\n)\ndef __init__(\n    self,\n    model: BaseBayesianModel,\n    objective: str,\n    custom_loss_function: Optional[Module] = None,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    metrics: Optional[Union[List[Metric], List[TorchMetric]]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        objective=objective,\n        custom_loss_function=custom_loss_function,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        metrics=metrics,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.fit","title":"fit","text":"
fit(\n    X_tab,\n    target,\n    X_tab_val=None,\n    target_val=None,\n    val_split=None,\n    n_epochs=1,\n    validation_freq=1,\n    batch_size=32,\n    n_train_samples=2,\n    n_val_samples=2,\n)\n

Fit method.

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • target (ndarray) \u2013

    target values

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data

  • target_val (Optional[ndarray], default: None ) \u2013

    validation target values

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • batch_size (int, default: 32 ) \u2013

    batch size

  • n_train_samples (int, default: 2 ) \u2013

    number of samples to average over during the training process. See Weight Uncertainty in Neural Networks for details.

  • n_val_samples (int, default: 2 ) \u2013

    number of samples to average over during the validation process. See Weight Uncertainty in Neural Networks for details.

Source code in pytorch_widedeep/training/bayesian_trainer.py
def fit(  # noqa: C901\n    self,\n    X_tab: np.ndarray,\n    target: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    target_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    n_epochs: int = 1,\n    validation_freq: int = 1,\n    batch_size: int = 32,\n    n_train_samples: int = 2,\n    n_val_samples: int = 2,\n):\n    r\"\"\"Fit method.\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    target: np.ndarray\n        target values\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data\n    target_val: np.ndarray, Optional, default = None\n        validation target values\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    n_epochs: int, default=1\n        number of epochs\n    validation_freq: int, default=1\n        epochs validation frequency\n    batch_size: int, default=32\n        batch size\n    n_train_samples: int, default=2\n        number of samples to average over during the training process.\n        See [Weight Uncertainty in Neural Networks](https://arxiv.org/pdf/1505.05424.pdf) for details.\n    n_val_samples: int, default=2\n        number of samples to average over during the validation process.\n        See [Weight Uncertainty in Neural Networks](https://arxiv.org/pdf/1505.05424.pdf) for details.\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = tabular_train_val_split(\n        self.seed, self.objective, X_tab, target, X_tab_val, target_val, val_split\n    )\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, (X, y) in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_score, train_loss = self._train_step(\n                    X, y, n_train_samples, train_steps, batch_idx\n                )\n                print_loss_and_metric(t, train_loss, train_score)\n                self.callback_container.on_batch_end(batch=batch_idx)\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, train_score, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for i, (X, y) in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_score, val_loss = self._eval_step(\n                        X, y, n_val_samples, train_steps, i\n                    )\n                    print_loss_and_metric(v, val_loss, val_score)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, val_score, \"val\")\n\n            if self.reducelronplateau:\n                if self.reducelronplateau_criterion == \"loss\":\n                    on_epoch_end_metric = val_loss\n                else:\n                    on_epoch_end_metric = val_score[\n                        self.reducelronplateau_criterion\n                    ]\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.model.train()\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.predict","title":"predict","text":"
predict(\n    X_tab, n_samples=5, return_samples=False, batch_size=256\n)\n

Returns the predictions

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • n_samples (int, default: 5 ) \u2013

    number of samples that will be either returned or averaged to produce an overal prediction

  • return_samples (bool, default: False ) \u2013

    Boolean indicating whether the n samples will be averaged or directly returned

  • batch_size (int, default: 256 ) \u2013

    batch size

Returns:

  • np.ndarray: \u2013

    array with the predictions

Source code in pytorch_widedeep/training/bayesian_trainer.py
def predict(  # type: ignore[return]\n    self,\n    X_tab: np.ndarray,\n    n_samples: int = 5,\n    return_samples: bool = False,\n    batch_size: int = 256,\n) -> np.ndarray:\n    r\"\"\"Returns the predictions\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    n_samples: int, default=5\n        number of samples that will be either returned or averaged to\n        produce an overal prediction\n    return_samples: bool, default = False\n        Boolean indicating whether the n samples will be averaged or directly returned\n    batch_size: int, default = 256\n        batch size\n\n    Returns\n    -------\n    np.ndarray:\n        array with the predictions\n    \"\"\"\n\n    preds_l = self._predict(X_tab, n_samples, return_samples, batch_size)\n    preds = np.hstack(preds_l) if return_samples else np.vstack(preds_l)\n    axis = 2 if return_samples else 1\n\n    if self.objective == \"regression\":\n        return preds.squeeze(axis)\n    if self.objective == \"binary\":\n        return (preds.squeeze(axis) > 0.5).astype(\"int\")\n    if self.objective == \"multiclass\":\n        return np.argmax(preds, axis)\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.predict_proba","title":"predict_proba","text":"
predict_proba(\n    X_tab, n_samples=5, return_samples=False, batch_size=256\n)\n

Returns the predicted probabilities

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • n_samples (int, default: 5 ) \u2013

    number of samples that will be either returned or averaged to produce an overal prediction

  • return_samples (bool, default: False ) \u2013

    Boolean indicating whether the n samples will be averaged or directly returned

  • batch_size (int, default: 256 ) \u2013

    batch size

Returns:

  • ndarray \u2013

    array with the probabilities per class

Source code in pytorch_widedeep/training/bayesian_trainer.py
def predict_proba(  # type: ignore[return]\n    self,\n    X_tab: np.ndarray,\n    n_samples: int = 5,\n    return_samples: bool = False,\n    batch_size: int = 256,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted probabilities\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    n_samples: int, default=5\n        number of samples that will be either returned or averaged to\n        produce an overal prediction\n    return_samples: bool, default = False\n        Boolean indicating whether the n samples will be averaged or directly returned\n    batch_size: int, default = 256\n        batch size\n\n    Returns\n    -------\n    np.ndarray\n        array with the probabilities per class\n    \"\"\"\n    preds_l = self._predict(X_tab, n_samples, return_samples, batch_size)\n    preds = np.hstack(preds_l) if return_samples else np.vstack(preds_l)\n\n    if self.objective == \"binary\":\n        if return_samples:\n            preds = preds.squeeze(2)\n            probs = np.zeros([n_samples, preds.shape[1], 2])\n            for i in range(n_samples):\n                probs[i, :, 0] = 1 - preds[i]\n                probs[i, :, 1] = preds[i]\n        else:\n            preds = preds.squeeze(1)\n            probs = np.zeros([preds.shape[0], 2])\n            probs[:, 0] = 1 - preds\n            probs[:, 1] = preds\n        return probs\n    if self.objective == \"multiclass\":\n        return preds\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"bayesian_model.pt\",\n)\n

Saves the model, training and evaluation history to disk

The Trainer class is built so that it 'just' trains a model. With that in mind, all the torch related parameters (such as optimizers or learning rate schedulers) have to be defined externally and then passed to the Trainer. As a result, the Trainer does not generate any attribute or additional data products that need to be saved other than the model object itself, which can be saved as any other torch model (e.g. torch.save(model, path)).

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'bayesian_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/training/bayesian_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"bayesian_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history to disk\n\n    The `Trainer` class is built so that it 'just' trains a model. With\n    that in mind, all the torch related parameters (such as optimizers or\n    learning rate schedulers) have to be defined externally and then\n    passed to the `Trainer`. As a result, the `Trainer` does not\n    generate any attribute or additional data products that need to be\n    saved other than the `model` object itself, which can be saved as\n    any other torch model (e.g. `torch.save(model, path)`).\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"wd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.model.state_dict(), model_path)\n    else:\n        torch.save(self.model, model_path)\n
"},{"location":"pytorch-widedeep/callbacks.html","title":"Callbacks","text":"

Here are the 4 callbacks available to the user in pytorch-widedepp: LRHistory, ModelCheckpoint, EarlyStopping and RayTuneReporter.

NOTE: other callbacks , like History, run always by default. In particular, the History callback saves the metrics in the history attribute of the Trainer.

"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.LRHistory","title":"LRHistory","text":"
LRHistory(n_epochs)\n

Bases: Callback

Saves the learning rates during training in the lr_history attribute of the Trainer.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • n_epochs (int) \u2013

    number of training epochs

Examples:

>>> from pytorch_widedeep.callbacks import LRHistory\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[LRHistory(n_epochs=10)])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(self, n_epochs: int):\n    super(LRHistory, self).__init__()\n    self.n_epochs = n_epochs\n
"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.ModelCheckpoint","title":"ModelCheckpoint","text":"
ModelCheckpoint(\n    filepath=None,\n    monitor=\"val_loss\",\n    min_delta=0.0,\n    verbose=0,\n    save_best_only=False,\n    mode=\"auto\",\n    period=1,\n    max_save=-1,\n)\n

Bases: Callback

Saves the model after every epoch.

This class is almost identical to the corresponding keras class. Therefore, credit to the Keras Team.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • filepath (Optional[str], default: None ) \u2013

    Full path to save the output weights. It must contain only the root of the filenames. Epoch number and .pt extension (for pytorch) will be added. e.g. filepath=\"path/to/output_weights/weights_out\" And the saved files in that directory will be named: 'weights_out_1.pt', 'weights_out_2.pt', .... If set to None the class just report best metric and best_epoch.

  • monitor (str, default: 'val_loss' ) \u2013

    quantity to monitor. Typically 'val_loss' or metric name (e.g. 'val_acc')

  • min_delta (float, default: 0.0 ) \u2013

    minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.

  • verbose (int, default: 0 ) \u2013

    verbosity mode

  • save_best_only (bool, default: False ) \u2013

    the latest best model according to the quantity monitored will not be overwritten.

  • mode (str, default: 'auto' ) \u2013

    If save_best_only=True, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For 'acc', this should be 'max', for 'loss' this should be 'min', etc. In 'auto' mode, the direction is automatically inferred from the name of the monitored quantity.

  • period (int, default: 1 ) \u2013

    Interval (number of epochs) between checkpoints.

  • max_save (int, default: -1 ) \u2013

    Maximum number of outputs to save. If -1 will save all outputs

Attributes:

  • best (float) \u2013

    best metric

  • best_epoch (int) \u2013

    best epoch

  • best_state_dict (dict) \u2013

    best model state dictionary. To restore model to its best state use Trainer.model.load_state_dict (model_checkpoint.best_state_dict) where model_checkpoint is an instance of the class ModelCheckpoint. See the Examples folder in the repo or the Examples section in this documentation for details

Examples:

>>> from pytorch_widedeep.callbacks import ModelCheckpoint\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[ModelCheckpoint(filepath='checkpoints/weights_out')])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(\n    self,\n    filepath: Optional[str] = None,\n    monitor: str = \"val_loss\",\n    min_delta: float = 0.0,\n    verbose: int = 0,\n    save_best_only: bool = False,\n    mode: str = \"auto\",\n    period: int = 1,\n    max_save: int = -1,\n):\n    super(ModelCheckpoint, self).__init__()\n\n    self.filepath = filepath\n    self.monitor = monitor\n    self.min_delta = min_delta\n    self.verbose = verbose\n    self.save_best_only = save_best_only\n    self.mode = mode\n    self.period = period\n    self.max_save = max_save\n\n    self.epochs_since_last_save = 0\n\n    if self.filepath:\n        if len(self.filepath.split(\"/\")[:-1]) == 0:\n            raise ValueError(\n                \"'filepath' must be the full path to save the output weights,\"\n                \" including the root of the filenames. e.g. 'checkpoints/weights_out'\"\n            )\n\n        root_dir = (\"/\").join(self.filepath.split(\"/\")[:-1])\n        if not os.path.exists(root_dir):\n            os.makedirs(root_dir)\n\n    if self.max_save > 0:\n        self.old_files: List[str] = []\n\n    if self.mode not in [\"auto\", \"min\", \"max\"]:\n        warnings.warn(\n            \"ModelCheckpoint mode %s is unknown, \"\n            \"fallback to auto mode.\" % (self.mode),\n            RuntimeWarning,\n        )\n        self.mode = \"auto\"\n    if self.mode == \"min\":\n        self.monitor_op = np.less\n        self.best = np.Inf\n    elif self.mode == \"max\":\n        self.monitor_op = np.greater  # type: ignore[assignment]\n        self.best = -np.Inf\n    else:\n        if _is_metric(self.monitor):\n            self.monitor_op = np.greater  # type: ignore[assignment]\n            self.best = -np.Inf\n        else:\n            self.monitor_op = np.less\n            self.best = np.Inf\n\n    if self.monitor_op == np.greater:\n        self.min_delta *= 1\n    else:\n        self.min_delta *= -1\n
"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.EarlyStopping","title":"EarlyStopping","text":"
EarlyStopping(\n    monitor=\"val_loss\",\n    min_delta=0.0,\n    patience=10,\n    verbose=0,\n    mode=\"auto\",\n    baseline=None,\n    restore_best_weights=False,\n)\n

Bases: Callback

Stop training when a monitored quantity has stopped improving.

This class is almost identical to the corresponding keras class. Therefore, credit to the Keras Team.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • monitor (str, default: 'val_loss' ) \u2013

    Quantity to monitor. Typically 'val_loss' or metric name (e.g. 'val_acc')

  • min_delta (float, default: 0.0 ) \u2013

    minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.

  • patience (int, default: 10 ) \u2013

    Number of epochs that produced the monitored quantity with no improvement after which training will be stopped.

  • verbose (int, default: 0 ) \u2013

    verbosity mode.

  • mode (str, default: 'auto' ) \u2013

    one of {'auto', 'min', 'max'}. In 'min' mode, training will stop when the quantity monitored has stopped decreasing; in 'max' mode it will stop when the quantity monitored has stopped increasing; in 'auto' mode, the direction is automatically inferred from the name of the monitored quantity.

  • baseline (Optional[float], default: None ) \u2013

    Baseline value for the monitored quantity to reach. Training will stop if the model does not show improvement over the baseline.

  • restore_best_weights (bool, default: False ) \u2013

    Whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used.

Attributes:

  • best (float) \u2013

    best metric

  • stopped_epoch (int) \u2013

    epoch when the training stopped

Examples:

>>> from pytorch_widedeep.callbacks import EarlyStopping\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[EarlyStopping(patience=10)])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(\n    self,\n    monitor: str = \"val_loss\",\n    min_delta: float = 0.0,\n    patience: int = 10,\n    verbose: int = 0,\n    mode: str = \"auto\",\n    baseline: Optional[float] = None,\n    restore_best_weights: bool = False,\n):\n    super(EarlyStopping, self).__init__()\n\n    self.monitor = monitor\n    self.min_delta = min_delta\n    self.patience = patience\n    self.verbose = verbose\n    self.mode = mode\n    self.baseline = baseline\n    self.restore_best_weights = restore_best_weights\n\n    self.wait = 0\n    self.stopped_epoch = 0\n    self.state_dict = None\n\n    if self.mode not in [\"auto\", \"min\", \"max\"]:\n        warnings.warn(\n            \"EarlyStopping mode %s is unknown, \"\n            \"fallback to auto mode.\" % self.mode,\n            RuntimeWarning,\n        )\n        self.mode = \"auto\"\n\n    if self.mode == \"min\":\n        self.monitor_op = np.less\n    elif self.mode == \"max\":\n        self.monitor_op = np.greater  # type: ignore[assignment]\n    else:\n        if _is_metric(self.monitor):\n            self.monitor_op = np.greater  # type: ignore[assignment]\n        else:\n            self.monitor_op = np.less\n\n    if self.monitor_op == np.greater:\n        self.min_delta *= 1\n    else:\n        self.min_delta *= -1\n
"},{"location":"pytorch-widedeep/dataloaders.html","title":"Dataloaders","text":"

NOTE: This module should contain custom dataloaders that the user might want to implement. At the moment pytorch-widedeep offers one custom dataloader, DataLoaderImbalanced.

"},{"location":"pytorch-widedeep/dataloaders.html#pytorch_widedeep.dataloaders.DataLoaderImbalanced","title":"DataLoaderImbalanced","text":"
DataLoaderImbalanced(\n    dataset, batch_size, num_workers, **kwargs\n)\n

Bases: DataLoader

Class to load and shuffle batches with adjusted weights for imbalanced datasets. If the classes do not begin from 0 remapping is necessary. See here.

Parameters:

  • dataset (WideDeepDataset) \u2013

    see pytorch_widedeep.training._wd_dataset

  • batch_size (int) \u2013

    size of batch

  • num_workers (int) \u2013

    number of workers

Other Parameters:

  • **kwargs \u2013

    This can include any parameter that can be passed to the 'standard' pytorch DataLoader and that is not already explicitely passed to the class. In addition, the dictionary can also include the extra parameter oversample_mul which will multiply the number of samples of the minority class to be sampled by the WeightedRandomSampler.

    In other words, the num_samples param in WeightedRandomSampler will be defined as:

    \\[ minority \\space class \\space count \\times number \\space of \\space classes \\times oversample\\_mul \\]
Source code in pytorch_widedeep/dataloaders.py
def __init__(\n    self, dataset: WideDeepDataset, batch_size: int, num_workers: int, **kwargs\n):\n    assert dataset.Y is not None, (\n        \"The 'dataset' instance of WideDeepDataset must contain a \"\n        \"target array 'Y'\"\n    )\n\n    self.with_lds = dataset.with_lds\n    if \"oversample_mul\" in kwargs:\n        oversample_mul = kwargs[\"oversample_mul\"]\n        del kwargs[\"oversample_mul\"]\n    else:\n        oversample_mul = 1\n    weights, minor_cls_cnt, num_clss = get_class_weights(dataset)\n    num_samples = int(minor_cls_cnt * num_clss * oversample_mul)\n    samples_weight = list(np.array([weights[i] for i in dataset.Y]))\n    sampler = WeightedRandomSampler(samples_weight, num_samples, replacement=True)\n    super().__init__(\n        dataset, batch_size, num_workers=num_workers, sampler=sampler, **kwargs\n    )\n
"},{"location":"pytorch-widedeep/load_from_folder.html","title":"The load_from_folder module","text":"

The load_from_folder module contains the classes that are necessary to load data from disk and these are inspired by the ImageFolder class in the torchvision library. This module is designed with one specific case in mind. Such case is the following: given a multi-modal dataset with tabular data, images and text, the images do not fit in memory, and therefore, they have to be loaded from disk. However, as any other functionality in this library, there is some flexibility and some additional cases can also be addressed using this module.

For this module to be used, the datasets must be prepared in a certain way:

  1. the tabular data must contain a column with the images names as stored in disk, including the extension (.jpg, .png, etc...).

  2. Regarding to the text dataset, the tabular data can contain a column with the texts themselves or the names of the files containing the texts as stored in disk.

The tabular data might or might not fit in disk itself. If it does not, please see the ChunkPreprocessor utilities at the[preprocessing] (preprocessing.md) module and the examples folder in the repo, which illustrate such case. Finally note that only csv format is currently supported in that case(more formats coming soon).

"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.tabular.tabular_from_folder.TabFromFolder","title":"TabFromFolder","text":"
TabFromFolder(\n    fname,\n    directory=None,\n    target_col=None,\n    preprocessor=None,\n    text_col=None,\n    img_col=None,\n    ignore_target=False,\n    reference=None,\n    verbose=1,\n)\n

This class is used to load tabular data from disk. The current constrains are:

  1. The only file format supported right now is csv
  2. The csv file must contain headers

For examples, please, see the examples folder in the repo.

Parameters:

  • fname (str) \u2013

    the name of the csv file

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the csv file is located. If None, a TabFromFolder reference object must be provided

  • target_col (Optional[str], default: None ) \u2013

    the name of the target column. If None, a TabFromFolder reference object must be provided

  • preprocessor (Optional[TabularPreprocessor], default: None ) \u2013

    a fitted TabularPreprocessor object. If None, a TabFromFolder reference object must be provided

  • text_col (Optional[str], default: None ) \u2013

    the name of the column with the texts themselves or the names of the files that contain the text dataset. If None, either there is no text column or a TabFromFolder reference object must be provided

  • img_col (Optional[str], default: None ) \u2013

    the name of the column with the the names of the images. If None, either there is no image column or a TabFromFolder reference object must be provided

  • ignore_target (bool, default: False ) \u2013

    whether to ignore the target column. This is normally set to True when this class is used for a test dataset.

  • reference (Optional[Any], default: None ) \u2013

    a reference TabFromFolder object. If provided, the TabFromFolder object will be created using the attributes of the reference object. This is useful to instantiate a TabFromFolder object for evaluation or test purposes

  • verbose (Optional[int], default: 1 ) \u2013

    verbosity. If 0, no output will be printed during the process.

Source code in pytorch_widedeep/load_from_folder/tabular/tabular_from_folder.py
def __init__(\n    self,\n    fname: str,\n    directory: Optional[str] = None,\n    target_col: Optional[str] = None,\n    preprocessor: Optional[TabularPreprocessor] = None,\n    text_col: Optional[str] = None,\n    img_col: Optional[str] = None,\n    ignore_target: bool = False,\n    reference: Optional[Any] = None,  # is Type[\"TabFromFolder\"],\n    verbose: Optional[int] = 1,\n):\n    self.fname = fname\n    self.ignore_target = ignore_target\n    self.verbose = verbose\n\n    if reference is not None:\n        (\n            self.directory,\n            self.target_col,\n            self.preprocessor,\n            self.text_col,\n            self.img_col,\n        ) = self._set_from_reference(reference, preprocessor)\n    else:\n        assert (\n            directory is not None\n            and (target_col is not None and not ignore_target)\n            and preprocessor is not None\n        ), (\n            \"if no reference is provided, 'directory', 'target_col' and 'preprocessor' \"\n            \"must be provided\"\n        )\n\n        self.directory = directory\n        self.target_col = target_col\n        self.preprocessor = preprocessor\n        self.text_col = text_col\n        self.img_col = img_col\n\n    assert (\n        self.preprocessor.is_fitted\n    ), \"The preprocessor must be fitted before passing it to this class\"\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.tabular.tabular_from_folder.WideFromFolder","title":"WideFromFolder","text":"
WideFromFolder(\n    fname,\n    directory=None,\n    target_col=None,\n    preprocessor=None,\n    text_col=None,\n    img_col=None,\n    ignore_target=False,\n    reference=None,\n    verbose=1,\n)\n

Bases: TabFromFolder

This class is mostly identical to TabFromFolder but exists because we want to separate the treatment of the wide and the deep tabular components

Parameters:

  • fname (str) \u2013

    the name of the csv file

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the csv file is located. If None, a WideFromFolder reference object must be provided

  • target_col (Optional[str], default: None ) \u2013

    the name of the target column. If None, a WideFromFolder reference object must be provided

  • preprocessor (Optional[TabularPreprocessor], default: None ) \u2013

    a fitted TabularPreprocessor object. If None, a WideFromFolder reference object must be provided

  • text_col (Optional[str], default: None ) \u2013

    the name of the column with the texts themselves or the names of the files that contain the text dataset. If None, either there is no text column or a WideFromFolder reference object must be provided=

  • img_col (Optional[str], default: None ) \u2013

    the name of the column with the the names of the images. If None, either there is no image column or a WideFromFolder reference object must be provided

  • ignore_target (bool, default: False ) \u2013

    whether to ignore the target column. This is normally used when this class is used for a test dataset.

  • reference (Optional[Any], default: None ) \u2013

    a reference WideFromFolder object. If provided, the WideFromFolder object will be created using the attributes of the reference object. This is useful to instantiate a WideFromFolder object for evaluation or test purposes

  • verbose (int, default: 1 ) \u2013

    verbosity. If 0, no output will be printed during the process.

Source code in pytorch_widedeep/load_from_folder/tabular/tabular_from_folder.py
def __init__(\n    self,\n    fname: str,\n    directory: Optional[str] = None,\n    target_col: Optional[str] = None,\n    preprocessor: Optional[TabularPreprocessor] = None,\n    text_col: Optional[str] = None,\n    img_col: Optional[str] = None,\n    ignore_target: bool = False,\n    reference: Optional[Any] = None,  # is Type[\"WideFromFolder\"],\n    verbose: int = 1,\n):\n    super(WideFromFolder, self).__init__(\n        fname=fname,\n        directory=directory,\n        target_col=target_col,\n        preprocessor=preprocessor,\n        text_col=text_col,\n        img_col=img_col,\n        reference=reference,\n        ignore_target=ignore_target,\n        verbose=verbose,\n    )\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.text.text_from_folder.TextFromFolder","title":"TextFromFolder","text":"
TextFromFolder(preprocessor)\n

This class is used to load the text dataset (i.e. the text files) from a folder, or to retrieve the text given a texts column specified within the preprocessor object.

For examples, please, see the examples folder in the repo.

Parameters:

  • preprocessor (Union[TextPreprocessor, ChunkTextPreprocessor]) \u2013

    The preprocessor used to process the text. It must be fitted before using this class

Source code in pytorch_widedeep/load_from_folder/text/text_from_folder.py
def __init__(\n    self,\n    preprocessor: Union[TextPreprocessor, ChunkTextPreprocessor],\n):\n    assert (\n        preprocessor.is_fitted\n    ), \"The preprocessor must be fitted before using this class\"\n\n    self.preprocessor = preprocessor\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.image.image_from_folder.ImageFromFolder","title":"ImageFromFolder","text":"
ImageFromFolder(\n    directory=None,\n    preprocessor=None,\n    loader=default_loader,\n    extensions=None,\n    transforms=None,\n)\n

This class is used to load the image dataset from disk. It is inspired by the ImageFolder class at the torchvision library. Here, we have simply adapted to work within the context of a Wide and Deep multi-modal model.

For examples, please, see the examples folder in the repo.

Parameters:

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the images are located. If None, a preprocessor must be provided.

  • preprocessor (Optional[ImagePreprocessor], default: None ) \u2013

    a fitted ImagePreprocessor object.

  • loader (Callable[[str], Any], default: default_loader ) \u2013

    a function to load a sample given its path.

  • extensions (Optional[Tuple[str, ...]], default: None ) \u2013

    a tuple with the allowed extensions. If None, IMG_EXTENSIONS will be used where IMG_EXTENSIONS =\".jpg\", \".jpeg\", \".png\", \".ppm\", \".bmp\", \".pgm\", \".tif\", \".tiff\", \".webp\"

  • transforms (Optional[Any], default: None ) \u2013

    a torchvision.transforms object. If None, this class will simply return an array representation of the PIL Image

Source code in pytorch_widedeep/load_from_folder/image/image_from_folder.py
def __init__(\n    self,\n    directory: Optional[str] = None,\n    preprocessor: Optional[ImagePreprocessor] = None,\n    loader: Callable[[str], Any] = default_loader,\n    extensions: Optional[Tuple[str, ...]] = None,\n    transforms: Optional[Any] = None,\n) -> None:\n    assert (\n        directory is not None or preprocessor is not None\n    ), \"Either a directory or an instance of ImagePreprocessor must be provided\"\n\n    if directory is not None and preprocessor is not None:  # pragma: no cover\n        assert directory == preprocessor.img_path, (\n            \"If both 'directory' and 'preprocessor' are provided, the 'img_path' \"\n            \"attribute of the 'preprocessor' must be the same as the 'directory'\"\n        )\n\n    if directory is not None:\n        self.directory = directory\n    else:\n        assert (\n            preprocessor is not None\n        ), \"Either a directory or an instance of ImagePreprocessor must be provided\"\n        self.directory = preprocessor.img_path\n\n    self.preprocessor = preprocessor\n    self.loader = loader\n    self.extensions = extensions if extensions is not None else IMG_EXTENSIONS\n    self.transforms = transforms\n    if self.transforms:\n        self.transforms_names = [\n            tr.__class__.__name__ for tr in self.transforms.transforms\n        ]\n    else:\n        self.transforms_names = []\n\n        self.transpose = True\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.wd_dataset_from_folder.WideDeepDatasetFromFolder","title":"WideDeepDatasetFromFolder","text":"
WideDeepDatasetFromFolder(\n    n_samples,\n    tab_from_folder=None,\n    wide_from_folder=None,\n    text_from_folder=None,\n    img_from_folder=None,\n    reference=None,\n)\n

Bases: Dataset

This class is the Dataset counterpart of the WideDeepDataset class.

Given a reference tabular dataset, with columns that indicate the path to the images and to the text files or the texts themselves, it will use the [...]FromFolder classes to load the data consistently from disk per batch.

For examples, please, see the examples folder in the repo.

Parameters:

  • n_samples (int) \u2013

    Number of samples in the dataset

  • tab_from_folder (Optional[TabFromFolder], default: None ) \u2013

    Instance of the TabFromFolder class

  • wide_from_folder (Optional[WideFromFolder], default: None ) \u2013

    Instance of the WideFromFolder class

  • text_from_folder (Optional[TextFromFolder], default: None ) \u2013

    Instance of the TextFromFolder class

  • img_from_folder (Optional[ImageFromFolder], default: None ) \u2013

    Instance of the ImageFromFolder class

  • reference (Optional[Any], default: None ) \u2013

    If not None, the 'text_from_folder' and 'img_from_folder' objects will be retrieved from the reference class. This is useful when we want to use a WideDeepDatasetFromFolder class used for a train dataset as a reference for the validation and test datasets. In this case, the text_from_folder and img_from_folder objects will be the same for all three datasets, so there is no need to create a new instance for each dataset.

Source code in pytorch_widedeep/load_from_folder/wd_dataset_from_folder.py
def __init__(\n    self,\n    n_samples: int,\n    tab_from_folder: Optional[TabFromFolder] = None,\n    wide_from_folder: Optional[WideFromFolder] = None,\n    text_from_folder: Optional[TextFromFolder] = None,\n    img_from_folder: Optional[ImageFromFolder] = None,\n    reference: Optional[Any] = None,  # is Type[\"WideDeepDatasetFromFolder\"],\n):\n    super(WideDeepDatasetFromFolder, self).__init__()\n\n    if tab_from_folder is None and wide_from_folder is None:\n        raise ValueError(\n            \"Either 'tab_from_folder' or 'wide_from_folder' must be not None\"\n        )\n\n    if reference is not None:\n        assert (\n            img_from_folder is None and text_from_folder is None\n        ), \"If reference is not None, 'img_from_folder' and 'text_from_folder' left as None\"\n        self.text_from_folder, self.img_from_folder = self._get_from_reference(\n            reference\n        )\n    else:\n        assert (\n            text_from_folder is not None and img_from_folder is not None\n        ), \"If reference is None, 'img_from_folder' and 'text_from_folder' must be not None\"\n        self.text_from_folder = text_from_folder\n        self.img_from_folder = img_from_folder\n\n    self.n_samples = n_samples\n    self.tab_from_folder = tab_from_folder\n    self.wide_from_folder = wide_from_folder\n
"},{"location":"pytorch-widedeep/losses.html","title":"Losses","text":"

pytorch-widedeep accepts a number of losses and objectives that can be passed to the Trainer class via the parameter objective (see pytorch-widedeep.training.Trainer). For most cases the loss function that pytorch-widedeep will use internally is already implemented in Pytorch.

In addition, pytorch-widedeep implements a series of \"custom\" loss functions. These are described below for completion since, as mentioned before, they are used internally by the Trainer. Of course, onen could always use them on their own and can be imported as:

from pytorch_widedeep.losses import FocalLoss

NOTE: Losses in this module expect the predictions and ground truth to have the same dimensions for regression and binary classification problems \\((N_{samples}, 1)\\). In the case of multiclass classification problems the ground truth is expected to be a 1D tensor with the corresponding classes. See Examples below

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSELoss","title":"MSELoss","text":"
MSELoss()\n

Bases: Module

Mean square error loss with the option of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import MSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = MSELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import MSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = MSELoss()(input, target, lds_weight)\n    \"\"\"\n    loss = (input - target) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSLELoss","title":"MSLELoss","text":"
MSLELoss()\n

Bases: Module

Mean square log error loss with the option of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSLELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import MSLELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = MSLELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import MSLELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = MSLELoss()(input, target, lds_weight)\n    \"\"\"\n    assert (\n        input.min() >= 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n\n    loss = (torch.log(input + 1) - torch.log(target + 1)) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSELoss","title":"RMSELoss","text":"
RMSELoss()\n

Bases: Module

Root mean square error loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import RMSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = RMSELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import RMSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = RMSELoss()(input, target, lds_weight)\n    \"\"\"\n    loss = (input - target) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSLELoss","title":"RMSLELoss","text":"
RMSLELoss()\n

Bases: Module

Root mean square log error loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSLELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import RMSLELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = RMSLELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import RMSLELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = RMSLELoss()(input, target, lds_weight)\n    \"\"\"\n    assert (\n        input.min() >= 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n\n    loss = (torch.log(input + 1) - torch.log(target + 1)) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.QuantileLoss","title":"QuantileLoss","text":"
QuantileLoss(\n    quantiles=[0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]\n)\n

Bases: Module

Quantile loss defined as:

\\[ Loss = max(q \\times (y-y_{pred}), (1-q) \\times (y_{pred}-y)) \\]

All credits go to the implementation at pytorch-forecasting.

Parameters:

  • quantiles (List[float], default: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98] ) \u2013

    List of quantiles

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],\n):\n    super().__init__()\n    self.quantiles = quantiles\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.QuantileLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import QuantileLoss\n>>>\n>>> # REGRESSION\n>>> target = torch.tensor([[0.6, 1.5]]).view(-1, 1)\n>>> input = torch.tensor([[.1, .2,], [.4, .5]])\n>>> qloss = QuantileLoss([0.25, 0.75])\n>>> loss = qloss(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import QuantileLoss\n    >>>\n    >>> # REGRESSION\n    >>> target = torch.tensor([[0.6, 1.5]]).view(-1, 1)\n    >>> input = torch.tensor([[.1, .2,], [.4, .5]])\n    >>> qloss = QuantileLoss([0.25, 0.75])\n    >>> loss = qloss(input, target)\n    \"\"\"\n\n    assert input.shape == torch.Size([target.shape[0], len(self.quantiles)]), (\n        \"The input and target have inconsistent shape. The dimension of the prediction \"\n        \"of the model that is using QuantileLoss must be equal to number of quantiles, \"\n        f\"i.e. {len(self.quantiles)}.\"\n    )\n    target = target.view(-1, 1).float()\n    losses = []\n    for i, q in enumerate(self.quantiles):\n        errors = target - input[..., i]\n        losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))\n\n    loss = torch.cat(losses, dim=2)\n\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalLoss","title":"FocalLoss","text":"
FocalLoss(alpha=0.25, gamma=1.0)\n

Bases: Module

Implementation of the Focal loss for both binary and multiclass classification:

\\[ FL(p_t) = \\alpha (1 - p_t)^{\\gamma} log(p_t) \\]

where, for a case of a binary classification problem

\\[ \\begin{equation} p_t= \\begin{cases}p, & \\text{if $y=1$}.\\\\1-p, & \\text{otherwise}. \\end{cases} \\end{equation} \\]

Parameters:

  • alpha (float, default: 0.25 ) \u2013

    Focal Loss alpha parameter

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

Source code in pytorch_widedeep/losses.py
def __init__(self, alpha: float = 0.25, gamma: float = 1.0):\n    super().__init__()\n    self.alpha = alpha\n    self.gamma = gamma\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalLoss\n>>>\n>>> # BINARY\n>>> target = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> input = torch.tensor([[0.6, 0.7, 0.3, 0.8]]).t()\n>>> loss = FocalLoss()(input, target)\n>>>\n>>> # MULTICLASS\n>>> target = torch.tensor([1, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([[0.2, 0.5, 0.3], [0.8, 0.1, 0.1], [0.7, 0.2, 0.1]])\n>>> loss = FocalLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalLoss\n    >>>\n    >>> # BINARY\n    >>> target = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n    >>> input = torch.tensor([[0.6, 0.7, 0.3, 0.8]]).t()\n    >>> loss = FocalLoss()(input, target)\n    >>>\n    >>> # MULTICLASS\n    >>> target = torch.tensor([1, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([[0.2, 0.5, 0.3], [0.8, 0.1, 0.1], [0.7, 0.2, 0.1]])\n    >>> loss = FocalLoss()(input, target)\n    \"\"\"\n    input_prob = torch.sigmoid(input)\n    if input.size(1) == 1:\n        input_prob = torch.cat([1 - input_prob, input_prob], axis=1)  # type: ignore\n        num_class = 2\n    else:\n        num_class = input_prob.size(1)\n    binary_target = torch.eye(num_class)[target.squeeze().cpu().long()]\n    if use_cuda:\n        binary_target = binary_target.cuda()\n    binary_target = binary_target.contiguous()\n    weight = self._get_weight(input_prob, binary_target)\n\n    return F.binary_cross_entropy(\n        input_prob, binary_target, weight, reduction=\"mean\"\n    )\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.BayesianSELoss","title":"BayesianSELoss","text":"
BayesianSELoss()\n

Bases: Module

Squared Loss (log Gaussian) for the case of a regression as specified in the original publication Weight Uncertainty in Neural Networks.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.BayesianSELoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import BayesianSELoss\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = BayesianSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import BayesianSELoss\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = BayesianSELoss()(input, target)\n    \"\"\"\n    return (0.5 * (input - target) ** 2).sum()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.TweedieLoss","title":"TweedieLoss","text":"
TweedieLoss()\n

Bases: Module

Tweedie loss for extremely unbalanced zero-inflated data

All credits go to Wenbo Shi. See this post and the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.TweedieLoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None, p=1.5)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

  • p (float, default: 1.5 ) \u2013

    the power to be used to compute the loss. See the original publication for details

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import TweedieLoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = TweedieLoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n    p: float = 1.5,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n    p: float, default = 1.5\n        the power to be used to compute the loss. See the original\n        publication for details\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import TweedieLoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = TweedieLoss()(input, target, lds_weight)\n    \"\"\"\n\n    assert (\n        input.min() > 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n    loss = -target * torch.pow(input, 1 - p) / (1 - p) + torch.pow(input, 2 - p) / (\n        2 - p\n    )\n    if lds_weight is not None:\n        loss *= lds_weight\n\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.ZILNLoss","title":"ZILNLoss","text":"
ZILNLoss()\n

Bases: Module

Adjusted implementation of the Zero Inflated LogNormal Loss

See A Deep Probabilistic Model for Customer Lifetime Value Prediction and the corresponding code.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.ZILNLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions with spape (N,3), where N is the batch size

  • target (Tensor) \u2013

    Target tensor with the actual target values

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import ZILNLoss\n>>>\n>>> target = torch.tensor([[0., 1.5]]).view(-1, 1)\n>>> input = torch.tensor([[.1, .2, .3], [.4, .5, .6]])\n>>> loss = ZILNLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions with spape (N,3), where N is the batch size\n    target: Tensor\n        Target tensor with the actual target values\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import ZILNLoss\n    >>>\n    >>> target = torch.tensor([[0., 1.5]]).view(-1, 1)\n    >>> input = torch.tensor([[.1, .2, .3], [.4, .5, .6]])\n    >>> loss = ZILNLoss()(input, target)\n    \"\"\"\n    positive = target > 0\n    positive = positive.float()\n\n    assert input.shape == torch.Size([target.shape[0], 3]), (\n        \"Wrong shape of the 'input' tensor. The pred_dim of the \"\n        \"model that is using ZILNLoss must be equal to 3.\"\n    )\n\n    positive_input = input[..., :1]\n\n    classification_loss = F.binary_cross_entropy_with_logits(\n        positive_input, positive, reduction=\"none\"\n    ).flatten()\n\n    loc = input[..., 1:2]\n\n    # when using max the two input tensors (input and other) have to be of\n    # the same type\n    max_input = F.softplus(input[..., 2:])\n    max_other = torch.sqrt(torch.Tensor([torch.finfo(torch.double).eps])).type(\n        max_input.type()\n    )\n    scale = torch.max(max_input, max_other)\n    safe_labels = positive * target + (1 - positive) * torch.ones_like(target)\n\n    regression_loss = -torch.mean(\n        positive\n        * torch.distributions.log_normal.LogNormal(loc=loc, scale=scale).log_prob(\n            safe_labels\n        ),\n        dim=-1,\n    )\n\n    return torch.mean(classification_loss + regression_loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.L1Loss","title":"L1Loss","text":"
L1Loss()\n

Bases: Module

L1 loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.L1Loss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import L1Loss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = L1Loss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import L1Loss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = L1Loss()(input, target)\n    \"\"\"\n    loss = F.l1_loss(input, target, reduction=\"none\")\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_L1Loss","title":"FocalR_L1Loss","text":"
FocalR_L1Loss(beta=0.2, gamma=1.0, activation_fn='sigmoid')\n

Bases: Module

Focal-R L1 loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_L1Loss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_L1Loss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_L1Loss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_L1Loss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_L1Loss()(input, target)\n    \"\"\"\n    loss = F.l1_loss(input, target, reduction=\"none\")\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs(input - target)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_MSELoss","title":"FocalR_MSELoss","text":"
FocalR_MSELoss(\n    beta=0.2, gamma=1.0, activation_fn=\"sigmoid\"\n)\n

Bases: Module

Focal-R MSE loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_MSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_MSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_MSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_MSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_MSELoss()(input, target)\n    \"\"\"\n    loss = (input - target) ** 2\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs((input - target) ** 2)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_RMSELoss","title":"FocalR_RMSELoss","text":"
FocalR_RMSELoss(\n    beta=0.2, gamma=1.0, activation_fn=\"sigmoid\"\n)\n

Bases: Module

Focal-R RMSE loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_RMSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_RMSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_RMSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_RMSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_RMSELoss()(input, target)\n    \"\"\"\n    loss = (input - target) ** 2\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs((input - target) ** 2)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.HuberLoss","title":"HuberLoss","text":"
HuberLoss(beta=0.2)\n

Bases: Module

Hubbler Loss

Based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self, beta: float = 0.2):\n    super().__init__()\n    self.beta = beta\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.HuberLoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import HuberLoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = HuberLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import HuberLoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = HuberLoss()(input, target)\n    \"\"\"\n    l1_loss = torch.abs(input - target)\n    cond = l1_loss < self.beta\n    loss = torch.where(\n        cond, 0.5 * l1_loss**2 / self.beta, l1_loss - 0.5 * self.beta\n    )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss","title":"InfoNCELoss","text":"
InfoNCELoss(temperature=0.1, reduction='mean')\n

Bases: Module

InfoNCE Loss. Loss applied during the Contrastive Denoising Self Supervised Pre-training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training and references therein

Partially inspired by the code in this repo

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss--parameters","title":"Parameters:","text":"

temperature: float, default = 0.1 The logits are divided by the temperature before computing the loss value reduction: str, default = \"mean\" Loss reduction method

Source code in pytorch_widedeep/losses.py
def __init__(self, temperature: float = 0.1, reduction: str = \"mean\"):\n    super(InfoNCELoss, self).__init__()\n\n    self.temperature = temperature\n    self.reduction = reduction\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss.forward","title":"forward","text":"
forward(g_projs)\n

Parameters:

  • g_projs (Tuple[Tensor, Tensor]) \u2013

    Tuple with the two tensors corresponding to the output of the two projection heads, as described 'SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training'.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import InfoNCELoss\n>>> g_projs = (torch.rand(3, 5, 16), torch.rand(3, 5, 16))\n>>> loss = InfoNCELoss()\n>>> res = loss(g_projs)\n
Source code in pytorch_widedeep/losses.py
def forward(self, g_projs: Tuple[Tensor, Tensor]) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    g_projs: Tuple\n        Tuple with the two tensors corresponding to the output of the two\n        projection heads, as described 'SAINT: Improved Neural Networks\n        for Tabular Data via Row Attention and Contrastive Pre-Training'.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import InfoNCELoss\n    >>> g_projs = (torch.rand(3, 5, 16), torch.rand(3, 5, 16))\n    >>> loss = InfoNCELoss()\n    >>> res = loss(g_projs)\n    \"\"\"\n    z, z_ = g_projs[0], g_projs[1]\n\n    norm_z = F.normalize(z, dim=-1).flatten(1)\n    norm_z_ = F.normalize(z_, dim=-1).flatten(1)\n\n    logits = (norm_z @ norm_z_.t()) / self.temperature\n    logits_ = (norm_z_ @ norm_z.t()) / self.temperature\n\n    # the target/labels are the entries on the diagonal\n    target = torch.arange(len(norm_z), device=norm_z.device)\n\n    loss = F.cross_entropy(logits, target, reduction=self.reduction)\n    loss_ = F.cross_entropy(logits_, target, reduction=self.reduction)\n\n    return (loss + loss_) / 2.0\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss","title":"DenoisingLoss","text":"
DenoisingLoss(\n    lambda_cat=1.0, lambda_cont=1.0, reduction=\"mean\"\n)\n

Bases: Module

Denoising Loss. Loss applied during the Contrastive Denoising Self Supervised Pre-training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training and references therein

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss--parameters","title":"Parameters:","text":"

lambda_cat: float, default = 1. Multiplicative factor that will be applied to loss associated to the categorical features lambda_cont: float, default = 1. Multiplicative factor that will be applied to loss associated to the continuous features reduction: str, default = \"mean\" Loss reduction method

Source code in pytorch_widedeep/losses.py
def __init__(\n    self, lambda_cat: float = 1.0, lambda_cont: float = 1.0, reduction: str = \"mean\"\n):\n    super(DenoisingLoss, self).__init__()\n\n    self.lambda_cat = lambda_cat\n    self.lambda_cont = lambda_cont\n    self.reduction = reduction\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss.forward","title":"forward","text":"
forward(x_cat_and_cat_, x_cont_and_cont_)\n

Parameters:

  • x_cat_and_cat_ (Optional[Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]]) \u2013

    Tuple of tensors containing the raw input features and their encodings, referred in the SAINT paper as \\(x\\) and \\(x''\\) respectively. If one denoising MLP is used per categorical feature x_cat_and_cat_ will be a list of tuples, one per categorical feature

  • x_cont_and_cont_ (Optional[Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]]) \u2013

    same as x_cat_and_cat_ but for continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import DenoisingLoss\n>>> x_cat_and_cat_ = (torch.empty(3).random_(3).long(), torch.randn(3, 3))\n>>> x_cont_and_cont_ = (torch.randn(3, 1), torch.randn(3, 1))\n>>> loss = DenoisingLoss()\n>>> res = loss(x_cat_and_cat_, x_cont_and_cont_)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    x_cat_and_cat_: Optional[\n        Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]\n    ],\n    x_cont_and_cont_: Optional[\n        Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]\n    ],\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    x_cat_and_cat_: tuple of Tensors or lists of tuples\n        Tuple of tensors containing the raw input features and their\n        encodings, referred in the SAINT paper as $x$ and $x''$\n        respectively. If one denoising MLP is used per categorical\n        feature `x_cat_and_cat_` will be a list of tuples, one per\n        categorical feature\n    x_cont_and_cont_: tuple of Tensors or lists of tuples\n        same as `x_cat_and_cat_` but for continuous columns\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import DenoisingLoss\n    >>> x_cat_and_cat_ = (torch.empty(3).random_(3).long(), torch.randn(3, 3))\n    >>> x_cont_and_cont_ = (torch.randn(3, 1), torch.randn(3, 1))\n    >>> loss = DenoisingLoss()\n    >>> res = loss(x_cat_and_cat_, x_cont_and_cont_)\n    \"\"\"\n\n    loss_cat = (\n        self._compute_cat_loss(x_cat_and_cat_)\n        if x_cat_and_cat_ is not None\n        else torch.tensor(0.0)\n    )\n    loss_cont = (\n        self._compute_cont_loss(x_cont_and_cont_)\n        if x_cont_and_cont_ is not None\n        else torch.tensor(0.0)\n    )\n\n    return self.lambda_cat * loss_cat + self.lambda_cont * loss_cont\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss","title":"EncoderDecoderLoss","text":"
EncoderDecoderLoss(eps=1e-09)\n

Bases: Module

'Standard' Encoder Decoder Loss. Loss applied during the Endoder-Decoder Self-Supervised Pre-Training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

The implementation of this lost is based on that at the tabnet repo, which is in itself an adaptation of that in the original paper TabNet: Attentive Interpretable Tabular Learning.

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss--parameters","title":"Parameters:","text":"

eps: float Simply a small number to avoid dividing by zero

Source code in pytorch_widedeep/losses.py
def __init__(self, eps: float = 1e-9):\n    super(EncoderDecoderLoss, self).__init__()\n    self.eps = eps\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss.forward","title":"forward","text":"
forward(x_true, x_pred, mask)\n

Parameters:

  • x_true (Tensor) \u2013

    Embeddings of the input data

  • x_pred (Tensor) \u2013

    Reconstructed embeddings

  • mask (Tensor) \u2013

    Mask with 1s indicated that the reconstruction, and therefore the loss, is based on those features.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import EncoderDecoderLoss\n>>> x_true = torch.rand(3, 3)\n>>> x_pred = torch.rand(3, 3)\n>>> mask = torch.empty(3, 3).random_(2)\n>>> loss = EncoderDecoderLoss()\n>>> res = loss(x_true, x_pred, mask)\n
Source code in pytorch_widedeep/losses.py
def forward(self, x_true: Tensor, x_pred: Tensor, mask: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    x_true: Tensor\n        Embeddings of the input data\n    x_pred: Tensor\n        Reconstructed embeddings\n    mask: Tensor\n        Mask with 1s indicated that the reconstruction, and therefore the\n        loss, is based on those features.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import EncoderDecoderLoss\n    >>> x_true = torch.rand(3, 3)\n    >>> x_pred = torch.rand(3, 3)\n    >>> mask = torch.empty(3, 3).random_(2)\n    >>> loss = EncoderDecoderLoss()\n    >>> res = loss(x_true, x_pred, mask)\n    \"\"\"\n\n    errors = x_pred - x_true\n\n    reconstruction_errors = torch.mul(errors, mask) ** 2\n\n    x_true_means = torch.mean(x_true, dim=0)\n    x_true_means[x_true_means == 0] = 1\n\n    x_true_stds = torch.std(x_true, dim=0) ** 2\n    x_true_stds[x_true_stds == 0] = x_true_means[x_true_stds == 0]\n\n    features_loss = torch.matmul(reconstruction_errors, 1 / x_true_stds)\n    nb_reconstructed_variables = torch.sum(mask, dim=1)\n    features_loss_norm = features_loss / (nb_reconstructed_variables + self.eps)\n\n    loss = torch.mean(features_loss_norm)\n\n    return loss\n
"},{"location":"pytorch-widedeep/metrics.html","title":"Metrics","text":"

NOTE: metrics in this module expect the predictions and ground truth to have the same dimensions for regression and binary classification problems: \\((N_{samples}, 1)\\). In the case of multiclass classification problems the ground truth is expected to be a 1D tensor with the corresponding classes. See Examples below

We have added the possibility of using the metrics available at the torchmetrics library. Note that this library is still in its early versions and therefore this option should be used with caution. To use torchmetrics simply import them and use them as any of the pytorch-widedeep metrics described below.

from torchmetrics import Accuracy, Precision\n\naccuracy = Accuracy(average=None, num_classes=2)\nprecision = Precision(average='micro', num_classes=2)\n\ntrainer = Trainer(model, objective=\"binary\", metrics=[accuracy, precision])\n

A functioning example for pytorch-widedeep using torchmetrics can be found in the Examples folder

NOTE: the forward method for all metrics in this module takes two tensors, y_pred and y_true (in that order). Therefore, we do not include the method in the documentation.

"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Accuracy","title":"Accuracy","text":"
Accuracy(top_k=1)\n

Bases: Metric

Class to calculate the accuracy for both binary and categorical problems

Parameters:

  • top_k (int, default: 1 ) \u2013

    Accuracy will be computed using the top k most likely classes in multiclass problems

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Accuracy\n>>>\n>>> acc = Accuracy()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> acc(y_pred, y_true)\narray(0.5)\n>>>\n>>> acc = Accuracy(top_k=2)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.3, 0.5, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> acc(y_pred, y_true)\narray(0.66666667)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, top_k: int = 1):\n    super(Accuracy, self).__init__()\n\n    self.top_k = top_k\n    self.correct_count = 0\n    self.total_count = 0\n    self._name = \"acc\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Accuracy.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.correct_count = 0\n    self.total_count = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Precision","title":"Precision","text":"
Precision(average=True)\n

Bases: Metric

Class to calculate the precision for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate precision for each label, and finds their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Precision\n>>>\n>>> prec = Precision()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> prec(y_pred, y_true)\narray(0.5)\n>>>\n>>> prec = Precision(average=True)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> prec(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(Precision, self).__init__()\n\n    self.average = average\n    self.true_positives = 0\n    self.all_positives = 0\n    self.eps = 1e-20\n    self._name = \"prec\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Precision.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.true_positives = 0\n    self.all_positives = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Recall","title":"Recall","text":"
Recall(average=True)\n

Bases: Metric

Class to calculate the recall for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate recall for each label, and finds their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Recall\n>>>\n>>> rec = Recall()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> rec(y_pred, y_true)\narray(0.5)\n>>>\n>>> rec = Recall(average=True)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> rec(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(Recall, self).__init__()\n\n    self.average = average\n    self.true_positives = 0\n    self.actual_positives = 0\n    self.eps = 1e-20\n    self._name = \"rec\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Recall.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.true_positives = 0\n    self.actual_positives = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.FBetaScore","title":"FBetaScore","text":"
FBetaScore(beta, average=True)\n

Bases: Metric

Class to calculate the fbeta score for both binary and categorical problems

\\[ F_{\\beta} = ((1 + {\\beta}^2) * \\frac{(precision * recall)}{({\\beta}^2 * precision + recall)} \\]

Parameters:

  • beta (int) \u2013

    Coefficient to control the balance between precision and recall

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate fbeta for each label, and find their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import FBetaScore\n>>>\n>>> fbeta = FBetaScore(beta=2)\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> fbeta(y_pred, y_true)\narray(0.5)\n>>>\n>>> fbeta = FBetaScore(beta=2)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> fbeta(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, beta: int, average: bool = True):\n    super(FBetaScore, self).__init__()\n\n    self.beta = beta\n    self.average = average\n    self.precision = Precision(average=False)\n    self.recall = Recall(average=False)\n    self.eps = 1e-20\n    self._name = \"\".join([\"f\", str(self.beta)])\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.FBetaScore.reset","title":"reset","text":"
reset()\n

resets precision and recall

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets precision and recall\n    \"\"\"\n    self.precision.reset()\n    self.recall.reset()\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.F1Score","title":"F1Score","text":"
F1Score(average=True)\n

Bases: Metric

Class to calculate the f1 score for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate f1 for each label, and find their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import F1Score\n>>>\n>>> f1 = F1Score()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> f1(y_pred, y_true)\narray(0.5)\n>>>\n>>> f1 = F1Score()\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> f1(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(F1Score, self).__init__()\n\n    self.average = average\n    self.f1 = FBetaScore(beta=1, average=self.average)\n    self._name = self.f1._name\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.F1Score.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.f1.reset()\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.R2Score","title":"R2Score","text":"
R2Score()\n

Bases: Metric

Calculates R-Squared, the coefficient of determination:

\\[ R^2 = 1 - \\frac{\\sum_{j=1}^n(y_j - \\hat{y_j})^2}{\\sum_{j=1}^n(y_j - \\bar{y})^2} \\]

where \\(\\hat{y_j}\\) is the ground truth, \\(y_j\\) is the predicted value and \\(\\bar{y}\\) is the mean of the ground truth.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import R2Score\n>>>\n>>> r2 = R2Score()\n>>> y_true = torch.tensor([3, -0.5, 2, 7]).view(-1, 1)\n>>> y_pred = torch.tensor([2.5, 0.0, 2, 8]).view(-1, 1)\n>>> r2(y_pred, y_true)\narray(0.94860814)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self):\n    self.numerator = 0\n    self.denominator = 0\n    self.num_examples = 0\n    self.y_true_sum = 0\n\n    self._name = \"r2\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.R2Score.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.numerator = 0\n    self.denominator = 0\n    self.num_examples = 0\n    self.y_true_sum = 0\n
"},{"location":"pytorch-widedeep/model_components.html","title":"The models module","text":"

This module contains the models that can be used as the four main components that will comprise a Wide and Deep model (wide, deeptabular, deeptext, deepimage), as well as the WideDeep \"constructor\" class. Note that each of the four components can be used independently. It also contains all the documentation for the models that can be used for self-supervised pre-training with tabular data.

NOTE: when we started developing the library we thought that combining Deep Learning architectures for tabular data, with CNN-based architectures (pretrained or not) for images and Transformer-based architectures for text would be an 'overkill' (also, pretrained transformer-based models were not as readily available as they are today). Therefore, at that time we made the decision of including in the library simple RNN-based architectures for the text dataset. A lot has passed since then and it is our intention to integrate this library with the Hugginface's Transformers library in the near future. Nonetheless, note that it is still possible to use any custom model as the deeptext component using this library. Please, see the example section in this documentation for details

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.linear.wide.Wide","title":"Wide","text":"
Wide(input_dim, pred_dim=1)\n

Bases: Module

Defines a Wide (linear) model where the non-linearities are captured via the so-called crossed-columns. This can be used as the wide component of a Wide & Deep model.

Parameters:

  • input_dim (int) \u2013

    size of the Linear layer (implemented via an Embedding layer). input_dim is the summation of all the individual values for all the features that go through the wide model. For example, if the wide model receives 2 features with 5 individual values each, input_dim = 10

  • pred_dim (int, default: 1 ) \u2013

    size of the ouput tensor containing the predictions. Note that unlike all the other models, the wide model is connected directly to the output neuron(s) when used to build a Wide and Deep model. Therefore, it requires the pred_dim parameter.

Attributes:

  • wide_linear (Module) \u2013

    the linear layer that comprises the wide branch of the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Wide\n>>> X = torch.empty(4, 4).random_(4)\n>>> wide = Wide(input_dim=X.unique().size(0), pred_dim=1)\n>>> out = wide(X)\n
Source code in pytorch_widedeep/models/tabular/linear/wide.py
@alias(\"pred_dim\", [\"pred_size\", \"num_class\"])\ndef __init__(self, input_dim: int, pred_dim: int = 1):\n    super(Wide, self).__init__()\n\n    self.input_dim = input_dim\n    self.pred_dim = pred_dim\n\n    # Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.\n    self.wide_linear = nn.Embedding(input_dim + 1, pred_dim, padding_idx=0)\n    # (Sum(Embedding) + bias) is equivalent to (OneHotVector + Linear)\n    self.bias = nn.Parameter(torch.zeros(pred_dim))\n    self._reset_parameters()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.linear.wide.Wide.forward","title":"forward","text":"
forward(X)\n

Forward pass. Simply connecting the Embedding layer with the ouput neuron(s)

Source code in pytorch_widedeep/models/tabular/linear/wide.py
def forward(self, X: Tensor) -> Tensor:\n    r\"\"\"Forward pass. Simply connecting the Embedding layer with the ouput\n    neuron(s)\"\"\"\n    out = self.wide_linear(X.long()).sum(dim=1) + self.bias\n    return out\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlp","title":"TabMlp","text":"
TabMlp(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    mlp_hidden_dims=[200, 100],\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabMlp model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of dense layers (i.e. a MLP).

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • mlp_hidden_dims (List[int], default: [200, 100] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float or List of floats with the dropout between the dense layers. e.g: [0.5,0.5]

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • encoder (Module) \u2013

    mlp model that will receive the concatenation of the embeddings and the continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabMlp\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n>>> cat_embed_input = [(u, i, j) for u, i, j in zip(colnames[:4], [4] * 4, [8] * 4)]\n>>> column_idx = {k: v for v, k in enumerate(colnames)}\n>>> model = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols=[\"e\"],\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/tab_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    mlp_hidden_dims: List[int] = [200, 100],\n    mlp_activation: str = \"relu\",\n    mlp_dropout: Union[float, List[float]] = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabMlp, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n    # Mlp\n    mlp_input_dim = self.cat_out_dim + self.cont_out_dim\n    mlp_hidden_dims = [mlp_input_dim] + mlp_hidden_dims\n    self.encoder = MLP(\n        mlp_hidden_dims,\n        mlp_activation,\n        mlp_dropout,\n        mlp_batchnorm,\n        mlp_batchnorm_last,\n        mlp_linear_first,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlp.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlpDecoder","title":"TabMlpDecoder","text":"
TabMlpDecoder(\n    embed_dim,\n    mlp_hidden_dims=[100, 200],\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True,\n)\n

Bases: Module

Companion decoder model for the TabMlp model (which can be considered an encoder itself).

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). The TabMlpDecoder will receive the output from the MLP and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor that needs to be reconstructed.

  • mlp_hidden_dims (List[int], default: [100, 200] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float or List of floats with the dropout between the dense layers. e.g: [0.5,0.5]

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • decoder (Module) \u2013

    mlp model that will receive the output of the encoder

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabMlpDecoder\n>>> x_inp = torch.rand(3, 8)\n>>> decoder = TabMlpDecoder(embed_dim=32, mlp_hidden_dims=[8,16])\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/mlp/tab_mlp.py
def __init__(\n    self,\n    embed_dim: int,\n    mlp_hidden_dims: List[int] = [100, 200],\n    mlp_activation: str = \"relu\",\n    mlp_dropout: Union[float, List[float]] = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabMlpDecoder, self).__init__()\n\n    self.embed_dim = embed_dim\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.decoder = MLP(\n        mlp_hidden_dims + [self.embed_dim],\n        mlp_activation,\n        mlp_dropout,\n        mlp_batchnorm,\n        mlp_batchnorm_last,\n        mlp_linear_first,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnet","title":"TabResnet","text":"
TabResnet(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    blocks_dims=[200, 100, 100],\n    blocks_dropout=0.1,\n    simplify_blocks=False,\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabResnet model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of Resnet blocks. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • blocks_dims (List[int], default: [200, 100, 100] ) \u2013

    List of integers that define the input and output units of each block. For example: [200, 100, 100] will generate 2 blocks. The first will receive a tensor of size 200 and output a tensor of size 100, and the second will receive a tensor of size 100 and output a tensor of size 100. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

  • blocks_dropout (float, default: 0.1 ) \u2013

    Block's internal dropout.

  • simplify_blocks (bool, default: False ) \u2013

    Boolean indicating if the simplest possible residual blocks (X -> [ [LIN, BN, ACT] + X ]) will be used instead of a standard one (X -> [ [LIN1, BN1, ACT1] -> [LIN2, BN2] + X ]).

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If None the output of the Resnet Blocks will be connected directly to the output neuron(s).

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    deep dense Resnet model that will receive the concatenation of the embeddings and the continuous columns

  • mlp (Module) \u2013

    if mlp_hidden_dims is True, this attribute will be an mlp model that will receive the results of the concatenation of the embeddings and the continuous columns -- if present --.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabResnet\n>>> X_deep = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabResnet(blocks_dims=[16,4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols = ['e'])\n>>> out = model(X_deep)\n
Source code in pytorch_widedeep/models/tabular/resnet/tab_resnet.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    blocks_dims: List[int] = [200, 100, 100],\n    blocks_dropout: float = 0.1,\n    simplify_blocks: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabResnet, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    if len(blocks_dims) < 2:\n        raise ValueError(\n            \"'blocks' must contain at least two elements, e.g. [256, 128]\"\n        )\n\n    self.blocks_dims = blocks_dims\n    self.blocks_dropout = blocks_dropout\n    self.simplify_blocks = simplify_blocks\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n\n    # Resnet\n    dense_resnet_input_dim = self.cat_out_dim + self.cont_out_dim\n    self.encoder = DenseResnet(\n        dense_resnet_input_dim, blocks_dims, blocks_dropout, self.simplify_blocks\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.blocks_dims[-1]] + self.mlp_hidden_dims,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=True\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnet.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnetDecoder","title":"TabResnetDecoder","text":"
TabResnetDecoder(\n    embed_dim,\n    blocks_dims=[100, 100, 200],\n    blocks_dropout=0.1,\n    simplify_blocks=False,\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None,\n)\n

Bases: Module

Companion decoder model for the TabResnet model (which can be considered an encoder itself)

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). This class will receive the output from the ResNet blocks or the MLP(if present) and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor to be reconstructed.

  • blocks_dims (List[int], default: [100, 100, 200] ) \u2013

    List of integers that define the input and output units of each block. For example: [200, 100, 100] will generate 2 blocks. The first will receive a tensor of size 200 and output a tensor of size 100, and the second will receive a tensor of size 100 and output a tensor of size 100. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

  • blocks_dropout (float, default: 0.1 ) \u2013

    Block's internal dropout.

  • simplify_blocks (bool, default: False ) \u2013

    Boolean indicating if the simplest possible residual blocks (X -> [ [LIN, BN, ACT] + X ]) will be used instead of a standard one (X -> [ [LIN1, BN1, ACT1] -> [LIN2, BN2] + X ]).

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If None the output of the Resnet Blocks will be connected directly to the output neuron(s).

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • decoder (Module) \u2013

    deep dense Resnet model that will receive the output of the encoder IF mlp_hidden_dims is None

  • mlp (Module) \u2013

    if mlp_hidden_dims is not None, the overall decoder will consist in an MLP that will receive the output of the encoder followed by the deep dense Resnet.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabResnetDecoder\n>>> x_inp = torch.rand(3, 8)\n>>> decoder = TabResnetDecoder(embed_dim=32, blocks_dims=[8, 16, 16])\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/resnet/tab_resnet.py
def __init__(\n    self,\n    embed_dim: int,\n    blocks_dims: List[int] = [100, 100, 200],\n    blocks_dropout: float = 0.1,\n    simplify_blocks: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabResnetDecoder, self).__init__()\n\n    if len(blocks_dims) < 2:\n        raise ValueError(\n            \"'blocks' must contain at least two elements, e.g. [256, 128]\"\n        )\n\n    self.embed_dim = embed_dim\n\n    self.blocks_dims = blocks_dims\n    self.blocks_dropout = blocks_dropout\n    self.simplify_blocks = simplify_blocks\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=True\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n        self.decoder = DenseResnet(\n            self.mlp_hidden_dims[-1],\n            blocks_dims,\n            blocks_dropout,\n            self.simplify_blocks,\n        )\n    else:\n        self.mlp = None\n        self.decoder = DenseResnet(\n            blocks_dims[0], blocks_dims, blocks_dropout, self.simplify_blocks\n        )\n\n    self.reconstruction_layer = nn.Linear(blocks_dims[-1], embed_dim, bias=False)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNet","title":"TabNet","text":"
TabNet(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    n_steps=3,\n    step_dim=8,\n    attn_dim=8,\n    dropout=0.0,\n    n_glu_step_dependent=2,\n    n_glu_shared=2,\n    ghost_bn=True,\n    virtual_batch_size=128,\n    momentum=0.02,\n    gamma=1.3,\n    epsilon=1e-15,\n    mask_type=\"sparsemax\"\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabNet model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

The implementation in this library is fully based on that here by the dreamquark-ai team, simply adapted so that it can work within the WideDeep frame. Therefore, ALL CREDIT TO THE DREAMQUARK-AI TEAM.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • n_steps (int, default: 3 ) \u2013

    number of decision steps. For a better understanding of the function of n_steps and the upcoming parameters, please see the paper.

  • step_dim (int, default: 8 ) \u2013

    Step's output dimension. This is the output dimension that WideDeep will collect and connect to the output neuron(s).

  • attn_dim (int, default: 8 ) \u2013

    Attention dimension

  • dropout (float, default: 0.0 ) \u2013

    GLU block's internal dropout

  • n_glu_step_dependent (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that are step dependent

  • n_glu_shared (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that will be shared across decision steps

  • ghost_bn (bool, default: True ) \u2013

    Boolean indicating if Ghost Batch Normalization will be used.

  • virtual_batch_size (int, default: 128 ) \u2013

    Batch size when using Ghost Batch Normalization

  • momentum (float, default: 0.02 ) \u2013

    Ghost Batch Normalization's momentum. The dreamquark-ai advises for very low values. However high values are used in the original publication. During our tests higher values lead to better results

  • gamma (float, default: 1.3 ) \u2013

    Relaxation parameter in the paper. When gamma = 1, a feature is enforced to be used only at one decision step. As gamma increases, more flexibility is provided to use a feature at multiple decision steps

  • epsilon (float, default: 1e-15 ) \u2013

    Float to avoid log(0). Always keep low

  • mask_type (str, default: 'sparsemax' ) \u2013

    Mask function to use. Either 'sparsemax' or 'entmax'

Attributes:

  • encoder (Module) \u2013

    the TabNet encoder. For details see the original publication.

Examples:

>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n>>> cat_embed_input = [(u, i, j) for u, i, j in zip(colnames[:4], [4] * 4, [8] * 4)]\n>>> column_idx = {k: v for v, k in enumerate(colnames)}\n>>> model = TabNet(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=[\"e\"])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/tabnet/tab_net.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    n_steps: int = 3,\n    step_dim: int = 8,\n    attn_dim: int = 8,\n    dropout: float = 0.0,\n    n_glu_step_dependent: int = 2,\n    n_glu_shared: int = 2,\n    ghost_bn: bool = True,\n    virtual_batch_size: int = 128,\n    momentum: float = 0.02,\n    gamma: float = 1.3,\n    epsilon: float = 1e-15,\n    mask_type: str = \"sparsemax\",\n):\n    super(TabNet, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_steps = n_steps\n    self.step_dim = step_dim\n    self.attn_dim = attn_dim\n    self.dropout = dropout\n    self.n_glu_step_dependent = n_glu_step_dependent\n    self.n_glu_shared = n_glu_shared\n    self.ghost_bn = ghost_bn\n    self.virtual_batch_size = virtual_batch_size\n    self.momentum = momentum\n    self.gamma = gamma\n    self.epsilon = epsilon\n    self.mask_type = mask_type\n\n    # Embeddings are instantiated at the base model\n    self.embed_out_dim = self.cat_out_dim + self.cont_out_dim\n\n    # TabNet\n    self.encoder = TabNetEncoder(\n        self.embed_out_dim,\n        n_steps,\n        step_dim,\n        attn_dim,\n        dropout,\n        n_glu_step_dependent,\n        n_glu_shared,\n        ghost_bn,\n        virtual_batch_size,\n        momentum,\n        gamma,\n        epsilon,\n        mask_type,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNet.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNetDecoder","title":"TabNetDecoder","text":"
TabNetDecoder(\n    embed_dim,\n    n_steps=3,\n    step_dim=8,\n    dropout=0.0,\n    n_glu_step_dependent=2,\n    n_glu_shared=2,\n    ghost_bn=True,\n    virtual_batch_size=128,\n    momentum=0.02,\n)\n

Bases: Module

Companion decoder model for the TabNet model (which can be considered an encoder itself)

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). This class will receive the output from the TabNet encoder (i.e. the output from the so called 'steps') and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor to be reconstructed.

  • n_steps (int, default: 3 ) \u2013

    number of decision steps. For a better understanding of the function of n_steps and the upcoming parameters, please see the paper.

  • step_dim (int, default: 8 ) \u2013

    Step's output dimension. This is the output dimension that WideDeep will collect and connect to the output neuron(s).

  • dropout (float, default: 0.0 ) \u2013

    GLU block's internal dropout

  • n_glu_step_dependent (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that are step dependent

  • n_glu_shared (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that will be shared across decision steps

  • ghost_bn (bool, default: True ) \u2013

    Boolean indicating if Ghost Batch Normalization will be used.

  • virtual_batch_size (int, default: 128 ) \u2013

    Batch size when using Ghost Batch Normalization

  • momentum (float, default: 0.02 ) \u2013

    Ghost Batch Normalization's momentum. The dreamquark-ai advises for very low values. However high values are used in the original publication. During our tests higher values lead to better results

Attributes:

  • decoder (Module) \u2013

    decoder that will receive the output from the encoder's steps and will reconstruct the embeddings

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabNetDecoder\n>>> x_inp = [torch.rand(3, 8), torch.rand(3, 8), torch.rand(3, 8)]\n>>> decoder = TabNetDecoder(embed_dim=32, ghost_bn=False)\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/tabnet/tab_net.py
def __init__(\n    self,\n    embed_dim: int,\n    n_steps: int = 3,\n    step_dim: int = 8,\n    dropout: float = 0.0,\n    n_glu_step_dependent: int = 2,\n    n_glu_shared: int = 2,\n    ghost_bn: bool = True,\n    virtual_batch_size: int = 128,\n    momentum: float = 0.02,\n):\n    super(TabNetDecoder, self).__init__()\n\n    self.n_steps = n_steps\n    self.step_dim = step_dim\n    self.dropout = dropout\n    self.n_glu_step_dependent = n_glu_step_dependent\n    self.n_glu_shared = n_glu_shared\n    self.ghost_bn = ghost_bn\n    self.virtual_batch_size = virtual_batch_size\n    self.momentum = momentum\n\n    shared_layers = nn.ModuleList()\n    for i in range(n_glu_shared):\n        if i == 0:\n            shared_layers.append(nn.Linear(step_dim, 2 * step_dim, bias=False))\n        else:\n            shared_layers.append(nn.Linear(step_dim, 2 * step_dim, bias=False))\n\n    self.decoder = nn.ModuleList()\n    for step in range(n_steps):\n        transformer = FeatTransformer(\n            step_dim,\n            step_dim,\n            dropout,\n            shared_layers,\n            n_glu_step_dependent,\n            ghost_bn,\n            virtual_batch_size,\n            momentum=momentum,\n        )\n        self.decoder.append(transformer)\n\n    self.reconstruction_layer = nn.Linear(step_dim, embed_dim, bias=False)\n    initialize_non_glu(self.reconstruction_layer, step_dim, embed_dim)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP","title":"ContextAttentionMLP","text":"
ContextAttentionMLP(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    attn_dropout=0.2,\n    with_addnorm=False,\n    attn_activation=\"leaky_relu\",\n    n_blocks=3\n)\n

Bases: BaseTabularModelWithAttention

Defines a ContextAttentionMLP model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features that are also embedded. These are then passed through a series of attention blocks. Each attention block is comprised by a ContextAttentionEncoder. Such encoder is in part inspired by the attention mechanism described in Hierarchical Attention Networks for Document Classification. See pytorch_widedeep.models.tabular.mlp._attention_layers for details.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout for each attention block

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if residual connections will be used in the attention blocks

  • attn_activation (str, default: 'leaky_relu' ) \u2013

    String indicating the activation function to be applied to the dense layer in each attention encoder. 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks

Attributes:

  • encoder (Module) \u2013

    Sequence of attention encoders.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import ContextAttentionMLP\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = ContextAttentionMLP(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/context_attention_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    attn_dropout: float = 0.2,\n    with_addnorm: bool = False,\n    attn_activation: str = \"leaky_relu\",\n    n_blocks: int = 3,\n):\n    super(ContextAttentionMLP, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.attn_dropout = attn_dropout\n    self.with_addnorm = with_addnorm\n    self.attn_activation = attn_activation\n    self.n_blocks = n_blocks\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    # Embeddings are instantiated at the base model\n    # Attention Blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"attention_block\" + str(i),\n            ContextAttentionEncoder(\n                input_dim,\n                attn_dropout,\n                with_addnorm,\n                attn_activation,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, F)\\), where \\(N\\) is the batch size and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP","title":"SelfAttentionMLP","text":"
SelfAttentionMLP(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    attn_dropout=0.2,\n    n_heads=8,\n    use_bias=False,\n    with_addnorm=False,\n    attn_activation=\"leaky_relu\",\n    n_blocks=3\n)\n

Bases: BaseTabularModelWithAttention

Defines a SelfAttentionMLP model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features that are also embedded. These are then passed through a series of attention blocks. Each attention block is comprised by what we would refer as a simplified SelfAttentionEncoder. See pytorch_widedeep.models.tabular.mlp._attention_layers for details. The reason to use a simplified version of self attention is because we observed that the 'standard' attention mechanism used in the TabTransformer has a notable tendency to overfit.

In more detail, this model only uses Q and K (and not V). If we think about it as in terms of text (and intuitively), the Softmax(QK^T) is the attention mechanism that tells us how much, at each position in the input sentence, each word is represented or 'expressed'. We refer to that as 'attention weights'. These attention weighst are normally multiplied by a Value matrix to further strength the focus on the words that each word should be attending to (again, intuitively).

In this implementation we skip this last multiplication and instead we multiply the attention weights directly by the input tensor. This is a simplification that we expect is beneficial in terms of avoiding overfitting for tabular data.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout for each attention block

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per attention block.

  • use_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K projection layers.

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if residual connections will be used in the attention blocks

  • attn_activation (str, default: 'leaky_relu' ) \u2013

    String indicating the activation function to be applied to the dense layer in each attention encoder. 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks

Attributes:

  • cat_and_cont_embed (Module) \u2013

    This is the module that processes the categorical and continuous columns

  • encoder (Module) \u2013

    Sequence of attention encoders.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import SelfAttentionMLP\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = SelfAttentionMLP(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/self_attention_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    attn_dropout: float = 0.2,\n    n_heads: int = 8,\n    use_bias: bool = False,\n    with_addnorm: bool = False,\n    attn_activation: str = \"leaky_relu\",\n    n_blocks: int = 3,\n):\n    super(SelfAttentionMLP, self).__init__(\n        column_idx=column_idx,\n        input_dim=input_dim,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.attn_dropout = attn_dropout\n    self.n_heads = n_heads\n    self.use_bias = use_bias\n    self.with_addnorm = with_addnorm\n    self.attn_activation = attn_activation\n    self.n_blocks = n_blocks\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    # Embeddings are instantiated at the base model\n    # Attention Blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"attention_block\" + str(i),\n            SelfAttentionEncoder(\n                input_dim,\n                attn_dropout,\n                use_bias,\n                n_heads,\n                with_addnorm,\n                attn_activation,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, H, F, F)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer","title":"TabTransformer","text":"
TabTransformer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_heads=8,\n    use_qkv_bias=False,\n    n_blocks=4,\n    attn_dropout=0.2,\n    ff_dropout=0.1,\n    ff_factor=4,\n    transformer_activation=\"gelu\",\n    use_linear_attention=False,\n    use_flash_attention=False,\n    mlp_hidden_dims=None,\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True\n)\n

Bases: BaseTabularModelWithAttention

Defines our adptation of the TabTransformer model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: This is an enhanced adaptation of the model described in the paper. It can be considered as the flagship of our transformer family of models for tabular data and offers mutiple, additional features relative to the original publication(and some other models in the library)

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per Transformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers.

  • n_blocks (int, default: 4 ) \u2013

    Number of Transformer blocks

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the Multi-Head Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'gelu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • use_linear_attention (bool, default: False ) \u2013

    Boolean indicating if Linear Attention (from Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention) will be used. The inclusing of this mode of attention is inspired by this post, where the Uber team finds that this attention mechanism leads to the best results for their tabular data.

  • use_flash_attention (bool, default: False ) \u2013

    Boolean indicating if Flash Attention will be used.

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (float, default: 0.1 ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of Transformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabTransformer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabTransformer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_transformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_heads: int = 8,\n    use_qkv_bias: bool = False,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.2,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    transformer_activation: str = \"gelu\",\n    use_linear_attention: bool = False,\n    use_flash_attention: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: str = \"relu\",\n    mlp_dropout: float = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabTransformer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        input_dim=input_dim,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_heads = n_heads\n    self.use_qkv_bias = use_qkv_bias\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.transformer_activation = transformer_activation\n    self.use_linear_attention = use_linear_attention\n    self.use_flash_attention = use_flash_attention\n    self.ff_factor = ff_factor\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    if self.n_cont and not self.n_cat and not self.embed_continuous:\n        raise ValueError(\n            \"If only continuous features are used 'embed_continuous' must be set to 'True'\"\n        )\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"transformer_block\" + str(i),\n            TransformerEncoder(\n                input_dim,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                transformer_activation,\n                use_linear_attention,\n                use_flash_attention,\n            ),\n        )\n\n    self.mlp_first_hidden_dim = self._mlp_first_hidden_dim()\n\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=False\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, H, F, F)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

NOTE: if flash attention or linear attention are used, no attention weights are saved during the training process and calling this property will throw a ValueError

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT","title":"SAINT","text":"
SAINT(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    use_qkv_bias=False,\n    n_heads=8,\n    n_blocks=2,\n    attn_dropout=0.1,\n    ff_dropout=0.2,\n    ff_factor=4,\n    transformer_activation=\"gelu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines a SAINT model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: This is an slightly modified and enhanced version of the model described in the paper,

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per Transformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 2 ) \u2013

    Number of SAINT-Transformer blocks.

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Multi-Head Attention column and row layers

  • ff_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'gelu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of SAINT-Transformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import SAINT\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = SAINT(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/saint.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    use_qkv_bias: bool = False,\n    n_heads: int = 8,\n    n_blocks: int = 2,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.2,\n    ff_factor: int = 4,\n    transformer_activation: str = \"gelu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(SAINT, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.use_qkv_bias = use_qkv_bias\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"saint_block\" + str(i),\n            SaintEncoder(\n                input_dim,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                transformer_activation,\n                self.n_feats,\n            ),\n        )\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=False\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. Each element of the list is a tuple where the first and the second elements are the column and row attention weights respectively

The shape of the attention weights is:

  • column attention: \\((N, H, F, F)\\)

  • row attention: \\((1, H, N, N)\\)

where \\(N\\) is the batch size, \\(H\\) is the number of heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer","title":"FTTransformer","text":"
FTTransformer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=64,\n    kv_compression_factor=0.5,\n    kv_sharing=False,\n    use_qkv_bias=False,\n    n_heads=8,\n    n_blocks=4,\n    attn_dropout=0.2,\n    ff_dropout=0.1,\n    ff_factor=1.33,\n    transformer_activation=\"reglu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines a FTTransformer model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 64 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns.

  • kv_compression_factor (float, default: 0.5 ) \u2013

    By default, the FTTransformer uses Linear Attention (See Linformer: Self-Attention with Linear Complexity ). The compression factor that will be used to reduce the input sequence length. If we denote the resulting sequence length as \\(k = int(kv_{compression \\space factor} \\times s)\\) where \\(s\\) is the input sequence length.

  • kv_sharing (bool, default: False ) \u2013

    Boolean indicating if the \\(E\\) and \\(F\\) projection matrices will share weights. See Linformer: Self-Attention with Linear Complexity for details

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per FTTransformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 4 ) \u2013

    Number of FTTransformer blocks

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the Linear-Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (float, default: 1.33 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4, but they use 4/3 in the paper.

  • transformer_activation (str, default: 'reglu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final FTTransformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of FTTransformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import FTTransformer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = FTTransformer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/ft_transformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 64,\n    kv_compression_factor: float = 0.5,\n    kv_sharing: bool = False,\n    use_qkv_bias: bool = False,\n    n_heads: int = 8,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.2,\n    ff_dropout: float = 0.1,\n    ff_factor: float = 1.33,\n    transformer_activation: str = \"reglu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(FTTransformer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.kv_compression_factor = kv_compression_factor\n    self.kv_sharing = kv_sharing\n    self.use_qkv_bias = use_qkv_bias\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    is_first = True\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"fttransformer_block\" + str(i),\n            FTTransformerEncoder(\n                input_dim,\n                self.n_feats,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                kv_compression_factor,\n                kv_sharing,\n                transformer_activation,\n                is_first,\n            ),\n        )\n        is_first = False\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=False\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is: \\((N, H, F, k)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads, \\(F\\) is the number of features/columns and \\(k\\) is the reduced sequence length or dimension, i.e. \\(k = int(kv_{compression \\space factor} \\times s)\\)

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver","title":"TabPerceiver","text":"
TabPerceiver(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_cross_attns=1,\n    n_cross_attn_heads=4,\n    n_latents=16,\n    latent_dim=128,\n    n_latent_heads=4,\n    n_latent_blocks=4,\n    n_perceiver_blocks=4,\n    share_weights=False,\n    attn_dropout=0.1,\n    ff_dropout=0.1,\n    ff_factor=4,\n    transformer_activation=\"geglu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines an adaptation of a Perceiver that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: while there are scientific publications for the TabTransformer, SAINT and FTTransformer, the TabPerceiver and the TabFastFormer are our own adaptations of the Perceiver and the FastFormer for tabular data.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns.

  • n_cross_attns (int, default: 1 ) \u2013

    Number of times each perceiver block will cross attend to the input data (i.e. number of cross attention components per perceiver block). This should normally be 1. However, in the paper they describe some architectures (normally computer vision-related problems) where the Perceiver attends multiple times to the input array. Therefore, maybe multiple cross attention to the input array is also useful in some cases for tabular data .

  • n_cross_attn_heads (int, default: 4 ) \u2013

    Number of attention heads for the cross attention component

  • n_latents (int, default: 16 ) \u2013

    Number of latents. This is the \\(N\\) parameter in the paper. As indicated in the paper, this number should be significantly lower than \\(M\\) (the number of columns in the dataset). Setting \\(N\\) closer to \\(M\\) defies the main purpose of the Perceiver, which is to overcome the transformer quadratic bottleneck

  • latent_dim (int, default: 128 ) \u2013

    Latent dimension.

  • n_latent_heads (int, default: 4 ) \u2013

    Number of attention heads per Latent Transformer

  • n_latent_blocks (int, default: 4 ) \u2013

    Number of transformer encoder blocks (normalised MHA + normalised FF) per Latent Transformer

  • n_perceiver_blocks (int, default: 4 ) \u2013

    Number of Perceiver blocks defined as [Cross Attention + Latent Transformer]

  • share_weights (bool, default: False ) \u2013

    Boolean indicating if the weights will be shared between Perceiver blocks

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Multi-Head Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'geglu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (ModuleDict) \u2013

    ModuleDict with the Perceiver blocks

  • latents (Parameter) \u2013

    Latents that will be used for prediction

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabPerceiver\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabPerceiver(column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols=continuous_cols, n_latents=2, latent_dim=16,\n... n_perceiver_blocks=2)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_perceiver.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_cross_attns: int = 1,\n    n_cross_attn_heads: int = 4,\n    n_latents: int = 16,\n    latent_dim: int = 128,\n    n_latent_heads: int = 4,\n    n_latent_blocks: int = 4,\n    n_perceiver_blocks: int = 4,\n    share_weights: bool = False,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    transformer_activation: str = \"geglu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabPerceiver, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_cross_attns = n_cross_attns\n    self.n_cross_attn_heads = n_cross_attn_heads\n    self.n_latents = n_latents\n    self.latent_dim = latent_dim\n    self.n_latent_heads = n_latent_heads\n    self.n_latent_blocks = n_latent_blocks\n    self.n_perceiver_blocks = n_perceiver_blocks\n    self.share_weights = share_weights\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.latents = nn.init.trunc_normal_(\n        nn.Parameter(torch.empty(n_latents, latent_dim))\n    )\n\n    self.encoder = nn.ModuleDict()\n    first_perceiver_block = self._build_perceiver_block()\n    self.encoder[\"perceiver_block0\"] = first_perceiver_block\n\n    if share_weights:\n        for n in range(1, n_perceiver_blocks):\n            self.encoder[\"perceiver_block\" + str(n)] = first_perceiver_block\n    else:\n        for n in range(1, n_perceiver_blocks):\n            self.encoder[\"perceiver_block\" + str(n)] = self._build_perceiver_block()\n\n    self.mlp_first_hidden_dim = self.latent_dim\n\n    # Mlp\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=False\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. If the weights are not shared between perceiver blocks each element of the list will be a list itself containing the Cross Attention and Latent Transformer attention weights respectively

The shape of the attention weights is:

  • Cross Attention: \\((N, C, L, F)\\)

  • Latent Attention: \\((N, T, L, L)\\)

WHere \\(N\\) is the batch size, \\(C\\) is the number of Cross Attention heads, \\(L\\) is the number of Latents, \\(F\\) is the number of features/columns in the dataset and \\(T\\) is the number of Latent Attention heads

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer","title":"TabFastFormer","text":"
TabFastFormer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_heads=8,\n    use_bias=False,\n    n_blocks=4,\n    attn_dropout=0.1,\n    ff_dropout=0.2,\n    ff_factor=4,\n    share_qv_weights=False,\n    share_weights=False,\n    transformer_activation=\"relu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines an adaptation of a FastFormer that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: while there are scientific publications for the TabTransformer, SAINT and FTTransformer, the TabPerceiver and the TabFastFormer are our own adaptations of the Perceiver and the FastFormer for tabular data.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per FastFormer block

  • use_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 4 ) \u2013

    Number of FastFormer blocks

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Additive Attention layers

  • ff_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • share_qv_weights (bool, default: False ) \u2013

    Following the paper, this is a boolean indicating if the Value (\\(V\\)) and the Query (\\(Q\\)) transformation parameters will be shared.

  • share_weights (bool, default: False ) \u2013

    In addition to sharing the \\(V\\) and \\(Q\\) transformation parameters, the parameters across different Fastformer layers can also be shared. Please, see pytorch_widedeep/models/tabular/transformers/tab_fastformer.py for details

  • transformer_activation (str, default: 'relu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    MLP hidden dimensions. If not provided no MLP on top of the final FTTransformer block will be used

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of FasFormer blocks.

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabFastFormer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabFastFormer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_fastformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_heads: int = 8,\n    use_bias: bool = False,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.2,\n    ff_factor: int = 4,\n    share_qv_weights: bool = False,\n    share_weights: bool = False,\n    transformer_activation: str = \"relu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabFastFormer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_heads = n_heads\n    self.use_bias = use_bias\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.share_qv_weights = share_qv_weights\n    self.share_weights = share_weights\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    first_fastformer_block = FastFormerEncoder(\n        input_dim,\n        n_heads,\n        use_bias,\n        attn_dropout,\n        ff_dropout,\n        ff_factor,\n        share_qv_weights,\n        transformer_activation,\n    )\n    self.encoder.add_module(\"fastformer_block0\", first_fastformer_block)\n    for i in range(1, n_blocks):\n        if share_weights:\n            self.encoder.add_module(\n                \"fastformer_block\" + str(i), first_fastformer_block\n            )\n        else:\n            self.encoder.add_module(\n                \"fastformer_block\" + str(i),\n                FastFormerEncoder(\n                    input_dim,\n                    n_heads,\n                    use_bias,\n                    attn_dropout,\n                    ff_dropout,\n                    ff_factor,\n                    share_qv_weights,\n                    transformer_activation,\n                ),\n            )\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=\"relu\"\n            if self.mlp_activation is None\n            else self.mlp_activation,\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=False\n            if self.mlp_batchnorm_last is None\n            else self.mlp_batchnorm_last,\n            linear_first=False\n            if self.mlp_linear_first is None\n            else self.mlp_linear_first,\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. Each element of the list is a tuple where the first and second elements are the \\(\\alpha\\) and \\(\\beta\\) attention weights in the paper.

The shape of the attention weights is \\((N, H, F)\\) where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.BasicRNN","title":"BasicRNN","text":"
BasicRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.1,\n    bidirectional=False,\n    use_hidden_state=True,\n    padding_idx=1,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Standard text classifier/regressor comprised by a stack of RNNs (LSTMs or GRUs) that can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of the stack of RNNs

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • n_layers (int, default: 3 ) \u2013

    Number of recurrent layers

  • rnn_dropout (float, default: 0.1 ) \u2013

    Dropout for each RNN layer except the last layer

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • use_hidden_state (bool, default: True ) \u2013

    Boolean indicating whether to use the final hidden state or the RNN's output as predicting features. Typically the former is used.

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import BasicRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=2, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/basic_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    n_layers: int = 3,\n    rnn_dropout: float = 0.1,\n    bidirectional: bool = False,\n    use_hidden_state: bool = True,\n    padding_idx: int = 1,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(BasicRNN, self).__init__()\n\n    if embed_dim is None and embed_matrix is None:\n        raise ValueError(\n            \"If no 'embed_matrix' is passed, the embedding dimension must\"\n            \"be specified with 'embed_dim'\"\n        )\n\n    if rnn_type.lower() not in [\"lstm\", \"gru\"]:\n        raise ValueError(\n            f\"'rnn_type' must be 'lstm' or 'gru', got {rnn_type} instead\"\n        )\n\n    if (\n        embed_dim is not None\n        and embed_matrix is not None\n        and not embed_dim == embed_matrix.shape[1]\n    ):\n        warnings.warn(\n            \"the input embedding dimension {} and the dimension of the \"\n            \"pretrained embeddings {} do not match. The pretrained embeddings \"\n            \"dimension ({}) will be used\".format(\n                embed_dim, embed_matrix.shape[1], embed_matrix.shape[1]\n            ),\n            UserWarning,\n        )\n\n    self.vocab_size = vocab_size\n    self.embed_trainable = embed_trainable\n    self.embed_dim = embed_dim\n\n    self.rnn_type = rnn_type\n    self.hidden_dim = hidden_dim\n    self.n_layers = n_layers\n    self.rnn_dropout = rnn_dropout\n    self.bidirectional = bidirectional\n    self.use_hidden_state = use_hidden_state\n    self.padding_idx = padding_idx\n\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    # Embeddings\n    if embed_matrix is not None:\n        self.word_embed, self.embed_dim = self._set_embeddings(embed_matrix)\n    else:\n        self.word_embed = nn.Embedding(\n            self.vocab_size, self.embed_dim, padding_idx=self.padding_idx\n        )\n\n    # RNN\n    rnn_params = {\n        \"input_size\": self.embed_dim,\n        \"hidden_size\": hidden_dim,\n        \"num_layers\": n_layers,\n        \"bidirectional\": bidirectional,\n        \"dropout\": rnn_dropout,\n        \"batch_first\": True,\n    }\n    if self.rnn_type.lower() == \"lstm\":\n        self.rnn: Union[nn.LSTM, nn.GRU] = nn.LSTM(**rnn_params)\n    elif self.rnn_type.lower() == \"gru\":\n        self.rnn = nn.GRU(**rnn_params)\n\n    self.rnn_output_dim = hidden_dim * 2 if bidirectional else hidden_dim\n\n    # FC-Head (Mlp)\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp: Union[MLP, nn.Identity] = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    else:\n        # simple hack to add readability in the forward pass\n        self.rnn_mlp = nn.Identity()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.BasicRNN.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.AttentiveRNN","title":"AttentiveRNN","text":"
AttentiveRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.1,\n    bidirectional=False,\n    use_hidden_state=True,\n    padding_idx=1,\n    attn_concatenate=True,\n    attn_dropout=0.1,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BasicRNN

Text classifier/regressor comprised by a stack of RNNs (LSTMs or GRUs) plus an attention layer. This model can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of attention layer

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • n_layers (int, default: 3 ) \u2013

    Number of recurrent layers

  • rnn_dropout (float, default: 0.1 ) \u2013

    Dropout for each RNN layer except the last layer

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • use_hidden_state (bool, default: True ) \u2013

    Boolean indicating whether to use the final hidden state or the RNN's output as predicting features. Typically the former is used.

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • attn_concatenate (bool, default: True ) \u2013

    Boolean indicating if the input to the attention mechanism will be the output of the RNN or the output of the RNN concatenated with the last hidden state.

  • attn_dropout (float, default: 0.1 ) \u2013

    Internal dropout for the attention mechanism

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import AttentiveRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = AttentiveRNN(vocab_size=4, hidden_dim=4, n_layers=2, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/attentive_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    n_layers: int = 3,\n    rnn_dropout: float = 0.1,\n    bidirectional: bool = False,\n    use_hidden_state: bool = True,\n    padding_idx: int = 1,\n    attn_concatenate: bool = True,\n    attn_dropout: float = 0.1,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(AttentiveRNN, self).__init__(\n        vocab_size=vocab_size,\n        embed_dim=embed_dim,\n        embed_matrix=embed_matrix,\n        embed_trainable=embed_trainable,\n        rnn_type=rnn_type,\n        hidden_dim=hidden_dim,\n        n_layers=n_layers,\n        rnn_dropout=rnn_dropout,\n        bidirectional=bidirectional,\n        use_hidden_state=use_hidden_state,\n        padding_idx=padding_idx,\n        head_hidden_dims=head_hidden_dims,\n        head_activation=head_activation,\n        head_dropout=head_dropout,\n        head_batchnorm=head_batchnorm,\n        head_batchnorm_last=head_batchnorm_last,\n        head_linear_first=head_linear_first,\n    )\n\n    # Embeddings and RNN defined in the BasicRNN inherited class\n\n    # Attention\n    self.attn_concatenate = attn_concatenate\n    self.attn_dropout = attn_dropout\n\n    if bidirectional and attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 4\n    elif bidirectional or attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 2\n    else:\n        self.rnn_output_dim = hidden_dim\n    self.attn = ContextAttention(\n        self.rnn_output_dim, attn_dropout, sum_along_seq=True\n    )\n\n    # FC-Head (Mlp)\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.AttentiveRNN.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights

The shape of the attention weights is \\((N, S)\\), where \\(N\\) is the batch size and \\(S\\) is the length of the sequence

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN","title":"StackedAttentiveRNN","text":"
StackedAttentiveRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    bidirectional=False,\n    padding_idx=1,\n    n_blocks=3,\n    attn_concatenate=False,\n    attn_dropout=0.1,\n    with_addnorm=False,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Text classifier/regressor comprised by a stack of blocks: [RNN + Attention]. This can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of the attentiob blocks

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks. Each block is comprised by an RNN and a Context Attention Encoder

  • attn_concatenate (bool, default: False ) \u2013

    Boolean indicating if the input to the attention mechanism will be the output of the RNN or the output of the RNN concatenated with the last hidden state or simply

  • attn_dropout (float, default: 0.1 ) \u2013

    Internal dropout for the attention mechanism

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if the output of each block will be added to the input and normalised

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import StackedAttentiveRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = StackedAttentiveRNN(vocab_size=4, hidden_dim=4, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/stacked_attentive_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    bidirectional: bool = False,\n    padding_idx: int = 1,\n    n_blocks: int = 3,\n    attn_concatenate: bool = False,\n    attn_dropout: float = 0.1,\n    with_addnorm: bool = False,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(StackedAttentiveRNN, self).__init__()\n\n    if (\n        embed_dim is not None\n        and embed_matrix is not None\n        and not embed_dim == embed_matrix.shape[1]\n    ):\n        warnings.warn(\n            \"the input embedding dimension {} and the dimension of the \"\n            \"pretrained embeddings {} do not match. The pretrained embeddings \"\n            \"dimension ({}) will be used\".format(\n                embed_dim, embed_matrix.shape[1], embed_matrix.shape[1]\n            ),\n            UserWarning,\n        )\n\n    if rnn_type.lower() not in [\"lstm\", \"gru\"]:\n        raise ValueError(\n            f\"'rnn_type' must be 'lstm' or 'gru', got {rnn_type} instead\"\n        )\n\n    self.vocab_size = vocab_size\n    self.embed_trainable = embed_trainable\n    self.embed_dim = embed_dim\n\n    self.rnn_type = rnn_type\n    self.hidden_dim = hidden_dim\n    self.bidirectional = bidirectional\n    self.padding_idx = padding_idx\n\n    self.n_blocks = n_blocks\n    self.attn_concatenate = attn_concatenate\n    self.attn_dropout = attn_dropout\n    self.with_addnorm = with_addnorm\n\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    # Embeddings\n    self.word_embed, self.embed_dim = self._set_embeddings(embed_matrix)\n\n    # Linear Projection: if embed_dim is different that the input of the\n    # attention blocks we add a linear projection\n    if bidirectional and attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 4\n    elif bidirectional or attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 2\n    else:\n        self.rnn_output_dim = hidden_dim\n\n    if self.rnn_output_dim != self.embed_dim:\n        self.embed_proj: Union[nn.Linear, nn.Identity] = nn.Linear(\n            self.embed_dim, self.rnn_output_dim\n        )\n    else:\n        self.embed_proj = nn.Identity()\n\n    # RNN\n    rnn_params = {\n        \"input_size\": self.rnn_output_dim,\n        \"hidden_size\": hidden_dim,\n        \"bidirectional\": bidirectional,\n        \"batch_first\": True,\n    }\n    if self.rnn_type.lower() == \"lstm\":\n        self.rnn: Union[nn.LSTM, nn.GRU] = nn.LSTM(**rnn_params)\n    elif self.rnn_type.lower() == \"gru\":\n        self.rnn = nn.GRU(**rnn_params)\n\n    # FC-Head (Mlp)\n    self.attention_blks = nn.ModuleList()\n    for i in range(n_blocks):\n        self.attention_blks.append(\n            ContextAttentionEncoder(\n                self.rnn,\n                self.rnn_output_dim,\n                attn_dropout,\n                attn_concatenate,\n                with_addnorm=with_addnorm if i != n_blocks - 1 else False,\n                sum_along_seq=i == n_blocks - 1,\n            )\n        )\n\n    # Mlp\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp: Union[MLP, nn.Identity] = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    else:\n        # simple hack to add readability in the forward pass\n        self.rnn_mlp = nn.Identity()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, S)\\) Where \\(N\\) is the batch size and \\(S\\) is the length of the sequence

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.basic_transformer.Transformer","title":"Transformer","text":"
Transformer(\n    vocab_size,\n    seq_length,\n    input_dim,\n    n_heads,\n    n_blocks,\n    attn_dropout=0.1,\n    ff_dropout=0.1,\n    ff_factor=4,\n    activation=\"gelu\",\n    use_linear_attention=False,\n    use_flash_attention=False,\n    padding_idx=0,\n    with_cls_token=False,\n    *,\n    with_pos_encoding=True,\n    pos_encoding_dropout=0.1,\n    pos_encoder=None\n)\n

Bases: Module

Basic Encoder-Only Transformer Model for text classification/regression. As all other models in the library this model can be used as the deeptext component of a Wide & Deep model or independently by itself.

NOTE: This model is introduced in the context of recommendation systems and thought for sequences of any nature (e.g. items). It can, of course, still be used for text. However, at this stage, we have decided to not include the possibility of loading pretrained word vectors since we aim to integrate the library wit Huggingface in the (hopefully) near future

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • input_dim (int) \u2013

    Dimension of the token embeddings

    Param aliases: embed_dim, d_model.

Attributes:

  • embedding (Module) \u2013

    Standard token embedding layer

  • pos_encoder (Module) \u2013

    Positional Encoder

  • encoder (Module) \u2013

    Sequence of Transformer blocks

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Transformer\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = Transformer(vocab_size=4, seq_length=5, input_dim=8, n_heads=1, n_blocks=1)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/basic_transformer.py
@alias(\"input_dim\", [\"embed_dim\", \"d_model\"])\n@alias(\"seq_length\", [\"max_length\", \"maxlen\"])\ndef __init__(\n    self,\n    vocab_size: int,\n    seq_length: int,\n    input_dim: int,\n    n_heads: int,\n    n_blocks: int,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    activation: str = \"gelu\",\n    use_linear_attention: bool = False,\n    use_flash_attention: bool = False,\n    padding_idx: int = 0,\n    with_cls_token: bool = False,\n    *,  # from here on pos encoding args\n    with_pos_encoding: bool = True,\n    pos_encoding_dropout: float = 0.1,\n    pos_encoder: Optional[nn.Module] = None,\n):\n    super().__init__()\n\n    self.input_dim = input_dim\n    self.seq_length = seq_length\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.activation = activation\n    self.use_linear_attention = use_linear_attention\n    self.use_flash_attention = use_flash_attention\n    self.padding_idx = padding_idx\n    self.with_cls_token = with_cls_token\n    self.with_pos_encoding = with_pos_encoding\n    self.pos_encoding_dropout = pos_encoding_dropout\n\n    self.embedding = nn.Embedding(\n        vocab_size, input_dim, padding_idx=self.padding_idx\n    )\n\n    if with_pos_encoding:\n        if pos_encoder is not None:\n            self.pos_encoder: Union[\n                nn.Module, nn.Identity, PositionalEncoding\n            ] = pos_encoder\n        else:\n            self.pos_encoder = PositionalEncoding(\n                input_dim, pos_encoding_dropout, seq_length\n            )\n    else:\n        self.pos_encoder = nn.Identity()\n\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"transformer_block\" + str(i),\n            TransformerEncoder(\n                input_dim,\n                n_heads,\n                False,  # use_qkv_bias\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                activation,\n                use_linear_attention,\n                use_flash_attention,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.image.vision.Vision","title":"Vision","text":"
Vision(\n    pretrained_model_setup=None,\n    n_trainable=None,\n    trainable_params=None,\n    channel_sizes=[64, 128, 256, 512],\n    kernel_sizes=[7, 3, 3, 3],\n    strides=[2, 1, 1, 1],\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=0.1,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Defines a standard image classifier/regressor using a pretrained network or a sequence of convolution layers that can be used as the deepimage component of a Wide & Deep model or independently by itself.

NOTE: this class represents the integration between pytorch-widedeep and torchvision. New architectures will be available as they are added to torchvision. In a distant future we aim to bring transformer-based architectures as well. However, simple CNN-based architectures (and even MLP-based) seem to produce SoTA results. For the time being, we describe below the options available through this class

Parameters:

  • pretrained_model_setup (Union[str, Dict[str, Union[str, WeightsEnum]]], default: None ) \u2013

    Name of the pretrained model. Should be a variant of the following architectures: 'resnet', 'shufflenet', 'resnext', 'wide_resnet', 'regnet', 'densenet', 'mobilenetv3', 'mobilenetv2', 'mnasnet', 'efficientnet' and 'squeezenet'. if pretrained_model_setup = None a basic, fully trainable CNN will be used. Alternatively, since Torchvision 0.13 one can use pretrained models with different weigths. Therefore, pretrained_model_setup can also be dictionary with the name of the model and the weights (e.g. {'resnet50': ResNet50_Weights.DEFAULT} or {'resnet50': \"IMAGENET1K_V2\"}). Aliased as pretrained_model_name.

  • n_trainable (Optional[int], default: None ) \u2013

    Number of trainable layers starting from the layer closer to the output neuron(s). Note that this number DOES NOT take into account the so-called 'head' which is ALWAYS trainable. If trainable_params is not None this parameter will be ignored

  • trainable_params (Optional[List[str]], default: None ) \u2013

    List of strings containing the names (or substring within the name) of the parameters that will be trained. For example, if we use a 'resnet18' pretrained model and we set trainable_params = ['layer4'] only the parameters of 'layer4' of the network (and the head, as mentioned before) will be trained. Note that setting this or the previous parameter involves some knowledge of the architecture used.

  • channel_sizes (List[int], default: [64, 128, 256, 512] ) \u2013

    List of integers with the channel sizes of a CNN in case we choose not to use a pretrained model

  • kernel_sizes (Union[int, List[int]], default: [7, 3, 3, 3] ) \u2013

    List of integers with the kernel sizes of a CNN in case we choose not to use a pretrained model. Must be of length equal to len(channel_sizes) - 1.

  • strides (Union[int, List[int]], default: [2, 1, 1, 1] ) \u2013

    List of integers with the stride sizes of a CNN in case we choose not to use a pretrained model. Must be of length equal to len(channel_sizes) - 1.

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the head. e.g: [64,32]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float indicating the dropout between the dense layers.

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • features (Module) \u2013

    The pretrained model or Standard CNN plus the optional head

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Vision\n>>> X_img = torch.rand((2,3,224,224))\n>>> model = Vision(channel_sizes=[64, 128], kernel_sizes = [3, 3], strides=[1, 1], head_hidden_dims=[32, 8])\n>>> out = model(X_img)\n
Source code in pytorch_widedeep/models/image/vision.py
@alias(\"pretrained_model_setup\", [\"pretrained_model_name\"])\ndef __init__(\n    self,\n    pretrained_model_setup: Union[str, Dict[str, Union[str, WeightsEnum]]] = None,\n    n_trainable: Optional[int] = None,\n    trainable_params: Optional[List[str]] = None,\n    channel_sizes: List[int] = [64, 128, 256, 512],\n    kernel_sizes: Union[int, List[int]] = [7, 3, 3, 3],\n    strides: Union[int, List[int]] = [2, 1, 1, 1],\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Union[float, List[float]] = 0.1,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(Vision, self).__init__()\n\n    self._check_pretrained_model_setup(\n        pretrained_model_setup, n_trainable, trainable_params\n    )\n\n    self.pretrained_model_setup = pretrained_model_setup\n    self.n_trainable = n_trainable\n    self.trainable_params = trainable_params\n    self.channel_sizes = channel_sizes\n    self.kernel_sizes = kernel_sizes\n    self.strides = strides\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    self.features, self.backbone_output_dim = self._get_features()\n\n    if pretrained_model_setup is not None:\n        self._freeze(self.features)\n\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.backbone_output_dim] + self.head_hidden_dims\n        self.vision_mlp = MLP(\n            head_hidden_dims,\n            self.head_activation,\n            self.head_dropout,\n            self.head_batchnorm,\n            self.head_batchnorm_last,\n            self.head_linear_first,\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.image.vision.Vision.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.wide_deep.WideDeep","title":"WideDeep","text":"
WideDeep(\n    wide=None,\n    deeptabular=None,\n    deeptext=None,\n    deepimage=None,\n    deephead=None,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=0.1,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=True,\n    enforce_positive=False,\n    enforce_positive_activation=\"softplus\",\n    pred_dim=1,\n    with_fds=False,\n    **fds_config\n)\n

Bases: Module

Main collector class that combines all wide, deeptabular deeptext and deepimage models.

Note that all models described so far in this library must be passed to the WideDeep class once constructed. This is because the models output the last layer before the prediction layer. Such prediction layer is added by the WideDeep class as it collects the components for every data mode.

There are two options to combine these models that correspond to the two main architectures that pytorch-widedeep can build.

  • Directly connecting the output of the model components to an ouput neuron(s).

  • Adding a Fully-Connected Head (FC-Head) on top of the deep models. This FC-Head will combine the output form the deeptabular, deeptext and deepimage and will be then connected to the output neuron(s).

Parameters:

  • wide (Optional[Module], default: None ) \u2013

    Wide model. This is a linear model where the non-linearities are captured via crossed-columns.

  • deeptabular (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library implements a number of possible architectures for the deeptabular component. See the documenation of the package.

  • deeptext (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library implements a number of possible architectures for the deeptext component. See the documenation of the package.

  • deepimage (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library uses torchvision and implements a number of possible architectures for the deepimage component. See the documenation of the package.

  • deephead (Optional[BaseWDModelComponent], default: None ) \u2013

    Alternatively, the user can pass a custom model that will receive the output of the deep component. If deephead is not None all the previous fc-head parameters will be ignored

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (float, default: 0.1 ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: True ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

  • enforce_positive (bool, default: False ) \u2013

    Boolean indicating if the output from the final layer must be positive. This is important if you are using loss functions with non-negative input restrictions, e.g. RMSLE, or if you know your predictions are bounded in between 0 and inf

  • enforce_positive_activation (str, default: 'softplus' ) \u2013

    Activation function to enforce that the final layer has a positive output. 'softplus' or 'relu' are supported.

  • pred_dim (int, default: 1 ) \u2013

    Size of the final wide and deep output layer containing the predictions. 1 for regression and binary classification or number of classes for multiclass classification.

  • with_fds (bool, default: False ) \u2013

    Boolean indicating if Feature Distribution Smoothing (FDS) will be applied before the final prediction layer. Only available for regression problems. See Delving into Deep Imbalanced Regression for details.

Other Parameters:

  • **fds_config \u2013

    Dictionary with the parameters to be used when using Feature Distribution Smoothing. Please, see the docs for the FDSLayer. NOTE: Feature Distribution Smoothing is available when using ONLY a deeptabular component NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

Examples:

>>> from pytorch_widedeep.models import TabResnet, Vision, BasicRNN, Wide, WideDeep\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deeptabular = TabResnet(blocks_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> deeptext = BasicRNN(vocab_size=10, embed_dim=4, padding_idx=0)\n>>> deepimage = Vision()\n>>> model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage)\n

NOTE: It is possible to use custom components to build Wide & Deep models. Simply, build them and pass them as the corresponding parameters. Note that the custom models MUST return a last layer of activations(i.e. not the final prediction) so that these activations are collected by WideDeep and combined accordingly. In addition, the models MUST also contain an attribute output_dim with the size of these last layers of activations. See for example pytorch_widedeep.models.tab_mlp.TabMlp

Source code in pytorch_widedeep/models/wide_deep.py
@alias(  # noqa: C901\n    \"pred_dim\",\n    [\"num_class\", \"pred_size\"],\n)\ndef __init__(\n    self,\n    wide: Optional[nn.Module] = None,\n    deeptabular: Optional[BaseWDModelComponent] = None,\n    deeptext: Optional[BaseWDModelComponent] = None,\n    deepimage: Optional[BaseWDModelComponent] = None,\n    deephead: Optional[BaseWDModelComponent] = None,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: float = 0.1,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = True,\n    enforce_positive: bool = False,\n    enforce_positive_activation: str = \"softplus\",\n    pred_dim: int = 1,\n    with_fds: bool = False,\n    **fds_config,\n):\n    super(WideDeep, self).__init__()\n\n    self._check_inputs(\n        wide,\n        deeptabular,\n        deeptext,\n        deepimage,\n        deephead,\n        head_hidden_dims,\n        pred_dim,\n        with_fds,\n    )\n\n    # this attribute will be eventually over-written by the Trainer's\n    # device. Acts here as a 'placeholder'.\n    self.wd_device: Optional[str] = None\n\n    # required as attribute just in case we pass a deephead\n    self.pred_dim = pred_dim\n\n    self.with_fds = with_fds\n    self.enforce_positive = enforce_positive\n\n    # The main 5 components of the wide and deep assemble: wide,\n    # deeptabular, deeptext, deepimage and deephead\n    self.with_deephead = deephead is not None or head_hidden_dims is not None\n    if deephead is None and head_hidden_dims is not None:\n        self.deephead = self._build_deephead(\n            deeptabular,\n            deeptext,\n            deepimage,\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    elif deephead is not None:\n        self.deephead = nn.Sequential(\n            deephead, nn.Linear(deephead.output_dim, self.pred_dim)\n        )\n    else:\n        # for consistency with other components we default to None\n        self.deephead = None\n\n    self.wide = wide\n    self.deeptabular, self.deeptext, self.deepimage = self._set_model_components(\n        deeptabular, deeptext, deepimage, self.with_deephead\n    )\n\n    if self.with_fds:\n        self.fds_layer = FDSLayer(feature_dim=self.deeptabular.output_dim, **fds_config)  # type: ignore[arg-type]\n\n    if self.enforce_positive:\n        self.enf_pos = get_activation_fn(enforce_positive_activation)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.fds_layer.FDSLayer","title":"FDSLayer","text":"
FDSLayer(\n    feature_dim,\n    granularity=100,\n    y_max=None,\n    y_min=None,\n    start_update=0,\n    start_smooth=2,\n    kernel=\"gaussian\",\n    ks=5,\n    sigma=2,\n    momentum=0.9,\n    clip_min=None,\n    clip_max=None,\n)\n

Bases: Module

Feature Distribution Smoothing layer. Please, see Delving into Deep Imbalanced Regression for details.

NOTE: this is NOT an available model per se, but more a utility that can be used as we run a WideDeep model. The parameters of this extra layers can be set as the class WideDeep is instantiated via the keyword arguments fds_config.

NOTE: Feature Distribution Smoothing is available when using ONLY a deeptabular component

NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

The code here is based on the code at the official repo

Parameters:

  • feature_dim (int) \u2013

    input dimension size, i.e. output size of previous layer. This will be the dimension of the output from the deeptabular component

  • granularity (int, default: 100 ) \u2013

    number of bins that the target \\(y\\) is divided into and that will be used to compute the features' statistics (mean and variance)

  • y_max (Optional[float], default: None ) \u2013

    \\(y\\) upper limit to be considered when binning

  • y_min (Optional[float], default: None ) \u2013

    \\(y\\) lower limit to be considered when binning

  • start_update (int, default: 0 ) \u2013

    number of _'waiting epochs' after which the FDS layer will start to update its statistics

  • start_smooth (int, default: 2 ) \u2013

    number of _'waiting epochs' after which the FDS layer will start smoothing the feature distributions

  • kernel (Literal[gaussian, triang, laplace], default: 'gaussian' ) \u2013

    choice of smoothing kernel

  • ks (int, default: 5 ) \u2013

    kernel window size

  • sigma (float, default: 2 ) \u2013

    if a 'gaussian' or 'laplace' kernels are used, this is the corresponding standard deviation

  • momentum (Optional[float], default: 0.9 ) \u2013

    to train the layer the authors used a momentum update of the running statistics across each epoch. Set to 0.9 in the paper.

  • clip_min (Optional[float], default: None ) \u2013

    this parameter is used to clip the ratio between the so called running variance and the smoothed variance, and is introduced for numerical stability. We leave it as optional as we did not find a notable improvement in our experiments. The authors used a value of 0.1

  • clip_max (Optional[float], default: None ) \u2013

    same as clip_min but for the upper limit.We leave it as optional as we did not find a notable improvement in our experiments. The authors used a value of 10.

Source code in pytorch_widedeep/models/fds_layer.py
def __init__(\n    self,\n    feature_dim: int,\n    granularity: int = 100,\n    y_max: Optional[float] = None,\n    y_min: Optional[float] = None,\n    start_update: int = 0,\n    start_smooth: int = 2,\n    kernel: Literal[\"gaussian\", \"triang\", \"laplace\"] = \"gaussian\",\n    ks: int = 5,\n    sigma: float = 2,\n    momentum: Optional[float] = 0.9,\n    clip_min: Optional[float] = None,\n    clip_max: Optional[float] = None,\n):\n    \"\"\"\n    Feature Distribution Smoothing layer. Please, see\n    [Delving into Deep Imbalanced Regression](https:/arxiv.org/abs/2102.09554)\n    for details.\n\n    :information_source: **NOTE**: this is NOT an available model per se,\n     but more a utility that can be used as we run a `WideDeep` model.\n     The parameters of this extra layers can be set as the class\n     `WideDeep` is instantiated via the keyword arguments `fds_config`.\n\n    :information_source: **NOTE**: Feature Distribution Smoothing is\n     available when using ONLY a `deeptabular` component\n\n    :information_source: **NOTE**: We consider this feature absolutely\n    experimental and we recommend the user to not use it unless the\n    corresponding [publication](https://arxiv.org/abs/2102.09554) is\n    well understood\n\n    The code here is based on the code at the\n    [official repo](https://github.com/YyzHarry/imbalanced-regression)\n\n    Parameters\n    ----------\n    feature_dim: int,\n        input dimension size, i.e. output size of previous layer. This\n        will be the dimension of the output from the `deeptabular`\n        component\n    granularity: int = 100,\n        number of bins that the target $y$ is divided into and that will\n        be used to compute the features' statistics (mean and variance)\n    y_max: Optional[float] = None,\n        $y$ upper limit to be considered when binning\n    y_min: Optional[float] = None,\n        $y$ lower limit to be considered when binning\n    start_update: int = 0,\n        number of _'waiting epochs' after which the FDS layer will start\n        to update its statistics\n    start_smooth: int = 1,\n        number of _'waiting epochs' after which the FDS layer will start\n        smoothing the feature distributions\n    kernel: Literal[\"gaussian\", \"triang\", \"laplace\", None] = \"gaussian\",\n        choice of smoothing kernel\n    ks: int = 5,\n        kernel window size\n    sigma: Union[int, float] = 2,\n        if a _'gaussian'_ or _'laplace'_ kernels are used, this is the\n        corresponding standard deviation\n    momentum: float = 0.9,\n        to train the layer the authors used a momentum update of the running\n        statistics across each epoch. Set to 0.9 in the paper.\n    clip_min: Optional[float] = None,\n        this parameter is used to clip the ratio between the so called\n        running variance and the smoothed variance, and is introduced for\n        numerical stability. We leave it as optional as we did not find a\n        notable improvement in our experiments. The authors used a value\n        of 0.1\n    clip_max: Optional[float] = None,\n        same as `clip_min` but for the upper limit.We leave it as optional\n        as we did not find a notable improvement in our experiments. The\n        authors used a value of 10.\n    \"\"\"\n    super(FDSLayer, self).__init__()\n    assert (\n        start_update + 1 < start_smooth\n    ), \"initial update must start at least 2 epoch before smoothing\"\n\n    self.feature_dim = feature_dim\n    self.granularity = granularity\n    self.y_max = y_max\n    self.y_min = y_min\n    self.kernel_window = torch.tensor(\n        get_kernel_window(kernel, ks, sigma), dtype=torch.float32\n    )\n    self.half_ks = (ks - 1) // 2\n    self.momentum = momentum\n    self.start_update = start_update\n    self.start_smooth = start_smooth\n    self.clip_min = clip_min\n    self.clip_max = clip_max\n\n    self.pred_layer = nn.Linear(feature_dim, 1)\n\n    self._register_buffers()\n
"},{"location":"pytorch-widedeep/preprocessing.html","title":"The preprocessing module","text":"

This module contains the classes that are used to prepare the data before being passed to the models. There is one Preprocessor per data mode or model component: wide, deeptabular, deepimage and deeptext.

"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor","title":"WidePreprocessor","text":"
WidePreprocessor(wide_cols, crossed_cols=None)\n

Bases: BasePreprocessor

Preprocessor to prepare the wide input dataset

This Preprocessor prepares the data for the wide, linear component. This linear model is implemented via an Embedding layer that is connected to the output neuron. WidePreprocessor numerically encodes all the unique values of all categorical columns wide_cols + crossed_cols. See the Example below.

Parameters:

  • wide_cols (List[str]) \u2013

    List of strings with the name of the columns that will label encoded and passed through the wide component

  • crossed_cols (Optional[List[Tuple[str, str]]], default: None ) \u2013

    List of Tuples with the name of the columns that will be 'crossed' and then label encoded. e.g. [('education', 'occupation'), ...]. For binary features, a cross-product transformation is 1 if and only if the constituent features are all 1, and 0 otherwise.

Attributes:

  • wide_crossed_cols (List) \u2013

    List with the names of all columns that will be label encoded

  • encoding_dict (Dict) \u2013

    Dictionary where the keys are the result of pasting colname + '_' + column value and the values are the corresponding mapped integer.

  • inverse_encoding_dict (Dict) \u2013

    the inverse encoding dictionary

  • wide_dim (int) \u2013

    Dimension of the wide model (i.e. dim of the linear layer)

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import WidePreprocessor\n>>> df = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l']})\n>>> wide_cols = ['color']\n>>> crossed_cols = [('color', 'size')]\n>>> wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\n>>> X_wide = wide_preprocessor.fit_transform(df)\n>>> X_wide\narray([[1, 4],\n       [2, 5],\n       [3, 6]])\n>>> wide_preprocessor.encoding_dict\n{'color_r': 1, 'color_b': 2, 'color_g': 3, 'color_size_r-s': 4, 'color_size_b-n': 5, 'color_size_g-l': 6}\n>>> wide_preprocessor.inverse_transform(X_wide)\n  color color_size\n0     r        r-s\n1     b        b-n\n2     g        g-l\n
Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def __init__(\n    self, wide_cols: List[str], crossed_cols: Optional[List[Tuple[str, str]]] = None\n):\n    super(WidePreprocessor, self).__init__()\n\n    self.wide_cols = wide_cols\n    self.crossed_cols = crossed_cols\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.fit","title":"fit","text":"
fit(df)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • WidePreprocessor \u2013

    WidePreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit(self, df: pd.DataFrame) -> \"WidePreprocessor\":\n    r\"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    WidePreprocessor\n        `WidePreprocessor` fitted object\n    \"\"\"\n    df_wide = self._prepare_wide(df)\n    self.wide_crossed_cols = df_wide.columns.tolist()\n    glob_feature_list = self._make_global_feature_list(\n        df_wide[self.wide_crossed_cols]\n    )\n    # leave 0 for padding/\"unseen\" categories\n    self.encoding_dict = {v: i + 1 for i, v in enumerate(glob_feature_list)}\n    self.wide_dim = len(self.encoding_dict)\n    self.inverse_encoding_dict = {k: v for v, k in self.encoding_dict.items()}\n    self.inverse_encoding_dict[0] = \"unseen\"\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.transform","title":"transform","text":"
transform(df)\n

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    r\"\"\"\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    check_is_fitted(self, attributes=[\"encoding_dict\"])\n    df_wide = self._prepare_wide(df)\n    encoded = np.zeros([len(df_wide), len(self.wide_crossed_cols)])\n    for col_i, col in enumerate(self.wide_crossed_cols):\n        encoded[:, col_i] = df_wide[col].apply(\n            lambda x: self.encoding_dict[col + \"_\" + str(x)]\n            if col + \"_\" + str(x) in self.encoding_dict\n            else 0\n        )\n    return encoded.astype(\"int64\")\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(encoded)\n

Takes as input the output from the transform method and it will return the original values.

Parameters:

  • encoded (ndarray) \u2013

    numpy array with the encoded values that are the output from the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original values

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:\n    r\"\"\"Takes as input the output from the `transform` method and it will\n    return the original values.\n\n    Parameters\n    ----------\n    encoded: np.ndarray\n        numpy array with the encoded values that are the output from the\n        `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original values\n    \"\"\"\n    decoded = pd.DataFrame(encoded, columns=self.wide_crossed_cols)\n    decoded = decoded.map(lambda x: self.inverse_encoding_dict[x])\n    for col in decoded.columns:\n        rm_str = \"\".join([col, \"_\"])\n        decoded[col] = decoded[col].apply(lambda x: x.replace(rm_str, \"\"))\n    return decoded\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor","title":"TabPreprocessor","text":"
TabPreprocessor(\n    cat_embed_cols=None,\n    continuous_cols=None,\n    quantization_setup=None,\n    cols_to_scale=None,\n    auto_embed_dim=True,\n    embedding_rule=\"fastai_new\",\n    default_embed_dim=16,\n    with_attention=False,\n    with_cls_token=False,\n    shared_embed=False,\n    verbose=1,\n    *,\n    scale=False,\n    already_standard=None,\n    **kwargs\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deeptabular component input dataset

Parameters:

  • cat_embed_cols (Optional[Union[List[str], List[Tuple[str, int]]]], default: None ) \u2013

    List containing the name of the categorical columns that will be represented by embeddings (e.g. ['education', 'relationship', ...]) or a Tuple with the name and the embedding dimension (e.g.: [ ('education',32), ('relationship',16), ...])

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the continuous cols

  • quantization_setup (Optional[Union[int, Dict[str, Union[int, List[float]]]]], default: None ) \u2013

    Continuous columns can be turned into categorical via pd.cut. If quantization_setup is an int, all continuous columns will be quantized using this value as the number of bins. Alternatively, a dictionary where the keys are the column names to quantize and the values are the either integers indicating the number of bins or a list of scalars indicating the bin edges can also be used.

  • cols_to_scale (Optional[Union[List[str], str]], default: None ) \u2013

    List with the names of the columns that will be standarised via sklearn's StandardScaler. It can also be the string 'all' in which case all the continuous cols will be scaled.

  • auto_embed_dim (bool, default: True ) \u2013

    Boolean indicating whether the embedding dimensions will be automatically defined via rule of thumb. See embedding_rule below.

  • embedding_rule (Literal[google, fastai_old, fastai_new], default: 'fastai_new' ) \u2013

    If auto_embed_dim=True, this is the choice of embedding rule of thumb. Choices are:

    • fastai_new: \\(min(600, round(1.6 \\times n_{cat}^{0.56}))\\)

    • fastai_old: \\(min(50, (n_{cat}//{2})+1)\\)

    • google: \\(min(600, round(n_{cat}^{0.24}))\\)

  • default_embed_dim (int, default: 16 ) \u2013

    Dimension for the embeddings if the embedding dimension is not provided in the cat_embed_cols parameter and auto_embed_dim is set to False.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model (more precisely a model where all embeddings must have the same dimensions). If True, the param cat_embed_cols must just be a list containing just the categorical column names: e.g. ['education', 'relationship', ...]. This is because they will all be encoded using embeddings of the same dim, which will be specified later when the model is defined. Param alias: for_transformer

  • with_cls_token (bool, default: False ) \u2013

    Boolean indicating if a '[CLS]' token will be added to the dataset when using attention-based models. The final hidden state corresponding to this token is used as the aggregated representation for classification and regression tasks. If not, the categorical and/or continuous embeddings will be concatenated before being passed to the final MLP (if present).

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • verbose (int, default: 1 ) \u2013
  • scale (bool, default: False ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. Bool indicating whether or not to scale/standarise continuous cols. It is important to emphasize that all the DL models for tabular data in the library also include the possibility of normalising the input continuous features via a BatchNorm or a LayerNorm. Param alias: scale_cont_cols.

  • already_standard (Optional[List[str]], default: None ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. List with the name of the continuous cols that do not need to be scaled/standarised.

Other Parameters:

  • **kwargs \u2013

    pd.cut and StandardScaler related args

Attributes:

  • embed_dim (Dict) \u2013

    Dictionary where keys are the embed cols and values are the embedding dimensions. If with_attention is set to True this attribute is not generated during the fit process

  • label_encoder (LabelEncoder) \u2013

    see pytorch_widedeep.utils.dense_utils.LabelEncder

  • cat_embed_input (List) \u2013

    List of Tuples with the column name, number of individual values for that column and, If with_attention is set to False, the corresponding embeddings dim, e.g. [('education', 16, 10), ('relationship', 6, 8), ...].

  • standardize_cols (List) \u2013

    List of the columns that will be standarized

  • scaler (StandardScaler) \u2013

    an instance of sklearn.preprocessing.StandardScaler

  • column_idx (Dict) \u2013

    Dictionary where keys are column names and values are column indexes. This is neccesary to slice tensors

  • quantizer (Quantizer) \u2013

    an instance of Quantizer

Examples:

>>> import pandas as pd\n>>> import numpy as np\n>>> from pytorch_widedeep.preprocessing import TabPreprocessor\n>>> df = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l'], 'age': [25, 40, 55]})\n>>> cat_embed_cols = [('color',5), ('size',5)]\n>>> cont_cols = ['age']\n>>> deep_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols)\n>>> X_tab = deep_preprocessor.fit_transform(df)\n>>> deep_preprocessor.embed_dim\n{'color': 5, 'size': 5}\n>>> deep_preprocessor.column_idx\n{'color': 0, 'size': 1, 'age': 2}\n>>> cont_df = pd.DataFrame({\"col1\": np.random.rand(10), \"col2\": np.random.rand(10) + 1})\n>>> cont_cols = [\"col1\", \"col2\"]\n>>> tab_preprocessor = TabPreprocessor(continuous_cols=cont_cols, quantization_setup=3)\n>>> ft_cont_df = tab_preprocessor.fit_transform(cont_df)\n>>> # or...\n>>> quantization_setup = {'col1': [0., 0.4, 1.], 'col2': [1., 1.4, 2.]}\n>>> tab_preprocessor2 = TabPreprocessor(continuous_cols=cont_cols, quantization_setup=quantization_setup)\n>>> ft_cont_df2 = tab_preprocessor2.fit_transform(cont_df)\n
Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
@alias(\"with_attention\", [\"for_transformer\"])\n@alias(\"cat_embed_cols\", [\"embed_cols\"])\n@alias(\"scale\", [\"scale_cont_cols\"])\n@alias(\"quantization_setup\", [\"cols_and_bins\"])\ndef __init__(\n    self,\n    cat_embed_cols: Optional[Union[List[str], List[Tuple[str, int]]]] = None,\n    continuous_cols: Optional[List[str]] = None,\n    quantization_setup: Optional[\n        Union[int, Dict[str, Union[int, List[float]]]]\n    ] = None,\n    cols_to_scale: Optional[Union[List[str], str]] = None,\n    auto_embed_dim: bool = True,\n    embedding_rule: Literal[\"google\", \"fastai_old\", \"fastai_new\"] = \"fastai_new\",\n    default_embed_dim: int = 16,\n    with_attention: bool = False,\n    with_cls_token: bool = False,\n    shared_embed: bool = False,\n    verbose: int = 1,\n    *,\n    scale: bool = False,\n    already_standard: Optional[List[str]] = None,\n    **kwargs,\n):\n    super(TabPreprocessor, self).__init__()\n\n    self.continuous_cols = continuous_cols\n    self.quantization_setup = quantization_setup\n    self.cols_to_scale = cols_to_scale\n    self.scale = scale\n    self.already_standard = already_standard\n    self.auto_embed_dim = auto_embed_dim\n    self.embedding_rule = embedding_rule\n    self.default_embed_dim = default_embed_dim\n    self.with_attention = with_attention\n    self.with_cls_token = with_cls_token\n    self.shared_embed = shared_embed\n    self.verbose = verbose\n\n    self.quant_args = {\n        k: v for k, v in kwargs.items() if k in pd.cut.__code__.co_varnames\n    }\n    self.scale_args = {\n        k: v for k, v in kwargs.items() if k in StandardScaler().get_params()\n    }\n\n    self._check_inputs(cat_embed_cols)\n\n    if with_cls_token:\n        self.cat_embed_cols = (\n            [\"cls_token\"] + cat_embed_cols  # type: ignore[operator]\n            if cat_embed_cols is not None\n            else [\"cls_token\"]\n        )\n    else:\n        self.cat_embed_cols = cat_embed_cols  # type: ignore[assignment]\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.fit","title":"fit","text":"
fit(df)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • TabPreprocessor \u2013

    TabPreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def fit(self, df: pd.DataFrame) -> BasePreprocessor:  # noqa: C901\n    \"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    TabPreprocessor\n        `TabPreprocessor` fitted object\n    \"\"\"\n\n    df_adj = self._insert_cls_token(df) if self.with_cls_token else df.copy()\n\n    self.column_idx: Dict[str, int] = {}\n\n    # Categorical embeddings logic\n    if self.cat_embed_cols is not None or self.quantization_setup is not None:\n        self.cat_embed_input: List[Tuple[str, int] | Tuple[str, int, int]] = []\n\n    if self.cat_embed_cols is not None:\n        df_cat, cat_embed_dim = self._prepare_categorical(df_adj)\n\n        self.label_encoder = LabelEncoder(\n            columns_to_encode=df_cat.columns.tolist(),\n            shared_embed=self.shared_embed,\n            with_attention=self.with_attention,\n        )\n        self.label_encoder.fit(df_cat)\n\n        for k, v in self.label_encoder.encoding_dict.items():\n            if self.with_attention:\n                self.cat_embed_input.append((k, len(v)))\n            else:\n                self.cat_embed_input.append((k, len(v), cat_embed_dim[k]))\n\n        self.column_idx.update({k: v for v, k in enumerate(df_cat.columns)})\n\n    # Continuous columns logic\n    if self.continuous_cols is not None:\n        df_cont, cont_embed_dim = self._prepare_continuous(df_adj)\n\n        # Standardization logic\n        if self.standardize_cols is not None:\n            self.scaler = StandardScaler(**self.scale_args).fit(\n                df_cont[self.standardize_cols].values\n            )\n        elif self.verbose:\n            warnings.warn(\"Continuous columns will not be normalised\")\n\n        # Quantization logic\n        if self.cols_and_bins is not None:\n            # we do not run 'Quantizer.fit' here since in the wild case\n            # someone wants standardization and quantization for the same\n            # columns, the Quantizer will run on the scaled data\n            self.quantizer = Quantizer(self.cols_and_bins, **self.quant_args)\n\n            if self.with_attention:\n                for col, n_cat, _ in cont_embed_dim:\n                    self.cat_embed_input.append((col, n_cat))\n            else:\n                self.cat_embed_input.extend(cont_embed_dim)\n\n        self.column_idx.update(\n            {k: v + len(self.column_idx) for v, k in enumerate(df_cont)}\n        )\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.transform","title":"transform","text":"
transform(df)\n

Returns the processed dataframe as a np.ndarray

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:  # noqa: C901\n    \"\"\"Returns the processed `dataframe` as a np.ndarray\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    check_is_fitted(self, condition=self.is_fitted)\n\n    df_adj = self._insert_cls_token(df) if self.with_cls_token else df.copy()\n\n    if self.cat_embed_cols is not None:\n        df_cat = df_adj[self.cat_cols]\n        df_cat = self.label_encoder.transform(df_cat)\n    if self.continuous_cols is not None:\n        df_cont = df_adj[self.continuous_cols]\n        # Standardization logic\n        if self.standardize_cols:\n            df_cont[self.standardize_cols] = self.scaler.transform(\n                df_cont[self.standardize_cols].values\n            )\n        # Quantization logic\n        if self.cols_and_bins is not None:\n            # Adjustment so I don't have to override the method\n            # in 'ChunkTabPreprocessor'\n            if self.quantizer.is_fitted:\n                df_cont = self.quantizer.transform(df_cont)\n            else:\n                df_cont = self.quantizer.fit_transform(df_cont)\n    try:\n        df_deep = pd.concat([df_cat, df_cont], axis=1)\n    except NameError:\n        try:\n            df_deep = df_cat.copy()\n        except NameError:\n            df_deep = df_cont.copy()\n\n    return df_deep.values\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(encoded)\n

Takes as input the output from the transform method and it will return the original values.

Parameters:

  • encoded (ndarray) \u2013

    array with the output of the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original values

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:  # noqa: C901\n    r\"\"\"Takes as input the output from the `transform` method and it will\n    return the original values.\n\n    Parameters\n    ----------\n    encoded: np.ndarray\n        array with the output of the `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original values\n    \"\"\"\n    decoded = pd.DataFrame(encoded, columns=list(self.column_idx.keys()))\n    # embeddings back to original category\n    if self.cat_embed_cols is not None:\n        decoded = self.label_encoder.inverse_transform(decoded)\n    if self.continuous_cols is not None:\n        # quantized cols to the mid point\n        if self.cols_and_bins is not None:\n            if self.verbose:\n                print(\n                    \"Note that quantized cols will be turned into the mid point of \"\n                    \"the corresponding bin\"\n                )\n            for k, v in self.quantizer.inversed_bins.items():\n                decoded[k] = decoded[k].map(v)\n        # continuous_cols back to non-standarised\n        try:\n            decoded[self.standardize_cols] = self.scaler.inverse_transform(\n                decoded[self.standardize_cols]\n            )\n        except Exception:  # KeyError:\n            pass\n\n    if \"cls_token\" in decoded.columns:\n        decoded.drop(\"cls_token\", axis=1, inplace=True)\n\n    return decoded\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.Quantizer","title":"Quantizer","text":"
Quantizer(quantization_setup, **kwargs)\n

Helper class to perform the quantization of continuous columns. It is included in this docs for completion, since depending on the value of the parameter 'quantization_setup' of the TabPreprocessor class, that class might have an attribute of type Quantizer. However, this class is designed to always run internally within the TabPreprocessor class.

Parameters:

  • quantization_setup (Dict[str, Union[int, List[float]]]) \u2013

    Dictionary where the keys are the column names to quantize and the values are the either integers indicating the number of bins or a list of scalars indicating the bin edges.

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def __init__(\n    self,\n    quantization_setup: Dict[str, Union[int, List[float]]],\n    **kwargs,\n):\n    self.quantization_setup = quantization_setup\n    self.quant_args = kwargs\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor","title":"TextPreprocessor","text":"
TextPreprocessor(\n    text_col,\n    max_vocab=30000,\n    min_freq=5,\n    maxlen=80,\n    pad_first=True,\n    pad_idx=1,\n    already_processed=False,\n    word_vectors_path=None,\n    n_cpus=None,\n    verbose=1,\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deeptext input dataset

Parameters:

  • text_col (str) \u2013

    column in the input dataframe containing the texts

  • max_vocab (int, default: 30000 ) \u2013

    Maximum number of tokens in the vocabulary

  • min_freq (int, default: 5 ) \u2013

    Minimum frequency for a token to be part of the vocabulary

  • maxlen (int, default: 80 ) \u2013

    Maximum length of the tokenized sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

  • already_processed (Optional[bool], default: False ) \u2013

    Boolean indicating if the sequence of elements is already processed or prepared. If this is the case, this Preprocessor will simply tokenize and pad the sequence.

    Param aliases: `not_text`. <br/>\n

    This parameter is thought for those cases where the input sequences are already fully processed or are directly not text (e.g. IDs)

  • word_vectors_path (Optional[str], default: None ) \u2013

    Path to the pretrained word vectors

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • vocab (Vocab) \u2013

    an instance of pytorch_widedeep.utils.fastai_transforms.Vocab

  • embedding_matrix (ndarray) \u2013

    Array with the pretrained embeddings

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import TextPreprocessor\n>>> df_train = pd.DataFrame({'text_column': [\"life is like a box of chocolates\",\n... \"You never know what you're gonna get\"]})\n>>> text_preprocessor = TextPreprocessor(text_col='text_column', max_vocab=25, min_freq=1, maxlen=10)\n>>> text_preprocessor.fit_transform(df_train)\nThe vocabulary contains 24 tokens\narray([[ 1,  1,  1,  1, 10, 11, 12, 13, 14, 15],\n       [ 5,  9, 16, 17, 18,  9, 19, 20, 21, 22]], dtype=int32)\n>>> df_te = pd.DataFrame({'text_column': ['you never know what is in the box']})\n>>> text_preprocessor.transform(df_te)\narray([[ 1,  1,  9, 16, 17, 18, 11,  0,  0, 13]], dtype=int32)\n
Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
@alias(\"already_processed\", [\"not_text\"])\ndef __init__(\n    self,\n    text_col: str,\n    max_vocab: int = 30000,\n    min_freq: int = 5,\n    maxlen: int = 80,\n    pad_first: bool = True,\n    pad_idx: int = 1,\n    already_processed: Optional[bool] = False,\n    word_vectors_path: Optional[str] = None,\n    n_cpus: Optional[int] = None,\n    verbose: int = 1,\n):\n    super(TextPreprocessor, self).__init__()\n\n    self.text_col = text_col\n    self.max_vocab = max_vocab\n    self.min_freq = min_freq\n    self.maxlen = maxlen\n    self.pad_first = pad_first\n    self.pad_idx = pad_idx\n    self.already_processed = already_processed\n    self.word_vectors_path = word_vectors_path\n    self.verbose = verbose\n    self.n_cpus = n_cpus if n_cpus is not None else os.cpu_count()\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.fit","title":"fit","text":"
fit(df)\n

Builds the vocabulary

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • TextPreprocessor \u2013

    TextPreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def fit(self, df: pd.DataFrame) -> BasePreprocessor:\n    \"\"\"Builds the vocabulary\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    TextPreprocessor\n        `TextPreprocessor` fitted object\n    \"\"\"\n    texts = self._read_texts(df)\n\n    tokens = get_texts(texts, self.already_processed, self.n_cpus)\n\n    self.vocab: TVocab = Vocab(\n        max_vocab=self.max_vocab,\n        min_freq=self.min_freq,\n        pad_idx=self.pad_idx,\n    ).fit(\n        tokens,\n    )\n\n    if self.verbose:\n        print(\"The vocabulary contains {} tokens\".format(len(self.vocab.stoi)))\n    if self.word_vectors_path is not None:\n        self.embedding_matrix = build_embeddings_matrix(\n            self.vocab, self.word_vectors_path, self.min_freq\n        )\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.transform","title":"transform","text":"
transform(df)\n

Returns the padded, 'numericalised' sequences

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequences

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Returns the padded, _'numericalised'_ sequences\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequences\n    \"\"\"\n    check_is_fitted(self, attributes=[\"vocab\"])\n    texts = self._read_texts(df)\n    tokens = get_texts(texts, self.already_processed, self.n_cpus)\n    return self._pad_sequences(tokens)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.transform_sample","title":"transform_sample","text":"
transform_sample(text)\n

Returns the padded, 'numericalised' sequence

Parameters:

  • text (str) \u2013

    text to be tokenized and padded

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequence

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def transform_sample(self, text: str) -> np.ndarray:\n    \"\"\"Returns the padded, _'numericalised'_ sequence\n\n    Parameters\n    ----------\n    text: str\n        text to be tokenized and padded\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequence\n    \"\"\"\n    check_is_fitted(self, attributes=[\"vocab\"])\n    tokens = get_texts([text], self.already_processed, self.n_cpus)\n    return self._pad_sequences(tokens)[0]\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequences

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequences\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(padded_seq)\n

Returns the original text plus the added 'special' tokens

Parameters:

  • padded_seq (ndarray) \u2013

    array with the output of the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original text plus the added 'special' tokens

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def inverse_transform(self, padded_seq: np.ndarray) -> pd.DataFrame:\n    \"\"\"Returns the original text plus the added 'special' tokens\n\n    Parameters\n    ----------\n    padded_seq: np.ndarray\n        array with the output of the `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original text plus the added 'special' tokens\n    \"\"\"\n    texts = [self.vocab.inverse_transform(num) for num in padded_seq]\n    return pd.DataFrame({self.text_col: texts})\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor","title":"ImagePreprocessor","text":"
ImagePreprocessor(\n    img_col, img_path, width=224, height=224, verbose=1\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deepimage input dataset.

The Preprocessing consists simply on resizing according to their aspect ratio

Parameters:

  • img_col (str) \u2013

    name of the column with the images filenames

  • img_path (str) \u2013

    path to the dicrectory where the images are stored

  • width (int, default: 224 ) \u2013

    width of the resulting processed image.

  • height (int, default: 224 ) \u2013

    width of the resulting processed image.

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • aap (AspectAwarePreprocessor) \u2013

    an instance of pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor

  • spp (SimplePreprocessor) \u2013

    an instance of pytorch_widedeep.utils.image_utils.SimplePreprocessor

  • normalise_metrics (Dict) \u2013

    Dict containing the normalisation metrics of the image dataset, i.e. mean and std for the R, G and B channels

Examples:

>>> import pandas as pd\n>>>\n>>> from pytorch_widedeep.preprocessing import ImagePreprocessor\n>>>\n>>> path_to_image1 = 'tests/test_data_utils/images/galaxy1.png'\n>>> path_to_image2 = 'tests/test_data_utils/images/galaxy2.png'\n>>>\n>>> df_train = pd.DataFrame({'images_column': [path_to_image1]})\n>>> df_test = pd.DataFrame({'images_column': [path_to_image2]})\n>>> img_preprocessor = ImagePreprocessor(img_col='images_column', img_path='.', verbose=0)\n>>> resized_images = img_preprocessor.fit_transform(df_train)\n>>> new_resized_images = img_preprocessor.transform(df_train)\n

NOTE: Normalising metrics will only be computed when the fit_transform method is run. Running transform only will not change the computed metrics and running fit only simply instantiates the resizing functions.

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def __init__(\n    self,\n    img_col: str,\n    img_path: str,\n    width: int = 224,\n    height: int = 224,\n    verbose: int = 1,\n):\n    super(ImagePreprocessor, self).__init__()\n\n    self.img_col = img_col\n    self.img_path = img_path\n    self.width = width\n    self.height = height\n    self.verbose = verbose\n\n    self.aap = AspectAwarePreprocessor(self.width, self.height)\n    self.spp = SimplePreprocessor(self.width, self.height)\n\n    self.compute_normalising_computed = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor.transform","title":"transform","text":"
transform(df)\n

Resizes the images to the input height and width.

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe with the img_col

Returns:

  • ndarray \u2013

    Resized images to the input height and width

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Resizes the images to the input height and width.\n\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe with the `img_col`\n\n    Returns\n    -------\n    np.ndarray\n        Resized images to the input height and width\n    \"\"\"\n    image_list = df[self.img_col].tolist()\n    if self.verbose:\n        print(\"Reading Images from {}\".format(self.img_path))\n    imgs = [cv2.imread(\"/\".join([self.img_path, img])) for img in image_list]\n\n    # finding images with different height and width\n    aspect = [(im.shape[0], im.shape[1]) for im in imgs]\n    aspect_r = [a[0] / a[1] for a in aspect]\n    diff_idx = [i for i, r in enumerate(aspect_r) if r != 1.0]\n\n    if self.verbose:\n        print(\"Resizing\")\n    resized_imgs = []\n    for i, img in tqdm(enumerate(imgs), total=len(imgs), disable=self.verbose != 1):\n        if i in diff_idx:\n            resized_imgs.append(self.aap.preprocess(img))\n        else:\n            # if aspect ratio is 1:1, no need for AspectAwarePreprocessor\n            resized_imgs.append(self.spp.preprocess(img))\n\n    if not self.compute_normalising_computed:\n        if self.verbose:\n            print(\"Computing normalisation metrics\")\n        # mean and std deviation will only be computed when the fit method\n        # is called\n        mean_R, mean_G, mean_B = [], [], []\n        std_R, std_G, std_B = [], [], []\n        for rsz_img in resized_imgs:\n            (mean_b, mean_g, mean_r), (std_b, std_g, std_r) = cv2.meanStdDev(\n                rsz_img\n            )\n            mean_R.append(mean_r)\n            mean_G.append(mean_g)\n            mean_B.append(mean_b)\n            std_R.append(std_r)\n            std_G.append(std_g)\n            std_B.append(std_b)\n        self.normalise_metrics = dict(\n            mean={\n                \"R\": np.mean(mean_R) / 255.0,\n                \"G\": np.mean(mean_G) / 255.0,\n                \"B\": np.mean(mean_B) / 255.0,\n            },\n            std={\n                \"R\": np.mean(std_R) / 255.0,\n                \"G\": np.mean(std_G) / 255.0,\n                \"B\": np.mean(std_B) / 255.0,\n            },\n        )\n        self.compute_normalising_computed = True\n    return np.asarray(resized_imgs)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Resized images to the input height and width

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Resized images to the input height and width\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#chunked-versions","title":"Chunked versions","text":"

Chunked versions of the preprocessors are also available. These are useful when the data is too big to fit in memory. See also the load_from_folder module in the library and the corresponding section here in the documentation.

Note that there is not a ChunkImagePreprocessor. This is because the processing of the images will occur inside the ImageFromFolder class in the load_from_folder module.

"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor","title":"ChunkWidePreprocessor","text":"
ChunkWidePreprocessor(\n    wide_cols, n_chunks, crossed_cols=None\n)\n

Bases: WidePreprocessor

Preprocessor to prepare the wide input dataset

This Preprocessor prepares the data for the wide, linear component. This linear model is implemented via an Embedding layer that is connected to the output neuron. ChunkWidePreprocessor numerically encodes all the unique values of all categorical columns wide_cols + crossed_cols. See the Example below.

Parameters:

  • wide_cols (List[str]) \u2013

    List of strings with the name of the columns that will label encoded and passed through the wide component

  • crossed_cols (Optional[List[Tuple[str, str]]], default: None ) \u2013

    List of Tuples with the name of the columns that will be 'crossed' and then label encoded. e.g. [('education', 'occupation'), ...]. For binary features, a cross-product transformation is 1 if and only if the constituent features are all 1, and 0 otherwise.

Attributes:

  • wide_crossed_cols (List) \u2013

    List with the names of all columns that will be label encoded

  • encoding_dict (Dict) \u2013

    Dictionary where the keys are the result of pasting colname + '_' + column value and the values are the corresponding mapped integer.

  • inverse_encoding_dict (Dict) \u2013

    the inverse encoding dictionary

  • wide_dim (int) \u2013

    Dimension of the wide model (i.e. dim of the linear layer)

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import ChunkWidePreprocessor\n>>> chunk = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l']})\n>>> wide_cols = ['color']\n>>> crossed_cols = [('color', 'size')]\n>>> chunk_wide_preprocessor = ChunkWidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols,\n... n_chunks=1)\n>>> X_wide = chunk_wide_preprocessor.fit_transform(chunk)\n
Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def __init__(\n    self,\n    wide_cols: List[str],\n    n_chunks: int,\n    crossed_cols: Optional[List[Tuple[str, str]]] = None,\n):\n    super(ChunkWidePreprocessor, self).__init__(wide_cols, crossed_cols)\n\n    self.n_chunks = n_chunks\n\n    self.chunk_counter = 0\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor.partial_fit","title":"partial_fit","text":"
partial_fit(chunk)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • chunk (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ChunkWidePreprocessor \u2013

    ChunkWidePreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def partial_fit(self, chunk: pd.DataFrame) -> \"ChunkWidePreprocessor\":\n    r\"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    chunk: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    ChunkWidePreprocessor\n        `ChunkWidePreprocessor` fitted object\n    \"\"\"\n    df_wide = self._prepare_wide(chunk)\n    self.wide_crossed_cols = df_wide.columns.tolist()\n\n    if self.chunk_counter == 0:\n        self.glob_feature_set = set(\n            self._make_global_feature_list(df_wide[self.wide_crossed_cols])\n        )\n    else:\n        self.glob_feature_set.update(\n            self._make_global_feature_list(df_wide[self.wide_crossed_cols])\n        )\n\n    self.chunk_counter += 1\n\n    if self.chunk_counter == self.n_chunks:\n        self.encoding_dict = {v: i + 1 for i, v in enumerate(self.glob_feature_set)}\n        self.wide_dim = len(self.encoding_dict)\n        self.inverse_encoding_dict = {k: v for v, k in self.encoding_dict.items()}\n        self.inverse_encoding_dict[0] = \"unseen\"\n\n        self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor.fit","title":"fit","text":"
fit(df)\n

Runs partial_fit. This is just to override the fit method in the base class. This class is not designed or thought to run fit

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit(self, df: pd.DataFrame) -> \"ChunkWidePreprocessor\":\n    \"\"\"\n    Runs `partial_fit`. This is just to override the fit method in the base\n    class. This class is not designed or thought to run fit\n    \"\"\"\n    return self.partial_fit(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.ChunkTabPreprocessor","title":"ChunkTabPreprocessor","text":"
ChunkTabPreprocessor(\n    n_chunks,\n    cat_embed_cols=None,\n    continuous_cols=None,\n    cols_and_bins=None,\n    cols_to_scale=None,\n    default_embed_dim=16,\n    with_attention=False,\n    with_cls_token=False,\n    shared_embed=False,\n    verbose=1,\n    *,\n    scale=False,\n    already_standard=None,\n    **kwargs\n)\n

Bases: TabPreprocessor

Preprocessor to prepare the deeptabular component input dataset

Parameters:

  • n_chunks (int) \u2013

    Number of chunks that the tabular dataset is divided by.

  • cat_embed_cols (Optional[Union[List[str], List[Tuple[str, int]]]], default: None ) \u2013

    List containing the name of the categorical columns that will be represented by embeddings (e.g. ['education', 'relationship', ...]) or a Tuple with the name and the embedding dimension (e.g.: [ ('education',32), ('relationship',16), ...])

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the continuous cols

  • cols_and_bins (Optional[Dict[str, List[float]]], default: None ) \u2013

    Continuous columns can be turned into categorical via pd.cut. 'cols_and_bins' is dictionary where the keys are the column names to quantize and the values are a list of scalars indicating the bin edges.

  • cols_to_scale (Optional[Union[List[str], str]], default: None ) \u2013

    List with the names of the columns that will be standarised via sklearn's StandardScaler

  • default_embed_dim (int, default: 16 ) \u2013

    Dimension for the embeddings if the embed_dim is not provided in the cat_embed_cols parameter and auto_embed_dim is set to False.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model (more precisely a model where all embeddings must have the same dimensions). If True, the param cat_embed_cols must just be a list containing just the categorical column names: e.g. ['education', 'relationship', ...]. This is because they will all be encoded using embeddings of the same dim, which will be specified later when the model is defined. Param alias: for_transformer

  • with_cls_token (bool, default: False ) \u2013

    Boolean indicating if a '[CLS]' token will be added to the dataset when using attention-based models. The final hidden state corresponding to this token is used as the aggregated representation for classification and regression tasks. If not, the categorical (and continuous embeddings if present) will be concatenated before being passed to the final MLP (if present).

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • verbose (int, default: 1 ) \u2013
  • scale (bool, default: False ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. Bool indicating whether or not to scale/standarise continuous cols. It is important to emphasize that all the DL models for tabular data in the library also include the possibility of normalising the input continuous features via a BatchNorm or a LayerNorm. Param alias: scale_cont_cols.

  • already_standard (Optional[List[str]], default: None ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. List with the name of the continuous cols that do not need to be scaled/standarised.

Other Parameters:

  • **kwargs \u2013

    pd.cut and StandardScaler related args

Attributes:

  • embed_dim (Dict) \u2013

    Dictionary where keys are the embed cols and values are the embedding dimensions. If with_attention is set to True this attribute is not generated during the fit process

  • label_encoder (LabelEncoder) \u2013

    see pytorch_widedeep.utils.dense_utils.LabelEncder

  • cat_embed_input (List) \u2013

    List of Tuples with the column name, number of individual values for that column and, If with_attention is set to False, the corresponding embeddings dim, e.g. [('education', 16, 10), ('relationship', 6, 8), ...].

  • standardize_cols (List) \u2013

    List of the columns that will be standarized

  • scaler (StandardScaler) \u2013

    an instance of sklearn.preprocessing.StandardScaler if 'cols_to_scale' is not None or 'scale' is 'True'

  • column_idx (Dict) \u2013

    Dictionary where keys are column names and values are column indexes. This is neccesary to slice tensors

  • quantizer (Quantizer) \u2013

    an instance of Quantizer

Examples:

>>> import pandas as pd\n>>> import numpy as np\n>>> from pytorch_widedeep.preprocessing import ChunkTabPreprocessor\n>>> np.random.seed(42)\n>>> chunk_df = pd.DataFrame({'cat_col': np.random.choice(['A', 'B', 'C'], size=8),\n... 'cont_col': np.random.uniform(1, 100, size=8)})\n>>> cat_embed_cols = [('cat_col',4)]\n>>> cont_cols = ['cont_col']\n>>> tab_preprocessor = ChunkTabPreprocessor(\n... n_chunks=1, cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols\n... )\n>>> X_tab = tab_preprocessor.fit_transform(chunk_df)\n>>> tab_preprocessor.embed_dim\n{'cat_col': 4}\n>>> tab_preprocessor.column_idx\n{'cat_col': 0, 'cont_col': 1}\n
Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
@alias(\"with_attention\", [\"for_transformer\"])\n@alias(\"cat_embed_cols\", [\"embed_cols\"])\n@alias(\"scale\", [\"scale_cont_cols\"])\n@alias(\"cols_and_bins\", [\"quantization_setup\"])\ndef __init__(\n    self,\n    n_chunks: int,\n    cat_embed_cols: Optional[Union[List[str], List[Tuple[str, int]]]] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cols_and_bins: Optional[Dict[str, List[float]]] = None,\n    cols_to_scale: Optional[Union[List[str], str]] = None,\n    default_embed_dim: int = 16,\n    with_attention: bool = False,\n    with_cls_token: bool = False,\n    shared_embed: bool = False,\n    verbose: int = 1,\n    *,\n    scale: bool = False,\n    already_standard: Optional[List[str]] = None,\n    **kwargs,\n):\n    super(ChunkTabPreprocessor, self).__init__(\n        cat_embed_cols=cat_embed_cols,\n        continuous_cols=continuous_cols,\n        quantization_setup=None,\n        cols_to_scale=cols_to_scale,\n        auto_embed_dim=False,\n        embedding_rule=\"google\",  # does not matter, irrelevant\n        default_embed_dim=default_embed_dim,\n        with_attention=with_attention,\n        with_cls_token=with_cls_token,\n        shared_embed=shared_embed,\n        verbose=verbose,\n        scale=scale,\n        already_standard=already_standard,\n        **kwargs,\n    )\n\n    self.n_chunks = n_chunks\n    self.chunk_counter = 0\n\n    self.cols_and_bins = cols_and_bins  # type: ignore[assignment]\n    if self.cols_and_bins is not None:\n        self.quantizer = Quantizer(self.cols_and_bins, **self.quant_args)\n\n    self.embed_prepared = False\n    self.continuous_prepared = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.ChunkTextPreprocessor","title":"ChunkTextPreprocessor","text":"
ChunkTextPreprocessor(\n    text_col,\n    n_chunks,\n    root_dir=None,\n    max_vocab=30000,\n    min_freq=5,\n    maxlen=80,\n    pad_first=True,\n    pad_idx=1,\n    already_processed=False,\n    word_vectors_path=None,\n    n_cpus=None,\n    verbose=1,\n)\n

Bases: TextPreprocessor

Preprocessor to prepare the deeptext input dataset

Parameters:

  • text_col (str) \u2013

    column in the input dataframe containing either the texts or the filenames where the text documents are stored

  • n_chunks (int) \u2013

    Number of chunks that the text dataset is divided by.

  • root_dir (Optional[str], default: None ) \u2013

    If 'text_col' contains the filenames with the text documents, this is the path to the directory where those documents are stored.

  • max_vocab (int, default: 30000 ) \u2013

    Maximum number of tokens in the vocabulary

  • min_freq (int, default: 5 ) \u2013

    Minimum frequency for a token to be part of the vocabulary

  • maxlen (int, default: 80 ) \u2013

    Maximum length of the tokenized sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

  • word_vectors_path (Optional[str], default: None ) \u2013

    Path to the pretrained word vectors

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • vocab (Vocab) \u2013

    an instance of pytorch_widedeep.utils.fastai_transforms.ChunkVocab

  • embedding_matrix (ndarray) \u2013

    Array with the pretrained embeddings if word_vectors_path is not None

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import ChunkTextPreprocessor\n>>> chunk_df = pd.DataFrame({'text_column': [\"life is like a box of chocolates\",\n... \"You never know what you're gonna get\"]})\n>>> chunk_text_preprocessor = ChunkTextPreprocessor(text_col='text_column', n_chunks=1,\n... max_vocab=25, min_freq=1, maxlen=10, verbose=0, n_cpus=1)\n>>> processed_chunk = chunk_text_preprocessor.fit_transform(chunk_df)\n
Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def __init__(\n    self,\n    text_col: str,\n    n_chunks: int,\n    root_dir: Optional[str] = None,\n    max_vocab: int = 30000,\n    min_freq: int = 5,\n    maxlen: int = 80,\n    pad_first: bool = True,\n    pad_idx: int = 1,\n    already_processed: Optional[bool] = False,\n    word_vectors_path: Optional[str] = None,\n    n_cpus: Optional[int] = None,\n    verbose: int = 1,\n):\n    super(ChunkTextPreprocessor, self).__init__(\n        text_col=text_col,\n        max_vocab=max_vocab,\n        min_freq=min_freq,\n        maxlen=maxlen,\n        pad_first=pad_first,\n        pad_idx=pad_idx,\n        already_processed=already_processed,\n        word_vectors_path=word_vectors_path,\n        n_cpus=n_cpus,\n        verbose=verbose,\n    )\n\n    self.n_chunks = n_chunks\n    self.root_dir = root_dir\n\n    self.chunk_counter = 0\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html","title":"Self Supervised Pre-training for tabular data","text":"

In this library we have implemented two methods or routines that allow the user to use self-suerpvised pre-training for all tabular models in the library with the exception of the TabPerceiver (this is a particular model and self-supervised pre-training requires some adjustments that will be implemented in future versions). Please see the examples folder in the repo or the examples section in the docs for details on how to use self-supervised pre-training with this library.

The two routines implemented are illustrated in the figures below. The first is from TabNet: Attentive Interpretable Tabular Learning. It is a 'standard' encoder-decoder architecture and and is designed here for models that do not use transformer-based architectures (or when the embeddings can all have different dimensions). The second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, it is based on Contrastive and Denoising learning and is designed for models that use transformer-based architectures (or when the embeddings all need to have the same dimension):

Figure 1. Figure 2 in their paper. The caption of the original paper is included in case it is useful.

Figure 2. Figure 1 in their paper. The caption of the original paper is included in case it is useful.

Note that the self-supervised pre-trainers described below focus, of course, on the self-supervised pre-training phase, i.e. the left side in Figure 1 and the upper part in Figure 2. When combined with the Trainer described earlier in the documenation, one can reproduce the full process illustrated in the figures above.

Also Note that it is beyond the scope of this docs to explain in detail these routines. In addition, to fully utilise the self-supervised trainers implemented in this library a minimum understanding of the processes as described in the papers is required. Therefore, we strongly encourage the users to have a look to the papers.

"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer","title":"EncoderDecoderTrainer","text":"
EncoderDecoderTrainer(\n    encoder,\n    decoder=None,\n    masked_prob=0.2,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseEncoderDecoderTrainer

This class implements an Encoder-Decoder self-supervised 'routine' inspired by TabNet: Attentive Interpretable Tabular Learning. See Figure 1 above.

Parameters:

  • encoder (ModelWithoutAttention) \u2013

    An instance of a TabMlp, TabResNet or TabNet model

  • decoder (Optional[DecoderWithoutAttention], default: None ) \u2013

    An instance of a TabMlpDecoder, TabResNetDecoder or TabNetDecoder model. if None the decoder will be automatically built as a 'simetric' model to the Encoder

  • masked_prob (float, default: 0.2 ) \u2013

    Indicates the fraction of elements in the embedding tensor that will be masked and hence used for reconstruction

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def __init__(\n    self,\n    encoder: ModelWithoutAttention,\n    decoder: Optional[DecoderWithoutAttention] = None,\n    masked_prob: float = 0.2,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        encoder=encoder,\n        decoder=decoder,\n        masked_prob=masked_prob,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer.pretrain","title":"pretrain","text":"
pretrain(\n    X_tab,\n    X_tab_val=None,\n    val_split=None,\n    validation_freq=1,\n    n_epochs=1,\n    batch_size=32,\n)\n

Pretrain method. Can also be called using .fit(<same_args>)

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • batch_size (int, default: 32 ) \u2013

    batch size

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def pretrain(\n    self,\n    X_tab: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    validation_freq: int = 1,\n    n_epochs: int = 1,\n    batch_size: int = 32,\n):\n    r\"\"\"Pretrain method. Can also be called using `.fit(<same_args>)`\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    validation_freq: int, default=1\n        epochs validation frequency\n    n_epochs: int, default=1\n        number of epochs\n    batch_size: int, default=32\n        batch size\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = self._train_eval_split(X_tab, X_tab_val, val_split)\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, X in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_loss = self._train_step(X[0], batch_idx)\n                self.callback_container.on_batch_end(batch=batch_idx)\n                print_loss_and_metric(t, train_loss)\n\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, None, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for batch_idx, X in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_loss = self._eval_step(X[0], batch_idx)\n                    print_loss_and_metric(v, val_loss)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, None, \"val\")\n            on_epoch_end_metric = val_loss\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.ed_model.train()\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"ed_model.pt\",\n)\n

Saves the model, training and evaluation history (if any) to disk

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'ed_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"ed_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history (if any) to disk\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"ed_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.ed_model.state_dict(), model_path)\n    else:\n        torch.save(self.ed_model, model_path)\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer","title":"ContrastiveDenoisingTrainer","text":"
ContrastiveDenoisingTrainer(\n    model,\n    preprocessor,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    loss_type=\"both\",\n    projection_head1_dims=None,\n    projection_head2_dims=None,\n    projection_heads_activation=\"relu\",\n    cat_mlp_type=\"multiple\",\n    cont_mlp_type=\"multiple\",\n    denoise_mlps_activation=\"relu\",\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseContrastiveDenoisingTrainer

This class trains a Contrastive, Denoising Self Supervised 'routine' that is based on the one described in SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, their Figure 1.

Parameters:

  • model (ModelWithAttention) \u2013

    An instance of a TabTransformer, SAINT, FTTransformer, TabFastFormer, TabPerceiver, ContextAttentionMLP and SelfAttentionMLP.

  • preprocessor (TabPreprocessor) \u2013

    A fitted TabPreprocessor object. See pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • loss_type (Literal[contrastive, denoising, both], default: 'both' ) \u2013

    One of 'contrastive', 'denoising' or 'both'. See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, their figure (1) and their equation (5).

  • projection_head1_dims (Optional[List[int]], default: None ) \u2013

    The projection heads are simply MLPs. This parameter is a list of integers with the dimensions of the MLP hidden layers. See the paper for details. Note that setting up this parameter requires some knowledge of the architecture one is using. For example, if we are representing the features with embeddings of dim 32 (i.e. the so called dimension of the model is 32), then the first dimension of the projection head must be 32 (e.g. [32, 16])

  • projection_head2_dims (Optional[List[int]], default: None ) \u2013

    Same as 'projection_head1_dims' for the second head

  • projection_heads_activation (str, default: 'relu' ) \u2013

    Activation function for the projection heads

  • cat_mlp_type (Literal[single, multiple], default: 'multiple' ) \u2013

    If 'denoising' loss is used, one can choose two types of 'stacked' MLPs to process the output from the transformer-based encoder that receives 'corrupted' (cut-mixed and mixed-up) features. These are 'single' or 'multiple'. The former approach will apply a single MLP to all the categorical features while the latter will use one MLP per categorical feature

  • cont_mlp_type (Literal[single, multiple], default: 'multiple' ) \u2013

    Same as 'cat_mlp_type' but for the continuous features

  • denoise_mlps_activation (str, default: 'relu' ) \u2013

    activation function for the so called 'denoising mlps'.

  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def __init__(\n    self,\n    model: ModelWithAttention,\n    preprocessor: TabPreprocessor,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    loss_type: Literal[\"contrastive\", \"denoising\", \"both\"] = \"both\",\n    projection_head1_dims: Optional[List[int]] = None,\n    projection_head2_dims: Optional[List[int]] = None,\n    projection_heads_activation: str = \"relu\",\n    cat_mlp_type: Literal[\"single\", \"multiple\"] = \"multiple\",\n    cont_mlp_type: Literal[\"single\", \"multiple\"] = \"multiple\",\n    denoise_mlps_activation: str = \"relu\",\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        preprocessor=preprocessor,\n        loss_type=loss_type,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        projection_head1_dims=projection_head1_dims,\n        projection_head2_dims=projection_head2_dims,\n        projection_heads_activation=projection_heads_activation,\n        cat_mlp_type=cat_mlp_type,\n        cont_mlp_type=cont_mlp_type,\n        denoise_mlps_activation=denoise_mlps_activation,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer.pretrain","title":"pretrain","text":"
pretrain(\n    X_tab,\n    X_tab_val=None,\n    val_split=None,\n    validation_freq=1,\n    n_epochs=1,\n    batch_size=32,\n)\n

Pretrain method. Can also be called using .fit(<same_args>)

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data. Note that, although it is possible to use contrastive-denoising training with a validation set, such set must include feature values that are all seen in the training set in the case of the categorical columns. This is because the values of the columns themselves will be used as targets when computing the loss. Therefore, if a new category is present in the validation set that was not seen in training this will effectively be like trying to predict a new, never seen category (and Pytorch will throw an error)

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • batch_size (int, default: 32 ) \u2013

    batch size

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def pretrain(\n    self,\n    X_tab: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    validation_freq: int = 1,\n    n_epochs: int = 1,\n    batch_size: int = 32,\n):\n    r\"\"\"Pretrain method. Can also be called using `.fit(<same_args>)`\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data. Note that, although it is possible to use\n        contrastive-denoising training with a validation set, such set\n        must include feature values that are _all_ seen in the training\n        set in the case of the categorical columns. This is because the\n        values of the columns themselves will be used as targets when\n        computing the loss. Therefore, if a new category is present in\n        the validation set that was not seen in training this will\n        effectively be like trying to predict a new, never seen category\n        (and Pytorch will throw an error)\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    validation_freq: int, default=1\n        epochs validation frequency\n    n_epochs: int, default=1\n        number of epochs\n    batch_size: int, default=32\n        batch size\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = self._train_eval_split(X_tab, X_tab_val, val_split)\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, X in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_loss = self._train_step(X[0], batch_idx)\n                self.callback_container.on_batch_end(batch=batch_idx)\n                print_loss_and_metric(t, train_loss)\n\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, None, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for batch_idx, X in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_loss = self._eval_step(X[0], batch_idx)\n                    print_loss_and_metric(v, val_loss)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, None, \"val\")\n            on_epoch_end_metric = val_loss\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.cd_model.train()\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"cd_model.pt\",\n)\n

Saves the model, training and evaluation history (if any) to disk

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'cd_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"cd_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history (if any) to disk\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"cd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.cd_model.state_dict(), model_path)\n    else:\n        torch.save(self.cd_model, model_path)\n
"},{"location":"pytorch-widedeep/tab2vec.html","title":"Tab2Vec","text":""},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec","title":"Tab2Vec","text":"
Tab2Vec(\n    tab_preprocessor,\n    model,\n    return_dataframe=False,\n    verbose=False,\n)\n

Class to transform an input dataframe into vectorized form.

This class will take an input dataframe in the form of the dataframe used for training, and it will turn it into a vectorised form based on the processing applied by the model to the categorical and continuous columns.

NOTE: Currently this class is only implemented for the deeptabular component. Therefore, if the input dataframe has a text column or a column with the path to images, these will be ignored. We will be adding these functionalities in future versions

Parameters:

  • model (Union[WideDeep, BayesianWide, BayesianTabMlp]) \u2013

    WideDeep, BayesianWide or BayesianTabMlp model. Must be trained.

  • tab_preprocessor (TabPreprocessor) \u2013

    TabPreprocessor object. Must be fitted.

  • return_dataframe (bool, default: False ) \u2013

    Boolean indicating of the returned object(s) will be array(s) or pandas dataframe(s)

Attributes:

  • vectorizer (Module) \u2013

    Torch module with the categorical and continuous encoding process

Examples:

>>> import string\n>>> from random import choices\n>>> import numpy as np\n>>> import pandas as pd\n>>> from pytorch_widedeep import Tab2Vec\n>>> from pytorch_widedeep.models import TabMlp, WideDeep\n>>> from pytorch_widedeep.preprocessing import TabPreprocessor\n>>>\n>>> colnames = list(string.ascii_lowercase)[:4]\n>>> cat_col1_vals = [\"a\", \"b\", \"c\"]\n>>> cat_col2_vals = [\"d\", \"e\", \"f\"]\n>>>\n>>> # Create the toy input dataframe and a toy dataframe to be vectorised\n>>> cat_inp = [np.array(choices(c, k=5)) for c in [cat_col1_vals, cat_col2_vals]]\n>>> cont_inp = [np.round(np.random.rand(5), 2) for _ in range(2)]\n>>> df_inp = pd.DataFrame(np.vstack(cat_inp + cont_inp).transpose(), columns=colnames)\n>>> cat_t2v = [np.array(choices(c, k=5)) for c in [cat_col1_vals, cat_col2_vals]]\n>>> cont_t2v = [np.round(np.random.rand(5), 2) for _ in range(2)]\n>>> df_t2v = pd.DataFrame(np.vstack(cat_t2v + cont_t2v).transpose(), columns=colnames)\n>>>\n>>> # fit the TabPreprocessor\n>>> embed_cols = [(\"a\", 2), (\"b\", 4)]\n>>> cont_cols = [\"c\", \"d\"]\n>>> tab_preprocessor = TabPreprocessor(cat_embed_cols=embed_cols, continuous_cols=cont_cols)\n>>> X_tab = tab_preprocessor.fit_transform(df_inp)\n>>>\n>>> # define the model (and let's assume we train it)\n>>> tabmlp = TabMlp(\n... column_idx=tab_preprocessor.column_idx,\n... cat_embed_input=tab_preprocessor.cat_embed_input,\n... continuous_cols=tab_preprocessor.continuous_cols,\n... mlp_hidden_dims=[8, 4])\n>>> model = WideDeep(deeptabular=tabmlp)\n>>> # ...train the model...\n>>>\n>>> # vectorise the dataframe\n>>> t2v = Tab2Vec(model, tab_preprocessor)\n>>> X_vec = t2v.transform(df_t2v)\n
Source code in pytorch_widedeep/tab2vec.py
def __init__(\n    self,\n    tab_preprocessor: TabPreprocessor,\n    model: Union[WideDeep, BayesianWide, BayesianTabMlp],\n    return_dataframe: bool = False,\n    verbose: bool = False,\n):\n    super(Tab2Vec, self).__init__()\n\n    self._check_inputs(tab_preprocessor, model, verbose)\n\n    self.tab_preprocessor = tab_preprocessor\n    self.return_dataframe = return_dataframe\n    self.verbose = verbose\n\n    self.vectorizer = self._set_vectorizer(model)\n\n    self._set_dim_attributes(tab_preprocessor, model)\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.fit","title":"fit","text":"
fit(df, target_col=None)\n

This is an empty method i.e. Returns the unchanged object itself. Is only included for consistency in case Tab2Vec is used as part of a Pipeline

Parameters:

  • df (DataFrame) \u2013

    DataFrame to be vectorised, i.e. the categorical and continuous columns will be encoded based on the processing applied within the model

  • target_col (Optional[str], default: None ) \u2013

    Column name of the target_col variable. If None only the array of predictors will be returned

Returns:

  • Tab2Vec \u2013
Source code in pytorch_widedeep/tab2vec.py
def fit(self, df: pd.DataFrame, target_col: Optional[str] = None) -> \"Tab2Vec\":\n    r\"\"\"This is an empty method i.e. Returns the unchanged object itself. Is\n    only included for consistency in case `Tab2Vec` is used as part of a\n    Pipeline\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        DataFrame to be vectorised, i.e. the categorical and continuous\n        columns will be encoded based on the processing applied within\n        the model\n    target_col: str, Optional\n        Column name of the target_col variable. If `None` only the array of\n        predictors will be returned\n\n    Returns\n    -------\n    Tab2Vec\n    \"\"\"\n\n    return self\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.transform","title":"transform","text":"
transform(df, target_col=None)\n

Transforms the input dataframe into vectorized form. If a target column name is passed the target values will be returned separately in their corresponding type (np.ndarray or pd.DataFrame)

Parameters:

  • df (DataFrame) \u2013

    DataFrame to be vectorised, i.e. the categorical and continuous columns will be encoded based on the processing applied within the model

  • target_col (Optional[str], default: None ) \u2013

    Column name of the target_col variable. If None only the array of predictors will be returned

Returns:

  • Union[np.ndarray, Tuple[np.ndarray, np.ndarray], pd.DataFrame, Tuple[pd.DataFrame, pd.Series] \u2013

    Returns eiter a numpy array with the vectorised values, or a Tuple of numpy arrays with the vectorised values and the target. The same applies to dataframes in case we choose to set return_dataframe = True

Source code in pytorch_widedeep/tab2vec.py
def transform(\n    self,\n    df: pd.DataFrame,\n    target_col: Optional[str] = None,\n) -> Union[\n    np.ndarray,\n    Tuple[np.ndarray, np.ndarray],\n    pd.DataFrame,\n    Tuple[pd.DataFrame, pd.Series],\n]:\n    r\"\"\"Transforms the input dataframe into vectorized form. If a target\n    column name is passed the target values will be returned separately\n    in their corresponding type (np.ndarray or pd.DataFrame)\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        DataFrame to be vectorised, i.e. the categorical and continuous\n        columns will be encoded based on the processing applied within\n        the model\n    target_col: str, Optional\n        Column name of the target_col variable. If `None` only the array of\n        predictors will be returned\n\n    Returns\n    -------\n    Union[np.ndarray, Tuple[np.ndarray, np.ndarray], pd.DataFrame, Tuple[pd.DataFrame, pd.Series]\n        Returns eiter a numpy array with the vectorised values, or a Tuple\n        of numpy arrays with the vectorised values and the target. The\n        same applies to dataframes in case we choose to set\n        `return_dataframe = True`\n    \"\"\"\n\n    X_tab = self.tab_preprocessor.transform(df)\n    X = torch.from_numpy(X_tab.astype(\"float\")).to(device)\n\n    with torch.no_grad():\n        if self.is_tab_transformer:\n            x_vec, x_cont_not_embed = self.vectorizer(X)\n        else:\n            x_vec = self.vectorizer(X)\n            x_cont_not_embed = None\n\n    if self.tab_preprocessor.with_cls_token:\n        x_vec = x_vec[:, 1:, :]\n\n    if self.tab_preprocessor.with_attention:\n        x_vec = einops.rearrange(x_vec, \"s c e -> s (c e)\")\n\n    if x_cont_not_embed is not None:\n        x_vec = torch.cat([x_vec, x_cont_not_embed], 1).detach().cpu().numpy()\n    else:\n        x_vec = x_vec.detach().cpu().numpy()\n\n    if self.return_dataframe:\n        new_colnames = self._new_colnames()\n        if target_col:\n            return pd.DataFrame(data=x_vec, columns=new_colnames), df[[target_col]]\n        else:\n            return pd.DataFrame(data=x_vec, columns=new_colnames)\n    else:\n        if target_col:\n            return x_vec, df[target_col].values\n        else:\n            return x_vec\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.fit_transform","title":"fit_transform","text":"
fit_transform(df, target_col=None)\n

Combines fit and transform

Source code in pytorch_widedeep/tab2vec.py
def fit_transform(\n    self, df: pd.DataFrame, target_col: Optional[str] = None\n) -> Union[\n    np.ndarray,\n    Tuple[np.ndarray, np.ndarray],\n    pd.DataFrame,\n    Tuple[pd.DataFrame, pd.Series],\n]:\n    r\"\"\"Combines `fit` and `transform`\"\"\"\n    return self.fit(df, target_col).transform(df, target_col)\n
"},{"location":"pytorch-widedeep/trainer.html","title":"Training multimodal Deep Learning Models","text":"

Here is the documentation for the Trainer class, that will do all the heavy lifting.

Trainer is also available from pytorch-widedeep directly, for example, one could do:

    from pytorch-widedeep.training import Trainer\n

or also:

    from pytorch-widedeep import Trainer\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer","title":"Trainer","text":"
Trainer(\n    model,\n    objective,\n    custom_loss_function=None,\n    optimizers=None,\n    lr_schedulers=None,\n    initializers=None,\n    transforms=None,\n    callbacks=None,\n    metrics=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseTrainer

Class to set the of attributes that will be used during the training process.

Parameters:

  • model (WideDeep) \u2013

    An object of class WideDeep

  • objective (str) \u2013

    Defines the objective, loss or cost function.

    Param aliases: loss_function, loss_fn, loss, cost_function, cost_fn, cost.

    Possible values are:

    • binary, aliases: logistic, binary_logloss, binary_cross_entropy

    • binary_focal_loss

    • multiclass, aliases: multi_logloss, cross_entropy, categorical_cross_entropy,

    • multiclass_focal_loss

    • regression, aliases: mse, l2, mean_squared_error

    • mean_absolute_error, aliases: mae, l1

    • mean_squared_log_error, aliases: msle

    • root_mean_squared_error, aliases: rmse

    • root_mean_squared_log_error, aliases: rmsle

    • zero_inflated_lognormal, aliases: ziln

    • quantile

    • tweedie

  • custom_loss_function (Optional[Module], default: None ) \u2013

    It is possible to pass a custom loss function. See for example pytorch_widedeep.losses.FocalLoss for the required structure of the object or the Examples section in this documentation or in the repo. Note that if custom_loss_function is not None, objective must be 'binary', 'multiclass' or 'regression', consistent with the loss function

  • optimizers (Optional[Union[Optimizer, Dict[str, Optimizer]]], default: None ) \u2013
    • An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam()) or
    • a dictionary where there keys are the model components (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding optimizers. If multiple optimizers are used the dictionary MUST contain an optimizer per model component.

    if no optimizers are passed it will default to Adam for all model components

  • lr_schedulers (Optional[Union[LRScheduler, Dict[str, LRScheduler]]], default: None ) \u2013
    • An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)) or
    • a dictionary where there keys are the model componenst (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding learning rate schedulers.
  • initializers (Optional[Union[Initializer, Dict[str, Initializer]]], default: None ) \u2013
    • An instance of an Initializer object see pytorch-widedeep.initializers or
    • a dictionary where there keys are the model components (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding initializers.
  • transforms (Optional[List[Transforms]], default: None ) \u2013

    List with torchvision.transforms to be applied to the image component of the model (i.e. deepimage) See torchvision transforms.

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. The History and the LRShedulerCallback callbacks are used by default. This can also be a custom callback as long as the object of type Callback. See pytorch_widedeep.callbacks.Callback or the examples folder in the repo.

  • metrics (Optional[Union[List[Metric], List[Metric]]], default: None ) \u2013
    • List of objects of type Metric. Metrics available are: Accuracy, Precision, Recall, FBetaScore, F1Score and R2Score. This can also be a custom metric as long as it is an object of type Metric. See pytorch_widedeep.metrics.Metric or the examples folder in the repo
    • List of objects of type torchmetrics.Metric. This can be any metric from torchmetrics library Examples. This can also be a custom metric as long as it is an object of type Metric. See the instructions.
  • verbose (int, default: 1 ) \u2013

    Verbosity level. If set to 0 nothing will be printed during training

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train/test split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • lambda_sparse: float lambda sparse parameter in case the deeptabular component is TabNet

    • class_weight: List[float] This is the weight or pos_weight parameter in CrossEntropyLoss and BCEWithLogitsLoss, depending on whether

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Attributes:

  • cyclic_lr (bool) \u2013

    Attribute that indicates if any of the lr_schedulers is cyclic_lr (i.e. CyclicLR or OneCycleLR). See Pytorch schedulers.

  • feature_importance (dict) \u2013

    dict where the keys are the column names and the values are the corresponding feature importances. This attribute will only exist if the deeptabular component is a Tabnet model.

Examples:

>>> import torch\n>>> from torchvision.transforms import ToTensor\n>>>\n>>> # wide deep imports\n>>> from pytorch_widedeep.callbacks import EarlyStopping, LRHistory\n>>> from pytorch_widedeep.initializers import KaimingNormal, KaimingUniform, Normal, Uniform\n>>> from pytorch_widedeep.models import TabResnet, Vision, BasicRNN, Wide, WideDeep\n>>> from pytorch_widedeep import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>>\n>>> # build the model\n>>> deeptabular = TabResnet(blocks_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> deeptext = BasicRNN(vocab_size=10, embed_dim=4, padding_idx=0)\n>>> deepimage = Vision()\n>>> model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage)\n>>>\n>>> # set optimizers and schedulers\n>>> wide_opt = torch.optim.Adam(model.wide.parameters())\n>>> deep_opt = torch.optim.AdamW(model.deeptabular.parameters())\n>>> text_opt = torch.optim.Adam(model.deeptext.parameters())\n>>> img_opt = torch.optim.AdamW(model.deepimage.parameters())\n>>>\n>>> wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)\n>>> deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3)\n>>> text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)\n>>> img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3)\n>>>\n>>> optimizers = {\"wide\": wide_opt, \"deeptabular\": deep_opt, \"deeptext\": text_opt, \"deepimage\": img_opt}\n>>> schedulers = {\"wide\": wide_sch, \"deeptabular\": deep_sch, \"deeptext\": text_sch, \"deepimage\": img_sch}\n>>>\n>>> # set initializers and callbacks\n>>> initializers = {\"wide\": Uniform, \"deeptabular\": Normal, \"deeptext\": KaimingNormal, \"deepimage\": KaimingUniform}\n>>> transforms = [ToTensor]\n>>> callbacks = [LRHistory(n_epochs=4), EarlyStopping]\n>>>\n>>> # set the trainer\n>>> trainer = Trainer(model, objective=\"regression\", initializers=initializers, optimizers=optimizers,\n... lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms)\n
Source code in pytorch_widedeep/training/trainer.py
@alias(  # noqa: C901\n    \"objective\",\n    [\"loss_function\", \"loss_fn\", \"loss\", \"cost_function\", \"cost_fn\", \"cost\"],\n)\ndef __init__(\n    self,\n    model: WideDeep,\n    objective: str,\n    custom_loss_function: Optional[nn.Module] = None,\n    optimizers: Optional[Union[Optimizer, Dict[str, Optimizer]]] = None,\n    lr_schedulers: Optional[Union[LRScheduler, Dict[str, LRScheduler]]] = None,\n    initializers: Optional[Union[Initializer, Dict[str, Initializer]]] = None,\n    transforms: Optional[List[Transforms]] = None,\n    callbacks: Optional[List[Callback]] = None,\n    metrics: Optional[Union[List[Metric], List[TorchMetric]]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        objective=objective,\n        custom_loss_function=custom_loss_function,\n        optimizers=optimizers,\n        lr_schedulers=lr_schedulers,\n        initializers=initializers,\n        transforms=transforms,\n        callbacks=callbacks,\n        metrics=metrics,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.fit","title":"fit","text":"
fit(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_train=None,\n    X_val=None,\n    val_split=None,\n    target=None,\n    n_epochs=1,\n    validation_freq=1,\n    batch_size=32,\n    custom_dataloader=None,\n    feature_importance_sample_size=None,\n    finetune=False,\n    with_lds=False,\n    **kwargs\n)\n

Fit method.

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in dictionaries (X_train or X_val).

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_train (Optional[Dict[str, ndarray]], default: None ) \u2013

    The training dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • X_val (Optional[Dict[str, ndarray]], default: None ) \u2013

    The validation dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • val_split (Optional[float], default: None ) \u2013

    train/val split fraction

  • target (Optional[ndarray], default: None ) \u2013

    target values

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • batch_size (int, default: 32 ) \u2013

    batch size

  • custom_dataloader (Optional[DataLoader], default: None ) \u2013

    object of class torch.utils.data.DataLoader. Available predefined dataloaders are in pytorch-widedeep.dataloaders.If None, a standard torch DataLoader is used.

  • finetune (bool, default: False ) \u2013

    fine-tune individual model components. This functionality can also be used to 'warm-up' (and hence the alias warmup) individual components before the joined training starts, and hence its alias. See the Examples folder in the repo for more details

    pytorch_widedeep implements 3 fine-tune routines.

    • fine-tune all trainable layers at once. This routine is inspired by the work of Howard & Sebastian Ruder 2018 in their ULMfit paper. Using a Slanted Triangular learing (see Leslie N. Smith paper ) , the process is the following: i) the learning rate will gradually increase for 10% of the training steps from max_lr/10 to max_lr. ii) It will then gradually decrease to max_lr/10 for the remaining 90% of the steps. The optimizer used in the process is Adam.

    and two gradual fine-tune routines, where only certain layers are trained at a time.

    • The so called Felbo gradual fine-tune rourine, based on the the Felbo et al., 2017 DeepEmoji paper.
    • The Howard routine based on the work of Howard & Sebastian Ruder 2018 in their ULMfit paper.

    For details on how these routines work, please see the Examples section in this documentation and the Examples folder in the repo. Param Alias: warmup

  • with_lds (bool, default: False ) \u2013

    Boolean indicating if Label Distribution Smoothing will be used. information_source: NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

Other Parameters:

  • **kwargs (dict) \u2013

    Other keyword arguments are:

    • DataLoader related parameters: For example, sampler, batch_sampler, collate_fn, etc. Please, see the pytorch DataLoader docs for details.

    • Label Distribution Smoothing related parameters:

      • lds_kernel (Literal['gaussian', 'triang', 'laplace']): choice of kernel for Label Distribution Smoothing
      • lds_ks (int): LDS kernel window size
      • lds_sigma (float): standard deviation of ['gaussian','laplace'] kernel for LDS
      • lds_granularity (int): number of bins in histogram used in LDS to count occurence of sample values
      • lds_reweight (bool): option to reweight bin frequency counts in LDS
      • lds_y_max (Optional[float]): option to restrict LDS bins by upper label limit
      • lds_y_min (Optional[float]): option to restrict LDS bins by lower label limit

      See pytorch_widedeep.trainer._wd_dataset for more details on the implications of these parameters

    • Finetune related parameters: see the source code at pytorch_widedeep._finetune. Namely, these are:

      • finetune_epochs (int): number of epochs use for fine tuning
      • finetune_max_lr (float): max lr during fine tuning
      • routine (str): one of 'howard' or 'felbo'
      • deeptabular_gradual (bool): boolean indicating if the deeptabular component will be fine tuned gradually
      • deeptabular_layers (List[nn.Module]): List of pytorch modules indicating the layers of the deeptabular that will be fine tuned
      • deeptabular_max_lr (float): max lr for the deeptabular componet during fine tuning
      • deeptext_gradual (bool): same as deeptabular_gradual but for the deeptext component
      • deeptext_layers (List[nn.Module]): same as deeptabular_gradual but for the deeptext component
      • deeptext_max_lr (float): same as deeptabular_gradual but for the deeptext component
      • deepimage_gradual (bool): same as deeptabular_gradual but for the deepimage component
      • deepimage_layers (List[nn.Module]): same as deeptabular_gradual but for the deepimage component
      • deepimage_max_lr (float): same as deeptabular_gradual but for the deepimage component

Examples:

For a series of comprehensive examples on how to use the fit method, please see the Examples folder in the repo

Source code in pytorch_widedeep/training/trainer.py
@alias(\"finetune\", [\"warmup\"])\ndef fit(  # noqa: C901\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_train: Optional[Dict[str, np.ndarray]] = None,\n    X_val: Optional[Dict[str, np.ndarray]] = None,\n    val_split: Optional[float] = None,\n    target: Optional[np.ndarray] = None,\n    n_epochs: int = 1,\n    validation_freq: int = 1,\n    batch_size: int = 32,\n    custom_dataloader: Optional[DataLoader] = None,\n    feature_importance_sample_size: Optional[int] = None,\n    finetune: bool = False,\n    with_lds: bool = False,\n    **kwargs,\n):\n    r\"\"\"Fit method.\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    dictionaries (`X_train` or `X_val`).\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_train: Dict, Optional. default=None\n        The training dataset can also be passed in a dictionary. Keys are\n        _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    X_val: Dict, Optional. default=None\n        The validation dataset can also be passed in a dictionary. Keys\n        are _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_.\n        Values are the corresponding matrices.\n    val_split: float, Optional. default=None\n        train/val split fraction\n    target: np.ndarray, Optional. default=None\n        target values\n    n_epochs: int, default=1\n        number of epochs\n    validation_freq: int, default=1\n        epochs validation frequency\n    batch_size: int, default=32\n        batch size\n    custom_dataloader: `DataLoader`, Optional, default=None\n        object of class `torch.utils.data.DataLoader`. Available\n        predefined dataloaders are in `pytorch-widedeep.dataloaders`.If\n        `None`, a standard torch `DataLoader` is used.\n    finetune: bool, default=False\n        fine-tune individual model components. This functionality can also\n        be used to 'warm-up' (and hence the alias `warmup`) individual\n        components before the joined training starts, and hence its\n        alias. See the Examples folder in the repo for more details\n\n        `pytorch_widedeep` implements 3 fine-tune routines.\n\n        - fine-tune all trainable layers at once. This routine is\n          inspired by the work of Howard & Sebastian Ruder 2018 in their\n          [ULMfit paper](https://arxiv.org/abs/1801.06146). Using a\n          Slanted Triangular learing (see\n          [Leslie N. Smith paper](https://arxiv.org/pdf/1506.01186.pdf) ) ,\n          the process is the following: *i*) the learning rate will\n          gradually increase for 10% of the training steps from max_lr/10\n          to max_lr. *ii*) It will then gradually decrease to max_lr/10\n          for the remaining 90% of the steps. The optimizer used in the\n          process is `Adam`.\n\n        and two gradual fine-tune routines, where only certain layers are\n        trained at a time.\n\n        - The so called `Felbo` gradual fine-tune rourine, based on the the\n          Felbo et al., 2017 [DeepEmoji paper](https://arxiv.org/abs/1708.00524).\n        - The `Howard` routine based on the work of Howard & Sebastian Ruder 2018 in their\n          [ULMfit paper](https://arxiv.org/abs/1801.06146>).\n\n        For details on how these routines work, please see the Examples\n        section in this documentation and the Examples folder in the repo. <br/>\n        Param Alias: `warmup`\n    with_lds: bool, default=False\n        Boolean indicating if Label Distribution Smoothing will be used. <br/>\n        information_source: **NOTE**: We consider this feature absolutely\n        experimental and we recommend the user to not use it unless the\n        corresponding [publication](https://arxiv.org/abs/2102.09554) is\n        well understood\n\n    Other Parameters\n    ----------------\n    **kwargs : dict\n        Other keyword arguments are:\n\n        - **DataLoader related parameters**:<br/>\n            For example,  `sampler`, `batch_sampler`, `collate_fn`, etc.\n            Please, see the pytorch\n            [DataLoader docs](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader)\n            for details.\n\n        - **Label Distribution Smoothing related parameters**:<br/>\n\n            - lds_kernel (`Literal['gaussian', 'triang', 'laplace']`):\n                choice of kernel for Label Distribution Smoothing\n            - lds_ks (`int`):\n                LDS kernel window size\n            - lds_sigma (`float`):\n                standard deviation of ['gaussian','laplace'] kernel for LDS\n            - lds_granularity (`int`):\n                number of bins in histogram used in LDS to count occurence of sample values\n            - lds_reweight (`bool`):\n                option to reweight bin frequency counts in LDS\n            - lds_y_max (`Optional[float]`):\n                option to restrict LDS bins by upper label limit\n            - lds_y_min (`Optional[float]`):\n                option to restrict LDS bins by lower label limit\n\n            See `pytorch_widedeep.trainer._wd_dataset` for more details on\n            the implications of these parameters\n\n        - **Finetune related parameters**:<br/>\n            see the source code at `pytorch_widedeep._finetune`. Namely, these are:\n\n            - `finetune_epochs` (`int`):\n                number of epochs use for fine tuning\n            - `finetune_max_lr` (`float`):\n               max lr during fine tuning\n            - `routine` (`str`):\n               one of _'howard'_ or _'felbo'_\n            - `deeptabular_gradual` (`bool`):\n               boolean indicating if the `deeptabular` component will be fine tuned gradually\n            - `deeptabular_layers` (`List[nn.Module]`):\n               List of pytorch modules indicating the layers of the\n               `deeptabular` that will be fine tuned\n            - `deeptabular_max_lr` (`float`):\n               max lr for the `deeptabular` componet during fine tuning\n            - `deeptext_gradual` (`bool`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deeptext_layers` (`List[nn.Module]`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deeptext_max_lr` (`float`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deepimage_gradual` (`bool`):\n               same as `deeptabular_gradual` but for the `deepimage` component\n            - `deepimage_layers` (`List[nn.Module]`):\n               same as `deeptabular_gradual` but for the `deepimage` component\n            - `deepimage_max_lr` (`float`):\n                same as `deeptabular_gradual` but for the `deepimage` component\n\n    Examples\n    --------\n\n    For a series of comprehensive examples on how to use the `fit` method, please see the\n    [Examples](https://github.com/jrzaurin/pytorch-widedeep/tree/master/examples)\n    folder in the repo\n    \"\"\"\n\n    lds_args, dataloader_args, finetune_args = self._extract_kwargs(kwargs)\n    lds_args[\"with_lds\"] = with_lds\n    self.with_lds = with_lds\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = wd_train_val_split(\n        self.seed,\n        self.method,  # type: ignore\n        X_wide,\n        X_tab,\n        X_text,\n        X_img,\n        X_train,\n        X_val,\n        val_split,\n        target,\n        **lds_args,\n    )\n    if isinstance(custom_dataloader, type):\n        if issubclass(custom_dataloader, DataLoader):\n            train_loader = custom_dataloader(  # type: ignore[misc]\n                dataset=train_set,\n                batch_size=batch_size,\n                num_workers=self.num_workers,\n                **dataloader_args,\n            )\n        else:\n            NotImplementedError(\n                \"Custom DataLoader must be a subclass of \"\n                \"torch.utils.data.DataLoader, please see the \"\n                \"pytorch documentation or examples in \"\n                \"pytorch_widedeep.dataloaders\"\n            )\n    else:\n        train_loader = DataLoaderDefault(\n            dataset=train_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            **dataloader_args,\n        )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    if finetune:\n        self.with_finetuning: bool = True\n        self._finetune(train_loader, **finetune_args)\n        if self.verbose:\n            print(\n                \"Fine-tuning (or warmup) of individual components completed. \"\n                \"Training the whole model for {} epochs\".format(n_epochs)\n            )\n    else:\n        self.with_finetuning = False\n\n    self.callback_container.on_train_begin(\n        {\"batch_size\": batch_size, \"train_steps\": train_steps, \"n_epochs\": n_epochs}\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, (data, targett, lds_weightt) in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_score, train_loss = self._train_step(\n                    data, targett, batch_idx, epoch, lds_weightt\n                )\n                print_loss_and_metric(t, train_loss, train_score)\n                self.callback_container.on_batch_end(batch=batch_idx)\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, train_score, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for i, (data, targett) in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_score, val_loss = self._eval_step(data, targett, i)\n                    print_loss_and_metric(v, val_loss, val_score)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, val_score, \"val\")\n\n            if self.reducelronplateau:\n                if self.reducelronplateau_criterion == \"loss\":\n                    on_epoch_end_metric = val_loss\n                else:\n                    on_epoch_end_metric = val_score[\n                        self.reducelronplateau_criterion\n                    ]\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            # self.callback_container.on_train_end(epoch_logs)\n            break\n\n        if self.model.with_fds:\n            self._update_fds_stats(train_loader, epoch)\n\n    self.callback_container.on_train_end(epoch_logs)\n\n    if feature_importance_sample_size is not None:\n        self.feature_importance = FeatureImportance(\n            self.device, feature_importance_sample_size\n        ).feature_importance(train_loader, self.model)\n    self._restore_best_weights()\n    self.model.train()\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict","title":"predict","text":"
predict(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n)\n

Returns the predictions

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in a dictionary (X_test)

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are X_wide, 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

Returns:

  • np.ndarray: \u2013

    array with the predictions

Source code in pytorch_widedeep/training/trainer.py
def predict(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n) -> np.ndarray:\n    r\"\"\"Returns the predictions\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    a dictionary (`X_test`)\n\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        `X_wide`, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n\n    Returns\n    -------\n    np.ndarray:\n        array with the predictions\n    \"\"\"\n    preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size)\n    if self.method == \"regression\":\n        return np.vstack(preds_l).squeeze(1)\n    if self.method == \"binary\":\n        preds = np.vstack(preds_l).squeeze(1)\n        return (preds > 0.5).astype(\"int\")\n    if self.method == \"qregression\":\n        return np.vstack(preds_l)\n    if self.method == \"multiclass\":\n        preds = np.vstack(preds_l)\n        return np.argmax(preds, 1)  # type: ignore[return-value]\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict_uncertainty","title":"predict_uncertainty","text":"
predict_uncertainty(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n    uncertainty_granularity=1000,\n)\n

Returns the predicted ucnertainty of the model for the test dataset using a Monte Carlo method during which dropout layers are activated in the evaluation/prediction phase and each sample is predicted N times (uncertainty_granularity times).

This is based on Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning.

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

  • uncertainty_granularity \u2013

    number of times the model does prediction for each sample

Returns:

  • np.ndarray: \u2013
    • if method = regression, it will return an array with (max, min, mean, stdev) values for each sample.
    • if method = binary it will return an array with (mean_cls_0_prob, mean_cls_1_prob, predicted_cls) for each sample.
    • if method = multiclass it will return an array with (mean_cls_0_prob, mean_cls_1_prob, mean_cls_2_prob, ... , predicted_cls) values for each sample.
Source code in pytorch_widedeep/training/trainer.py
def predict_uncertainty(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n    uncertainty_granularity=1000,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted ucnertainty of the model for the test dataset\n    using a Monte Carlo method during which dropout layers are activated\n    in the evaluation/prediction phase and each sample is predicted N\n    times (`uncertainty_granularity` times).\n\n    This is based on\n    [Dropout as a Bayesian Approximation: Representing\n    Model Uncertainty in Deep Learning](https://arxiv.org/abs/1506.02142?context=stat).\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n    uncertainty_granularity: int default = 1000\n        number of times the model does prediction for each sample\n\n    Returns\n    -------\n    np.ndarray:\n        - if `method = regression`, it will return an array with `(max, min, mean, stdev)`\n          values for each sample.\n        - if `method = binary` it will return an array with\n          `(mean_cls_0_prob, mean_cls_1_prob, predicted_cls)` for each sample.\n        - if `method = multiclass` it will return an array with\n          `(mean_cls_0_prob, mean_cls_1_prob, mean_cls_2_prob, ... , predicted_cls)`\n          values for each sample.\n\n    \"\"\"\n    preds_l = self._predict(\n        X_wide,\n        X_tab,\n        X_text,\n        X_img,\n        X_test,\n        batch_size,\n        uncertainty_granularity,\n        uncertainty=True,\n    )\n    preds = np.vstack(preds_l)\n    samples_num = int(preds.shape[0] / uncertainty_granularity)\n    if self.method == \"regression\":\n        preds = preds.squeeze(1)\n        preds = preds.reshape((uncertainty_granularity, samples_num))\n        return np.array(\n            (\n                preds.max(axis=0),\n                preds.min(axis=0),\n                preds.mean(axis=0),\n                preds.std(axis=0),\n            )\n        ).T\n    if self.method == \"qregression\":\n        raise ValueError(\n            \"Currently predict_uncertainty is not supported for qregression method\"\n        )\n    if self.method == \"binary\":\n        preds = preds.squeeze(1)\n        preds = preds.reshape((uncertainty_granularity, samples_num))\n        preds = preds.mean(axis=0)\n        probs = np.zeros([preds.shape[0], 3])\n        probs[:, 0] = 1 - preds\n        probs[:, 1] = preds\n        return probs\n    if self.method == \"multiclass\":\n        preds = preds.reshape(uncertainty_granularity, samples_num, preds.shape[1])\n        preds = preds.mean(axis=0)\n        preds = np.hstack((preds, np.vstack(np.argmax(preds, 1))))\n        return preds\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict_proba","title":"predict_proba","text":"
predict_proba(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n)\n

Returns the predicted probabilities for the test dataset for binary and multiclass methods

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in a dictionary (X_test)

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are X_wide, 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

Returns:

  • ndarray \u2013

    array with the probabilities per class

Source code in pytorch_widedeep/training/trainer.py
def predict_proba(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted probabilities for the test dataset for  binary\n    and multiclass methods\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    a dictionary (`X_test`)\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        `X_wide`, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n\n    Returns\n    -------\n    np.ndarray\n        array with the probabilities per class\n    \"\"\"\n\n    preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size)\n    if self.method == \"binary\":\n        preds = np.vstack(preds_l).squeeze(1)\n        probs = np.zeros([preds.shape[0], 2])\n        probs[:, 0] = 1 - preds\n        probs[:, 1] = preds\n        return probs\n    if self.method == \"multiclass\":\n        return np.vstack(preds_l)\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"wd_model.pt\",\n)\n

Saves the model, training and evaluation history, and the feature_importance attribute (if the deeptabular component is a Tabnet model) to disk

The Trainer class is built so that it 'just' trains a model. With that in mind, all the torch related parameters (such as optimizers, learning rate schedulers, initializers, etc) have to be defined externally and then passed to the Trainer. As a result, the Trainer does not generate any attribute or additional data products that need to be saved other than the model object itself, which can be saved as any other torch model (e.g. torch.save(model, path)).

The exception is Tabnet. If the deeptabular component is a Tabnet model, an attribute (a dict) called feature_importance will be created at the end of the training process. Therefore, a save method was created that will save the feature importance dictionary to a json file and, since we are here, the model weights, training history and learning rate history.

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'wd_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/training/trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"wd_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history, and the\n    `feature_importance` attribute (if the `deeptabular` component is a\n    Tabnet model) to disk\n\n    The `Trainer` class is built so that it 'just' trains a model. With\n    that in mind, all the torch related parameters (such as optimizers,\n    learning rate schedulers, initializers, etc) have to be defined\n    externally and then passed to the `Trainer`. As a result, the\n    `Trainer` does not generate any attribute or additional data\n    products that need to be saved other than the `model` object itself,\n    which can be saved as any other torch model (e.g. `torch.save(model,\n    path)`).\n\n    The exception is Tabnet. If the `deeptabular` component is a Tabnet\n    model, an attribute (a dict) called `feature_importance` will be\n    created at the end of the training process. Therefore, a `save`\n    method was created that will save the feature importance dictionary\n    to a json file and, since we are here, the model weights, training\n    history and learning rate history.\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"wd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.model.state_dict(), model_path)\n    else:\n        torch.save(self.model, model_path)\n\n    if self.model.is_tabnet:\n        with open(save_dir / \"feature_importance.json\", \"w\") as fi:\n            json.dump(self.feature_importance, fi)\n
"},{"location":"pytorch-widedeep/utils/index.html","title":"The utils module","text":"

These are a series of utilities that might be useful for a number of preprocessing tasks, even not directly related to pytorch-widedeep. All the classes and functions discussed here are available directly from the utils module. For example, the LabelEncoder within the deeptabular_utils submodule can be imported as:

from pytorch_widedeep.utils import LabelEncoder\n

These are classes and functions that are internally used in the library. We include them here in case the user finds them useful for other purposes.

"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html","title":"deeptabular utils","text":""},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder","title":"LabelEncoder","text":"
LabelEncoder(\n    columns_to_encode=None,\n    with_attention=False,\n    shared_embed=False,\n)\n

Label Encode categorical values for multiple columns at once

NOTE: LabelEncoder reserves 0 for unseen new categories. This is convenient when defining the embedding layers, since we can just set padding idx to 0.

Parameters:

  • columns_to_encode (Optional[List[str]], default: None ) \u2013

    List of strings containing the names of the columns to encode. If None all columns of type object in the dataframe will be label encoded.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model. Aliased as for_transformer.

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

Attributes:

  • encoding_dict (Dict) \u2013

    Dictionary containing the encoding mappings in the format, e.g. : {'colname1': {'cat1': 1, 'cat2': 2, ...}, 'colname2': {'cat1': 1, 'cat2': 2, ...}, ...}

  • inverse_encoding_dict (Dict) \u2013

    Dictionary containing the inverse encoding mappings in the format, e.g. : {'colname1': {1: 'cat1', 2: 'cat2', ...}, 'colname2': {1: 'cat1', 2: 'cat2', ...}, ...}

Source code in pytorch_widedeep/utils/deeptabular_utils.py
@alias(\"with_attention\", [\"for_transformer\"])\ndef __init__(\n    self,\n    columns_to_encode: Optional[List[str]] = None,\n    with_attention: bool = False,\n    shared_embed: bool = False,\n):\n    self.columns_to_encode = columns_to_encode\n\n    self.shared_embed = shared_embed\n    self.with_attention = with_attention\n\n    self.reset_embed_idx = not self.with_attention or self.shared_embed\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.partial_fit","title":"partial_fit","text":"
partial_fit(df)\n

Main method. Creates encoding attributes.

Returns:

  • LabelEncoder \u2013

    LabelEncoder fitted object

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def partial_fit(self, df: pd.DataFrame) -> \"LabelEncoder\":  # noqa: C901\n    \"\"\"Main method. Creates encoding attributes.\n\n    Returns\n    -------\n    LabelEncoder\n        `LabelEncoder` fitted object\n    \"\"\"\n    # here df is a chunk of the data. this is meant to be run when the\n    # data is large and we pass a chunk at a time. Therefore, we do not\n    # copy the input chunk as mutating a chunk is ok\n    if self.columns_to_encode is None:\n        self.columns_to_encode = list(df.select_dtypes(include=[\"object\"]).columns)\n    else:\n        # sanity check to make sure all categorical columns are in an adequate\n        # format\n        for col in self.columns_to_encode:\n            df[col] = df[col].astype(\"O\")\n\n    unique_column_vals: Dict[str, List[str]] = {}\n    for c in self.columns_to_encode:\n        unique_column_vals[c] = df[c].unique().tolist()\n\n    if not hasattr(self, \"encoding_dict\"):\n        # we run the method 'partial_fit' for the 1st time\n        self.encoding_dict: Dict[str, Dict[str, int]] = {}\n        if \"cls_token\" in unique_column_vals and self.shared_embed:\n            self.encoding_dict[\"cls_token\"] = {\"[CLS]\": 0}\n            del unique_column_vals[\"cls_token\"]\n\n        # leave 0 for padding/\"unseen\" categories. Also we need an\n        # attribute to keep track of the encoding in case we use\n        # attention and we do not re-start the index/counter\n        self.cum_idx: int = 1\n        for k, v in unique_column_vals.items():\n            self.encoding_dict[k] = {o: i + self.cum_idx for i, o in enumerate(v)}\n            self.cum_idx = 1 if self.reset_embed_idx else self.cum_idx + len(v)\n    else:\n        # the 'partial_fit' method has already run.\n        # \"cls_token\" will have been added already\n        if \"cls_token\" in unique_column_vals and self.shared_embed:\n            del unique_column_vals[\"cls_token\"]\n\n        # Classes in the new df/chunk of the dataset that have not been seen\n        # before\n        unseen_classes: Dict[str, List[str]] = {}\n        for c in self.columns_to_encode:\n            unseen_classes[c] = list(\n                np.setdiff1d(\n                    unique_column_vals[c], list(self.encoding_dict[c].keys())\n                )\n            )\n\n        # leave 0 for padding/\"unseen\" categories\n        for k, v in unique_column_vals.items():\n            # if we use attention we need to start encoding from the\n            # last 'overall' encoding index. Otherwise, we use the max\n            # encoding index per categorical col\n            _idx = (\n                max(self.encoding_dict[k].values()) + 1\n                if self.reset_embed_idx\n                else self.cum_idx\n            )\n            if len(unseen_classes[k]) != 0:\n                for i, o in enumerate(unseen_classes[k]):\n                    if o not in self.encoding_dict[k]:\n                        self.encoding_dict[k][o] = i + _idx\n                # if self.reset_embed_idx is True it will be 1 anyway\n                self.cum_idx = (\n                    1\n                    if self.reset_embed_idx\n                    else self.cum_idx + len(unseen_classes[k])\n                )\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.fit","title":"fit","text":"
fit(df)\n

Simply runs the partial_fit method when the data fits in memory

Returns:

  • LabelEncoder \u2013

    LabelEncoder fitted object

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def fit(self, df: pd.DataFrame) -> \"LabelEncoder\":\n    \"\"\"Simply runs the `partial_fit` method when the data fits in memory\n\n    Returns\n    -------\n    LabelEncoder\n        `LabelEncoder` fitted object\n    \"\"\"\n    # this is meant to be run when the data fits in memory and therefore,\n    # we do not want to mutate the original df, so we copy it\n    self.partial_fit(df.copy())\n\n    self.inverse_encoding_dict = self.create_inverse_encoding_dict()\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.transform","title":"transform","text":"
transform(df)\n

Label Encoded the categories in columns_to_encode

Returns:

  • DataFrame \u2013

    label-encoded dataframe

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Label Encoded the categories in `columns_to_encode`\n\n    Returns\n    -------\n    pd.DataFrame\n        label-encoded dataframe\n    \"\"\"\n    try:\n        self.encoding_dict\n    except AttributeError:\n        raise NotFittedError(\n            \"This LabelEncoder instance is not fitted yet. \"\n            \"Call 'fit' with appropriate arguments before using this LabelEncoder.\"\n        )\n\n    df_inp = df.copy()\n    # sanity check to make sure all categorical columns are in an adequate\n    # format\n    for col in self.columns_to_encode:  # type: ignore\n        df_inp[col] = df_inp[col].astype(\"O\")\n\n    for k, v in self.encoding_dict.items():\n        df_inp[k] = df_inp[k].apply(lambda x: v[x] if x in v.keys() else 0)\n\n    return df_inp\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.utils import LabelEncoder\n>>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n>>> columns_to_encode = ['col2']\n>>> encoder = LabelEncoder(columns_to_encode)\n>>> encoder.fit_transform(df)\n   col1  col2\n0     1     1\n1     2     2\n2     3     3\n>>> encoder.encoding_dict\n{'col2': {'me': 1, 'you': 2, 'him': 3}}\n

Returns:

  • DataFrame \u2013

    label-encoded dataframe

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Combines `fit` and `transform`\n\n    Examples\n    --------\n\n    >>> import pandas as pd\n    >>> from pytorch_widedeep.utils import LabelEncoder\n    >>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n    >>> columns_to_encode = ['col2']\n    >>> encoder = LabelEncoder(columns_to_encode)\n    >>> encoder.fit_transform(df)\n       col1  col2\n    0     1     1\n    1     2     2\n    2     3     3\n    >>> encoder.encoding_dict\n    {'col2': {'me': 1, 'you': 2, 'him': 3}}\n\n    Returns\n    -------\n    pd.DataFrame\n        label-encoded dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.inverse_transform","title":"inverse_transform","text":"
inverse_transform(df)\n

Returns the original categories

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.utils import LabelEncoder\n>>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n>>> columns_to_encode = ['col2']\n>>> encoder = LabelEncoder(columns_to_encode)\n>>> df_enc = encoder.fit_transform(df)\n>>> encoder.inverse_transform(df_enc)\n   col1 col2\n0     1   me\n1     2  you\n2     3  him\n

Returns:

  • DataFrame \u2013

    DataFrame with original categories

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Returns the original categories\n\n    Examples\n    --------\n\n    >>> import pandas as pd\n    >>> from pytorch_widedeep.utils import LabelEncoder\n    >>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n    >>> columns_to_encode = ['col2']\n    >>> encoder = LabelEncoder(columns_to_encode)\n    >>> df_enc = encoder.fit_transform(df)\n    >>> encoder.inverse_transform(df_enc)\n       col1 col2\n    0     1   me\n    1     2  you\n    2     3  him\n\n    Returns\n    -------\n    pd.DataFrame\n        DataFrame with original categories\n    \"\"\"\n\n    if not hasattr(self, \"inverse_encoding_dict\"):\n        self.inverse_encoding_dict = self.create_inverse_encoding_dict()\n\n    for k, v in self.inverse_encoding_dict.items():\n        df[k] = df[k].apply(lambda x: v[x])\n\n    return df\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html","title":"Fastai transforms","text":"

I directly copied and pasted part of the transforms.py module from the fastai library (from an old version). The reason to do such a thing is because pytorch_widedeep only needs the Tokenizer and the Vocab classes there. This way I avoid extra dependencies. Credit for all the code in the fastai_transforms module in this pytorch-widedeep package goes to Jeremy Howard and the fastai team. I only include the documentation here for completion, but I strongly advise the user to read the fastai documentation.

"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer","title":"Tokenizer","text":"
Tokenizer(\n    tok_func=SpacyTokenizer,\n    lang=\"en\",\n    pre_rules=None,\n    post_rules=None,\n    special_cases=None,\n    n_cpus=None,\n)\n

Class to combine a series of rules and a tokenizer function to tokenize text with multiprocessing.

Setting some of the parameters of this class require perhaps some familiarity with the source code.

Parameters:

  • tok_func (Callable, default: SpacyTokenizer ) \u2013

    Tokenizer Object. See pytorch_widedeep.utils.fastai_transforms.SpacyTokenizer

  • lang (str, default: 'en' ) \u2013

    Text's Language

  • pre_rules (Optional[ListRules], default: None ) \u2013

    Custom type: Collection[Callable[[str], str]]. These are Callable objects that will be applied to the text (str) directly as rule(tok) before being tokenized.

  • post_rules (Optional[ListRules], default: None ) \u2013

    Custom type: Collection[Callable[[str], str]]. These are Callable objects that will be applied to the tokens as rule(tokens) after the text has been tokenized.

  • special_cases (Optional[Collection[str]], default: None ) \u2013

    special cases to be added to the tokenizer via Spacy's add_special_case method

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

Source code in pytorch_widedeep/utils/fastai_transforms.py
def __init__(\n    self,\n    tok_func: Callable = SpacyTokenizer,\n    lang: str = \"en\",\n    pre_rules: Optional[ListRules] = None,\n    post_rules: Optional[ListRules] = None,\n    special_cases: Optional[Collection[str]] = None,\n    n_cpus: Optional[int] = None,\n):\n    self.tok_func, self.lang, self.special_cases = tok_func, lang, special_cases\n    self.pre_rules = ifnone(pre_rules, defaults.text_pre_rules)\n    self.post_rules = ifnone(post_rules, defaults.text_post_rules)\n    self.special_cases = (\n        special_cases if special_cases is not None else defaults.text_spec_tok\n    )\n    self.n_cpus = ifnone(n_cpus, defaults.cpus)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer.process_text","title":"process_text","text":"
process_text(t, tok)\n

Process and tokenize one text t with tokenizer tok.

Parameters:

  • t (str) \u2013

    text to be processed and tokenized

  • tok (BaseTokenizer) \u2013

    Instance of BaseTokenizer. See pytorch_widedeep.utils.fastai_transforms.BaseTokenizer

Returns:

  • List[str] \u2013

    List of tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def process_text(self, t: str, tok: BaseTokenizer) -> List[str]:\n    r\"\"\"Process and tokenize one text ``t`` with tokenizer ``tok``.\n\n    Parameters\n    ----------\n    t: str\n        text to be processed and tokenized\n    tok: ``BaseTokenizer``\n        Instance of `BaseTokenizer`. See\n        `pytorch_widedeep.utils.fastai_transforms.BaseTokenizer`\n\n    Returns\n    -------\n    List[str]\n        List of tokens\n    \"\"\"\n    for rule in self.pre_rules:\n        t = rule(t)\n    toks = tok.tokenizer(t)\n    for rule in self.post_rules:\n        toks = rule(toks)\n    return toks\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer.process_all","title":"process_all","text":"
process_all(texts)\n

Process a list of texts. Parallel execution of process_text.

Examples:

>>> from pytorch_widedeep.utils import Tokenizer\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> tok = Tokenizer()\n>>> tok.process_all(texts)\n[['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n

NOTE: Note the token TK_MAJ (xxmaj), used to indicate the next word begins with a capital in the original text. For more details of special tokens please see the fastai docs.

Returns:

  • List[List[str]] \u2013

    List containing lists of tokens. One list per \"document\"

Source code in pytorch_widedeep/utils/fastai_transforms.py
def process_all(self, texts: Collection[str]) -> List[List[str]]:\n    r\"\"\"Process a list of texts. Parallel execution of ``process_text``.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import Tokenizer\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> tok = Tokenizer()\n    >>> tok.process_all(texts)\n    [['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n\n    :information_source: **NOTE**:\n    Note the token ``TK_MAJ`` (`xxmaj`), used to indicate the\n    next word begins with a capital in the original text. For more\n    details of special tokens please see the [``fastai`` docs](https://docs.fast.ai/text.core.html#Tokenizing).\n\n    Returns\n    -------\n    List[List[str]]\n        List containing lists of tokens. One list per \"_document_\"\n\n    \"\"\"\n\n    if self.n_cpus <= 1:\n        return self._process_all_1(texts)\n    with ProcessPoolExecutor(self.n_cpus) as e:\n        return sum(\n            e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), []\n        )\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab","title":"Vocab","text":"
Vocab(max_vocab, min_freq, pad_idx=None)\n

Contains the correspondence between numbers and tokens.

Parameters:

  • max_vocab (int) \u2013

    maximum vocabulary size

  • min_freq (int) \u2013

    minimum frequency for a token to be considereds

  • pad_idx (Optional[int], default: None ) \u2013

    padding index. If None, Fastai's Tokenizer leaves the 0 index for the unknown token ('xxunk') and defaults to 1 for the padding token ('xxpad').

Attributes:

  • itos (Collection) \u2013

    index to str. Collection of strings that are the tokens of the vocabulary

  • stoi (defaultdict) \u2013

    str to index. Dictionary containing the tokens of the vocabulary and their corresponding index

Source code in pytorch_widedeep/utils/fastai_transforms.py
def __init__(\n    self,\n    max_vocab: int,\n    min_freq: int,\n    pad_idx: Optional[int] = None,\n):\n    self.max_vocab = max_vocab\n    self.min_freq = min_freq\n    self.pad_idx = pad_idx\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.create","title":"create","text":"
create(tokens)\n

Create a vocabulary object from a set of tokens.

Parameters:

  • tokens (Tokens) \u2013

    Custom type: Collection[Collection[str]] see pytorch_widedeep.wdtypes. Collection of collection of strings (e.g. list of tokenized sentences)

Examples:

>>> from pytorch_widedeep.utils import Tokenizer, Vocab\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> tokens = Tokenizer().process_all(texts)\n>>> vocab = Vocab(max_vocab=18, min_freq=1).create(tokens)\n>>> vocab.numericalize(['machine', 'learning', 'is', 'great'])\n[10, 11, 9, 12]\n>>> vocab.textify([10, 11, 9, 12])\n'machine learning is great'\n

NOTE: Note the many special tokens that fastai's' tokenizer adds. These are particularly useful when building Language models and/or in classification/Regression tasks. Please see the fastai docs.

Returns:

  • Vocab \u2013

    An instance of a Vocab object

Source code in pytorch_widedeep/utils/fastai_transforms.py
def create(\n    self,\n    tokens: Tokens,\n) -> \"Vocab\":\n    r\"\"\"Create a vocabulary object from a set of tokens.\n\n    Parameters\n    ----------\n    tokens: Tokens\n        Custom type: ``Collection[Collection[str]]``  see\n        `pytorch_widedeep.wdtypes`. Collection of collection of\n        strings (e.g. list of tokenized sentences)\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import Tokenizer, Vocab\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> tokens = Tokenizer().process_all(texts)\n    >>> vocab = Vocab(max_vocab=18, min_freq=1).create(tokens)\n    >>> vocab.numericalize(['machine', 'learning', 'is', 'great'])\n    [10, 11, 9, 12]\n    >>> vocab.textify([10, 11, 9, 12])\n    'machine learning is great'\n\n    :information_source: **NOTE**:\n    Note the many special tokens that ``fastai``'s' tokenizer adds. These\n    are particularly useful when building Language models and/or in\n    classification/Regression tasks. Please see the [``fastai`` docs](https://docs.fast.ai/text.core.html#Tokenizing).\n\n    Returns\n    -------\n    Vocab\n        An instance of a `Vocab` object\n    \"\"\"\n\n    freq = Counter(p for o in tokens for p in o)\n    itos = [o for o, c in freq.most_common(self.max_vocab) if c >= self.min_freq]\n    for o in reversed(defaults.text_spec_tok):\n        if o in itos:\n            itos.remove(o)\n        itos.insert(0, o)\n\n    if self.pad_idx is not None and self.pad_idx != 1:\n        itos.remove(PAD)\n        itos.insert(self.pad_idx, PAD)\n        # get the new 'xxunk' index\n        xxunk_idx = np.where([el == \"xxunk\" for el in itos])[0][0]\n    else:\n        xxunk_idx = 0\n\n    itos = itos[: self.max_vocab]\n    if (\n        len(itos) < self.max_vocab\n    ):  # Make sure vocab size is a multiple of 8 for fast mixed precision training\n        while len(itos) % 8 != 0:\n            itos.append(\"xxfake\")\n\n    self.itos = itos\n    self.stoi = defaultdict(\n        lambda: xxunk_idx, {v: k for k, v in enumerate(self.itos)}\n    )\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.fit","title":"fit","text":"
fit(tokens)\n

Calls the create method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including a fit method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def fit(\n    self,\n    tokens: Tokens,\n) -> \"Vocab\":\n    \"\"\"\n    Calls the `create` method. I simply want to honor fast ai naming, but\n    for consistency with the rest of the library I am including a fit method\n    \"\"\"\n    return self.create(tokens)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.numericalize","title":"numericalize","text":"
numericalize(t)\n

Convert a list of tokens t to their ids.

Returns:

  • List[int] \u2013

    List of 'numericalsed' tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def numericalize(self, t: Collection[str]) -> List[int]:\n    \"\"\"Convert a list of tokens ``t`` to their ids.\n\n    Returns\n    -------\n    List[int]\n        List of '_numericalsed_' tokens\n    \"\"\"\n    return [self.stoi[w] for w in t]\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.transform","title":"transform","text":"
transform(t)\n

Calls the numericalize method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including a transform method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def transform(self, t: Collection[str]) -> List[int]:\n    \"\"\"\n    Calls the `numericalize` method. I simply want to honor fast ai naming,\n    but for consistency with the rest of the library I am including a\n    transform method\n    \"\"\"\n    return self.numericalize(t)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.textify","title":"textify","text":"
textify(nums, sep=' ')\n

Convert a list of nums (or indexes) to their tokens.

Returns:

  • List[str] \u2013

    List of tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def textify(self, nums: Collection[int], sep=\" \") -> Union[str, List[str]]:\n    \"\"\"Convert a list of ``nums`` (or indexes) to their tokens.\n\n    Returns\n    -------\n    List[str]\n        List of tokens\n    \"\"\"\n    return (\n        sep.join([self.itos[i] for i in nums])\n        if sep is not None\n        else [self.itos[i] for i in nums]\n    )\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.inverse_transform","title":"inverse_transform","text":"
inverse_transform(nums, sep=' ')\n

Calls the textify method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including an inverse_transform method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def inverse_transform(\n    self, nums: Collection[int], sep=\" \"\n) -> Union[str, List[str]]:\n    \"\"\"\n    Calls the `textify` method. I simply want to honor fast ai naming, but\n    for consistency with the rest of the library I am including an\n    inverse_transform method\n    \"\"\"\n    # I simply want to honor fast ai naming, but for consistency with the\n    # rest of the library I am including an inverse_transform method\n    return self.textify(nums, sep)\n
"},{"location":"pytorch-widedeep/utils/image_utils.html","title":"Image utils","text":"

SimplePreprocessor and AspectAwarePreprocessor are directly taked from the great series of Books `Deep Learning for Computer Vision by Adrian. Therefore, all credit for the code in the image_utils module goes to Adrian Rosebrock.

"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor","title":"AspectAwarePreprocessor","text":"
AspectAwarePreprocessor(\n    width, height, inter=cv2.INTER_AREA\n)\n

Class to resize an image to a certain width and height taking into account the image aspect ratio

Parameters:

  • width (int) \u2013

    output width

  • height (int) \u2013

    output height

  • inter \u2013

    opencv interpolation method. See opencv InterpolationFlags.

Source code in pytorch_widedeep/utils/image_utils.py
def __init__(self, width: int, height: int, inter=cv2.INTER_AREA):\n    self.width = width\n    self.height = height\n    self.inter = inter\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor.preprocess","title":"preprocess","text":"
preprocess(image)\n

Returns the resized input image taking into account the image aspect ratio

Parameters:

  • image (ndarray) \u2013

    Input image to be resized

Examples:

>>> import cv2\n>>> from pytorch_widedeep.utils import AspectAwarePreprocessor\n>>> img = cv2.imread(\"tests/test_data_utils/images/galaxy1.png\")\n>>> img.shape\n(694, 890, 3)\n>>> app = AspectAwarePreprocessor(width=224, height=224)\n>>> resized_img = app.preprocess(img)\n>>> resized_img.shape\n(224, 224, 3)\n

Returns:

  • ndarray \u2013

    Resized image according to its original image aspect ratio

Source code in pytorch_widedeep/utils/image_utils.py
def preprocess(self, image: np.ndarray) -> np.ndarray:\n    r\"\"\"Returns the resized input image taking into account the image aspect ratio\n\n    Parameters\n    ----------\n    image: np.ndarray\n        Input image to be resized\n\n    Examples\n    --------\n    >>> import cv2\n    >>> from pytorch_widedeep.utils import AspectAwarePreprocessor\n    >>> img = cv2.imread(\"tests/test_data_utils/images/galaxy1.png\")\n    >>> img.shape\n    (694, 890, 3)\n    >>> app = AspectAwarePreprocessor(width=224, height=224)\n    >>> resized_img = app.preprocess(img)\n    >>> resized_img.shape\n    (224, 224, 3)\n\n    Returns\n    -------\n    np.ndarray\n        Resized image according to its original image aspect ratio\n    \"\"\"\n    (h, w) = image.shape[:2]\n    dW = 0\n    dH = 0\n\n    if w < h:\n        image = imutils.resize(image, width=self.width, inter=self.inter)\n        dH = int((image.shape[0] - self.height) / 2.0)\n    else:\n        image = imutils.resize(image, height=self.height, inter=self.inter)\n        dW = int((image.shape[1] - self.width) / 2.0)\n\n    (h, w) = image.shape[:2]\n    image = image[dH : h - dH, dW : w - dW]\n\n    resized_image = cv2.resize(\n        image, (self.width, self.height), interpolation=self.inter\n    )\n\n    return resized_image\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.SimplePreprocessor","title":"SimplePreprocessor","text":"
SimplePreprocessor(width, height, inter=cv2.INTER_AREA)\n

Class to resize an image to a certain width and height

Parameters:

  • width (int) \u2013

    output width

  • height (int) \u2013

    output height

  • inter \u2013

    opencv interpolation method. See opencv InterpolationFlags.

Source code in pytorch_widedeep/utils/image_utils.py
def __init__(self, width: int, height: int, inter=cv2.INTER_AREA):\n    self.width = width\n    self.height = height\n    self.inter = inter\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.SimplePreprocessor.preprocess","title":"preprocess","text":"
preprocess(image)\n

Returns the resized input image

Parameters:

  • image (ndarray) \u2013

    Input image to be resized

Returns:

  • ndarray \u2013

    Resized image

Source code in pytorch_widedeep/utils/image_utils.py
def preprocess(self, image: np.ndarray) -> np.ndarray:\n    r\"\"\"Returns the resized input image\n\n    Parameters\n    ----------\n    image: np.ndarray\n        Input image to be resized\n\n    Returns\n    -------\n    np.ndarray\n        Resized image\n\n    \"\"\"\n    resized_image = cv2.resize(\n        image, (self.width, self.height), interpolation=self.inter\n    )\n\n    return resized_image\n
"},{"location":"pytorch-widedeep/utils/text_utils.html","title":"Text utils","text":"

Collection of helper function that facilitate processing text.

"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.simple_preprocess","title":"simple_preprocess","text":"
simple_preprocess(\n    doc, lower=False, deacc=False, min_len=2, max_len=15\n)\n

This is Gensim's simple_preprocess with a lower param to indicate wether or not to lower case all the token in the doc

For more information see: Gensim utils module

Parameters:

  • doc (str) \u2013

    Input document.

  • lower (bool, default: False ) \u2013

    Lower case tokens in the input doc

  • deacc (bool, default: False ) \u2013

    Remove accent marks from tokens using Gensim's deaccent

  • min_len (int, default: 2 ) \u2013

    Minimum length of token (inclusive). Shorter tokens are discarded.

  • max_len (int, default: 15 ) \u2013

    Maximum length of token in result (inclusive). Longer tokens are discarded.

Examples:

>>> from pytorch_widedeep.utils import simple_preprocess\n>>> simple_preprocess('Machine learning is great')\n['Machine', 'learning', 'is', 'great']\n

Returns:

  • List[str] \u2013

    List with the processed tokens

Source code in pytorch_widedeep/utils/text_utils.py
def simple_preprocess(\n    doc: str,\n    lower: bool = False,\n    deacc: bool = False,\n    min_len: int = 2,\n    max_len: int = 15,\n) -> List[str]:\n    r\"\"\"\n    This is `Gensim`'s `simple_preprocess` with a `lower` param to\n    indicate wether or not to lower case all the token in the doc\n\n    For more information see: `Gensim` [utils module](https://radimrehurek.com/gensim/utils.html)\n\n    Parameters\n    ----------\n    doc: str\n        Input document.\n    lower: bool, default = False\n        Lower case tokens in the input doc\n    deacc: bool, default = False\n        Remove accent marks from tokens using `Gensim`'s `deaccent`\n    min_len: int, default = 2\n        Minimum length of token (inclusive). Shorter tokens are discarded.\n    max_len: int, default = 15\n        Maximum length of token in result (inclusive). Longer tokens are discarded.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import simple_preprocess\n    >>> simple_preprocess('Machine learning is great')\n    ['Machine', 'learning', 'is', 'great']\n\n    Returns\n    -------\n    List[str]\n        List with the processed tokens\n    \"\"\"\n    tokens = [\n        token\n        for token in tokenize(doc, lower=lower, deacc=deacc, errors=\"ignore\")\n        if min_len <= len(token) <= max_len and not token.startswith(\"_\")\n    ]\n    return tokens\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.get_texts","title":"get_texts","text":"
get_texts(texts, already_processed=False, n_cpus=None)\n

Tokenization using Fastai's Tokenizer because it does a series of very convenients things during the tokenization process

See pytorch_widedeep.utils.fastai_utils.Tokenizer

Parameters:

  • texts (List[str]) \u2013

    List of str with the texts (or documents). One str per document

  • already_processed (Optional[bool], default: False ) \u2013

    Boolean indicating if the text is already processed and we simply want to tokenize it. This parameter is thought for those cases where the input sequences might not be text (but IDs, or anything else) and we just want to tokenize it

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

Examples:

>>> from pytorch_widedeep.utils import get_texts\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> get_texts(texts)\n[['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n

Returns:

  • List[List[str]] \u2013

    List of lists, one list per 'document' containing its corresponding tokens

NOTE: get_texts uses pytorch_widedeep.utils.fastai_transforms.Tokenizer. Such tokenizer uses a series of convenient processing steps, including the addition of some special tokens, such as TK_MAJ (xxmaj), used to indicate the next word begins with a capital in the original text. For more details of special tokens please see the fastai `docs

Source code in pytorch_widedeep/utils/text_utils.py
def get_texts(\n    texts: List[str],\n    already_processed: Optional[bool] = False,\n    n_cpus: Optional[int] = None,\n) -> List[List[str]]:\n    r\"\"\"Tokenization using `Fastai`'s `Tokenizer` because it does a\n    series of very convenients things during the tokenization process\n\n    See `pytorch_widedeep.utils.fastai_utils.Tokenizer`\n\n    Parameters\n    ----------\n    texts: List\n        List of str with the texts (or documents). One str per document\n    already_processed: bool, Optional, default = False\n        Boolean indicating if the text is already processed and we simply want\n        to tokenize it. This parameter is thought for those cases where the\n        input sequences might not be text (but IDs, or anything else) and we\n        just want to tokenize it\n    n_cpus: int, Optional, default = None\n        number of CPUs to used during the tokenization process\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import get_texts\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> get_texts(texts)\n    [['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n\n    Returns\n    -------\n    List[List[str]]\n        List of lists, one list per '_document_' containing its corresponding tokens\n\n    :information_source: **NOTE**:\n    `get_texts` uses `pytorch_widedeep.utils.fastai_transforms.Tokenizer`.\n    Such tokenizer uses a series of convenient processing steps, including\n    the  addition of some special tokens, such as `TK_MAJ` (`xxmaj`), used to\n    indicate the next word begins with a capital in the original text. For more\n    details of special tokens please see the [`fastai` `docs](https://docs.fast.ai/text.core.html#Tokenizing)\n    \"\"\"\n\n    num_cpus = n_cpus if n_cpus is not None else os.cpu_count()\n\n    if not already_processed:\n        processed_texts = [\" \".join(simple_preprocess(t)) for t in texts]\n    else:\n        processed_texts = texts\n    tok = Tokenizer(n_cpus=num_cpus).process_all(processed_texts)\n    return tok\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.pad_sequences","title":"pad_sequences","text":"
pad_sequences(seq, maxlen, pad_first=True, pad_idx=1)\n

Given a List of tokenized and numericalised sequences it will return padded sequences according to the input parameters.

Parameters:

  • seq (List[int]) \u2013

    List of int with the numericalised tokens

  • maxlen (int) \u2013

    Maximum length of the padded sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

Examples:

>>> from pytorch_widedeep.utils import pad_sequences\n>>> seq = [1,2,3]\n>>> pad_sequences(seq, maxlen=5, pad_idx=0)\narray([0, 0, 1, 2, 3], dtype=int32)\n

Returns:

  • ndarray \u2013

    numpy array with the padded sequences

Source code in pytorch_widedeep/utils/text_utils.py
def pad_sequences(\n    seq: List[int], maxlen: int, pad_first: bool = True, pad_idx: int = 1\n) -> np.ndarray:\n    r\"\"\"\n    Given a List of tokenized and `numericalised` sequences it will return\n    padded sequences according to the input parameters.\n\n    Parameters\n    ----------\n    seq: List\n        List of int with the `numericalised` tokens\n    maxlen: int\n        Maximum length of the padded sequences\n    pad_first: bool,  default = True\n        Indicates whether the padding index will be added at the beginning or the\n        end of the sequences\n    pad_idx: int, default = 1\n        padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import pad_sequences\n    >>> seq = [1,2,3]\n    >>> pad_sequences(seq, maxlen=5, pad_idx=0)\n    array([0, 0, 1, 2, 3], dtype=int32)\n\n    Returns\n    -------\n    np.ndarray\n        numpy array with the padded sequences\n    \"\"\"\n    if len(seq) == 0:\n        return np.zeros(maxlen, dtype=\"int32\") + pad_idx\n    elif len(seq) >= maxlen:\n        res = np.array(seq[-maxlen:]).astype(\"int32\")\n        return res\n    else:\n        res = np.zeros(maxlen, dtype=\"int32\") + pad_idx\n        if pad_first:\n            res[-len(seq) :] = seq\n        else:\n            res[: len(seq) :] = seq\n        return res\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.build_embeddings_matrix","title":"build_embeddings_matrix","text":"
build_embeddings_matrix(\n    vocab, word_vectors_path, min_freq, verbose=1\n)\n

Build the embedding matrix using pretrained word vectors.

Returns pretrained word embeddings. If a word in our vocabulary is not among the pretrained embeddings it will be assigned the mean pretrained word-embeddings vector

Parameters:

  • vocab (Union[Vocab, ChunkVocab]) \u2013

    see pytorch_widedeep.utils.fastai_utils.Vocab

  • word_vectors_path (str) \u2013

    path to the pretrained word embeddings

  • min_freq (int) \u2013

    minimum frequency required for a word to be in the vocabulary

  • verbose (int, default: 1 ) \u2013

    level of verbosity. Set to 0 for no verbosity

Returns:

  • ndarray \u2013

    Pretrained word embeddings

Source code in pytorch_widedeep/utils/text_utils.py
def build_embeddings_matrix(\n    vocab: Union[Vocab, ChunkVocab],\n    word_vectors_path: str,\n    min_freq: int,\n    verbose: int = 1,\n) -> np.ndarray:  # pragma: no cover\n    r\"\"\"Build the embedding matrix using pretrained word vectors.\n\n    Returns pretrained word embeddings. If a word in our vocabulary is not\n    among the pretrained embeddings it will be assigned the mean pretrained\n    word-embeddings vector\n\n    Parameters\n    ----------\n    vocab: Vocab\n        see `pytorch_widedeep.utils.fastai_utils.Vocab`\n    word_vectors_path: str\n        path to the pretrained word embeddings\n    min_freq: int\n        minimum frequency required for a word to be in the vocabulary\n    verbose: int,  default=1\n        level of verbosity. Set to 0 for no verbosity\n\n    Returns\n    -------\n    np.ndarray\n        Pretrained word embeddings\n    \"\"\"\n    if not os.path.isfile(word_vectors_path):\n        raise FileNotFoundError(\"{} not found\".format(word_vectors_path))\n    if verbose:\n        print(\"Indexing word vectors...\")\n\n    embeddings_index = {}\n    f = open(word_vectors_path)\n    for line in f:\n        values = line.split()\n        word = values[0]\n        coefs = np.asarray(values[1:], dtype=\"float32\")\n        embeddings_index[word] = coefs\n    f.close()\n\n    if verbose:\n        print(\"Loaded {} word vectors\".format(len(embeddings_index)))\n        print(\"Preparing embeddings matrix...\")\n\n    mean_word_vector = np.mean(list(embeddings_index.values()), axis=0)  # type: ignore[arg-type]\n    embedding_dim = len(list(embeddings_index.values())[0])\n    num_words = len(vocab.itos)\n    embedding_matrix = np.zeros((num_words, embedding_dim))\n    found_words = 0\n    for i, word in enumerate(vocab.itos):\n        embedding_vector = embeddings_index.get(word)\n        if embedding_vector is not None:\n            embedding_matrix[i] = embedding_vector\n            found_words += 1\n        else:\n            embedding_matrix[i] = mean_word_vector\n\n    if verbose:\n        print(\n            \"{} words in the vocabulary had {} vectors and appear more than {} times\".format(\n                found_words, word_vectors_path, min_freq\n            )\n        )\n\n    return embedding_matrix.astype(\"float32\")\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"index.html","title":"Home","text":""},{"location":"index.html#pytorch-widedeep","title":"pytorch-widedeep","text":"

A flexible package for multimodal-deep-learning to combine tabular data with text and images using Wide and Deep models in Pytorch

Documentation: https://pytorch-widedeep.readthedocs.io

Companion posts and tutorials: infinitoml

Experiments and comparison with LightGBM: TabularDL vs LightGBM

Slack: if you want to contribute or just want to chat with us, join slack

The content of this document is organized as follows:

  • pytorch-widedeep
    • Introduction
    • The deeptabular component
    • Acknowledgments
    • License
    • Cite
      • BibTex
      • APA
"},{"location":"index.html#introduction","title":"Introduction","text":"

pytorch-widedeep is based on Google's Wide and Deep Algorithm, adjusted for multi-modal datasets

In general terms, pytorch-widedeep is a package to use deep learning with tabular data. In particular, is intended to facilitate the combination of text and images with corresponding tabular data using wide and deep models. With that in mind there are a number of architectures that can be implemented with just a few lines of code. The main components of those architectures are shown in the Figure below:

The dashed boxes in the figure represent optional, overall components, and the dashed lines/arrows indicate the corresponding connections, depending on whether or not certain components are present. For example, the dashed, blue-lines indicate that the deeptabular, deeptext and deepimage components are connected directly to the output neuron or neurons (depending on whether we are performing a binary classification or regression, or a multi-class classification) if the optional deephead is not present. Finally, the components within the faded-pink rectangle are concatenated.

Note that it is not possible to illustrate the number of possible architectures and components available in pytorch-widedeep in one Figure. Therefore, for more details on possible architectures (and more) please, read this documentation, or see the Examples folder in the repo.

In math terms, and following the notation in the paper, the expression for the architecture without a deephead component can be formulated as:

\\[ pred = \\sigma(W^{T}_{wide}[x,\\phi(x)] + W^{T}_{deeptabular}a^{l_f}_{deeptabular} + W^{T}_{deeptext}a^{l_f}_{deeptext} + W^{T}_{deepimage}a^{l_f}_{deepimage} + b) \\]

Where \u03c3 is the sigmoid function, 'W' are the weight matrices applied to the wide model and to the final activations of the deep models, 'a' are these final activations, \u03c6(x) are the cross product transformations of the original features 'x', and , and 'b' is the bias term. In case you are wondering what are \"cross product transformations\", here is a quote taken directly from the paper: \"For binary features, a cross-product transformation (e.g., \u201cAND(gender=female, language=en)\u201d) is 1 if and only if the constituent features (\u201cgender=female\u201d and \u201clanguage=en\u201d) are all 1, and 0 otherwise\".

While if there is a deephead component, the previous expression turns into:

\\[ pred = \\sigma(W^{T}_{wide}[x,\\phi(x)] + W^{T}_{deephead}a^{l_f}_{deephead} + b) \\]

It is perfectly possible to use custom models (and not necessarily those in the library) as long as the the custom models have an attribute called output_dim with the size of the last layer of activations, so that WideDeep can be constructed. Examples on how to use custom components can be found in the Examples folder.

"},{"location":"index.html#the-deeptabular-component","title":"The deeptabular component","text":"

It is important to emphasize that each individual component, wide, deeptabular, deeptext and deepimage, can be used independently and in isolation. For example, one could use only wide, which is in simply a linear model. In fact, one of the most interesting functionalities inpytorch-widedeep would be the use of the deeptabular component on its own, i.e. what one might normally refer as Deep Learning for Tabular Data. Currently, pytorch-widedeep offers the following different models for that component:

  1. Wide: a simple linear model where the nonlinearities are captured via cross-product transformations, as explained before.
  2. TabMlp: a simple MLP that receives embeddings representing the categorical features, concatenated with the continuous features, which can also be embedded.
  3. TabResnet: similar to the previous model but the embeddings are passed through a series of ResNet blocks built with dense layers.
  4. TabNet: details on TabNet can be found in TabNet: Attentive Interpretable Tabular Learning

Two simpler attention based models that we call:

  1. ContextAttentionMLP: MLP with at attention mechanism \"on top\" that is based on Hierarchical Attention Networks for Document Classification
  2. SelfAttentionMLP: MLP with an attention mechanism that is a simplified version of a transformer block that we refer as \"query-key self-attention\".

The Tabformer family, i.e. Transformers for Tabular data:

  1. TabTransformer: details on the TabTransformer can be found in TabTransformer: Tabular Data Modeling Using Contextual Embeddings. Note that this is an 'enhanced' implementation that allows for many options that can be set up via the TabTransformer params.
  2. SAINT: Details on SAINT can be found in SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training.
  3. FT-Transformer: details on the FT-Transformer can be found in Revisiting Deep Learning Models for Tabular Data.
  4. TabFastFormer: adaptation of the FastFormer for tabular data. Details on the Fasformer can be found in FastFormers: Highly Efficient Transformer Models for Natural Language Understanding
  5. TabPerceiver: adaptation of the Perceiver for tabular data. Details on the Perceiver can be found in Perceiver: General Perception with Iterative Attention

And probabilistic DL models for tabular data based on Weight Uncertainty in Neural Networks:

  1. BayesianWide: Probabilistic adaptation of the Wide model.
  2. BayesianTabMlp: Probabilistic adaptation of the TabMlp model

Note that while there are scientific publications for the TabTransformer, SAINT and FT-Transformer, the TabFasfFormer and TabPerceiver are our own adaptation of those algorithms for tabular data.

In addition, Self-Supervised pre-training can be used for all deeptabular models, with the exception of the TabPerceiver. Self-Supervised pre-training can be used via two methods or routines which we refer as: encoder-decoder method and constrastive-denoising method. Please, see the documentation and the examples for details on this functionality, and all other options in the library.

"},{"location":"index.html#acknowledgments","title":"Acknowledgments","text":"

This library takes from a series of other libraries, so I think it is just fair to mention them here in the README (specific mentions are also included in the code).

The Callbacks and Initializers structure and code is inspired by the torchsample library, which in itself partially inspired by Keras.

The TextProcessor class in this library uses the fastai's Tokenizer and Vocab. The code at utils.fastai_transforms is a minor adaptation of their code so it functions within this library. To my experience their Tokenizer is the best in class.

The ImageProcessor class in this library uses code from the fantastic Deep Learning for Computer Vision (DL4CV) book by Adrian Rosebrock.

"},{"location":"index.html#license","title":"License","text":"

This work is dual-licensed under Apache 2.0 and MIT (or any later version). You can choose between one of them if you use this work.

SPDX-License-Identifier: Apache-2.0 AND MIT

"},{"location":"index.html#cite","title":"Cite","text":""},{"location":"index.html#bibtex","title":"BibTex","text":"
@article{Zaurin_pytorch-widedeep_A_flexible_2023,\nauthor = {Zaurin, Javier Rodriguez and Mulinka, Pavol},\ndoi = {10.21105/joss.05027},\njournal = {Journal of Open Source Software},\nmonth = jun,\nnumber = {86},\npages = {5027},\ntitle = {{pytorch-widedeep: A flexible package for multimodal deep learning}},\nurl = {https://joss.theoj.org/papers/10.21105/joss.05027},\nvolume = {8},\nyear = {2023}\n}\n
"},{"location":"index.html#apa","title":"APA","text":"
Zaurin, J. R., & Mulinka, P. (2023). pytorch-widedeep: A flexible package for\nmultimodal deep learning. Journal of Open Source Software, 8(86), 5027.\nhttps://doi.org/10.21105/joss.05027\n
"},{"location":"contributing.html","title":"Contributing","text":"

Pytorch-widedeep is being developed and used by many active community members. Your help is very valuable to make it better for everyone.

  • Check for the Roadmap or Open an issue to report problems or recommend new features and submit a draft pull requests, which will be changed to pull request after intial review
  • Contribute to the tests to make it more reliable.
  • Contribute to the documentation to make it clearer for everyone.
  • Contribute to the examples to share your experience with other users.
  • Join the dicussion on slack
"},{"location":"installation.html","title":"Installation","text":"

This section explains how to install pytorch-widedeep.

For the latest stable release, execute:

pip install pytorch-widedeep\n

For the bleeding-edge version, execute:

pip install git+https://github.com/jrzaurin/pytorch-widedeep.git\n

For developer install

# Clone the repository\ngit clone https://github.com/jrzaurin/pytorch-widedeep\ncd pytorch-widedeep\n\n# Install in dev mode\npip install -e .\n
"},{"location":"installation.html#dependencies","title":"Dependencies","text":"
  • pandas>=1.3.5
  • numpy>=1.21.6
  • scipy>=1.7.3
  • scikit-learn>=1.0.2
  • gensim
  • spacy
  • opencv-contrib-python
  • imutils
  • tqdm
  • torch
  • torchvision
  • einops
  • wrapt
  • torchmetrics
  • pyarrow
  • fastparquet>=0.8.1
"},{"location":"quick_start.html","title":"Quick Start","text":"

This is an example of a binary classification with the adult census dataset using a combination of a wide and deep model (in this case a so called deeptabular model) with defaults settings.

import numpy as np\nimport torch\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.models import Wide, TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\n\n\ndf = load_adult(as_frame=True)\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf_train, df_test = train_test_split(df, test_size=0.2, stratify=df.income_label)\n\n# Define the 'column set up'\nwide_cols = [\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native-country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")]\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital-status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital-gain\",\n    \"capital-loss\",\n    \"native-country\",\n]\ncontinuous_cols = [\"age\", \"hours-per-week\"]\ntarget = \"income_label\"\ntarget = df_train[target].values\n\n# prepare the data\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df_train)\n\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols  # type: ignore[arg-type]\n)\nX_tab = tab_preprocessor.fit_transform(df_train)\n\n# build the model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n\n# train and validate\ntrainer = Trainer(model, objective=\"binary\", metrics=[Accuracy])\ntrainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    n_epochs=5,\n    batch_size=256,\n)\n\n# predict on test\nX_wide_te = wide_preprocessor.transform(df_test)\nX_tab_te = tab_preprocessor.transform(df_test)\npreds = trainer.predict(X_wide=X_wide_te, X_tab=X_tab_te)\n\n# Save and load\n\n# Option 1: this will also save training history and lr history if the\n# LRHistory callback is used\ntrainer.save(path=\"model_weights\", save_state_dict=True)\n\n# Option 2: save as any other torch model\ntorch.save(model.state_dict(), \"model_weights/wd_model.pt\")\n\n# From here in advance, Option 1 or 2 are the same. I assume the user has\n# prepared the data and defined the new model components:\n# 1. Build the model\nmodel_new = WideDeep(wide=wide, deeptabular=tab_mlp)\nmodel_new.load_state_dict(torch.load(\"model_weights/wd_model.pt\"))\n\n# 2. Instantiate the trainer\ntrainer_new = Trainer(model_new, objective=\"binary\")\n\n# 3. Either start the fit or directly predict\npreds = trainer_new.predict(X_wide=X_wide, X_tab=X_tab)\n
"},{"location":"examples/01_preprocessors_and_utils.html","title":"01_preprocessors_and_utils","text":"

For example

In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport pytorch_widedeep as wd\n\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import WidePreprocessor\n
import numpy as np import pandas as pd import pytorch_widedeep as wd from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import WidePreprocessor
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
wide_cols = [\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native-country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")]\n
wide_cols = [ \"education\", \"relationship\", \"workclass\", \"occupation\", \"native-country\", \"gender\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native-country\", \"occupation\")] In\u00a0[4]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_wide = wide_preprocessor.transform(new_df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_wide = wide_preprocessor.transform(new_df) In\u00a0[5]: Copied!
X_wide\n
X_wide Out[5]:
array([[  1,  17,  23, ...,  89,  91, 316],\n       [  2,  18,  23, ...,  89,  92, 317],\n       [  3,  18,  24, ...,  89,  93, 318],\n       ...,\n       [  2,  20,  23, ...,  90, 103, 323],\n       [  2,  17,  23, ...,  89, 103, 323],\n       [  2,  21,  29, ...,  90, 115, 324]])

Note that the label encoding starts from 1. This is because it is convenient to leave 0 for padding, i.e. unknown categories. Let's take from example the first entry

In\u00a0[6]: Copied!
X_wide[0]\n
X_wide[0] Out[6]:
array([  1,  17,  23,  32,  47,  89,  91, 316])
In\u00a0[7]: Copied!
wide_preprocessor.inverse_transform(X_wide[:1])\n
wide_preprocessor.inverse_transform(X_wide[:1]) Out[7]: education relationship workclass occupation native-country gender education_occupation native-country_occupation 0 11th Own-child Private Machine-op-inspct United-States Male 11th-Machine-op-inspct United-States-Machine-op-inspct

As we can see, wide_preprocessor numerically encodes the wide_cols and the crossed_cols, which can be recovered using the method inverse_transform.

In\u00a0[8]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\n
from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[9]: Copied!
# cat_embed_cols = [(column_name, embed_dim), ...]\ncat_embed_cols = [\n    (\"education\", 10),\n    (\"relationship\", 8),\n    (\"workclass\", 10),\n    (\"occupation\", 10),\n    (\"native-country\", 10),\n]\ncontinuous_cols = [\"age\", \"hours-per-week\"]\n
# cat_embed_cols = [(column_name, embed_dim), ...] cat_embed_cols = [ (\"education\", 10), (\"relationship\", 8), (\"workclass\", 10), (\"occupation\", 10), (\"native-country\", 10), ] continuous_cols = [\"age\", \"hours-per-week\"] In\u00a0[10]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    cols_to_scale=[\"age\"],  # or scale=True or cols_to_scale=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_deep = deep_preprocessor.transform(new_df)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, cols_to_scale=[\"age\"], # or scale=True or cols_to_scale=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_deep = deep_preprocessor.transform(new_df) In\u00a0[11]: Copied!
X_tab\n
X_tab Out[11]:
array([[ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -9.95128932e-01,  4.00000000e+01],\n       [ 2.00000000e+00,  2.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -4.69415091e-02,  5.00000000e+01],\n       [ 3.00000000e+00,  2.00000000e+00,  2.00000000e+00, ...,\n         1.00000000e+00, -7.76316450e-01,  4.00000000e+01],\n       ...,\n       [ 2.00000000e+00,  4.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00,  1.41180837e+00,  4.00000000e+01],\n       [ 2.00000000e+00,  1.00000000e+00,  1.00000000e+00, ...,\n         1.00000000e+00, -1.21394141e+00,  2.00000000e+01],\n       [ 2.00000000e+00,  5.00000000e+00,  7.00000000e+00, ...,\n         1.00000000e+00,  9.74183408e-01,  4.00000000e+01]])

Note that the label encoding starts from 1. This is because it is convenient to leave 0 for padding, i.e. unknown categories. Let's take from example the first entry

In\u00a0[12]: Copied!
X_tab[0]\n
X_tab[0] Out[12]:
array([ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ,\n       -0.99512893, 40.        ])
In\u00a0[13]: Copied!
tab_preprocessor.inverse_transform(X_tab[:1])\n
tab_preprocessor.inverse_transform(X_tab[:1]) Out[13]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States 25.0 40.0

The TabPreprocessor will have a series of useful attributes that can later be used when instantiating the different Tabular Models, such us for example, the column indexes (used to slice the tensors, internally in the models) or the categorical embeddings set up

In\u00a0[14]: Copied!
tab_preprocessor.column_idx\n
tab_preprocessor.column_idx Out[14]:
{'education': 0,\n 'relationship': 1,\n 'workclass': 2,\n 'occupation': 3,\n 'native-country': 4,\n 'age': 5,\n 'hours-per-week': 6}
In\u00a0[15]: Copied!
# column name, num unique, embedding dim\ntab_preprocessor.cat_embed_input\n
# column name, num unique, embedding dim tab_preprocessor.cat_embed_input Out[15]:
[('education', 16, 10),\n ('relationship', 6, 8),\n ('workclass', 9, 10),\n ('occupation', 15, 10),\n ('native-country', 42, 10)]

As I mentioned, there is more one can do, such as for example, quantize (or bucketize) the continuous cols. For this we could use the quantization_setup param. This parameter accepts a number of different inputs and uses pd.cut under the hood to quantize the continuous cols. For more info, please, read the docs. Let's use it here to quantize \"age\" and \"hours-per-week\" in 4 and 5 \"buckets\" respectively

In\u00a0[16]: Copied!
quantization_setup = {\n    \"age\": 4,\n    \"hours-per-week\": 5,\n}  # you can also pass a list of floats with the boundaries if you wanted\nquant_tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    quantization_setup=quantization_setup,\n)\nqX_tab = quant_tab_preprocessor.fit_transform(df)\n
quantization_setup = { \"age\": 4, \"hours-per-week\": 5, } # you can also pass a list of floats with the boundaries if you wanted quant_tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, quantization_setup=quantization_setup, ) qX_tab = quant_tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
qX_tab\n
qX_tab Out[17]:
array([[1, 1, 1, ..., 1, 1, 2],\n       [2, 2, 1, ..., 1, 2, 3],\n       [3, 2, 2, ..., 1, 1, 2],\n       ...,\n       [2, 4, 1, ..., 1, 3, 2],\n       [2, 1, 1, ..., 1, 1, 1],\n       [2, 5, 7, ..., 1, 2, 2]])

Note that the continuous columns that have been bucketised into quantiles are treated as any other categorical column

In\u00a0[18]: Copied!
quant_tab_preprocessor.cat_embed_input\n
quant_tab_preprocessor.cat_embed_input Out[18]:
[('education', 16, 10),\n ('relationship', 6, 8),\n ('workclass', 9, 10),\n ('occupation', 15, 10),\n ('native-country', 42, 10),\n ('age', 4, 4),\n ('hours-per-week', 5, 4)]

Where the column 'age' has now 4 categories, which will be encoded using embeddings of 4 dims. Note that, as any other categorical columns, the categorical \"counter\" starts with 1. This is because all incoming values that are lower/higher than the existing lowest/highest value in the train (or already seen) dataset, will be encoded as 0.

In\u00a0[19]: Copied!
np.unique(qX_tab[:, quant_tab_preprocessor.column_idx[\"age\"]])\n
np.unique(qX_tab[:, quant_tab_preprocessor.column_idx[\"age\"]]) Out[19]:
array([1, 2, 3, 4])

Finally, if we now wanted to inverse_transform the transformed array into the original dataframe, we could still do it, but the continuous, bucketized columns will be transformed back to the middle of their quantile/bucket range

In\u00a0[20]: Copied!
df_decoded = quant_tab_preprocessor.inverse_transform(qX_tab)\n
df_decoded = quant_tab_preprocessor.inverse_transform(qX_tab)
Note that quantized cols will be turned into the mid point of the corresponding bin\n
In\u00a0[21]: Copied!
df.head(2)\n
df.head(2) Out[21]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K In\u00a0[22]: Copied!
df_decoded.head(2)\n
df_decoded.head(2) Out[22]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States 26.0885 30.4 1 HS-grad Husband Private Farming-fishing United-States 44.3750 50.0

there is one final comment to make regarding to the inverse_transform functionality. As we mentioned before, the encoding 0 is reserved for values that fall outside the range covered by the data we used to run the fit method. For example

In\u00a0[23]: Copied!
df.age.min(), df.age.max()\n
df.age.min(), df.age.max() Out[23]:
(17, 90)

All future age values outside that range will be encoded as 0 and decoded as NaN

In\u00a0[24]: Copied!
tmp_df = df.head(1).copy()\ntmp_df.loc[:, \"age\"] = 5\n
tmp_df = df.head(1).copy() tmp_df.loc[:, \"age\"] = 5 In\u00a0[25]: Copied!
tmp_df\n
tmp_df Out[25]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 5 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K In\u00a0[26]: Copied!
# quant_tab_preprocessor has already been fitted with a data that has an age range between 17 and 90\ntmp_qX_tab = quant_tab_preprocessor.transform(tmp_df)\n
# quant_tab_preprocessor has already been fitted with a data that has an age range between 17 and 90 tmp_qX_tab = quant_tab_preprocessor.transform(tmp_df) In\u00a0[27]: Copied!
tmp_qX_tab\n
tmp_qX_tab Out[27]:
array([[1, 1, 1, 1, 1, 0, 2]])
In\u00a0[28]: Copied!
quant_tab_preprocessor.inverse_transform(tmp_qX_tab)\n
quant_tab_preprocessor.inverse_transform(tmp_qX_tab)
Note that quantized cols will be turned into the mid point of the corresponding bin\n
Out[28]: education relationship workclass occupation native-country age hours-per-week 0 11th Own-child Private Machine-op-inspct United-States NaN 30.4 In\u00a0[29]: Copied!
from pytorch_widedeep.preprocessing import TextPreprocessor\n
from pytorch_widedeep.preprocessing import TextPreprocessor In\u00a0[30]: Copied!
# The airbnb dataset, which you could get from here:\n# http://insideairbnb.com/get-the-data.html, is too big to be included in\n# our datasets module (when including images). Therefore, go there,\n# download it, and use the download_images.py script to get the images\n# and the airbnb_data_processing.py to process the data. We'll find\n# better datasets in the future ;). Note that here we are only using a\n# small sample to illustrate the use, so PLEASE ignore the results, just\n# focus on usage\ndf = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\n
# The airbnb dataset, which you could get from here: # http://insideairbnb.com/get-the-data.html, is too big to be included in # our datasets module (when including images). Therefore, go there, # download it, and use the download_images.py script to get the images # and the airbnb_data_processing.py to process the data. We'll find # better datasets in the future ;). Note that here we are only using a # small sample to illustrate the use, so PLEASE ignore the results, just # focus on usage df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") In\u00a0[31]: Copied!
texts = df.description.tolist()\ntexts[:2]\n
texts = df.description.tolist() texts[:2] Out[31]:
[\"My bright double bedroom with a large window has a relaxed feeling! It comfortably fits one or two and is centrally located just two blocks from Finsbury Park. Enjoy great restaurants in the area and easy access to easy transport tubes, trains and buses. Babies and children of all ages are welcome. Hello Everyone, I'm offering my lovely double bedroom in Finsbury Park area (zone 2) for let in a shared apartment.  You will share the apartment with me and it is fully furnished with a self catering kitchen. Two people can easily sleep well as the room has a queen size bed. I also have a travel cot for a baby for guest with small children.  I will require a deposit up front as a security gesture on both our parts and will be given back to you when you return the keys.  I trust anyone who will be responding to this add would treat my home with care and respect .  Best Wishes  Alina Guest will have access to the self catering kitchen and bathroom. There is the flat is equipped wifi internet,\",\n \"Lots of windows and light.  St Luke's Gardens are at the end of the block, and the river not too far the other way. Ten minutes walk if you go slowly. Buses to everywhere round the corner and shops, restaurants, pubs, the cinema and Waitrose . Bright Chelsea Apartment  This is a bright one bedroom ground floor apartment in an interesting listed building. There is one double bedroom and a living room/kitchen The apartment has a full  bathroom and the kitchen is fully equipped. Two wardrobes are available exclusively for guests and bedside tables and two long drawers. This sunny convenient compact flat is just around the corner from the Waitrose supermarket and all sorts of shops, cinemas, restaurants and pubs.  This is a lovely part of London. There is a fun farmers market in the King's Road at the weekend.  Buses to everywhere are just round the corner, and two underground stations are within ten minutes walk. There is a very nice pub round by St. Luke's gardens, 4 mins slow walk, the \"]
In\u00a0[32]: Copied!
text_preprocessor = TextPreprocessor(text_col=\"description\")\nX_text = text_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_text = text_preprocessor.transform(new_df)\n
text_preprocessor = TextPreprocessor(text_col=\"description\") X_text = text_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_text = text_preprocessor.transform(new_df)
The vocabulary contains 2192 tokens\n
In\u00a0[33]: Copied!
print(X_text[0])\n
print(X_text[0])
[  29   48   37  367  818   17  910   17  177   15  122  349   53  879\n 1174  126  393   40  911    0   23  228   71  819    9   53   55 1380\n  225   11   18  308   18 1564   10  755    0  942  239   53   55    0\n   11   36 1013  277 1974   70   62   15 1475    9  943    5  251    5\n    0    5    0    5  177   53   37   75   11   10  294  726   32    9\n   42    5   25   12   10   22   12  136  100  145]\n
In\u00a0[34]: Copied!
from pytorch_widedeep.preprocessing import ImagePreprocessor\n
from pytorch_widedeep.preprocessing import ImagePreprocessor In\u00a0[35]: Copied!
image_preprocessor = wd.preprocessing.ImagePreprocessor(\n    img_col=\"id\", img_path=\"../tmp_data/airbnb/property_picture/\"\n)\nX_images = image_preprocessor.fit_transform(df)\n# From here on, any new observation can be prepared by simply running `.transform`\n# new_X_images = image_preprocessor.transform(new_df)\n
image_preprocessor = wd.preprocessing.ImagePreprocessor( img_col=\"id\", img_path=\"../tmp_data/airbnb/property_picture/\" ) X_images = image_preprocessor.fit_transform(df) # From here on, any new observation can be prepared by simply running `.transform` # new_X_images = image_preprocessor.transform(new_df)
Reading Images from ../tmp_data/airbnb/property_picture/\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:01<00:00, 667.89it/s]\n
Computing normalisation metrics\n
In\u00a0[36]: Copied!
X_images[0].shape\n
X_images[0].shape Out[36]:
(224, 224, 3)
"},{"location":"examples/01_preprocessors_and_utils.html#processors-and-utils","title":"Processors and Utils\u00b6","text":"

Description of the main tools and utilities that one needs to prepare the data for a WideDeep model constructor.

"},{"location":"examples/01_preprocessors_and_utils.html#the-preprocessing-module","title":"The preprocessing module\u00b6","text":"

There are 4 preprocessors, corresponding to 4 main components of the WideDeep model. These are

  • WidePreprocessor
  • TabPreprocessor
  • TextPreprocessor
  • ImagePreprocessor

Behind the scenes, these preprocessors use a series of helper funcions and classes that are in the utils module. If you were interested please go and have a look to the documentation

"},{"location":"examples/01_preprocessors_and_utils.html#1-widepreprocessor","title":"1. WidePreprocessor\u00b6","text":"

The wide component of the model is a linear model that in principle, could be implemented as a linear layer receiving the result of on one-hot encoding categorical columns. However, this is not memory efficient. Therefore, we implement a liner layer as an Embedding layer plus a bias. I will explain in a bit more detail later.

With that in mind, WidePreprocessor simply encodes the categories numerically so that they are the indexes of the lookup table that is an Embedding layer.

"},{"location":"examples/01_preprocessors_and_utils.html#2-tabpreprocessor","title":"2. TabPreprocessor\u00b6","text":"

The TabPreprocessor has a lot of different functionalities. Let's explore some of them in detail. In its basic use, the TabPreprocessor simply label encodes the categorical columns and normalises the numerical ones (unless otherwised specified).

"},{"location":"examples/01_preprocessors_and_utils.html#3-textpreprocessor","title":"3. TextPreprocessor\u00b6","text":"

This preprocessor returns the tokenised, padded sequences that will be directly fed to the stack of LSTMs.

"},{"location":"examples/01_preprocessors_and_utils.html#4-imagepreprocessor","title":"4. ImagePreprocessor\u00b6","text":"

ImagePreprocessor simply resizes the images, being aware of the aspect ratio.

"},{"location":"examples/02_model_components.html","title":"02_model_components","text":"In\u00a0[1]: Copied!
import torch\nimport pandas as pd\nimport numpy as np\n\nfrom torch import nn\n
import torch import pandas as pd import numpy as np from torch import nn In\u00a0[2]: Copied!
df = pd.DataFrame({\"color\": [\"r\", \"b\", \"g\"], \"size\": [\"s\", \"n\", \"l\"]})\ndf.head()\n
df = pd.DataFrame({\"color\": [\"r\", \"b\", \"g\"], \"size\": [\"s\", \"n\", \"l\"]}) df.head() Out[2]: color size 0 r s 1 b n 2 g l

one hot encoded, the first observation would be

In\u00a0[3]: Copied!
obs_0_oh = (np.array([1.0, 0.0, 0.0, 1.0, 0.0, 0.0])).astype(\"float32\")\n
obs_0_oh = (np.array([1.0, 0.0, 0.0, 1.0, 0.0, 0.0])).astype(\"float32\")

if we simply numerically encode (label encode or le) the values:

In\u00a0[4]: Copied!
obs_0_le = (np.array([0, 3])).astype(\"int64\")\n
obs_0_le = (np.array([0, 3])).astype(\"int64\")

Note that in the functioning implementation of the package we start from 1, saving 0 for padding, i.e. unseen values.

Now, let's see if the two implementations are equivalent

In\u00a0[5]: Copied!
# we have 6 different values. Let's assume we are performing a regression, so pred_dim = 1\nlin = nn.Linear(6, 1)\n
# we have 6 different values. Let's assume we are performing a regression, so pred_dim = 1 lin = nn.Linear(6, 1) In\u00a0[6]: Copied!
emb = nn.Embedding(6, 1)\nemb.weight = nn.Parameter(lin.weight.reshape_as(emb.weight))\n
emb = nn.Embedding(6, 1) emb.weight = nn.Parameter(lin.weight.reshape_as(emb.weight)) In\u00a0[7]: Copied!
lin(torch.tensor(obs_0_oh))\n
lin(torch.tensor(obs_0_oh)) Out[7]:
tensor([-0.5181], grad_fn=<ViewBackward0>)
In\u00a0[8]: Copied!
emb(torch.tensor(obs_0_le)).sum() + lin.bias\n
emb(torch.tensor(obs_0_le)).sum() + lin.bias Out[8]:
tensor([-0.5181], grad_fn=<AddBackward0>)

And this is precisely how the linear model Wide is implemented

In\u00a0[9]: Copied!
from pytorch_widedeep.models import Wide\n
from pytorch_widedeep.models import Wide
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[10]: Copied!
# ?Wide\n
# ?Wide In\u00a0[11]: Copied!
wide = Wide(input_dim=10, pred_dim=1)\nwide\n
wide = Wide(input_dim=10, pred_dim=1) wide Out[11]:
Wide(\n  (wide_linear): Embedding(11, 1, padding_idx=0)\n)

Note that even though the input dim is 10, the Embedding layer has 11 weights. Again, this is because we save 0 for padding, which is used for unseen values during the encoding process.

As I mentioned, deeptabular has enough complexity on its own and it will be described in a separated notebook. Let's then jump to deeptext.

In\u00a0[12]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp\n
from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp In\u00a0[13]: Copied!
data = {\n    \"cat1\": np.random.choice([\"A\", \"B\", \"C\"], size=20),\n    \"cat2\": np.random.choice([\"X\", \"Y\"], size=20),\n    \"cont1\": np.random.rand(20),\n    \"cont2\": np.random.rand(20),\n}\n\ndf = pd.DataFrame(data)\n
data = { \"cat1\": np.random.choice([\"A\", \"B\", \"C\"], size=20), \"cat2\": np.random.choice([\"X\", \"Y\"], size=20), \"cont1\": np.random.rand(20), \"cont2\": np.random.rand(20), } df = pd.DataFrame(data) In\u00a0[14]: Copied!
df.head()\n
df.head() Out[14]: cat1 cat2 cont1 cont2 0 A Y 0.789347 0.561789 1 C X 0.050822 0.061538 2 A Y 0.863784 0.241967 3 C X 0.917848 0.644658 4 C Y 0.042328 0.417303 In\u00a0[15]: Copied!
# see the docs for details on all params/options\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=[\"cat1\", \"cat2\"],\n    continuous_cols=[\"cont1\", \"cont2\"],\n    embedding_rule=\"fastai\",\n)\n
# see the docs for details on all params/options tab_preprocessor = TabPreprocessor( cat_embed_cols=[\"cat1\", \"cat2\"], continuous_cols=[\"cont1\", \"cont2\"], embedding_rule=\"fastai\", ) In\u00a0[16]: Copied!
X_tab = tab_preprocessor.fit_transform(df)\n
X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
# toy example just to build a model.\ntabmlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=4,\n    mlp_hidden_dims=[8, 4],\n    mlp_linear_first=True,\n)\ntabmlp\n
# toy example just to build a model. tabmlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=4, mlp_hidden_dims=[8, 4], mlp_linear_first=True, ) tabmlp Out[17]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_cat1): Embedding(4, 3, padding_idx=0)\n      (emb_layer_cat2): Embedding(3, 2, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (cont_embed): ContEmbeddings(\n    INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n    (linear): ContLinear(n_cont_cols=2, embed_dim=4, embed_dropout=0.0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=13, out_features=8, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=8, out_features=4, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)

Lets describe a bit the model: first we have what we call a DiffSizeCatEmbeddings, where categorical columns with different number of unique categories will be encoded with embeddings of different dimensions. Then the continuous columns will not be normalised (the normalised layer is just the identity) and they will be embedded via a \"standard\" method, using a so-called ContLinear layer. This layer displays some INFO that tells us what it is (ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]). There are two other options available to embed the continuous cols based on the paper On Embeddings for Numerical Features in Tabular Deep Learning. These are PieceWise and Periodic and all available via the embed_continuous_method param, which can adopt values \"standard\", \"piecewise\" and \"periodic\". The embedded categorical and continuous columns will be then concatenated ($3 + 2 + (4 * 2) = 13$ input dims) and passed to an MLP.

In\u00a0[18]: Copied!
from pytorch_widedeep.models import BasicRNN\n
from pytorch_widedeep.models import BasicRNN In\u00a0[19]: Copied!
basic_rnn = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)\n
basic_rnn = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/torch/nn/modules/rnn.py:82: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1 and num_layers=1\n  warnings.warn(\"dropout option adds dropout after all but last \"\n
In\u00a0[20]: Copied!
basic_rnn\n
basic_rnn Out[20]:
BasicRNN(\n  (word_embed): Embedding(4, 4, padding_idx=0)\n  (rnn): LSTM(4, 4, batch_first=True, dropout=0.1)\n  (rnn_mlp): Identity()\n)

You could, if you wanted, add a Fully Connected Head (FC-Head) on top of it

In\u00a0[21]: Copied!
from pytorch_widedeep.models import Vision\n
from pytorch_widedeep.models import Vision In\u00a0[22]: Copied!
resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=0)\n
resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=0) In\u00a0[23]: Copied!
resnet\n
resnet Out[23]:
Vision(\n  (features): Sequential(\n    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (2): ReLU(inplace=True)\n    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n    (4): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (5): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (6): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (7): Sequential(\n      (0): BasicBlock(\n        (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (downsample): Sequential(\n          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n          (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n      )\n      (1): BasicBlock(\n        (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (relu): ReLU(inplace=True)\n        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n        (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n    (8): AdaptiveAvgPool2d(output_size=(1, 1))\n  )\n)
"},{"location":"examples/02_model_components.html#model-components","title":"Model Components\u00b6","text":"

The main components of a WideDeep (i.e. Multimodal) model are tabular data, text and images, which are feed into the model via so called wide, deeptabular, deeptext and deepimage model components

"},{"location":"examples/02_model_components.html#1-wide","title":"1. wide\u00b6","text":"

The wide component is a Linear layer \"plugged\" into the output neuron(s). Here, the non-linearities are captured via crossed columns. Crossed columns are, quoting directly the paper: \"For binary features, a cross-product transformation (e.g., \u201cAND(gender=female, language=en)\u201d) is 1 if and only if the constituent features (\u201cgender=female\u201d and \u201clanguage=en\u201d) are all 1, and 0 otherwise\".

The only particularity of our implementation is that we have implemented the linear layer via an Embedding layer plus a bias. While the implementations are equivalent, the latter is faster and far more memory efficient, since we do not need to one hot encode the categorical features.

Let's assume we the following dataset:

"},{"location":"examples/02_model_components.html#2-deeptabular","title":"2. deeptabular\u00b6","text":"

The deeptabular model alone is what normally would be referred as Deep Learning for tabular data. As mentioned a number of times throughout the library, each component can be used independently. Therefore, if you wanted to use any of the models below alone, it is perfectly possible. There are just a couple of simple requirement that will be covered in a later notebook.

By the time of writing, there are a number of models available in pytorch-widedeep to do DL for tabular data. These are:

  1. TabMlp
  2. ContextAttentionMLP
  3. SelfAttentionMLP
  4. TabResnet
  5. Tabnet
  6. TabTransformer
  7. FT-Tabransformer
  8. SAINT
  9. TabFastFormer
  10. TabPerceiver

Let's have a look to one of them. For more information on each of these models, please, have a look to the documentation

"},{"location":"examples/02_model_components.html#3-deeptext","title":"3. deeptext\u00b6","text":"

At the time of writing, pytorch-widedeep offers three models that can be passed to WideDeep as the deeptext component. These are:

  1. BasicRNN
  2. AttentiveRNN
  3. StackedAttentiveRNN

For details on each of these models, please, have a look to the documentation of the package.

We will soon integrate with Hugginface, but let me insist. It is perfectly possible to use custom models for each component, please, have a look to the corresponding notebook. In general, simply, build them and pass them as the corresponding parameters. Note that the custom models MUST return a last layer of activations (i.e. not the final prediction) so that these activations are collected by WideDeep and combined accordingly. In addition, the models MUST also contain an attribute output_dim with the size of these last layers of activations.

Let's have a look to the BasicRNN model

"},{"location":"examples/02_model_components.html#4-deepimage","title":"4. deepimage\u00b6","text":"

At the time of writing pytorch-widedeep is integrated with torchvision via the Vision class. This means that the it is possible to use a variant of the following architectures:

  1. resnet
  2. shufflenet
  3. resnext
  4. wide_resnet
  5. regnet
  6. densenet
  7. mobilenet
  8. mnasnet
  9. efficientnet
  10. squeezenet

The user can choose which layers will be trainable. Alternatively, in none of these architectures is useful, one could use a simple, fully trained CNN (please see the package documentation) or pass a custom model.

let's have a look

"},{"location":"examples/03_binary_classification_with_defaults.html","title":"03_binary_classification_with_defaults","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import Wide, TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy, Precision\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import Wide, TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy, Precision from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
df.drop([\"fnlwgt\", \"educational_num\"], axis=1, inplace=True)\n
df.drop([\"fnlwgt\", \"educational_num\"], axis=1, inplace=True) In\u00a0[5]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] In\u00a0[6]: Copied!
cat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n
cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] In\u00a0[7]: Copied!
# TARGET\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# TARGET target_col = \"income_label\" target = df[target_col].values

let's see what the preprocessors do

In\u00a0[8]: Copied!
# wide\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n
# wide wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) In\u00a0[9]: Copied!
# # wide_preprocessor has an attribute called encoding_dict with the encoding dictionary\n# wide_preprocessor.encoding_dict\n
# # wide_preprocessor has an attribute called encoding_dict with the encoding dictionary # wide_preprocessor.encoding_dict In\u00a0[10]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    cols_to_scale=continuous_cols,\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# deeptabular tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols, cols_to_scale=continuous_cols, ) X_tab = tab_preprocessor.fit_transform(df) In\u00a0[11]: Copied!
# check the docs to understand the useful attributes that the tab_preprocessor has. For example,\n# as well as an encoding dictionary, tab_preprocessor has an attribute called cat_embed_input\n# that specifies the categortical columns that will be represented as embeddings, the number\n# of different categories per feature, and the dimension of the embeddings as defined by some\n# of the internal rules of thumb that the preprocessor has (have a look to the docs)\ntab_preprocessor.cat_embed_input\n
# check the docs to understand the useful attributes that the tab_preprocessor has. For example, # as well as an encoding dictionary, tab_preprocessor has an attribute called cat_embed_input # that specifies the categortical columns that will be represented as embeddings, the number # of different categories per feature, and the dimension of the embeddings as defined by some # of the internal rules of thumb that the preprocessor has (have a look to the docs) tab_preprocessor.cat_embed_input Out[11]:
[('workclass', 9, 5),\n ('education', 16, 8),\n ('marital_status', 7, 5),\n ('occupation', 15, 7),\n ('relationship', 6, 4),\n ('race', 5, 4),\n ('gender', 2, 2),\n ('capital_gain', 123, 24),\n ('capital_loss', 99, 21),\n ('native_country', 42, 13)]
In\u00a0[12]: Copied!
print(X_wide)\nprint(X_wide.shape)\n
print(X_wide) print(X_wide.shape)
[[  1  10  26 ...  61 103 328]\n [  1  11  27 ...  61 104 329]\n [  2  12  27 ...  61 105 330]\n ...\n [  1  11  28 ...  61 115 335]\n [  1  11  26 ...  61 115 335]\n [  7  11  27 ...  61 127 336]]\n(48842, 10)\n
In\u00a0[13]: Copied!
print(X_tab)\nprint(X_tab.shape)\n
print(X_tab) print(X_tab.shape)
[[ 1.          1.          1.         ...  1.         -0.99512893\n  -0.03408696]\n [ 1.          2.          2.         ...  1.         -0.04694151\n   0.77292975]\n [ 2.          3.          2.         ...  1.         -0.77631645\n  -0.03408696]\n ...\n [ 1.          2.          3.         ...  1.          1.41180837\n  -0.03408696]\n [ 1.          2.          1.         ...  1.         -1.21394141\n  -1.64812038]\n [ 7.          2.          2.         ...  1.          0.97418341\n  -0.03408696]]\n(48842, 12)\n
In\u00a0[14]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[400, 200],\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[400, 200], mlp_dropout=0.5, mlp_activation=\"leaky_relu\", )

Let's first find out how a linear model performs

In\u00a0[15]: Copied!
wide\n
wide Out[15]:
Wide(\n  (wide_linear): Embedding(809, 1, padding_idx=0)\n)

Before being passed to the Trainer, the models need to be \"constructed\" with the WideDeep constructor class. For the particular case of the wide/linear model, not much really happens

In\u00a0[16]: Copied!
lin_model = WideDeep(wide=wide)\n
lin_model = WideDeep(wide=wide) In\u00a0[17]: Copied!
lin_model\n
lin_model Out[17]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n)
In\u00a0[18]: Copied!
lin_trainer = Trainer(\n    model=lin_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(lin_model.parameters(), lr=0.01),\n    metrics=[Accuracy, Precision],\n)\n
lin_trainer = Trainer( model=lin_model, objective=\"binary\", optimizers=torch.optim.AdamW(lin_model.parameters(), lr=0.01), metrics=[Accuracy, Precision], ) In\u00a0[19]: Copied!
lin_trainer.fit(X_wide=X_wide, target=target, n_epochs=4, batch_size=128, val_split=0.2)\n
lin_trainer.fit(X_wide=X_wide, target=target, n_epochs=4, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 109.04it/s, loss=0.426, metrics={'acc': 0.7983, 'prec': 0.6152}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 102.46it/s, loss=0.366, metrics={'acc': 0.832, 'prec': 0.6916}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 130.27it/s, loss=0.364, metrics={'acc': 0.8305, 'prec': 0.6933}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 150.46it/s, loss=0.361, metrics={'acc': 0.8357, 'prec': 0.6982}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 133.19it/s, loss=0.359, metrics={'acc': 0.8329, 'prec': 0.6994}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 145.75it/s, loss=0.361, metrics={'acc': 0.836, 'prec': 0.7009}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:02<00:00, 130.91it/s, loss=0.358, metrics={'acc': 0.8333, 'prec': 0.7005}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 155.08it/s, loss=0.361, metrics={'acc': 0.8364, 'prec': 0.702}]\n

Bear in mind that wide is a linear model where the non-linearities are captured via the crossed columns. For the crossed-columns to be effective one needs proper business knowledge. There is no magic formula to produce them

Let's have a look to the tabular model by itself

In\u00a0[20]: Copied!
tab_model = WideDeep(deeptabular=tab_mlp)\n
tab_model = WideDeep(deeptabular=tab_mlp) In\u00a0[21]: Copied!
tab_model\n
tab_model Out[21]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=95, out_features=400, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.5, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=400, out_features=200, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.5, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)

You can see how the WideDeep class has added a final prediction layer that collects the activations from the last layer of the model and plugs them into the output neuron. If this was a multiclass classification problem, the prediction dimension (i.e. the size of that final layer) needs to be specified via the pred_dim when instantiating the WideDeep class, as we will see later

In\u00a0[22]: Copied!
tab_trainer = Trainer(\n    model=tab_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
tab_trainer = Trainer( model=tab_model, objective=\"binary\", optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[23]: Copied!
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2)\n
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 97.00it/s, loss=0.37, metrics={'acc': 0.8267, 'prec': 0.7037}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 134.91it/s, loss=0.313, metrics={'acc': 0.8588, 'prec': 0.7577}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 86.86it/s, loss=0.319, metrics={'acc': 0.8514, 'prec': 0.761}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:01<00:00, 73.13it/s, loss=0.296, metrics={'acc': 0.8675, 'prec': 0.7685}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 79.07it/s, loss=0.305, metrics={'acc': 0.8574, 'prec': 0.7646}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 130.11it/s, loss=0.289, metrics={'acc': 0.8696, 'prec': 0.7765}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 87.39it/s, loss=0.296, metrics={'acc': 0.8622, 'prec': 0.7769}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 90.63it/s, loss=0.285, metrics={'acc': 0.8697, 'prec': 0.7741}]\n

The best result I ever obtained with LightGBM on this dataset is 0.8782...so we are pretty close.

Let's combine the wide and tab_mlp components see if it helps

In\u00a0[24]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[400, 200],\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n)\nwd_model = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[400, 200], mlp_dropout=0.5, mlp_activation=\"leaky_relu\", ) wd_model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[25]: Copied!
wd_trainer = Trainer(\n    model=wd_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(wd_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
wd_trainer = Trainer( model=wd_model, objective=\"binary\", optimizers=torch.optim.AdamW(wd_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[26]: Copied!
wd_trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2\n)\n
wd_trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=4, batch_size=128, val_split=0.2 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 77.48it/s, loss=0.418, metrics={'acc': 0.8047, 'prec': 0.6154}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 110.51it/s, loss=0.321, metrics={'acc': 0.8521, 'prec': 0.7059}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:03<00:00, 82.70it/s, loss=0.333, metrics={'acc': 0.8428, 'prec': 0.7141}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 112.52it/s, loss=0.299, metrics={'acc': 0.866, 'prec': 0.7447}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:04<00:00, 74.34it/s, loss=0.312, metrics={'acc': 0.8533, 'prec': 0.7404}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 89.86it/s, loss=0.29, metrics={'acc': 0.8683, 'prec': 0.7496}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:04<00:00, 65.32it/s, loss=0.301, metrics={'acc': 0.8591, 'prec': 0.7542}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:00<00:00, 86.81it/s, loss=0.286, metrics={'acc': 0.8712, 'prec': 0.7552}]\n

For this particular case, the combination of both did not lead to better results that using just the tab_mlp model, when using only 4 epochs.

Note that we have use a TabMlp model, but we could use any other model in the library using the same syntax

In\u00a0[27]: Copied!
from pytorch_widedeep.models import TabTransformer\n
from pytorch_widedeep.models import TabTransformer

The parameters for the TabTransformer are this

column_idx: Dict[str, int],\ncat_embed_input: Optional[List[Tuple[str, int]]] = None,\ncat_embed_dropout: Optional[float] = None,\nuse_cat_bias: Optional[bool] = None,\ncat_embed_activation: Optional[str] = None,\nshared_embed: Optional[bool] = None,\nadd_shared_embed: Optional[bool] = None,\nfrac_shared_embed: Optional[float] = None,\ncontinuous_cols: Optional[List[str]] = None,\ncont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\nembed_continuous: Optional[bool] = None,\nembed_continuous_method: Optional[Literal[\"standard\", \"piecewise\", \"periodic\"]] = None,\ncont_embed_dropout: Optional[float] = None,\ncont_embed_activation: Optional[str] = None,\nquantization_setup: Optional[Dict[str, List[float]]] = None,\nn_frequencies: Optional[int] = None,\nsigma: Optional[float] = None,\nshare_last_layer: Optional[bool] = None,\nfull_embed_dropout: Optional[bool] = None,\ninput_dim: int = 32,\nn_heads: int = 8,\nuse_qkv_bias: bool = False,\nn_blocks: int = 4,\nattn_dropout: float = 0.2,\nff_dropout: float = 0.1,\nff_factor: int = 4,\ntransformer_activation: str = \"gelu\",\nuse_linear_attention: bool = False,\nuse_flash_attention: bool = False,\nmlp_hidden_dims: Optional[List[int]] = None,\nmlp_activation: str = \"relu\",\nmlp_dropout: float = 0.1,\nmlp_batchnorm: bool = False,\nmlp_batchnorm_last: bool = False,\nmlp_linear_first: bool = True,\n

Please, see the documentation for details on each one of them, for now let's see how one could use a TabTransformer model in a few lines of code

In\u00a0[28]: Copied!
tab_transformer = TabTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_norm_layer=\"layernorm\",\n    cont_embed_dropout=0.2,\n    cont_embed_activation=\"leaky_relu\",\n    n_heads=4,\n    ff_dropout=0.2,\n    mlp_dropout=0.5,\n    mlp_activation=\"leaky_relu\",\n    mlp_linear_first=\"True\",\n)\n
tab_transformer = TabTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_norm_layer=\"layernorm\", cont_embed_dropout=0.2, cont_embed_activation=\"leaky_relu\", n_heads=4, ff_dropout=0.2, mlp_dropout=0.5, mlp_activation=\"leaky_relu\", mlp_linear_first=\"True\", ) In\u00a0[29]: Copied!
tab_model = WideDeep(deeptabular=tab_transformer)\n
tab_model = WideDeep(deeptabular=tab_transformer) In\u00a0[30]: Copied!
tab_model\n
tab_model Out[30]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabTransformer(\n      (cat_embed): SameSizeCatEmbeddings(\n        (embed): Embedding(325, 32, padding_idx=0)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): LayerNorm((2,), eps=1e-05, elementwise_affine=True)\n      (cont_embed): ContEmbeddings(\n        INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n        (linear): ContLinear(n_cont_cols=2, embed_dim=32, embed_dropout=0.2)\n        (activation_fn): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dropout): Dropout(p=0.2, inplace=False)\n      )\n      (encoder): Sequential(\n        (transformer_block0): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block1): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block2): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block3): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.2, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.2, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=384, out_features=1, bias=True)\n  )\n)
In\u00a0[31]: Copied!
tab_trainer = Trainer(\n    model=tab_model,\n    objective=\"binary\",\n    optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001),\n    metrics=[Accuracy, Precision],\n)\n
tab_trainer = Trainer( model=tab_model, objective=\"binary\", optimizers=torch.optim.AdamW(tab_model.parameters(), lr=0.001), metrics=[Accuracy, Precision], ) In\u00a0[32]: Copied!
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=128, val_split=0.2)\n
tab_trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=128, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 306/306 [00:11<00:00, 27.57it/s, loss=0.359, metrics={'acc': 0.8334, 'prec': 0.7082}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 77/77 [00:01<00:00, 57.89it/s, loss=0.33, metrics={'acc': 0.8536, 'prec': 0.7152}]\n
"},{"location":"examples/03_binary_classification_with_defaults.html#simple-binary-classification-with-defaults","title":"Simple Binary Classification with defaults\u00b6","text":"

In this notebook we will train a Wide and Deep model and simply a \"Deep\" model using the well known adult dataset

"},{"location":"examples/03_binary_classification_with_defaults.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/03_binary_classification_with_defaults.html#defining-the-model","title":"Defining the model\u00b6","text":""},{"location":"examples/04_regression_with_images_and_text.html","title":"04_regression_with_images_and_text","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport os\nimport torch\nfrom torchvision.transforms import ToTensor, Normalize\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import (\n    WidePreprocessor,\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n)\nfrom pytorch_widedeep.models import (\n    Wide,\n    TabMlp,\n    Vision,\n    BasicRNN,\n    WideDeep,\n)\nfrom pytorch_widedeep.losses import RMSELoss\nfrom pytorch_widedeep.initializers import *\nfrom pytorch_widedeep.callbacks import *\n
import numpy as np import pandas as pd import os import torch from torchvision.transforms import ToTensor, Normalize from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import ( WidePreprocessor, TabPreprocessor, TextPreprocessor, ImagePreprocessor, ) from pytorch_widedeep.models import ( Wide, TabMlp, Vision, BasicRNN, WideDeep, ) from pytorch_widedeep.losses import RMSELoss from pytorch_widedeep.initializers import * from pytorch_widedeep.callbacks import *
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\ndf.head()\n
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") df.head() Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 0 13913.jpg 54730 My bright double bedroom with a large window h... 4.0 f Islington 51.56802 -0.11121 t apartment ... 1 0 0 0 1 0 0 100.0 15.0 12.00 1 15400.jpg 60302 Lots of windows and light. St Luke's Gardens ... 1.0 t Kensington and Chelsea 51.48796 -0.16898 t apartment ... 0 0 0 0 1 0 0 150.0 0.0 109.50 2 17402.jpg 67564 Open from June 2018 after a 3-year break, we a... 19.0 t Westminster 51.52098 -0.14002 t apartment ... 0 0 0 0 1 0 0 350.0 10.0 149.65 3 24328.jpg 41759 Artist house, bright high ceiling rooms, priva... 2.0 t Wandsworth 51.47298 -0.16376 t other ... 0 0 0 0 1 0 0 250.0 0.0 215.60 4 25023.jpg 102813 Large, all comforts, 2-bed flat; first floor; ... 1.0 f Wandsworth 51.44687 -0.21874 t apartment ... 0 0 0 0 1 0 0 250.0 11.0 79.35

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# There are a number of columns that are already binary. Therefore, no need to one hot encode them\ncrossed_cols = [(\"property_type\", \"room_type\")]\nalready_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"]\nwide_cols = [\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"host_gender\",\n    \"instant_bookable\",\n] + already_dummies\n\ncat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [\n    (\"neighbourhood_cleansed\", 64),\n    (\"cancellation_policy\", 16),\n]\ncontinuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"]\n\n# text and image colnames\ntext_col = \"description\"\nimg_col = \"id\"\n\n# path to pretrained word embeddings and the images\nword_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\"\nimg_path = \"../tmp_data/airbnb/property_picture\"\n\n# target\ntarget_col = \"yield\"\n
# There are a number of columns that are already binary. Therefore, no need to one hot encode them crossed_cols = [(\"property_type\", \"room_type\")] already_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"] wide_cols = [ \"is_location_exact\", \"property_type\", \"room_type\", \"host_gender\", \"instant_bookable\", ] + already_dummies cat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [ (\"neighbourhood_cleansed\", 64), (\"cancellation_policy\", 16), ] continuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"] # text and image colnames text_col = \"description\" img_col = \"id\" # path to pretrained word embeddings and the images word_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\" img_path = \"../tmp_data/airbnb/property_picture\" # target target_col = \"yield\" In\u00a0[4]: Copied!
target = df[target_col].values\n
target = df[target_col].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) In\u00a0[6]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[7]: Copied!
text_preprocessor = TextPreprocessor(\n    word_vectors_path=word_vectors_path, text_col=text_col\n)\nX_text = text_preprocessor.fit_transform(df)\n
text_preprocessor = TextPreprocessor( word_vectors_path=word_vectors_path, text_col=text_col ) X_text = text_preprocessor.fit_transform(df)
The vocabulary contains 2192 tokens\nIndexing word vectors...\nLoaded 400000 word vectors\nPreparing embeddings matrix...\n2175 words in the vocabulary had ../tmp_data/glove.6B/glove.6B.100d.txt vectors and appear more than 5 times\n
In\u00a0[8]: Copied!
image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path)\nX_images = image_processor.fit_transform(df)\n
image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path) X_images = image_processor.fit_transform(df)
Reading Images from ../tmp_data/airbnb/property_picture\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:01<00:00, 638.00it/s]\n
Computing normalisation metrics\n
In\u00a0[9]: Copied!
# Linear model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\n\n# DeepDense: 2 Dense layers\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.1,\n)\n\n# DeepText: a stack of 2 LSTMs\nbasic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_matrix=text_preprocessor.embedding_matrix,\n    n_layers=2,\n    hidden_dim=64,\n    rnn_dropout=0.5,\n)\n\n# Pretrained Resnet 18\nresnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=4)\n
# Linear model wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) # DeepDense: 2 Dense layers tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.1, ) # DeepText: a stack of 2 LSTMs basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_matrix=text_preprocessor.embedding_matrix, n_layers=2, hidden_dim=64, rnn_dropout=0.5, ) # Pretrained Resnet 18 resnet = Vision(pretrained_model_setup=\"resnet18\", n_trainable=4)

Combine them all with the \"collector\" class WideDeep

In\u00a0[10]: Copied!
model = WideDeep(\n    wide=wide,\n    deeptabular=tab_mlp,\n    deeptext=basic_rnn,\n    deepimage=resnet,\n    head_hidden_dims=[256, 128],\n)\n
model = WideDeep( wide=wide, deeptabular=tab_mlp, deeptext=basic_rnn, deepimage=resnet, head_hidden_dims=[256, 128], ) In\u00a0[11]: Copied!
trainer = Trainer(model, objective=\"rmse\")\n
trainer = Trainer(model, objective=\"rmse\") In\u00a0[12]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:19<00:00,  1.28it/s, loss=115]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:04<00:00,  1.62it/s, loss=94.1]\n

Both, the Text and Image components allow FC-heads on their own (have a look to the documentation).

Now let's go \"kaggle crazy\". Let's use different optimizers, initializers and schedulers for different components. Moreover, let's use a different learning rate for different parameter groups, for the deeptabular component

In\u00a0[13]: Copied!
deep_params = []\nfor childname, child in model.named_children():\n    if childname == \"deeptabular\":\n        for n, p in child.named_parameters():\n            if \"embed_layer\" in n:\n                deep_params.append({\"params\": p, \"lr\": 1e-4})\n            else:\n                deep_params.append({\"params\": p, \"lr\": 1e-3})\n
deep_params = [] for childname, child in model.named_children(): if childname == \"deeptabular\": for n, p in child.named_parameters(): if \"embed_layer\" in n: deep_params.append({\"params\": p, \"lr\": 1e-4}) else: deep_params.append({\"params\": p, \"lr\": 1e-3}) In\u00a0[14]: Copied!
wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.03)\ndeep_opt = torch.optim.Adam(deep_params)\ntext_opt = torch.optim.AdamW(model.deeptext.parameters())\nimg_opt = torch.optim.AdamW(model.deepimage.parameters())\nhead_opt = torch.optim.Adam(model.deephead.parameters())\n
wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.03) deep_opt = torch.optim.Adam(deep_params) text_opt = torch.optim.AdamW(model.deeptext.parameters()) img_opt = torch.optim.AdamW(model.deepimage.parameters()) head_opt = torch.optim.Adam(model.deephead.parameters()) In\u00a0[15]: Copied!
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)\ndeep_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8])\ntext_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)\nimg_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8])\nhead_sch = torch.optim.lr_scheduler.StepLR(head_opt, step_size=5)\n
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5) deep_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8]) text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5) img_sch = torch.optim.lr_scheduler.MultiStepLR(deep_opt, milestones=[3, 8]) head_sch = torch.optim.lr_scheduler.StepLR(head_opt, step_size=5) In\u00a0[16]: Copied!
# remember, one optimizer per model components, for lr_schedures and initializers is not neccesary\noptimizers = {\n    \"wide\": wide_opt,\n    \"deeptabular\": deep_opt,\n    \"deeptext\": text_opt,\n    \"deepimage\": img_opt,\n    \"deephead\": head_opt,\n}\nschedulers = {\n    \"wide\": wide_sch,\n    \"deeptabular\": deep_sch,\n    \"deeptext\": text_sch,\n    \"deepimage\": img_sch,\n    \"deephead\": head_sch,\n}\n\n# Now...we have used pretrained word embeddings, so you do not want to\n# initialise these  embeddings. However you might still want to initialise the\n# other layers in the DeepText component. No probs, you can do that with the\n# parameter pattern and your knowledge on regular  expressions. Here we are\n# telling to the KaimingNormal initializer to NOT touch the  parameters whose\n# name contains the string word_embed.\ninitializers = {\n    \"wide\": KaimingNormal,\n    \"deeptabular\": KaimingNormal,\n    \"deeptext\": KaimingNormal(pattern=r\"^(?!.*word_embed).*$\"),\n    \"deepimage\": KaimingNormal,\n}\n\nmean = [0.406, 0.456, 0.485]  # BGR\nstd = [0.225, 0.224, 0.229]  # BGR\ntransforms = [ToTensor, Normalize(mean=mean, std=std)]\ncallbacks = [\n    LRHistory(n_epochs=10),\n    EarlyStopping,\n    ModelCheckpoint(filepath=\"model_weights/wd_out\"),\n]\n
# remember, one optimizer per model components, for lr_schedures and initializers is not neccesary optimizers = { \"wide\": wide_opt, \"deeptabular\": deep_opt, \"deeptext\": text_opt, \"deepimage\": img_opt, \"deephead\": head_opt, } schedulers = { \"wide\": wide_sch, \"deeptabular\": deep_sch, \"deeptext\": text_sch, \"deepimage\": img_sch, \"deephead\": head_sch, } # Now...we have used pretrained word embeddings, so you do not want to # initialise these embeddings. However you might still want to initialise the # other layers in the DeepText component. No probs, you can do that with the # parameter pattern and your knowledge on regular expressions. Here we are # telling to the KaimingNormal initializer to NOT touch the parameters whose # name contains the string word_embed. initializers = { \"wide\": KaimingNormal, \"deeptabular\": KaimingNormal, \"deeptext\": KaimingNormal(pattern=r\"^(?!.*word_embed).*$\"), \"deepimage\": KaimingNormal, } mean = [0.406, 0.456, 0.485] # BGR std = [0.225, 0.224, 0.229] # BGR transforms = [ToTensor, Normalize(mean=mean, std=std)] callbacks = [ LRHistory(n_epochs=10), EarlyStopping, ModelCheckpoint(filepath=\"model_weights/wd_out\"), ] In\u00a0[17]: Copied!
trainer = Trainer(\n    model,\n    objective=\"rmse\",\n    initializers=initializers,\n    optimizers=optimizers,\n    lr_schedulers=schedulers,\n    callbacks=callbacks,\n    transforms=transforms,\n)\n
trainer = Trainer( model, objective=\"rmse\", initializers=initializers, optimizers=optimizers, lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms, )
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/initializers.py:34: UserWarning: No initializer found for deephead\n  warnings.warn(\n
In\u00a0[18]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:19<00:00,  1.25it/s, loss=101]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:04<00:00,  1.62it/s, loss=90.6]\n
Model weights after training corresponds to the those of the final epoch which might not be the best performing weights. Use the 'ModelCheckpoint' Callback to restore the best epoch weights.\n

we have only run one epoch, but let's check that the LRHistory callback records the lr values for each group

In\u00a0[19]: Copied!
trainer.lr_history\n
trainer.lr_history Out[19]:
{'lr_wide_0': [0.03, 0.03],\n 'lr_deeptabular_0': [0.0001, 0.0001],\n 'lr_deeptabular_1': [0.0001, 0.0001],\n 'lr_deeptabular_2': [0.0001, 0.0001],\n 'lr_deeptabular_3': [0.0001, 0.0001],\n 'lr_deeptabular_4': [0.0001, 0.0001],\n 'lr_deeptabular_5': [0.0001, 0.0001],\n 'lr_deeptabular_6': [0.0001, 0.0001],\n 'lr_deeptabular_7': [0.0001, 0.0001],\n 'lr_deeptabular_8': [0.0001, 0.0001],\n 'lr_deeptabular_9': [0.001, 0.001],\n 'lr_deeptabular_10': [0.001, 0.001],\n 'lr_deeptabular_11': [0.001, 0.001],\n 'lr_deeptabular_12': [0.001, 0.001],\n 'lr_deeptext_0': [0.001, 0.001],\n 'lr_deepimage_0': [0.001, 0.001],\n 'lr_deephead_0': [0.001, 0.001]}
"},{"location":"examples/04_regression_with_images_and_text.html#regression-with-images-and-text","title":"Regression with Images and Text\u00b6","text":"

In this notebook we will go through a series of examples on how to combine all Wide & Deep components.

To that aim I will use the Airbnb listings dataset for London, which you can download from here. I use this dataset simply because it contains tabular data, images and text.

I have taken a sample of 1000 listings to keep the data tractable in this notebook. Also, I have preprocessed the data and prepared it for this exercise. All preprocessing steps can be found in the notebook airbnb_data_preprocessing.ipynb in this examples folder.

"},{"location":"examples/04_regression_with_images_and_text.html#regression-with-the-defaults","title":"Regression with the defaults\u00b6","text":"

The set up

"},{"location":"examples/04_regression_with_images_and_text.html#prepare-the-data","title":"Prepare the data\u00b6","text":"

I will focus here on how to prepare the data and run the model. Check notebooks 1 and 2 to see what's going on behind the scences

Preparing the data is rather simple

"},{"location":"examples/04_regression_with_images_and_text.html#build-the-model-components","title":"Build the model components\u00b6","text":""},{"location":"examples/04_regression_with_images_and_text.html#build-the-trainer-and-fit","title":"Build the trainer and fit\u00b6","text":""},{"location":"examples/05_save_and_load_model_and_artifacts.html","title":"05_save_and_load_model_and_artifacts","text":"In\u00a0[1]: Copied!
import pickle\nimport numpy as np\nimport pandas as pd\nimport torch\nimport shutil\n\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom sklearn.model_selection import train_test_split\n
import pickle import numpy as np import pandas as pd import torch import shutil from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint, LRHistory from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from sklearn.model_selection import train_test_split
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
train, valid = train_test_split(df, test_size=0.2, stratify=df.target)\n# the test data will be used lately as if it was \"fresh\", new data coming after some time...\nvalid, test = train_test_split(valid, test_size=0.5, stratify=valid.target)\n
train, valid = train_test_split(df, test_size=0.2, stratify=df.target) # the test data will be used lately as if it was \"fresh\", new data coming after some time... valid, test = train_test_split(valid, test_size=0.5, stratify=valid.target) In\u00a0[5]: Copied!
print(f\"train shape: {train.shape}\")\nprint(f\"valid shape: {valid.shape}\")\nprint(f\"test shape: {test.shape}\")\n
print(f\"train shape: {train.shape}\") print(f\"valid shape: {valid.shape}\") print(f\"test shape: {test.shape}\")
train shape: (39073, 15)\nvalid shape: (4884, 15)\ntest shape: (4885, 15)\n
In\u00a0[6]: Copied!
cat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n
cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] In\u00a0[7]: Copied!
tab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n)\nX_tab_train = tab_preprocessor.fit_transform(train)\ny_train = train.target.values\nX_tab_valid = tab_preprocessor.transform(valid)\ny_valid = valid.target.values\n
tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols, ) X_tab_train = tab_preprocessor.fit_transform(train) y_train = train.target.values X_tab_valid = tab_preprocessor.transform(valid) y_valid = valid.target.values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[8]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    cont_norm_layer=\"layernorm\",\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(deeptabular=tab_mlp)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, cont_norm_layer=\"layernorm\", embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(deeptabular=tab_mlp) In\u00a0[9]: Copied!
model\n
model Out[9]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(122, 23, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(97, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): LayerNorm((2,), eps=1e-05, elementwise_affine=True)\n      (cont_embed): ContEmbeddings(\n        INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n        (linear): ContLinear(n_cont_cols=2, embed_dim=8, embed_dropout=0.0)\n        (dropout): Dropout(p=0.0, inplace=False)\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=108, out_features=64, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.2, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): LeakyReLU(negative_slope=0.01, inplace=True)\n            (2): Dropout(p=0.2, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=1, bias=True)\n  )\n)
In\u00a0[10]: Copied!
early_stopping = EarlyStopping()\nmodel_checkpoint = ModelCheckpoint(\n    filepath=\"tmp_dir/adult_tabmlp_model\",\n    save_best_only=True,\n    verbose=1,\n    max_save=1,\n)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    callbacks=[early_stopping, model_checkpoint],\n    metrics=[Accuracy],\n)\n\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=4,\n    batch_size=256,\n)\n
early_stopping = EarlyStopping() model_checkpoint = ModelCheckpoint( filepath=\"tmp_dir/adult_tabmlp_model\", save_best_only=True, verbose=1, max_save=1, ) trainer = Trainer( model, objective=\"binary\", callbacks=[early_stopping, model_checkpoint], metrics=[Accuracy], ) trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=4, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 76.25it/s, loss=0.452, metrics={'acc': 0.7867}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 125.36it/s, loss=0.335, metrics={'acc': 0.8532}]\n
\nEpoch 1: val_loss improved from inf to 0.33532 Saving model to tmp_dir/adult_tabmlp_model_1.p\n
epoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 76.98it/s, loss=0.355, metrics={'acc': 0.8401}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 106.51it/s, loss=0.303, metrics={'acc': 0.8665}]\n
\nEpoch 2: val_loss improved from 0.33532 to 0.30273 Saving model to tmp_dir/adult_tabmlp_model_2.p\n
epoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 82.71it/s, loss=0.332, metrics={'acc': 0.849}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 107.80it/s, loss=0.288, metrics={'acc': 0.8757}]\n
\nEpoch 3: val_loss improved from 0.30273 to 0.28791 Saving model to tmp_dir/adult_tabmlp_model_3.p\n
epoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 79.02it/s, loss=0.32, metrics={'acc': 0.8541}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 127.07it/s, loss=0.282, metrics={'acc': 0.8763}]
\nEpoch 4: val_loss improved from 0.28791 to 0.28238 Saving model to tmp_dir/adult_tabmlp_model_4.p\nModel weights restored to best epoch: 4\n
\n
In\u00a0[11]: Copied!
torch.save(model, \"tmp_dir/model_saved_option_1.pt\")\n
torch.save(model, \"tmp_dir/model_saved_option_1.pt\") In\u00a0[12]: Copied!
torch.save(model.state_dict(), \"tmp_dir/model_state_dict_saved_option_1.pt\")\n
torch.save(model.state_dict(), \"tmp_dir/model_state_dict_saved_option_1.pt\") In\u00a0[13]: Copied!
trainer.save(path=\"tmp_dir/\", model_filename=\"model_saved_option_2.pt\")\n
trainer.save(path=\"tmp_dir/\", model_filename=\"model_saved_option_2.pt\")

or the state dict

In\u00a0[14]: Copied!
trainer.save(\n    path=\"tmp_dir/\",\n    model_filename=\"model_state_dict_saved_option_2.pt\",\n    save_state_dict=True,\n)\n
trainer.save( path=\"tmp_dir/\", model_filename=\"model_state_dict_saved_option_2.pt\", save_state_dict=True, ) In\u00a0[15]: Copied!
%%bash\n\nls tmp_dir/\n
%%bash ls tmp_dir/
adult_tabmlp_model_4.p\nhistory\nmodel_saved_option_1.pt\nmodel_saved_option_2.pt\nmodel_state_dict_saved_option_1.pt\nmodel_state_dict_saved_option_2.pt\n
In\u00a0[16]: Copied!
%%bash\n\nls tmp_dir/history/\n
%%bash ls tmp_dir/history/
train_eval_history.json\n

Note that since we have used the ModelCheckpoint Callback, adult_tabmlp_model_2.p is the model state dict of the model at epoch 2, i.e. same as model_state_dict_saved_option_1.p or model_state_dict_saved_option_2.p.

In\u00a0[17]: Copied!
with open(\"tmp_dir/tab_preproc.pkl\", \"wb\") as dp:\n    pickle.dump(tab_preprocessor, dp)\n
with open(\"tmp_dir/tab_preproc.pkl\", \"wb\") as dp: pickle.dump(tab_preprocessor, dp) In\u00a0[18]: Copied!
with open(\"tmp_dir/eary_stop.pkl\", \"wb\") as es:\n    pickle.dump(early_stopping, es)\n
with open(\"tmp_dir/eary_stop.pkl\", \"wb\") as es: pickle.dump(early_stopping, es) In\u00a0[19]: Copied!
%%bash\n\nls tmp_dir/\n
%%bash ls tmp_dir/
adult_tabmlp_model_4.p\neary_stop.pkl\nhistory\nmodel_saved_option_1.pt\nmodel_saved_option_2.pt\nmodel_state_dict_saved_option_1.pt\nmodel_state_dict_saved_option_2.pt\ntab_preproc.pkl\n

And that is pretty much all you need to resume training or directly predict, let's see

In\u00a0[20]: Copied!
test.head()\n
test.head() Out[20]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 10103 43 Private 198282 HS-grad 9 Married-civ-spouse Craft-repair Husband White Male 0 0 40 United-States 1 31799 20 Private 228686 11th 7 Married-civ-spouse Other-service Husband White Male 0 0 40 United-States 0 19971 26 Private 291968 HS-grad 9 Married-civ-spouse Transport-moving Husband White Male 0 0 44 United-States 0 3039 48 Private 175958 Bachelors 13 Divorced Prof-specialty Not-in-family White Male 0 0 30 United-States 0 20725 18 Private 232024 11th 7 Never-married Machine-op-inspct Own-child White Male 0 0 55 United-States 0 In\u00a0[21]: Copied!
with open(\"tmp_dir/tab_preproc.pkl\", \"rb\") as tp:\n    tab_preprocessor_new = pickle.load(tp)\n
with open(\"tmp_dir/tab_preproc.pkl\", \"rb\") as tp: tab_preprocessor_new = pickle.load(tp) In\u00a0[22]: Copied!
X_test_tab = tab_preprocessor_new.transform(test)\ny_test = test.target\n
X_test_tab = tab_preprocessor_new.transform(test) y_test = test.target In\u00a0[23]: Copied!
tab_mlp_new = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    cont_norm_layer=\"layernorm\",\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nnew_model = WideDeep(deeptabular=tab_mlp)\n
tab_mlp_new = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, cont_norm_layer=\"layernorm\", embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) new_model = WideDeep(deeptabular=tab_mlp) In\u00a0[24]: Copied!
new_model.load_state_dict(torch.load(\"tmp_dir/model_state_dict_saved_option_2.pt\"))\n
new_model.load_state_dict(torch.load(\"tmp_dir/model_state_dict_saved_option_2.pt\")) Out[24]:
<All keys matched successfully>
In\u00a0[25]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n)\n
trainer = Trainer( model, objective=\"binary\", ) In\u00a0[26]: Copied!
preds = trainer.predict(X_tab=X_test_tab, batch_size=32)\n
preds = trainer.predict(X_tab=X_test_tab, batch_size=32)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:00<00:00, 309.83it/s]\n
In\u00a0[27]: Copied!
from sklearn.metrics import accuracy_score\n
from sklearn.metrics import accuracy_score In\u00a0[28]: Copied!
accuracy_score(y_test, preds)\n
accuracy_score(y_test, preds) Out[28]:
0.8595701125895598
In\u00a0[29]: Copied!
shutil.rmtree(\"tmp_dir/\")\n
shutil.rmtree(\"tmp_dir/\")"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-and-load-model-and-artifacts","title":"Save and load model and artifacts\u00b6","text":"

In this notebook I will show the different options to save and load a model, as well as some additional objects produced during training.

On a given day, you train a model...

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-model-option-1","title":"Save model: option 1\u00b6","text":"

save (and load) a model as you woud do with any other torch model

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-model-option-2","title":"Save model: option 2\u00b6","text":"

use the trainer. The trainer will also save the training history and the learning rate history (if learning rate schedulers are used)

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#save-preprocessors-and-callbacks","title":"Save preprocessors and callbacks\u00b6","text":"

...just pickle them

"},{"location":"examples/05_save_and_load_model_and_artifacts.html#run-new-experiment-prepare-new-dataset-load-model-and-predict","title":"Run New experiment: prepare new dataset, load model, and predict\u00b6","text":""},{"location":"examples/06_finetune_and_warmup.html","title":"06_finetune_and_warmup","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor\nfrom pytorch_widedeep.models import Wide, TabMlp, TabResnet, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor from pytorch_widedeep.models import Wide, TabMlp, TabResnet, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\n# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
df = load_adult(as_frame=True) # For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[2]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[3]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" target = df[target_col].values In\u00a0[4]: Copied!
# TARGET\ntarget = df[target_col].values\n\n# WIDE\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\n# DEEP\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# TARGET target = df[target_col].values # WIDE wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # DEEP tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[5]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[6]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = Trainer( model, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[7]: Copied!
trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, val_split=0.2, batch_size=256\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, val_split=0.2, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 74.26it/s, loss=0.399, metrics={'acc': 0.8163}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 91.03it/s, loss=0.296, metrics={'acc': 0.8677}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 81.31it/s, loss=0.3, metrics={'acc': 0.8614}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 106.45it/s, loss=0.285, metrics={'acc': 0.8721}]\n
In\u00a0[8]: Copied!
trainer.save(path=\"models_dir/\", save_state_dict=True, model_filename=\"model_1.pt\")\n
trainer.save(path=\"models_dir/\", save_state_dict=True, model_filename=\"model_1.pt\")

Now time goes by...and we want to fine-tune the model to another, new dataset (for example, a dataset that is identical to the one you used to train the previous model but for another country).

Here I will use the same dataset just for illustration purposes, but the flow would be identical to that new dataset

In\u00a0[9]: Copied!
wide_1 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp_1 = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel_1 = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide_1 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp_1 = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model_1 = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[10]: Copied!
model_1.load_state_dict(torch.load(\"models_dir/model_1.pt\"))\n
model_1.load_state_dict(torch.load(\"models_dir/model_1.pt\")) Out[10]:
<All keys matched successfully>
In\u00a0[11]: Copied!
trainer_1 = Trainer(model_1, objective=\"binary\", metrics=[Accuracy])\n
trainer_1 = Trainer(model_1, objective=\"binary\", metrics=[Accuracy]) In\u00a0[12]: Copied!
trainer_1.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    n_epochs=2,\n    batch_size=256,\n    finetune=True,\n    finetune_epochs=2,\n)\n
trainer_1.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=2, batch_size=256, finetune=True, finetune_epochs=2, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:01<00:00, 97.37it/s, loss=0.39, metrics={'acc': 0.8152}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:01<00:00, 104.04it/s, loss=0.359, metrics={'acc': 0.824}]\n
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 83.83it/s, loss=0.297, metrics={'acc': 0.8365}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 82.78it/s, loss=0.283, metrics={'acc': 0.8445}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 72.84it/s, loss=0.281, metrics={'acc': 0.8716}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 191/191 [00:02<00:00, 77.46it/s, loss=0.273, metrics={'acc': 0.8744}]\n

Note that, as I describe above, in scenario 2, we can just use this to warm up models before they joined training begins:

In\u00a0[13]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    embed_continuous_method=\"standard\",\n    cont_embed_dim=8,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, embed_continuous_method=\"standard\", cont_embed_dim=8, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[14]: Copied!
trainer_2 = Trainer(model, objective=\"binary\", metrics=[Accuracy])\n
trainer_2 = Trainer(model, objective=\"binary\", metrics=[Accuracy]) In\u00a0[15]: Copied!
trainer_2.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    warmup=True,\n    warmup_epochs=2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer_2.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, warmup=True, warmup_epochs=2, n_epochs=2, batch_size=256, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 102.49it/s, loss=0.52, metrics={'acc': 0.7519}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 98.15it/s, loss=0.381, metrics={'acc': 0.7891}]\n
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 82.97it/s, loss=0.356, metrics={'acc': 0.8043}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 80.27it/s, loss=0.295, metrics={'acc': 0.8195}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 77.27it/s, loss=0.291, metrics={'acc': 0.8667}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 89.57it/s, loss=0.289, metrics={'acc': 0.8665}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 72.69it/s, loss=0.283, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 91.81it/s, loss=0.284, metrics={'acc': 0.869}]\n

We need to explicitly indicate

  1. That we want fine-tune

  2. The components that we want to individually fine-tune

  3. In case of gradual fine-tuning, the routine (\"felbo\" or \"howard\")

  4. The layers we want to fine-tune.

For example

In\u00a0[16]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_resnet = TabResnet(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 200, 200],\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_resnet)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_resnet = TabResnet( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, blocks_dims=[200, 200, 200], ) model = WideDeep(wide=wide, deeptabular=tab_resnet) In\u00a0[17]: Copied!
model\n
model Out[17]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n  (deeptabular): Sequential(\n    (0): TabResnet(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): DenseResnet(\n        (dense_resnet): Sequential(\n          (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n          (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (block_0): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n          (block_1): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)

let's first train as usual

In\u00a0[18]: Copied!
trainer_3 = Trainer(model, objective=\"binary\", metrics=[Accuracy])\n
trainer_3 = Trainer(model, objective=\"binary\", metrics=[Accuracy]) In\u00a0[19]: Copied!
trainer_3.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=2, batch_size=256\n)\n
trainer_3.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=2, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 54.23it/s, loss=0.382, metrics={'acc': 0.8239}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.72it/s, loss=0.331, metrics={'acc': 0.8526}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 54.35it/s, loss=0.33, metrics={'acc': 0.8465}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 68.15it/s, loss=0.312, metrics={'acc': 0.8604}]\n
In\u00a0[20]: Copied!
trainer_3.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_3.pt\")\n
trainer_3.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_3.pt\")

Now we are going to fine-tune the model components, and in the case of the deeptabular component, we will fine-tune the resnet-blocks and the linear layer but NOT the embeddings.

For this, we need to access the model component's children: deeptabular $\\rightarrow$ tab_resnet $\\rightarrow$ dense_resnet $\\rightarrow$ blocks

In\u00a0[21]: Copied!
wide_3 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_resnet_3 = TabResnet(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 200, 200],\n)\nmodel_3 = WideDeep(wide=wide, deeptabular=tab_resnet)\n
wide_3 = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_resnet_3 = TabResnet( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, blocks_dims=[200, 200, 200], ) model_3 = WideDeep(wide=wide, deeptabular=tab_resnet) In\u00a0[22]: Copied!
model_3.load_state_dict(torch.load(\"models_dir/model_3.pt\"))\n
model_3.load_state_dict(torch.load(\"models_dir/model_3.pt\")) Out[22]:
<All keys matched successfully>
In\u00a0[23]: Copied!
model_3\n
model_3 Out[23]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(809, 1, padding_idx=0)\n  )\n  (deeptabular): Sequential(\n    (0): TabResnet(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n          (emb_layer_education): Embedding(17, 8, padding_idx=0)\n          (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n          (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n          (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n          (emb_layer_race): Embedding(6, 4, padding_idx=0)\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n          (emb_layer_capital_loss): Embedding(100, 21, padding_idx=0)\n          (emb_layer_native_country): Embedding(43, 13, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.1, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): DenseResnet(\n        (dense_resnet): Sequential(\n          (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n          (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          (block_0): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n          (block_1): BasicBlock(\n            (lin1): Linear(in_features=200, out_features=200, bias=False)\n            (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n            (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n            (dp): Dropout(p=0.1, inplace=False)\n            (lin2): Linear(in_features=200, out_features=200, bias=False)\n            (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=200, out_features=1, bias=True)\n  )\n)
In\u00a0[24]: Copied!
tab_lin_layer = list(model_3.deeptabular.children())[1]\n
tab_lin_layer = list(model_3.deeptabular.children())[1] In\u00a0[25]: Copied!
tab_lin_layer\n
tab_lin_layer Out[25]:
Linear(in_features=200, out_features=1, bias=True)
In\u00a0[26]: Copied!
tab_deep_layers = []\nfor n1, c1 in model_3.deeptabular.named_children():\n    if (\n        n1 == \"0\"\n    ):  # 0 is the model component and 1 is always the prediction layer added by the `WideDeep` class\n        for n2, c2 in c1.named_children():\n            if n2 == \"encoder\":  # TabResnet\n                for _, c3 in c2.named_children():\n                    for n4, c4 in c3.named_children():  # dense_resnet\n                        if \"block\" in n4:\n                            tab_deep_layers.append((n4, c4))\n
tab_deep_layers = [] for n1, c1 in model_3.deeptabular.named_children(): if ( n1 == \"0\" ): # 0 is the model component and 1 is always the prediction layer added by the `WideDeep` class for n2, c2 in c1.named_children(): if n2 == \"encoder\": # TabResnet for _, c3 in c2.named_children(): for n4, c4 in c3.named_children(): # dense_resnet if \"block\" in n4: tab_deep_layers.append((n4, c4)) In\u00a0[27]: Copied!
tab_deep_layers\n
tab_deep_layers Out[27]:
[('block_0',\n  BasicBlock(\n    (lin1): Linear(in_features=200, out_features=200, bias=False)\n    (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n    (dp): Dropout(p=0.1, inplace=False)\n    (lin2): Linear(in_features=200, out_features=200, bias=False)\n    (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  )),\n ('block_1',\n  BasicBlock(\n    (lin1): Linear(in_features=200, out_features=200, bias=False)\n    (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n    (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n    (dp): Dropout(p=0.1, inplace=False)\n    (lin2): Linear(in_features=200, out_features=200, bias=False)\n    (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n  ))]

Now remember, we need to pass ONLY LAYERS (before I included the name for clarity) the layers in WARM UP ORDER, therefore:

In\u00a0[28]: Copied!
tab_deep_layers = [el[1] for el in tab_deep_layers][::-1]\n
tab_deep_layers = [el[1] for el in tab_deep_layers][::-1] In\u00a0[29]: Copied!
tab_layers = [tab_lin_layer] + tab_deep_layers[::-1]\n
tab_layers = [tab_lin_layer] + tab_deep_layers[::-1] In\u00a0[30]: Copied!
tab_layers\n
tab_layers Out[30]:
[Linear(in_features=200, out_features=1, bias=True),\n BasicBlock(\n   (lin1): Linear(in_features=200, out_features=200, bias=False)\n   (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n   (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n   (dp): Dropout(p=0.1, inplace=False)\n   (lin2): Linear(in_features=200, out_features=200, bias=False)\n   (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n ),\n BasicBlock(\n   (lin1): Linear(in_features=200, out_features=200, bias=False)\n   (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n   (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n   (dp): Dropout(p=0.1, inplace=False)\n   (lin2): Linear(in_features=200, out_features=200, bias=False)\n   (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n )]

And now simply

In\u00a0[31]: Copied!
trainer_4 = Trainer(model_3, objective=\"binary\", metrics=[Accuracy])\n
trainer_4 = Trainer(model_3, objective=\"binary\", metrics=[Accuracy]) In\u00a0[32]: Copied!
trainer_4.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    finetune=True,\n    finetune_epochs=2,\n    deeptabular_gradual=True,\n    deeptabular_layers=tab_layers,\n    deeptabular_max_lr=0.01,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer_4.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, finetune=True, finetune_epochs=2, deeptabular_gradual=True, deeptabular_layers=tab_layers, deeptabular_max_lr=0.01, n_epochs=2, batch_size=256, )
Training wide for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 95.17it/s, loss=0.504, metrics={'acc': 0.7523}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:01<00:00, 99.83it/s, loss=0.384, metrics={'acc': 0.789}]\n
Training deeptabular, layer 1 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 72.31it/s, loss=0.317, metrics={'acc': 0.8098}]\n
Training deeptabular, layer 2 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 65.97it/s, loss=0.312, metrics={'acc': 0.8214}]\n
Training deeptabular, layer 3 of 3\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 63.92it/s, loss=0.306, metrics={'acc': 0.8284}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 57.26it/s, loss=0.292, metrics={'acc': 0.8664}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.56it/s, loss=0.292, metrics={'acc': 0.8696}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:03<00:00, 53.61it/s, loss=0.282, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 80.59it/s, loss=0.289, metrics={'acc': 0.8719}]\n

Finally, there is one more use case I would like to consider. The case where we train only one component and we just want to fine-tune and stop the training afterwards, since there is no joined training. This is a simple as

In\u00a0[33]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(deeptabular=tab_mlp)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(deeptabular=tab_mlp) In\u00a0[34]: Copied!
trainer_5 = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer_5 = Trainer( model, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[35]: Copied!
trainer_5.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=1, batch_size=256\n)\n
trainer_5.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, n_epochs=1, batch_size=256 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 73.69it/s, loss=0.365, metrics={'acc': 0.8331}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 92.56it/s, loss=0.299, metrics={'acc': 0.8673}]\n
In\u00a0[36]: Copied!
trainer_5.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_5.pt\")\n
trainer_5.save(path=\"models_dir\", save_state_dict=True, model_filename=\"model_5.pt\") In\u00a0[37]: Copied!
tab_mlp_5 = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[64, 32],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel_5 = WideDeep(deeptabular=tab_mlp_5)\n
tab_mlp_5 = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[64, 32], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model_5 = WideDeep(deeptabular=tab_mlp_5) In\u00a0[38]: Copied!
model_5.load_state_dict(torch.load(\"models_dir/model_5.pt\"))\n
model_5.load_state_dict(torch.load(\"models_dir/model_5.pt\")) Out[38]:
<All keys matched successfully>

...times go by...

In\u00a0[39]: Copied!
trainer_6 = Trainer(\n    model_5,\n    objective=\"binary\",\n    optimizers=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer_6 = Trainer( model_5, objective=\"binary\", optimizers=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[40]: Copied!
trainer_6.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    target=target,\n    val_split=0.1,\n    finetune=True,\n    finetune_epochs=2,\n    finetune_max_lr=0.01,\n    stop_after_finetuning=True,\n    batch_size=256,\n)\n
trainer_6.fit( X_wide=X_wide, X_tab=X_tab, target=target, val_split=0.1, finetune=True, finetune_epochs=2, finetune_max_lr=0.01, stop_after_finetuning=True, batch_size=256, )
Training deeptabular for 2 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 73.86it/s, loss=0.298, metrics={'acc': 0.8652}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 75.45it/s, loss=0.286, metrics={'acc': 0.8669}]\n
Fine-tuning (or warmup) of individual components completed. Training the whole model for 1 epochs\n
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 172/172 [00:02<00:00, 76.29it/s, loss=0.282, metrics={'acc': 0.8698}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 20/20 [00:00<00:00, 84.93it/s, loss=0.281, metrics={'acc': 0.8749}]\n
In\u00a0[42]: Copied!
import shutil\n\nshutil.rmtree(\"models_dir/\")\nshutil.rmtree(\"model_weights/\")\n
import shutil shutil.rmtree(\"models_dir/\") shutil.rmtree(\"model_weights/\") In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/06_finetune_and_warmup.html#the-finetunewarm-up-option","title":"The FineTune/Warm Up option\u00b6","text":"

Let's place ourselves in two possible scenarios.

  1. Let's assume we have run a model and we want to just transfer the learnings (you know...transfer-learning) to another dataset, or simply we have received new data and we do not want to start the training of each component from scratch. Simply, we want to load the pre-trained weights and fine-tune.

  2. We just want to \"warm up\" individual model components individually before the joined training begins.

This can be done with the finetune set of parameters. There are 3 fine-tuning routines:

  1. Fine-tune all trainable layers at once with a triangular one-cycle learning rate (referred as slanted triangular learning rates in Howard & Ruder 2018)
  2. Gradual fine-tuning inspired by the work of Felbo et al., 2017
  3. Gradual fine-tuning based on the work of Howard & Ruder 2018

Currently fine-tunning is only supported without a fully connected head, i.e. if deephead=None. In addition, Felbo and Howard routines only applied, of course, to the deeptabular, deeptext and deepimage models. The wide component can also be fine-tuned, but only in an \"all at once\" mode.

"},{"location":"examples/06_finetune_and_warmup.html#fine-tune-or-warm-up-all-at-once","title":"Fine-tune or warm-up all at once\u00b6","text":"

Here, the model components will be trained for finetune_epochs using a triangular one-cycle learning rate (slanted triangular learning rate) ranging from finetune_max_lr/10 to finetune_max_lr (default is 0.01). 10% of the training steps are used to increase the learning rate which then decreases for the remaining 90%.

Here all trainable layers are fine-tuned.

Let's have a look to one example.

"},{"location":"examples/06_finetune_and_warmup.html#fine-tune-gradually-the-felbo-and-the-howard-routines","title":"Fine-tune Gradually: The \"felbo\" and the \"howard\" routines\u00b6","text":"

The Felbo routine can be illustrated as follows:

Figure 1. The figure can be described as follows: fine-tune (or train) the last layer for one epoch using a one cycle triangular learning rate. Then fine-tune the next deeper layer for one epoch, with a learning rate that is a factor of 2.5 lower than the previous learning rate (the 2.5 factor is fixed) while freezing the already warmed up layer(s). Repeat untill all individual layers are warmed. Then warm one last epoch with all warmed layers trainable. The vanishing color gradient in the figure attempts to illustrate the decreasing learning rate.

Note that this is not identical to the Fine-Tunning routine described in Felbo et al, 2017, this is why I used the word 'inspired'.

The Howard routine can be illustrated as follows:

Figure 2. The figure can be described as follows: fine-tune (or train) the last layer for one epoch using a one cycle triangular learning rate. Then fine-tune the next deeper layer for one epoch, with a learning rate that is a factor of 2.5 lower than the previous learning rate (the 2.5 factor is fixed) while keeping the already warmed up layer(s) trainable. Repeat. The vanishing color gradient in the figure attempts to illustrate the decreasing learning rate.

Note that I write \"fine-tune (or train) the last layer for one epoch [...]\". However, in practice the user will have to specify the order of the layers to be fine-tuned. This is another reason why I wrote that the fine-tune routines I have implemented are inspired by the work of Felbo and Howard and not identical to their implemenations.

The felbo and howard routines can be accessed with via the fine-tune parameters.

"},{"location":"examples/07_custom_components.html","title":"07_custom_components","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport os\nimport torch\n\nfrom torch import Tensor\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import (\n    WidePreprocessor,\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n)\nfrom pytorch_widedeep.models import (\n    Wide,\n    TabMlp,\n    Vision,\n    BasicRNN,\n    WideDeep,\n)\nfrom pytorch_widedeep.losses import RMSELoss\nfrom pytorch_widedeep.initializers import *\nfrom pytorch_widedeep.callbacks import *\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import os import torch from torch import Tensor from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import ( WidePreprocessor, TabPreprocessor, TextPreprocessor, ImagePreprocessor, ) from pytorch_widedeep.models import ( Wide, TabMlp, Vision, BasicRNN, WideDeep, ) from pytorch_widedeep.losses import RMSELoss from pytorch_widedeep.initializers import * from pytorch_widedeep.callbacks import * from pytorch_widedeep.datasets import load_adult
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\ndf.head()\n
df = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") df.head() Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 0 13913.jpg 54730 My bright double bedroom with a large window h... 4.0 f Islington 51.56802 -0.11121 t apartment ... 1 0 0 0 1 0 0 100.0 15.0 12.00 1 15400.jpg 60302 Lots of windows and light. St Luke's Gardens ... 1.0 t Kensington and Chelsea 51.48796 -0.16898 t apartment ... 0 0 0 0 1 0 0 150.0 0.0 109.50 2 17402.jpg 67564 Open from June 2018 after a 3-year break, we a... 19.0 t Westminster 51.52098 -0.14002 t apartment ... 0 0 0 0 1 0 0 350.0 10.0 149.65 3 24328.jpg 41759 Artist house, bright high ceiling rooms, priva... 2.0 t Wandsworth 51.47298 -0.16376 t other ... 0 0 0 0 1 0 0 250.0 0.0 215.60 4 25023.jpg 102813 Large, all comforts, 2-bed flat; first floor; ... 1.0 f Wandsworth 51.44687 -0.21874 t apartment ... 0 0 0 0 1 0 0 250.0 11.0 79.35

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# There are a number of columns that are already binary. Therefore, no need to one hot encode them\ncrossed_cols = [(\"property_type\", \"room_type\")]\nalready_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"]\nwide_cols = [\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"host_gender\",\n    \"instant_bookable\",\n] + already_dummies\n\ncat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [\n    (\"neighbourhood_cleansed\", 64),\n    (\"cancellation_policy\", 16),\n]\ncontinuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"]\n# it does not make sense to standarised Latitude and Longitude\nalready_standard = [\"latitude\", \"longitude\"]\n\n# text and image colnames\ntext_col = \"description\"\nimg_col = \"id\"\n\n# path to pretrained word embeddings and the images\nword_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\"\nimg_path = \"../tmp_data/airbnb/property_picture\"\n\n# target\ntarget_col = \"yield\"\n
# There are a number of columns that are already binary. Therefore, no need to one hot encode them crossed_cols = [(\"property_type\", \"room_type\")] already_dummies = [c for c in df.columns if \"amenity\" in c] + [\"has_house_rules\"] wide_cols = [ \"is_location_exact\", \"property_type\", \"room_type\", \"host_gender\", \"instant_bookable\", ] + already_dummies cat_embed_cols = [(c, 16) for c in df.columns if \"catg\" in c] + [ (\"neighbourhood_cleansed\", 64), (\"cancellation_policy\", 16), ] continuous_cols = [\"latitude\", \"longitude\", \"security_deposit\", \"extra_people\"] # it does not make sense to standarised Latitude and Longitude already_standard = [\"latitude\", \"longitude\"] # text and image colnames text_col = \"description\" img_col = \"id\" # path to pretrained word embeddings and the images word_vectors_path = \"../tmp_data/glove.6B/glove.6B.100d.txt\" img_path = \"../tmp_data/airbnb/property_picture\" # target target_col = \"yield\" In\u00a0[4]: Copied!
target = df[target_col].values\n
target = df[target_col].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n\ntext_preprocessor = TextPreprocessor(\n    word_vectors_path=word_vectors_path, text_col=text_col\n)\nX_text = text_preprocessor.fit_transform(df)\n\nimage_processor = ImagePreprocessor(img_col=img_col, img_path=img_path)\nX_images = image_processor.fit_transform(df)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df) text_preprocessor = TextPreprocessor( word_vectors_path=word_vectors_path, text_col=text_col ) X_text = text_preprocessor.fit_transform(df) image_processor = ImagePreprocessor(img_col=img_col, img_path=img_path) X_images = image_processor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
The vocabulary contains 2192 tokens\nIndexing word vectors...\nLoaded 400000 word vectors\nPreparing embeddings matrix...\n2175 words in the vocabulary had ../tmp_data/glove.6B/glove.6B.100d.txt vectors and appear more than 5 times\nReading Images from ../tmp_data/airbnb/property_picture\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1001/1001 [00:02<00:00, 497.80it/s]\n
Computing normalisation metrics\n

Now we are ready to build a wide and deep model. Three of the four components we will use are included in this package, and they will be combined with a custom deeptext component. Then the fit process will run with a custom loss function.

Let's have a look

In\u00a0[6]: Copied!
# Linear model\nwide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\n\n# DeepDense: 2 Dense layers\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.1,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.1,\n)\n\n# Pretrained Resnet 18\nresnet = Vision(pretrained_model_name=\"resnet18\", n_trainable=0)\n
# Linear model wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) # DeepDense: 2 Dense layers tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.1, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.1, ) # Pretrained Resnet 18 resnet = Vision(pretrained_model_name=\"resnet18\", n_trainable=0) In\u00a0[7]: Copied!
class MyDeepText(nn.Module):\n    def __init__(self, vocab_size, padding_idx=1, embed_dim=100, hidden_dim=64):\n        super(MyDeepText, self).__init__()\n\n        # word/token embeddings\n        self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)\n\n        # stack of RNNs\n        self.rnn = nn.GRU(\n            embed_dim,\n            hidden_dim,\n            num_layers=2,\n            bidirectional=True,\n            batch_first=True,\n        )\n\n        # Remember, this MUST be defined. If not WideDeep will through an error\n        self.output_dim = hidden_dim * 2\n\n    def forward(self, X):\n        embed = self.word_embed(X.long())\n        o, h = self.rnn(embed)\n        return torch.cat((h[-2], h[-1]), dim=1)\n
class MyDeepText(nn.Module): def __init__(self, vocab_size, padding_idx=1, embed_dim=100, hidden_dim=64): super(MyDeepText, self).__init__() # word/token embeddings self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx) # stack of RNNs self.rnn = nn.GRU( embed_dim, hidden_dim, num_layers=2, bidirectional=True, batch_first=True, ) # Remember, this MUST be defined. If not WideDeep will through an error self.output_dim = hidden_dim * 2 def forward(self, X): embed = self.word_embed(X.long()) o, h = self.rnn(embed) return torch.cat((h[-2], h[-1]), dim=1) In\u00a0[8]: Copied!
mydeeptext = MyDeepText(vocab_size=len(text_preprocessor.vocab.itos))\n
mydeeptext = MyDeepText(vocab_size=len(text_preprocessor.vocab.itos)) In\u00a0[9]: Copied!
model = WideDeep(wide=wide, deeptabular=tab_mlp, deeptext=mydeeptext, deepimage=resnet)\n
model = WideDeep(wide=wide, deeptabular=tab_mlp, deeptext=mydeeptext, deepimage=resnet) In\u00a0[10]: Copied!
class RMSELoss(nn.Module):\n    def __init__(self):\n        \"\"\"root mean squared error\"\"\"\n        super().__init__()\n        self.mse = nn.MSELoss()\n\n    def forward(self, input: Tensor, target: Tensor) -> Tensor:\n        return torch.sqrt(self.mse(input, target))\n
class RMSELoss(nn.Module): def __init__(self): \"\"\"root mean squared error\"\"\" super().__init__() self.mse = nn.MSELoss() def forward(self, input: Tensor, target: Tensor) -> Tensor: return torch.sqrt(self.mse(input, target))

and now we just instantiate the Trainer as usual. Needless to say, but this runs with 1000 random observations, so loss and metric values are meaningless. This is just an example

In\u00a0[11]: Copied!
trainer = Trainer(model, objective=\"regression\", custom_loss_function=RMSELoss())\n
trainer = Trainer(model, objective=\"regression\", custom_loss_function=RMSELoss()) In\u00a0[12]: Copied!
trainer.fit(\n    X_wide=X_wide,\n    X_tab=X_tab,\n    X_text=X_text,\n    X_img=X_images,\n    target=target,\n    n_epochs=1,\n    batch_size=32,\n    val_split=0.2,\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images, target=target, n_epochs=1, batch_size=32, val_split=0.2, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 25/25 [00:23<00:00,  1.07it/s, loss=126]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:05<00:00,  1.24it/s, loss=97.4]\n

In addition to model components and loss functions, we can also use custom callbacks or custom metrics. The former need to be of type Callback and the latter need to be of type Metric. See:

pytorch-widedeep.callbacks\n

and

pytorch-widedeep.metrics\n

For this example let me use the adult dataset. Again, we first prepare the data as usual

In\u00a0[13]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[13]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[14]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[14]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[15]: Copied!
# Define wide, crossed and deep tabular columns\nwide_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"native_country\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\ntarget = df[target_col].values\n
# Define wide, crossed and deep tabular columns wide_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"native_country\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" target = df[target_col].values In\u00a0[16]: Copied!
# wide\nwide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_wide = wide_preprocessor.fit_transform(df)\n\n# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
# wide wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = wide_preprocessor.fit_transform(df) # deeptabular tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[17]: Copied!
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1)\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    mlp_hidden_dims=[128, 64],\n    mlp_dropout=0.2,\n    mlp_activation=\"leaky_relu\",\n)\nmodel = WideDeep(wide=wide, deeptabular=tab_mlp)\n
wide = Wide(input_dim=np.unique(X_wide).shape[0], pred_dim=1) tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, mlp_hidden_dims=[128, 64], mlp_dropout=0.2, mlp_activation=\"leaky_relu\", ) model = WideDeep(wide=wide, deeptabular=tab_mlp) In\u00a0[18]: Copied!
from pytorch_widedeep.metrics import Metric\n
from pytorch_widedeep.metrics import Metric In\u00a0[19]: Copied!
class Accuracy(Metric):\n    def __init__(self, top_k: int = 1):\n        super(Accuracy, self).__init__()\n\n        self.top_k = top_k\n        self.correct_count = 0\n        self.total_count = 0\n\n        # \u00a0metric name needs to be defined\n        self._name = \"acc\"\n\n    def reset(self):\n        self.correct_count = 0\n        self.total_count = 0\n\n    def __call__(self, y_pred: Tensor, y_true: Tensor) -> np.ndarray:\n        num_classes = y_pred.size(1)\n\n        if num_classes == 1:\n            y_pred = y_pred.round()\n            y_true = y_true\n        elif num_classes > 1:\n            y_pred = y_pred.topk(self.top_k, 1)[1]\n            y_true = y_true.view(-1, 1).expand_as(y_pred)\n\n        self.correct_count += y_pred.eq(y_true).sum().item()\n        self.total_count += len(y_pred)\n        accuracy = float(self.correct_count) / float(self.total_count)\n        return np.array(accuracy)\n
class Accuracy(Metric): def __init__(self, top_k: int = 1): super(Accuracy, self).__init__() self.top_k = top_k self.correct_count = 0 self.total_count = 0 # \u00a0metric name needs to be defined self._name = \"acc\" def reset(self): self.correct_count = 0 self.total_count = 0 def __call__(self, y_pred: Tensor, y_true: Tensor) -> np.ndarray: num_classes = y_pred.size(1) if num_classes == 1: y_pred = y_pred.round() y_true = y_true elif num_classes > 1: y_pred = y_pred.topk(self.top_k, 1)[1] y_true = y_true.view(-1, 1).expand_as(y_pred) self.correct_count += y_pred.eq(y_true).sum().item() self.total_count += len(y_pred) accuracy = float(self.correct_count) / float(self.total_count) return np.array(accuracy) In\u00a0[20]: Copied!
# have a look to the class\nfrom pytorch_widedeep.callbacks import Callback\n
# have a look to the class from pytorch_widedeep.callbacks import Callback In\u00a0[21]: Copied!
class SillyCallback(Callback):\n    def on_train_begin(self, logs=None):\n        # recordings will be the trainer object attributes\n        self.trainer.silly_callback = {}\n\n        self.trainer.silly_callback[\"beginning\"] = []\n        self.trainer.silly_callback[\"end\"] = []\n\n    def on_epoch_begin(self, epoch, logs=None):\n        self.trainer.silly_callback[\"beginning\"].append(epoch + 1)\n\n    def on_epoch_end(self, epoch, logs=None, metric=None):\n        self.trainer.silly_callback[\"end\"].append(epoch + 1)\n
class SillyCallback(Callback): def on_train_begin(self, logs=None): # recordings will be the trainer object attributes self.trainer.silly_callback = {} self.trainer.silly_callback[\"beginning\"] = [] self.trainer.silly_callback[\"end\"] = [] def on_epoch_begin(self, epoch, logs=None): self.trainer.silly_callback[\"beginning\"].append(epoch + 1) def on_epoch_end(self, epoch, logs=None, metric=None): self.trainer.silly_callback[\"end\"].append(epoch + 1)

and now, as usual:

In\u00a0[22]: Copied!
trainer = Trainer(\n    model, objective=\"binary\", metrics=[Accuracy], callbacks=[SillyCallback]\n)\n
trainer = Trainer( model, objective=\"binary\", metrics=[Accuracy], callbacks=[SillyCallback] ) In\u00a0[23]: Copied!
trainer.fit(\n    X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=64, val_split=0.2\n)\n
trainer.fit( X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=64, val_split=0.2 )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:06<00:00, 94.39it/s, loss=0.411, metrics={'acc': 0.814}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 121.91it/s, loss=0.327, metrics={'acc': 0.8449}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:07<00:00, 85.39it/s, loss=0.324, metrics={'acc': 0.8495}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 88.68it/s, loss=0.298, metrics={'acc': 0.8612}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 74.35it/s, loss=0.302, metrics={'acc': 0.8593}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 100.51it/s, loss=0.29, metrics={'acc': 0.8665}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 73.83it/s, loss=0.292, metrics={'acc': 0.8637}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 105.98it/s, loss=0.286, metrics={'acc': 0.8695}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 611/611 [00:08<00:00, 72.15it/s, loss=0.286, metrics={'acc': 0.866}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 92.27it/s, loss=0.284, metrics={'acc': 0.8698}]\n
In\u00a0[24]: Copied!
trainer.silly_callback\n
trainer.silly_callback Out[24]:
{'beginning': [1, 2, 3, 4, 5], 'end': [1, 2, 3, 4, 5]}
"},{"location":"examples/07_custom_components.html#custom-components","title":"Custom components\u00b6","text":"

As I mentioned earlier in the example notebooks, and also in the README, it is possible to customise almost every component in pytorch-widedeep.

Let's now go through a couple of simple examples to illustrate how that could be done.

First let's load and process the data \"as usual\", let's start with a regression and the airbnb dataset.

"},{"location":"examples/07_custom_components.html#custom-deeptext","title":"Custom deeptext\u00b6","text":"

Standard Pytorch model

"},{"location":"examples/07_custom_components.html#custom-loss-function","title":"Custom loss function\u00b6","text":"

Loss functions must simply inherit pytorch's nn.Module. For example, let's say we want to use RMSE (note that this is already available in the package, but I will pass it here as a custom loss for illustration purposes)

"},{"location":"examples/07_custom_components.html#custom-metric","title":"Custom metric\u00b6","text":"

Let's say we want to use our own accuracy metric (again, this is already available in the package, but I will pass it here as a custom loss for illustration purposes).

This could be done as:

"},{"location":"examples/07_custom_components.html#custom-callback","title":"Custom Callback\u00b6","text":"

Let's code a callback that records the current epoch at the beginning and the end of each epoch (silly, but you know, this is just an example)

"},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html","title":"08_custom_dataLoader_imbalanced_dataset","text":"
  • In this notebook we will use the higly imbalanced Protein Homology Dataset from KDD cup 2004
* The first element of each line is a BLOCK ID that denotes to which native sequence this example belongs. There is a unique BLOCK ID for each native sequence. BLOCK IDs are integers running from 1 to 303 (one for each native sequence, i.e. for each query). BLOCK IDs were assigned before the blocks were split into the train and test sets, so they do not run consecutively in either file.\n* The second element of each line is an EXAMPLE ID that uniquely describes the example. You will need this EXAMPLE ID and the BLOCK ID when you submit results.\n* The third element is the class of the example. Proteins that are homologous to the native sequence are denoted by 1, non-homologous proteins (i.e. decoys) by 0. Test examples have a \"?\" in this position.\n* All following elements are feature values. There are 74 feature values in each line. The features describe the match (e.g. the score of a sequence alignment) between the native protein sequence and the sequence that is tested for homology.\n
In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nimport time\nimport datetime\n\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import time import datetime import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n  from .autonotebook import tqdm as notebook_tqdm\n
In\u00a0[2]: Copied!
df = load_bio_kdd04(as_frame=True)\n# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n\ndf_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n\ncontinuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
df = load_bio_kdd04(as_frame=True) # drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[3]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[4]: Copied!
# Define the model\ninput_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n\ndeeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    mlp_hidden_dims=hidden_layers,\n)\nmodel = WideDeep(deeptabular=deeptabular)\nmodel\n
# Define the model input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, mlp_hidden_dims=hidden_layers, ) model = WideDeep(deeptabular=deeptabular) model Out[4]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[5]: Copied!
# Metrics from pytorch-widedeep\naccuracy = Accuracy(top_k=2)\nprecision = Precision(average=False)\n\n# # Metrics from torchmetrics\n# accuracy = Accuracy_torchmetrics(average=None, num_classes=1)\n# precision = Precision_torchmetrics(average=\"micro\", num_classes=1)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[accuracy, precision],\n    verbose=1,\n)\n
# Metrics from pytorch-widedeep accuracy = Accuracy(top_k=2) precision = Precision(average=False) # # Metrics from torchmetrics # accuracy = Accuracy_torchmetrics(average=None, num_classes=1) # precision = Precision_torchmetrics(average=\"micro\", num_classes=1) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) trainer = Trainer( model, objective=\"binary\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision], verbose=1, ) In\u00a0[6]: Copied!
start = time.time()\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=1,\n    batch_size=32,\n    custom_dataloader=DataLoaderImbalanced,\n    oversample_mul=5,\n)\nprint(\n    \"Training time[s]: {}\".format(\n        datetime.timedelta(seconds=round(time.time() - start))\n    )\n)\n\npd.DataFrame(trainer.history)\n\ndf_pred = trainer.predict(X_tab=X_tab_test)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))\n
start = time.time() trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=1, batch_size=32, custom_dataloader=DataLoaderImbalanced, oversample_mul=5, ) print( \"Training time[s]: {}\".format( datetime.timedelta(seconds=round(time.time() - start)) ) ) pd.DataFrame(trainer.history) df_pred = trainer.predict(X_tab=X_tab_test) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 325/325 [00:02<00:00, 153.99it/s, loss=0.163, metrics={'acc': 0.9363, 'prec': [0.9358]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 456/456 [00:02<00:00, 205.93it/s, loss=0.1, metrics={'acc': 0.9501, 'prec': [0.1447]}]\n
Training time[s]: 0:00:04\n
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 456/456 [00:01<00:00, 368.16it/s]\n
              precision    recall  f1-score   support\n\n           0       1.00      0.95      0.97     14446\n           1       0.15      0.95      0.25       130\n\n    accuracy                           0.95     14576\n   macro avg       0.57      0.95      0.61     14576\nweighted avg       0.99      0.95      0.97     14576\n\nActual predicted values:\n(array([0, 1]), array([13736,   840]))\n
"},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html#custom-dataloader-for-imbalanced-dataset","title":"Custom DataLoader for Imbalanced dataset\u00b6","text":""},{"location":"examples/08_custom_dataLoader_imbalanced_dataset.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/09_extracting_embeddings.html","title":"09_extracting_embeddings","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\n\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import FTTransformer, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep import Tab2Vec\nfrom pytorch_widedeep.datasets import load_adult\n
import numpy as np import pandas as pd import torch from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import FTTransformer, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep import Tab2Vec from pytorch_widedeep.datasets import load_adult In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.head()\n
df = load_adult(as_frame=True) df.head() Out[2]: age workclass fnlwgt education educational-num marital-status occupation relationship race gender capital-gain capital-loss hours-per-week native-country income 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States <=50K 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States <=50K 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States >50K 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States >50K 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States <=50K In\u00a0[3]: Copied!
# For convenience, we'll replace '-' with '_'\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n# binary target\ndf[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop([\"income\", \"educational_num\"], axis=1, inplace=True)\n\ndf.head()\n
# For convenience, we'll replace '-' with '_' df.columns = [c.replace(\"-\", \"_\") for c in df.columns] # binary target df[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop([\"income\", \"educational_num\"], axis=1, inplace=True) df.head() Out[3]: age workclass fnlwgt education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country target 0 25 Private 226802 11th Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private 89814 HS-grad Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov 336951 Assoc-acdm Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private 160323 Some-college Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? 103497 Some-college Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
cat_cols, cont_cols = [], []\nfor col in df.columns:\n    # 50 is just a random number I choose here for this example\n    if df[col].dtype == \"O\" or df[col].nunique() < 50 and col != \"target\":\n        cat_cols.append(col)\n    elif col != \"target\":\n        cont_cols.append(col)\ntarget_col = \"target\"\n
cat_cols, cont_cols = [], [] for col in df.columns: # 50 is just a random number I choose here for this example if df[col].dtype == \"O\" or df[col].nunique() < 50 and col != \"target\": cat_cols.append(col) elif col != \"target\": cont_cols.append(col) target_col = \"target\" In\u00a0[5]: Copied!
target = df[target_col].values\n\ntab_preprocessor = TabPreprocessor(\n    embed_cols=cat_cols, continuous_cols=cont_cols, for_transformer=True\n)\nX_tab = tab_preprocessor.fit_transform(df)\n
target = df[target_col].values tab_preprocessor = TabPreprocessor( embed_cols=cat_cols, continuous_cols=cont_cols, for_transformer=True ) X_tab = tab_preprocessor.fit_transform(df)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[6]: Copied!
ft_transformer = FTTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    n_blocks=2,\n    n_heads=4,\n    input_dim=16,\n)\n
ft_transformer = FTTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", n_blocks=2, n_heads=4, input_dim=16, )
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/utils/general_utils.py:12: DeprecationWarning: The 'embed_continuous' parameter is deprecated and will be removed in the next release. Please use 'embed_continuous_method' instead See the documentation for more details.\n  return func(*args, **kwargs)\n
In\u00a0[7]: Copied!
model = WideDeep(deeptabular=ft_transformer)\ntrainer = Trainer(model, objective=\"binary\", metrics=[Accuracy])\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=256, val_split=0.2)\n
model = WideDeep(deeptabular=ft_transformer) trainer = Trainer(model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=1, batch_size=256, val_split=0.2)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 41.47it/s, loss=221, metrics={'acc': 0.686}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 86.24it/s, loss=9.28, metrics={'acc': 0.76}]\n
In\u00a0[8]: Copied!
t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor)\n
t2v = Tab2Vec(model=model, tab_preprocessor=tab_preprocessor) In\u00a0[9]: Copied!
# assuming is a test set with target col\nX_vec, y = t2v.transform(df.sample(100), target_col=\"target\")\n
# assuming is a test set with target col X_vec, y = t2v.transform(df.sample(100), target_col=\"target\") In\u00a0[10]: Copied!
# X vec is the dataframe turned into the embeddings\nX_vec.shape\n
# X vec is the dataframe turned into the embeddings X_vec.shape Out[10]:
(100, 208)

208 = input_dim (16) * n_cols (13)

In\u00a0[11]: Copied!
# ...or if we don't have target col\nX_vec = t2v.transform(df.sample(100))\n
# ...or if we don't have target col X_vec = t2v.transform(df.sample(100))"},{"location":"examples/09_extracting_embeddings.html#extracting-embeddings","title":"Extracting embeddings\u00b6","text":"

This notebook is a simple guide to extracting learned feature embeddings using Tab2Vec

"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html","title":"10_3rd_party_integration-RayTune_WnB","text":"In\u00a0[11]: Copied!
from typing import Optional, Dict\nimport os\n\nimport numpy as np\nimport pandas as pd\nimport torch\nimport wandb\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.callbacks import (\n    EarlyStopping,\n    ModelCheckpoint,\n    Callback,\n)\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\nfrom ray import tune\nfrom ray.tune.schedulers import AsyncHyperBandScheduler\nfrom ray.tune import JupyterNotebookReporter\nfrom ray.air.integrations.wandb import WandbLoggerCallback\n\n# from ray.tune.integration.wandb import wandb_mixin\n\nimport tracemalloc\n\ntracemalloc.start()\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
from typing import Optional, Dict import os import numpy as np import pandas as pd import torch import wandb from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.callbacks import ( EarlyStopping, ModelCheckpoint, Callback, ) from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune import JupyterNotebookReporter from ray.air.integrations.wandb import WandbLoggerCallback # from ray.tune.integration.wandb import wandb_mixin import tracemalloc tracemalloc.start() # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[12]: Copied!
class RayTuneReporter(Callback):\n    r\"\"\"Callback that allows reporting history and lr_history values to RayTune\n    during Hyperparameter tuning\n\n    Callbacks are passed as input parameters to the ``Trainer`` class. See\n    :class:`pytorch_widedeep.trainer.Trainer`\n\n    For examples see the examples folder at:\n\n        .. code-block:: bash\n\n            /examples/12_HyperParameter_tuning_w_RayTune.ipynb\n    \"\"\"\n\n    def on_epoch_end(\n        self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None\n    ):\n        report_dict = {}\n        for k, v in self.trainer.history.items():\n            report_dict.update({k: v[-1]})\n        if hasattr(self.trainer, \"lr_history\"):\n            for k, v in self.trainer.lr_history.items():\n                report_dict.update({k: v[-1]})\n        tune.report(report_dict)\n\n\nclass WnBReportBest(Callback):\n    r\"\"\"Callback that allows reporting best performance of a run to WnB\n    during Hyperparameter tuning. It is an adjusted pytorch_widedeep.callbacks.ModelCheckpoint\n    with added WnB and removed checkpoint saving.\n\n    Callbacks are passed as input parameters to the ``Trainer`` class.\n\n    Parameters\n    ----------\n    wb: obj\n        Weights&Biases API interface to report single best result usable for\n        comparisson of multiple paramater combinations by, for example,\n        `parallel coordinates\n        <https://docs.wandb.ai/ref/app/features/panels/parallel-coordinates>`_.\n        E.g W&B summary report `wandb.run.summary[\"best\"]`.\n    monitor: str, default=\"loss\"\n        quantity to monitor. Typically `'val_loss'` or metric name\n        (e.g. `'val_acc'`)\n    mode: str, default=\"auto\"\n        If ``save_best_only=True``, the decision to overwrite the current save\n        file is made based on either the maximization or the minimization of\n        the monitored quantity. For `'acc'`, this should be `'max'`, for\n        `'loss'` this should be `'min'`, etc. In `'auto'` mode, the\n        direction is automatically inferred from the name of the monitored\n        quantity.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        wb: object,\n        monitor: str = \"val_loss\",\n        mode: str = \"auto\",\n    ):\n        super(WnBReportBest, self).__init__()\n\n        self.monitor = monitor\n        self.mode = mode\n        self.wb = wb\n\n        if self.mode not in [\"auto\", \"min\", \"max\"]:\n            warnings.warn(\n                \"WnBReportBest mode %s is unknown, \"\n                \"fallback to auto mode.\" % (self.mode),\n                RuntimeWarning,\n            )\n            self.mode = \"auto\"\n        if self.mode == \"min\":\n            self.monitor_op = np.less\n            self.best = np.Inf\n        elif self.mode == \"max\":\n            self.monitor_op = np.greater  # type: ignore[assignment]\n            self.best = -np.Inf\n        else:\n            if self._is_metric(self.monitor):\n                self.monitor_op = np.greater  # type: ignore[assignment]\n                self.best = -np.Inf\n            else:\n                self.monitor_op = np.less\n                self.best = np.Inf\n\n    def on_epoch_end(  # noqa: C901\n        self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None\n    ):\n        logs = logs or {}\n        current = logs.get(self.monitor)\n        if current is not None:\n            if self.monitor_op(current, self.best):\n                self.wb.run.summary[\"best\"] = current  # type: ignore[attr-defined]\n                self.best = current\n                self.best_epoch = epoch\n\n    @staticmethod\n    def _is_metric(monitor: str):\n        \"copied from pytorch_widedeep.callbacks\"\n        if any([s in monitor for s in [\"acc\", \"prec\", \"rec\", \"fscore\", \"f1\", \"f2\"]]):\n            return True\n        else:\n            return False\n
class RayTuneReporter(Callback): r\"\"\"Callback that allows reporting history and lr_history values to RayTune during Hyperparameter tuning Callbacks are passed as input parameters to the ``Trainer`` class. See :class:`pytorch_widedeep.trainer.Trainer` For examples see the examples folder at: .. code-block:: bash /examples/12_HyperParameter_tuning_w_RayTune.ipynb \"\"\" def on_epoch_end( self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None ): report_dict = {} for k, v in self.trainer.history.items(): report_dict.update({k: v[-1]}) if hasattr(self.trainer, \"lr_history\"): for k, v in self.trainer.lr_history.items(): report_dict.update({k: v[-1]}) tune.report(report_dict) class WnBReportBest(Callback): r\"\"\"Callback that allows reporting best performance of a run to WnB during Hyperparameter tuning. It is an adjusted pytorch_widedeep.callbacks.ModelCheckpoint with added WnB and removed checkpoint saving. Callbacks are passed as input parameters to the ``Trainer`` class. Parameters ---------- wb: obj Weights&Biases API interface to report single best result usable for comparisson of multiple paramater combinations by, for example, `parallel coordinates `_. E.g W&B summary report `wandb.run.summary[\"best\"]`. monitor: str, default=\"loss\" quantity to monitor. Typically `'val_loss'` or metric name (e.g. `'val_acc'`) mode: str, default=\"auto\" If ``save_best_only=True``, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For `'acc'`, this should be `'max'`, for `'loss'` this should be `'min'`, etc. In `'auto'` mode, the direction is automatically inferred from the name of the monitored quantity. \"\"\" def __init__( self, wb: object, monitor: str = \"val_loss\", mode: str = \"auto\", ): super(WnBReportBest, self).__init__() self.monitor = monitor self.mode = mode self.wb = wb if self.mode not in [\"auto\", \"min\", \"max\"]: warnings.warn( \"WnBReportBest mode %s is unknown, \" \"fallback to auto mode.\" % (self.mode), RuntimeWarning, ) self.mode = \"auto\" if self.mode == \"min\": self.monitor_op = np.less self.best = np.Inf elif self.mode == \"max\": self.monitor_op = np.greater # type: ignore[assignment] self.best = -np.Inf else: if self._is_metric(self.monitor): self.monitor_op = np.greater # type: ignore[assignment] self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf def on_epoch_end( # noqa: C901 self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None ): logs = logs or {} current = logs.get(self.monitor) if current is not None: if self.monitor_op(current, self.best): self.wb.run.summary[\"best\"] = current # type: ignore[attr-defined] self.best = current self.best_epoch = epoch @staticmethod def _is_metric(monitor: str): \"copied from pytorch_widedeep.callbacks\" if any([s in monitor for s in [\"acc\", \"prec\", \"rec\", \"fscore\", \"f1\", \"f2\"]]): return True else: return False In\u00a0[13]: Copied!
df = load_bio_kdd04(as_frame=True)\ndf.head()\n
df = load_bio_kdd04(as_frame=True) df.head() Out[13]: EXAMPLE_ID BLOCK_ID target 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 0 279 261532 0 52.0 32.69 0.30 2.5 20.0 1256.8 -0.89 0.33 11.0 -55.0 267.2 0.52 0.05 -2.36 49.6 252.0 0.43 1.16 -2.06 -33.0 -123.2 1.60 -0.49 -6.06 65.0 296.1 -0.28 -0.26 -3.83 -22.6 -170.0 3.06 -1.05 -3.29 22.9 286.3 0.12 2.58 4.08 -33.0 -178.9 1.88 0.53 -7.0 -44.0 1987.0 -5.41 0.95 -4.0 -57.0 722.9 -3.26 -0.55 -7.5 125.5 1547.2 -0.36 1.12 9.0 -37.0 72.5 0.47 0.74 -11.0 -8.0 1595.1 -1.64 2.83 -2.0 -50.0 445.2 -0.35 0.26 0.76 1 279 261533 0 58.0 33.33 0.00 16.5 9.5 608.1 0.50 0.07 20.5 -52.5 521.6 -1.08 0.58 -0.02 -3.2 103.6 -0.95 0.23 -2.87 -25.9 -52.2 -0.21 0.87 -1.81 10.4 62.0 -0.28 -0.04 1.48 -17.6 -198.3 3.43 2.84 5.87 -16.9 72.6 -0.31 2.79 2.71 -33.5 -11.6 -1.11 4.01 5.0 -57.0 666.3 1.13 4.38 5.0 -64.0 39.3 1.07 -0.16 32.5 100.0 1893.7 -2.80 -0.22 2.5 -28.5 45.0 0.58 0.41 -19.0 -6.0 762.9 0.29 0.82 -3.0 -35.0 140.3 1.16 0.39 0.73 2 279 261534 0 77.0 27.27 -0.91 6.0 58.5 1623.6 -1.40 0.02 -6.5 -48.0 621.0 -1.20 0.14 -0.20 73.6 609.1 -0.44 -0.58 -0.04 -23.0 -27.4 -0.72 -1.04 -1.09 91.1 635.6 -0.88 0.24 0.59 -18.7 -7.2 -0.60 -2.82 -0.71 52.4 504.1 0.89 -0.67 -9.30 -20.8 -25.7 -0.77 -0.85 0.0 -20.0 2259.0 -0.94 1.15 -4.0 -44.0 -22.7 0.94 -0.98 -19.0 105.0 1267.9 1.03 1.27 11.0 -39.5 82.3 0.47 -0.19 -10.0 7.0 1491.8 0.32 -1.29 0.0 -34.0 658.2 -0.76 0.26 0.24 3 279 261535 0 41.0 27.91 -0.35 3.0 46.0 1921.6 -1.36 -0.47 -32.0 -51.5 560.9 -0.29 -0.10 -1.11 124.3 791.6 0.00 0.39 -1.85 -21.7 -44.9 -0.21 0.02 0.89 133.9 797.8 -0.08 1.06 -0.26 -16.4 -74.1 0.97 -0.80 -0.41 66.9 955.3 -1.90 1.28 -6.65 -28.1 47.5 -1.91 1.42 1.0 -30.0 1846.7 0.76 1.10 -4.0 -52.0 -53.9 1.71 -0.22 -12.0 97.5 1969.8 -1.70 0.16 -1.0 -32.5 255.9 -0.46 1.57 10.0 6.0 2047.7 -0.98 1.53 0.0 -49.0 554.2 -0.83 0.39 0.73 4 279 261536 0 50.0 28.00 -1.32 -9.0 12.0 464.8 0.88 0.19 8.0 -51.5 98.1 1.09 -0.33 -2.16 -3.9 102.7 0.39 -1.22 -3.39 -15.2 -42.2 -1.18 -1.11 -3.55 8.9 141.3 -0.16 -0.43 -4.15 -12.9 -13.4 -1.32 -0.98 -3.69 8.8 136.1 -0.30 4.13 1.89 -13.0 -18.7 -1.37 -0.93 0.0 -1.0 810.1 -2.29 6.72 1.0 -23.0 -29.7 0.58 -1.10 -18.5 33.5 206.8 1.84 -0.13 4.0 -29.0 30.1 0.80 -0.24 5.0 -14.0 479.5 0.68 -0.59 2.0 -36.0 -6.9 2.02 0.14 -0.23 In\u00a0[14]: Copied!
# imbalance of the classes\ndf[\"target\"].value_counts()\n
# imbalance of the classes df[\"target\"].value_counts() Out[14]:
target\n0    144455\n1      1296\nName: count, dtype: int64
In\u00a0[15]: Copied!
# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n
# drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) In\u00a0[16]: Copied!
df_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) In\u00a0[17]: Copied!
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[18]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[19]: Copied!
input_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n
input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() In\u00a0[20]: Copied!
deeptabular = TabMlp(\n    mlp_hidden_dims=hidden_layers,\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular)\nmodel\n
deeptabular = TabMlp( mlp_hidden_dims=hidden_layers, column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular) model Out[20]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[24]: Copied!
# Metrics from torchmetrics\naccuracy = Accuracy_torchmetrics(average=None, num_classes=1, task=\"binary\")\nprecision = Precision_torchmetrics(average=\"micro\", num_classes=1, task=\"binary\")\nf1 = F1_torchmetrics(average=None, num_classes=1, task=\"binary\")\nrecall = Recall_torchmetrics(average=None, num_classes=1, task=\"binary\")\n
# Metrics from torchmetrics accuracy = Accuracy_torchmetrics(average=None, num_classes=1, task=\"binary\") precision = Precision_torchmetrics(average=\"micro\", num_classes=1, task=\"binary\") f1 = F1_torchmetrics(average=None, num_classes=1, task=\"binary\") recall = Recall_torchmetrics(average=None, num_classes=1, task=\"binary\")

Note:

Following cells includes usage of both RayTuneReporter and WnBReportBest callbacks. In case you want to use just RayTuneReporter, remove following:

  • wandb from config
  • WandbLoggerCallback
  • WnBReportBest
  • @wandb_mixin decorator

We do not see strong reason to use WnB without RayTune for a single paramater combination run, but it is possible:

  • option01: define paramaters in config only for a single value tune.grid_search([1000]) (single value RayTune run)
  • option02: define WnB callback that reports currnet validation/training loss, metrics, etc. at the end of batch, ie. do not report to WnB at epoch_end as in WnBReportBest but at the on_batch_end, see pytorch_widedeep.callbacks.Callback
In\u00a0[26]: Copied!
config = {\n    \"batch_size\": tune.grid_search([1000, 5000]),\n    \"wandb\": {\n        \"project\": \"test\",\n        # \"api_key_file\": os.getcwd() + \"/wandb_api.key\",\n        \"api_key\": \"WNB_API_KEY\",\n    },\n}\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\n\n@wandb_mixin\ndef training_function(config, X_train, X_val):\n    early_stopping = EarlyStopping()\n    model_checkpoint = ModelCheckpoint(save_best_only=True)\n    # Hyperparameters\n    batch_size = config[\"batch_size\"]\n    trainer = Trainer(\n        model,\n        objective=\"binary_focal_loss\",\n        callbacks=[\n            RayTuneReporter,\n            WnBReportBest(wb=wandb),\n            early_stopping,\n            model_checkpoint,\n        ],\n        lr_schedulers={\"deeptabular\": deep_sch},\n        initializers={\"deeptabular\": XavierNormal},\n        optimizers={\"deeptabular\": deep_opt},\n        metrics=[accuracy, precision, recall, f1],\n        verbose=0,\n    )\n\n    trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=batch_size)\n\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n\nasha_scheduler = AsyncHyperBandScheduler(\n    time_attr=\"training_iteration\",\n    metric=\"_metric/val_loss\",\n    mode=\"min\",\n    max_t=100,\n    grace_period=10,\n    reduction_factor=3,\n    brackets=1,\n)\n\nanalysis = tune.run(\n    tune.with_parameters(training_function, X_train=X_train, X_val=X_val),\n    resources_per_trial={\"cpu\": 1, \"gpu\": 0},\n    progress_reporter=JupyterNotebookReporter(overwrite=True),\n    scheduler=asha_scheduler,\n    config=config,\n    callbacks=[\n        WandbLoggerCallback(\n            project=config[\"wandb\"][\"project\"],\n            # api_key_file=config[\"wandb\"][\"api_key_file\"],\n            api_key=config[\"wandb\"][\"api_key\"],\n            log_config=True,\n        )\n    ],\n)\n
config = { \"batch_size\": tune.grid_search([1000, 5000]), \"wandb\": { \"project\": \"test\", # \"api_key_file\": os.getcwd() + \"/wandb_api.key\", \"api_key\": \"WNB_API_KEY\", }, } # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) @wandb_mixin def training_function(config, X_train, X_val): early_stopping = EarlyStopping() model_checkpoint = ModelCheckpoint(save_best_only=True) # Hyperparameters batch_size = config[\"batch_size\"] trainer = Trainer( model, objective=\"binary_focal_loss\", callbacks=[ RayTuneReporter, WnBReportBest(wb=wandb), early_stopping, model_checkpoint, ], lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision, recall, f1], verbose=0, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=batch_size) X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid} asha_scheduler = AsyncHyperBandScheduler( time_attr=\"training_iteration\", metric=\"_metric/val_loss\", mode=\"min\", max_t=100, grace_period=10, reduction_factor=3, brackets=1, ) analysis = tune.run( tune.with_parameters(training_function, X_train=X_train, X_val=X_val), resources_per_trial={\"cpu\": 1, \"gpu\": 0}, progress_reporter=JupyterNotebookReporter(overwrite=True), scheduler=asha_scheduler, config=config, callbacks=[ WandbLoggerCallback( project=config[\"wandb\"][\"project\"], # api_key_file=config[\"wandb\"][\"api_key_file\"], api_key=config[\"wandb\"][\"api_key\"], log_config=True, ) ], )
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmp60pfyl1kwandb'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpnjv2rg1wwandb-artifacts'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpgebu5k1kwandb-media'>\n  _warnings.warn(warn_message, ResourceWarning)\n/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/lib/python3.10/tempfile.py:860: ResourceWarning: Implicitly cleaning up <TemporaryDirectory '/var/folders/_2/lrjn1qn54c758tdtktr1bvkc0000gn/T/tmpxy9y2yriwandb-media'>\n  _warnings.warn(warn_message, ResourceWarning)\n
In\u00a0[14]: Copied!
analysis.results\n
analysis.results Out[14]:
{'fc9a8_00000': {'_metric': {'train_loss': 0.006297602537127896,\n   'train_Accuracy': 0.9925042986869812,\n   'train_Precision': 0.9939393997192383,\n   'train_Recall': 0.15814851224422455,\n   'train_F1Score': 0.2728785574436188,\n   'val_loss': 0.005045663565397263,\n   'val_Accuracy': 0.9946483969688416,\n   'val_Precision': 1.0,\n   'val_Recall': 0.39534884691238403,\n   'val_F1Score': 0.5666667222976685},\n  'time_this_iter_s': 2.388202428817749,\n  'done': True,\n  'timesteps_total': None,\n  'episodes_total': None,\n  'training_iteration': 5,\n  'trial_id': 'fc9a8_00000',\n  'experiment_id': 'baad1d4f3d924b48b9ece1b9f26c80cc',\n  'date': '2022-07-31_14-06-51',\n  'timestamp': 1659276411,\n  'time_total_s': 12.656474113464355,\n  'pid': 1813,\n  'hostname': 'jupyter-5uperpalo',\n  'node_ip': '10.32.44.172',\n  'config': {'batch_size': 1000},\n  'time_since_restore': 12.656474113464355,\n  'timesteps_since_restore': 0,\n  'iterations_since_restore': 5,\n  'warmup_time': 0.8006253242492676,\n  'experiment_tag': '0_batch_size=1000'},\n 'fc9a8_00001': {'_metric': {'train_loss': 0.02519632239515583,\n   'train_Accuracy': 0.9910891652107239,\n   'train_Precision': 0.25,\n   'train_Recall': 0.0009643201483413577,\n   'train_F1Score': 0.0019212296465411782,\n   'val_loss': 0.02578434906899929,\n   'val_Accuracy': 0.9911492466926575,\n   'val_Precision': 0.0,\n   'val_Recall': 0.0,\n   'val_F1Score': 0.0},\n  'time_this_iter_s': 4.113586902618408,\n  'done': True,\n  'timesteps_total': None,\n  'episodes_total': None,\n  'training_iteration': 5,\n  'trial_id': 'fc9a8_00001',\n  'experiment_id': 'f2e54a6a5780429fbf0db0746853347e',\n  'date': '2022-07-31_14-06-56',\n  'timestamp': 1659276416,\n  'time_total_s': 12.926990509033203,\n  'pid': 1962,\n  'hostname': 'jupyter-5uperpalo',\n  'node_ip': '10.32.44.172',\n  'config': {'batch_size': 5000},\n  'time_since_restore': 12.926990509033203,\n  'timesteps_since_restore': 0,\n  'iterations_since_restore': 5,\n  'warmup_time': 0.9253025054931641,\n  'experiment_tag': '1_batch_size=5000'}}

Using Weights and Biases logging you can create parallel coordinates graphs that map parametr combinations to the best(lowest) loss achieved during the training of the networks

local visualization of raytune reults using tensorboard

In\u00a0[23]: Copied!
%load_ext tensorboard\n%tensorboard --logdir ~/ray_results\n
%load_ext tensorboard %tensorboard --logdir ~/ray_results"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#3rd-party-integration-raytune-weights-biases","title":"3rd party integration - RayTune, Weights & Biases\u00b6","text":"

This notebook provides guideline for integration of external library functions in the model training process through Callback objects, a popular concept of using objects as arguments for other objects.

[DISCLAIMER]

We show integration of RayTune (a hyperparameter tuning framework) and Weights & Biases (ML projects experiment tracking and versioning solution) in the pytorch_widedeep model training process. We did not include RayTuneReporter and WnBReportBest in the library code to minimize the dependencies on other libraries that are not directly included in the model design and training.

"},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/10_3rd_party_integration-RayTune_WnB.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/11_auc_multiclass.html","title":"11_auc_multiclass","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torchmetrics import AUROC\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_ecoli\nfrom pytorch_widedeep.utils import LabelEncoder\n\nfrom sklearn.model_selection import train_test_split\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from torchmetrics import AUROC from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_ecoli from pytorch_widedeep.utils import LabelEncoder from sklearn.model_selection import train_test_split # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_ecoli(as_frame=True)\ndf.head()\n
df = load_ecoli(as_frame=True) df.head() Out[2]: SequenceName mcg gvh lip chg aac alm1 alm2 class 0 AAT_ECOLI 0.49 0.29 0.48 0.5 0.56 0.24 0.35 cp 1 ACEA_ECOLI 0.07 0.40 0.48 0.5 0.54 0.35 0.44 cp 2 ACEK_ECOLI 0.56 0.40 0.48 0.5 0.49 0.37 0.46 cp 3 ACKA_ECOLI 0.59 0.49 0.48 0.5 0.52 0.45 0.36 cp 4 ADI_ECOLI 0.23 0.32 0.48 0.5 0.55 0.25 0.35 cp In\u00a0[3]: Copied!
# imbalance of the classes\ndf[\"class\"].value_counts()\n
# imbalance of the classes df[\"class\"].value_counts() Out[3]:
class\ncp     143\nim      77\npp      52\nimU     35\nom      20\nomL      5\nimS      2\nimL      2\nName: count, dtype: int64
In\u00a0[4]: Copied!
df = df.loc[~df[\"class\"].isin([\"omL\", \"imS\", \"imL\"])]\ndf.reset_index(inplace=True, drop=True)\n
df = df.loc[~df[\"class\"].isin([\"omL\", \"imS\", \"imL\"])] df.reset_index(inplace=True, drop=True) In\u00a0[5]: Copied!
encoder = LabelEncoder([\"class\"])\ndf_enc = encoder.fit_transform(df)\ndf_enc[\"class\"] = df_enc[\"class\"] - 1\n
encoder = LabelEncoder([\"class\"]) df_enc = encoder.fit_transform(df) df_enc[\"class\"] = df_enc[\"class\"] - 1 In\u00a0[6]: Copied!
# drop columns we won't need in this example\ndf_enc = df_enc.drop(columns=[\"SequenceName\"])\n
# drop columns we won't need in this example df_enc = df_enc.drop(columns=[\"SequenceName\"]) In\u00a0[7]: Copied!
df_train, df_valid = train_test_split(\n    df_enc, test_size=0.2, stratify=df_enc[\"class\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"class\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df_enc, test_size=0.2, stratify=df_enc[\"class\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"class\"], random_state=1 ) In\u00a0[8]: Copied!
continuous_cols = df_enc.drop(columns=[\"class\"]).columns.values.tolist()\n
continuous_cols = df_enc.drop(columns=[\"class\"]).columns.values.tolist() In\u00a0[9]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"class\"].values\ny_valid = df_valid[\"class\"].values\ny_test = df_test[\"class\"].values\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"class\"].values y_valid = df_valid[\"class\"].values y_test = df_test[\"class\"].values X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:295: DeprecationWarning: 'scale' and 'already_standard' will be deprecated in the next release. Please use 'cols_to_scale' instead\n  self._check_inputs(cat_embed_cols)\n
In\u00a0[10]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=df_enc[\"class\"].nunique())\nmodel\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=df_enc[\"class\"].nunique()) model Out[10]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=7, out_features=200, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=200, out_features=100, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=100, out_features=5, bias=True)\n  )\n)
In\u00a0[11]: Copied!
auroc = AUROC(num_classes=df_enc[\"class\"].nunique(), task=\"multiclass\")\n
auroc = AUROC(num_classes=df_enc[\"class\"].nunique(), task=\"multiclass\") In\u00a0[12]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"multiclass_focal_loss\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[auroc],\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"multiclass_focal_loss\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[auroc], ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 54.59it/s, loss=0.109, metrics={'MulticlassAUROC': 0.314}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 98.35it/s, loss=0.105, metrics={'MulticlassAUROC': 0.2558}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 91.55it/s, loss=0.105, metrics={'MulticlassAUROC': 0.3546}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 111.68it/s, loss=0.101, metrics={'MulticlassAUROC': 0.2737}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 62.55it/s, loss=0.1, metrics={'MulticlassAUROC': 0.3795}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 108.51it/s, loss=0.0966, metrics={'MulticlassAUROC': 0.3053}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 99.35it/s, loss=0.0965, metrics={'MulticlassAUROC': 0.3809}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 117.73it/s, loss=0.0962, metrics={'MulticlassAUROC': 0.3089}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 6/6 [00:00<00:00, 110.56it/s, loss=0.0967, metrics={'MulticlassAUROC': 0.3509}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 1/1 [00:00<00:00, 127.35it/s, loss=0.0958, metrics={'MulticlassAUROC': 0.3089}]\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/11_auc_multiclass.html#auc-multiclass-computation","title":"AUC multiclass computation\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/11_auc_multiclass.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html","title":"12_ZILNLoss_origkeras_vs_pytorch_widedeep","text":"In\u00a0[1]: Copied!
# @title Copyright 2019 The Lifetime Value Authors.\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ============================================================================\n
# @title Copyright 2019 The Lifetime Value Authors. # Licensed under the Apache License, Version 2.0 (the \"License\"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an \"AS IS\" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ Run in Google Colab View source on GitHub In\u00a0[3]: Copied!
import os\n\nimport numpy as np\nimport pandas as pd\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport tensorflow as tf\nimport tensorflow_probability as tfp\nfrom typing import Sequence\n\n# install and import ltv\n!pip install -q git+https://github.com/google/lifetime_value\nimport lifetime_value as ltv\n
import os import numpy as np import pandas as pd from scipy import stats import matplotlib.pyplot as plt import seaborn as sns import tensorflow as tf import tensorflow_probability as tfp from typing import Sequence # install and import ltv !pip install -q git+https://github.com/google/lifetime_value import lifetime_value as ltv In\u00a0[\u00a0]: Copied!
tfd = tfp.distributions\n%config InlineBackend.figure_format='retina'\nsns.set_style(\"whitegrid\")\n
tfd = tfp.distributions %config InlineBackend.figure_format='retina' sns.set_style(\"whitegrid\") In\u00a0[\u00a0]: Copied!
MODEL = \"dnn\"\nLOSS = \"ziln\"  # @param { isTemplate: true, type: 'string'} ['mse', 'ziln']\nLEARNING_RATE = 0.001  # @param { isTemplate: true}\nVERSION = 0  # @param { isTemplate: true, type: 'integer'}\nOUTPUT_CSV_FOLDER = \"/tmp/lifetime-value/kdd_cup_98/result\"  # @param { isTemplate: true, type: 'string'}\n
MODEL = \"dnn\" LOSS = \"ziln\" # @param { isTemplate: true, type: 'string'} ['mse', 'ziln'] LEARNING_RATE = 0.001 # @param { isTemplate: true} VERSION = 0 # @param { isTemplate: true, type: 'integer'} OUTPUT_CSV_FOLDER = \"/tmp/lifetime-value/kdd_cup_98/result\" # @param { isTemplate: true, type: 'string'}

Download kdd_cup_98 data to /tmp/lifetime-value/kdd_cup_98

In\u00a0[\u00a0]: Copied!
%%bash\nmkdir -p /tmp/lifetime-value/kdd_cup_98\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98lrn.zip -P /tmp/lifetime-value/kdd_cup_98/\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98val.zip -P /tmp/lifetime-value/kdd_cup_98/\nwget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/valtargt.txt -P /tmp/lifetime-value/kdd_cup_98/\ncd /tmp/lifetime-value/kdd_cup_98/\nunzip cup98lrn.zip\nunzip cup98val.zip\n
%%bash mkdir -p /tmp/lifetime-value/kdd_cup_98 wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98lrn.zip -P /tmp/lifetime-value/kdd_cup_98/ wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/cup98val.zip -P /tmp/lifetime-value/kdd_cup_98/ wget https://kdd.ics.uci.edu/databases/kddcup98/epsilon_mirror/valtargt.txt -P /tmp/lifetime-value/kdd_cup_98/ cd /tmp/lifetime-value/kdd_cup_98/ unzip cup98lrn.zip unzip cup98val.zip In\u00a0[\u00a0]: Copied!
df_train = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98LRN.txt\")\nnum_train = df_train.shape[0]\ndf_eval = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98VAL.txt\")\ndf_eval_target = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/valtargt.txt\")\ndf_eval = df_eval.merge(df_eval_target, on=\"CONTROLN\")\n
df_train = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98LRN.txt\") num_train = df_train.shape[0] df_eval = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/cup98VAL.txt\") df_eval_target = pd.read_csv(\"/tmp/lifetime-value/kdd_cup_98/valtargt.txt\") df_eval = df_eval.merge(df_eval_target, on=\"CONTROLN\") In\u00a0[\u00a0]: Copied!
df = pd.concat([df_train, df_eval], axis=0, sort=True)\n
df = pd.concat([df_train, df_eval], axis=0, sort=True) In\u00a0[\u00a0]: Copied!
y = df[\"TARGET_D\"][:num_train]\n
y = df[\"TARGET_D\"][:num_train] In\u00a0[\u00a0]: Copied!
def plot_hist_log_scale(y):\n    max_val = y.max() + 1.0\n    ax = pd.Series(y).hist(\n        figsize=(8, 5), bins=10 ** np.linspace(0.0, np.log10(max_val), 20)\n    )\n\n    plt.xlabel(\"Donation ($)\")\n    plt.ylabel(\"Count\")\n    # plt.title('Histogram of LTV')\n    plt.xticks(rotation=\"horizontal\")\n    plt.legend(loc=\"upper left\")\n    ax.set_xscale(\"log\")\n    ax.grid(False)\n    # Hide the right and top spines\n    ax.spines[\"right\"].set_visible(False)\n    ax.spines[\"top\"].set_visible(False)\n    # Only show ticks on the left and bottom spines\n    ax.yaxis.set_ticks_position(\"left\")\n    ax.xaxis.set_ticks_position(\"bottom\")\n    plt.show()\n\n    fig = ax.get_figure()\n    output_file = tf.io.gfile.GFile(\n        \"/tmp/lifetime-value/kdd_cup_98/histogram_kdd98_log_scale.pdf\", \"wb\"\n    )\n    fig.savefig(output_file, bbox_inches=\"tight\", format=\"pdf\")\n
def plot_hist_log_scale(y): max_val = y.max() + 1.0 ax = pd.Series(y).hist( figsize=(8, 5), bins=10 ** np.linspace(0.0, np.log10(max_val), 20) ) plt.xlabel(\"Donation ($)\") plt.ylabel(\"Count\") # plt.title('Histogram of LTV') plt.xticks(rotation=\"horizontal\") plt.legend(loc=\"upper left\") ax.set_xscale(\"log\") ax.grid(False) # Hide the right and top spines ax.spines[\"right\"].set_visible(False) ax.spines[\"top\"].set_visible(False) # Only show ticks on the left and bottom spines ax.yaxis.set_ticks_position(\"left\") ax.xaxis.set_ticks_position(\"bottom\") plt.show() fig = ax.get_figure() output_file = tf.io.gfile.GFile( \"/tmp/lifetime-value/kdd_cup_98/histogram_kdd98_log_scale.pdf\", \"wb\" ) fig.savefig(output_file, bbox_inches=\"tight\", format=\"pdf\") In\u00a0[\u00a0]: Copied!
plot_hist_log_scale(y[y > 0])\n
plot_hist_log_scale(y[y > 0]) In\u00a0[\u00a0]: Copied!
VOCAB_FEATURES = [\n    \"ODATEDW\",  # date of donor's first gift (YYMM)\n    \"OSOURCE\",  # donor acquisition mailing list\n    \"TCODE\",  # donor title code\n    \"STATE\",\n    \"ZIP\",\n    \"DOMAIN\",  # urbanicity level and socio-economic status of the neighborhood\n    \"CLUSTER\",  # socio-economic status\n    \"GENDER\",\n    \"MAXADATE\",  # date of the most recent promotion received\n    \"MINRDATE\",\n    \"LASTDATE\",\n    \"FISTDATE\",\n    \"RFA_2A\",\n]\n
VOCAB_FEATURES = [ \"ODATEDW\", # date of donor's first gift (YYMM) \"OSOURCE\", # donor acquisition mailing list \"TCODE\", # donor title code \"STATE\", \"ZIP\", \"DOMAIN\", # urbanicity level and socio-economic status of the neighborhood \"CLUSTER\", # socio-economic status \"GENDER\", \"MAXADATE\", # date of the most recent promotion received \"MINRDATE\", \"LASTDATE\", \"FISTDATE\", \"RFA_2A\", ] In\u00a0[\u00a0]: Copied!
df[\"ODATEDW\"] = df[\"ODATEDW\"].astype(\"str\")\ndf[\"TCODE\"] = df[\"TCODE\"].apply(lambda x: \"{:03d}\".format(x // 1000 if x > 1000 else x))\ndf[\"ZIP\"] = df[\"ZIP\"].str.slice(0, 5)\ndf[\"MAXADATE\"] = df[\"MAXADATE\"].astype(\"str\")\ndf[\"MINRDATE\"] = df[\"MINRDATE\"].astype(\"str\")\ndf[\"LASTDATE\"] = df[\"LASTDATE\"].astype(\"str\")\ndf[\"FISTDATE\"] = df[\"FISTDATE\"].astype(\"str\")\n
df[\"ODATEDW\"] = df[\"ODATEDW\"].astype(\"str\") df[\"TCODE\"] = df[\"TCODE\"].apply(lambda x: \"{:03d}\".format(x // 1000 if x > 1000 else x)) df[\"ZIP\"] = df[\"ZIP\"].str.slice(0, 5) df[\"MAXADATE\"] = df[\"MAXADATE\"].astype(\"str\") df[\"MINRDATE\"] = df[\"MINRDATE\"].astype(\"str\") df[\"LASTDATE\"] = df[\"LASTDATE\"].astype(\"str\") df[\"FISTDATE\"] = df[\"FISTDATE\"].astype(\"str\") In\u00a0[\u00a0]: Copied!
def label_encoding(y, frequency_threshold=100):\n    value_counts = pd.value_counts(y)\n    categories = value_counts[value_counts >= frequency_threshold].index.to_numpy()\n    # 0 indicates the unknown category.\n    return pd.Categorical(y, categories=categories).codes + 1\n
def label_encoding(y, frequency_threshold=100): value_counts = pd.value_counts(y) categories = value_counts[value_counts >= frequency_threshold].index.to_numpy() # 0 indicates the unknown category. return pd.Categorical(y, categories=categories).codes + 1 In\u00a0[\u00a0]: Copied!
for key in VOCAB_FEATURES:\n    df[key] = label_encoding(df[key])\n
for key in VOCAB_FEATURES: df[key] = label_encoding(df[key]) In\u00a0[\u00a0]: Copied!
MAIL_ORDER_RESPONSES = [\n    \"MBCRAFT\",\n    \"MBGARDEN\",\n    \"MBBOOKS\",\n    \"MBCOLECT\",\n    \"MAGFAML\",\n    \"MAGFEM\",\n    \"MAGMALE\",\n    \"PUBGARDN\",\n    \"PUBCULIN\",\n    \"PUBHLTH\",\n    \"PUBDOITY\",\n    \"PUBNEWFN\",\n    \"PUBPHOTO\",\n    \"PUBOPP\",\n    \"RFA_2F\",\n]\n
MAIL_ORDER_RESPONSES = [ \"MBCRAFT\", \"MBGARDEN\", \"MBBOOKS\", \"MBCOLECT\", \"MAGFAML\", \"MAGFEM\", \"MAGMALE\", \"PUBGARDN\", \"PUBCULIN\", \"PUBHLTH\", \"PUBDOITY\", \"PUBNEWFN\", \"PUBPHOTO\", \"PUBOPP\", \"RFA_2F\", ] In\u00a0[\u00a0]: Copied!
INDICATOR_FEATURES = [\n    \"AGE\",  # age decile, 0 indicates unknown\n    \"NUMCHLD\",\n    \"INCOME\",\n    \"WEALTH1\",\n    \"HIT\",\n] + MAIL_ORDER_RESPONSES\n
INDICATOR_FEATURES = [ \"AGE\", # age decile, 0 indicates unknown \"NUMCHLD\", \"INCOME\", \"WEALTH1\", \"HIT\", ] + MAIL_ORDER_RESPONSES In\u00a0[\u00a0]: Copied!
df[\"AGE\"] = pd.qcut(df[\"AGE\"].values, 10).codes + 1\ndf[\"NUMCHLD\"] = df[\"NUMCHLD\"].apply(lambda x: 0 if np.isnan(x) else int(x))\ndf[\"INCOME\"] = df[\"INCOME\"].apply(lambda x: 0 if np.isnan(x) else int(x))\ndf[\"WEALTH1\"] = df[\"WEALTH1\"].apply(lambda x: 0 if np.isnan(x) else int(x) + 1)\ndf[\"HIT\"] = pd.qcut(df[\"HIT\"].values, q=50, duplicates=\"drop\").codes\n\nfor col in MAIL_ORDER_RESPONSES:\n    df[col] = pd.qcut(df[col].values, q=20, duplicates=\"drop\").codes + 1\n
df[\"AGE\"] = pd.qcut(df[\"AGE\"].values, 10).codes + 1 df[\"NUMCHLD\"] = df[\"NUMCHLD\"].apply(lambda x: 0 if np.isnan(x) else int(x)) df[\"INCOME\"] = df[\"INCOME\"].apply(lambda x: 0 if np.isnan(x) else int(x)) df[\"WEALTH1\"] = df[\"WEALTH1\"].apply(lambda x: 0 if np.isnan(x) else int(x) + 1) df[\"HIT\"] = pd.qcut(df[\"HIT\"].values, q=50, duplicates=\"drop\").codes for col in MAIL_ORDER_RESPONSES: df[col] = pd.qcut(df[col].values, q=20, duplicates=\"drop\").codes + 1 In\u00a0[\u00a0]: Copied!
NUMERIC_FEATURES = [\n    # binary\n    \"MAILCODE\",  # bad address\n    \"NOEXCH\",  # do not exchange\n    \"RECINHSE\",  # donor has given to PVA's in house program\n    \"RECP3\",  # donor has given to PVA's P3 program\n    \"RECPGVG\",  # planned giving record\n    \"RECSWEEP\",  # sweepstakes record\n    \"HOMEOWNR\",  # home owner\n    \"CHILD03\",\n    \"CHILD07\",\n    \"CHILD12\",\n    \"CHILD18\",\n    # continuous\n    \"CARDPROM\",\n    \"NUMPROM\",\n    \"CARDPM12\",\n    \"NUMPRM12\",\n    \"RAMNTALL\",\n    \"NGIFTALL\",\n    \"MINRAMNT\",\n    \"MAXRAMNT\",\n    \"LASTGIFT\",\n    \"AVGGIFT\",\n]\n
NUMERIC_FEATURES = [ # binary \"MAILCODE\", # bad address \"NOEXCH\", # do not exchange \"RECINHSE\", # donor has given to PVA's in house program \"RECP3\", # donor has given to PVA's P3 program \"RECPGVG\", # planned giving record \"RECSWEEP\", # sweepstakes record \"HOMEOWNR\", # home owner \"CHILD03\", \"CHILD07\", \"CHILD12\", \"CHILD18\", # continuous \"CARDPROM\", \"NUMPROM\", \"CARDPM12\", \"NUMPRM12\", \"RAMNTALL\", \"NGIFTALL\", \"MINRAMNT\", \"MAXRAMNT\", \"LASTGIFT\", \"AVGGIFT\", ] In\u00a0[\u00a0]: Copied!
df[\"MAILCODE\"] = (df[\"MAILCODE\"] == \"B\").astype(\"float32\")\ndf[\"PVASTATE\"] = df[\"PVASTATE\"].isin([\"P\", \"E\"]).astype(\"float32\")\ndf[\"NOEXCH\"] = df[\"NOEXCH\"].isin([\"X\", \"1\"]).astype(\"float32\")\ndf[\"RECINHSE\"] = (df[\"RECINHSE\"] == \"X\").astype(\"float32\")\ndf[\"RECP3\"] = (df[\"RECP3\"] == \"X\").astype(\"float32\")\ndf[\"RECPGVG\"] = (df[\"RECPGVG\"] == \"X\").astype(\"float32\")\ndf[\"RECSWEEP\"] = (df[\"RECSWEEP\"] == \"X\").astype(\"float32\")\ndf[\"HOMEOWNR\"] = (df[\"HOMEOWNR\"] == \"H\").astype(\"float32\")\ndf[\"CHILD03\"] = df[\"CHILD03\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD07\"] = df[\"CHILD07\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD12\"] = df[\"CHILD12\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\ndf[\"CHILD18\"] = df[\"CHILD18\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\")\n\ndf[\"CARDPROM\"] = df[\"CARDPROM\"] / 100\ndf[\"NUMPROM\"] = df[\"NUMPROM\"] / 100\ndf[\"CARDPM12\"] = df[\"CARDPM12\"] / 100\ndf[\"NUMPRM12\"] = df[\"NUMPRM12\"] / 100\ndf[\"RAMNTALL\"] = np.log1p(df[\"RAMNTALL\"])\ndf[\"NGIFTALL\"] = np.log1p(df[\"NGIFTALL\"])\ndf[\"MINRAMNT\"] = np.log1p(df[\"MINRAMNT\"])\ndf[\"MAXRAMNT\"] = np.log1p(df[\"MAXRAMNT\"])\ndf[\"LASTGIFT\"] = np.log1p(df[\"LASTGIFT\"])\ndf[\"AVGGIFT\"] = np.log1p(df[\"AVGGIFT\"])\n
df[\"MAILCODE\"] = (df[\"MAILCODE\"] == \"B\").astype(\"float32\") df[\"PVASTATE\"] = df[\"PVASTATE\"].isin([\"P\", \"E\"]).astype(\"float32\") df[\"NOEXCH\"] = df[\"NOEXCH\"].isin([\"X\", \"1\"]).astype(\"float32\") df[\"RECINHSE\"] = (df[\"RECINHSE\"] == \"X\").astype(\"float32\") df[\"RECP3\"] = (df[\"RECP3\"] == \"X\").astype(\"float32\") df[\"RECPGVG\"] = (df[\"RECPGVG\"] == \"X\").astype(\"float32\") df[\"RECSWEEP\"] = (df[\"RECSWEEP\"] == \"X\").astype(\"float32\") df[\"HOMEOWNR\"] = (df[\"HOMEOWNR\"] == \"H\").astype(\"float32\") df[\"CHILD03\"] = df[\"CHILD03\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD07\"] = df[\"CHILD07\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD12\"] = df[\"CHILD12\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CHILD18\"] = df[\"CHILD18\"].isin([\"M\", \"F\", \"B\"]).astype(\"float32\") df[\"CARDPROM\"] = df[\"CARDPROM\"] / 100 df[\"NUMPROM\"] = df[\"NUMPROM\"] / 100 df[\"CARDPM12\"] = df[\"CARDPM12\"] / 100 df[\"NUMPRM12\"] = df[\"NUMPRM12\"] / 100 df[\"RAMNTALL\"] = np.log1p(df[\"RAMNTALL\"]) df[\"NGIFTALL\"] = np.log1p(df[\"NGIFTALL\"]) df[\"MINRAMNT\"] = np.log1p(df[\"MINRAMNT\"]) df[\"MAXRAMNT\"] = np.log1p(df[\"MAXRAMNT\"]) df[\"LASTGIFT\"] = np.log1p(df[\"LASTGIFT\"]) df[\"AVGGIFT\"] = np.log1p(df[\"AVGGIFT\"]) In\u00a0[\u00a0]: Copied!
CATEGORICAL_FEATURES = VOCAB_FEATURES + INDICATOR_FEATURES\nALL_FEATURES = CATEGORICAL_FEATURES + NUMERIC_FEATURES\n
CATEGORICAL_FEATURES = VOCAB_FEATURES + INDICATOR_FEATURES ALL_FEATURES = CATEGORICAL_FEATURES + NUMERIC_FEATURES In\u00a0[\u00a0]: Copied!
def dnn_split(df):\n    df_train = df.iloc[:num_train]\n    df_eval = df.iloc[num_train:]\n\n    def feature_dict(df):\n        features = {k: v.values for k, v in dict(df[CATEGORICAL_FEATURES]).items()}\n        features[\"numeric\"] = df[NUMERIC_FEATURES].astype(\"float32\").values\n        return features\n\n    x_train, y_train = (\n        feature_dict(df_train),\n        df_train[\"TARGET_D\"].astype(\"float32\").values,\n    )\n    x_eval, y_eval = feature_dict(df_eval), df_eval[\"TARGET_D\"].astype(\"float32\").values\n\n    return x_train, x_eval, y_train, y_eval\n
def dnn_split(df): df_train = df.iloc[:num_train] df_eval = df.iloc[num_train:] def feature_dict(df): features = {k: v.values for k, v in dict(df[CATEGORICAL_FEATURES]).items()} features[\"numeric\"] = df[NUMERIC_FEATURES].astype(\"float32\").values return features x_train, y_train = ( feature_dict(df_train), df_train[\"TARGET_D\"].astype(\"float32\").values, ) x_eval, y_eval = feature_dict(df_eval), df_eval[\"TARGET_D\"].astype(\"float32\").values return x_train, x_eval, y_train, y_eval In\u00a0[\u00a0]: Copied!
def embedding_dim(x):\n    return int(x**0.25) + 1\n\n\ndef embedding_layer(vocab_size):\n    return tf.keras.Sequential(\n        [\n            tf.keras.layers.Embedding(\n                input_dim=vocab_size,\n                output_dim=embedding_dim(vocab_size),\n                input_length=1,\n            ),\n            tf.keras.layers.Flatten(),\n        ]\n    )\n\n\ndef dnn_model(output_units):\n    numeric_input = tf.keras.layers.Input(\n        shape=(len(NUMERIC_FEATURES),), name=\"numeric\"\n    )\n\n    embedding_inputs = [\n        tf.keras.layers.Input(shape=(1,), name=key, dtype=np.int64)\n        for key in CATEGORICAL_FEATURES\n    ]\n\n    embedding_outputs = [\n        embedding_layer(vocab_size=df[key].max() + 1)(input)\n        for key, input in zip(CATEGORICAL_FEATURES, embedding_inputs)\n    ]\n\n    deep_input = tf.keras.layers.concatenate([numeric_input] + embedding_outputs)\n    deep_model = tf.keras.Sequential(\n        [\n            tf.keras.layers.Dense(128, activation=\"relu\"),\n            tf.keras.layers.Dense(128, activation=\"relu\"),\n            tf.keras.layers.Dense(64, activation=\"relu\"),\n            tf.keras.layers.Dense(64, activation=\"relu\"),\n            tf.keras.layers.Dense(units=output_units),\n        ]\n    )\n    return tf.keras.Model(\n        inputs=[numeric_input] + embedding_inputs, outputs=deep_model(deep_input)\n    )\n
def embedding_dim(x): return int(x**0.25) + 1 def embedding_layer(vocab_size): return tf.keras.Sequential( [ tf.keras.layers.Embedding( input_dim=vocab_size, output_dim=embedding_dim(vocab_size), input_length=1, ), tf.keras.layers.Flatten(), ] ) def dnn_model(output_units): numeric_input = tf.keras.layers.Input( shape=(len(NUMERIC_FEATURES),), name=\"numeric\" ) embedding_inputs = [ tf.keras.layers.Input(shape=(1,), name=key, dtype=np.int64) for key in CATEGORICAL_FEATURES ] embedding_outputs = [ embedding_layer(vocab_size=df[key].max() + 1)(input) for key, input in zip(CATEGORICAL_FEATURES, embedding_inputs) ] deep_input = tf.keras.layers.concatenate([numeric_input] + embedding_outputs) deep_model = tf.keras.Sequential( [ tf.keras.layers.Dense(128, activation=\"relu\"), tf.keras.layers.Dense(128, activation=\"relu\"), tf.keras.layers.Dense(64, activation=\"relu\"), tf.keras.layers.Dense(64, activation=\"relu\"), tf.keras.layers.Dense(units=output_units), ] ) return tf.keras.Model( inputs=[numeric_input] + embedding_inputs, outputs=deep_model(deep_input) ) In\u00a0[\u00a0]: Copied!
if LOSS == \"mse\":\n    loss = tf.keras.losses.MeanSquaredError()\n    output_units = 1\n\nif LOSS == \"ziln\":\n    loss = ltv.zero_inflated_lognormal_loss\n    output_units = 3\n
if LOSS == \"mse\": loss = tf.keras.losses.MeanSquaredError() output_units = 1 if LOSS == \"ziln\": loss = ltv.zero_inflated_lognormal_loss output_units = 3 In\u00a0[\u00a0]: Copied!
x_train, x_eval, y_train, y_eval = dnn_split(df)\nmodel = dnn_model(output_units)\n
x_train, x_eval, y_train, y_eval = dnn_split(df) model = dnn_model(output_units) In\u00a0[\u00a0]: Copied!
model.compile(optimizer=tf.keras.optimizers.Nadam(lr=LEARNING_RATE), loss=loss)\n
model.compile(optimizer=tf.keras.optimizers.Nadam(lr=LEARNING_RATE), loss=loss) In\u00a0[\u00a0]: Copied!
callbacks = [\n    tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", min_lr=1e-6),\n    tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10),\n]\n
callbacks = [ tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", min_lr=1e-6), tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10), ] In\u00a0[\u00a0]: Copied!
history = model.fit(\n    x=x_train,\n    y=y_train,\n    batch_size=2048,\n    epochs=200,\n    verbose=2,\n    callbacks=callbacks,\n    validation_data=(x_eval, y_eval),\n).history\n
history = model.fit( x=x_train, y=y_train, batch_size=2048, epochs=200, verbose=2, callbacks=callbacks, validation_data=(x_eval, y_eval), ).history In\u00a0[\u00a0]: Copied!
pd.DataFrame(history)[[\"loss\", \"val_loss\"]].plot();\n
pd.DataFrame(history)[[\"loss\", \"val_loss\"]].plot(); In\u00a0[\u00a0]: Copied!
if LOSS == \"mse\":\n    y_pred = model.predict(x=x_eval, batch_size=1024).flatten()\n\nif LOSS == \"ziln\":\n    logits = model.predict(x=x_eval, batch_size=1024)\n    y_pred = ltv.zero_inflated_lognormal_pred(logits).numpy().flatten()\n
if LOSS == \"mse\": y_pred = model.predict(x=x_eval, batch_size=1024).flatten() if LOSS == \"ziln\": logits = model.predict(x=x_eval, batch_size=1024) y_pred = ltv.zero_inflated_lognormal_pred(logits).numpy().flatten() In\u00a0[\u00a0]: Copied!
from pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.training import Trainer\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom torch.optim.lr_scheduler import ReduceLROnPlateau\nfrom pytorch_widedeep.callbacks import EarlyStopping\nfrom torch.optim import NAdam\n
from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.training import Trainer from pytorch_widedeep.models import TabMlp, WideDeep from torch.optim.lr_scheduler import ReduceLROnPlateau from pytorch_widedeep.callbacks import EarlyStopping from torch.optim import NAdam In\u00a0[\u00a0]: Copied!
# CATEGORICAL_FEATURES\nNUMERICAL_FEATURES = [\"num\" + str(i) for i in range(21)]\nx_train_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_train[\"numeric\"])\nx_train_pyt_cat = pd.DataFrame(\n    {key: value for key, value in x_train.items() if key not in [\"numeric\"]}\n)\n\nx_eval_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_eval[\"numeric\"])\nx_eval_pyt_cat = pd.DataFrame(\n    {key: value for key, value in x_eval.items() if key not in [\"numeric\"]}\n)\n
# CATEGORICAL_FEATURES NUMERICAL_FEATURES = [\"num\" + str(i) for i in range(21)] x_train_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_train[\"numeric\"]) x_train_pyt_cat = pd.DataFrame( {key: value for key, value in x_train.items() if key not in [\"numeric\"]} ) x_eval_pyt_num = pd.DataFrame(columns=NUMERICAL_FEATURES, data=x_eval[\"numeric\"]) x_eval_pyt_cat = pd.DataFrame( {key: value for key, value in x_eval.items() if key not in [\"numeric\"]} ) In\u00a0[\u00a0]: Copied!
x_train_pyt = pd.concat([x_train_pyt_num, x_train_pyt_cat], axis=1)\nx_eval_pyt = pd.concat([x_eval_pyt_num, x_eval_pyt_cat], axis=1)\n
x_train_pyt = pd.concat([x_train_pyt_num, x_train_pyt_cat], axis=1) x_eval_pyt = pd.concat([x_eval_pyt_num, x_eval_pyt_cat], axis=1) In\u00a0[\u00a0]: Copied!
embed_input = [\n    (u, int(x_train_pyt[u].nunique() ** 0.25) + 1) for u in CATEGORICAL_FEATURES\n]\n
embed_input = [ (u, int(x_train_pyt[u].nunique() ** 0.25) + 1) for u in CATEGORICAL_FEATURES ] In\u00a0[\u00a0]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(\n    embed_cols=embed_input,\n    continuous_cols=NUMERICAL_FEATURES,\n    shared_embed=False,\n    scale=False,\n)\nX_tab_train = tab_preprocessor.fit_transform(x_train_pyt)\nX_tab_valid = tab_preprocessor.transform(x_eval_pyt)\nX_tab_test = tab_preprocessor.transform(x_eval_pyt)\n\n# target\ny_train = y_train\ny_valid = y_eval\ny_test = y_train\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\nX_test = {\"X_tab\": X_tab_test}\n\ndeeptabular = TabMlp(\n    mlp_hidden_dims=[128, 128, 64, 64],\n    column_idx=tab_preprocessor.column_idx,\n    embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\n\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=3)\n\ndeep_opt = NAdam(model.deeptabular.parameters(), lr=LEARNING_RATE)\ncallbacks = [EarlyStopping()]\ndeep_sch = ReduceLROnPlateau(deep_opt, min_lr=1e-6)\n\nobjective = \"ziln\"\n\ntrainer = Trainer(\n    model,\n    callbacks=callbacks,\n    lr_schedulers={\"deeptabular\": deep_sch},\n    objective=objective,\n    optimizers={\"deeptabular\": deep_opt},\n)\n\ntrainer.fit(\n    X_train=X_train,\n    X_val=X_val,\n    n_epochs=200,\n    batch_size=2048,\n)\n\ny_pred_pytorch = trainer.predict(X_test=X_test)\n
# deeptabular tab_preprocessor = TabPreprocessor( embed_cols=embed_input, continuous_cols=NUMERICAL_FEATURES, shared_embed=False, scale=False, ) X_tab_train = tab_preprocessor.fit_transform(x_train_pyt) X_tab_valid = tab_preprocessor.transform(x_eval_pyt) X_tab_test = tab_preprocessor.transform(x_eval_pyt) # target y_train = y_train y_valid = y_eval y_test = y_train X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid} X_test = {\"X_tab\": X_tab_test} deeptabular = TabMlp( mlp_hidden_dims=[128, 128, 64, 64], column_idx=tab_preprocessor.column_idx, embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=3) deep_opt = NAdam(model.deeptabular.parameters(), lr=LEARNING_RATE) callbacks = [EarlyStopping()] deep_sch = ReduceLROnPlateau(deep_opt, min_lr=1e-6) objective = \"ziln\" trainer = Trainer( model, callbacks=callbacks, lr_schedulers={\"deeptabular\": deep_sch}, objective=objective, optimizers={\"deeptabular\": deep_opt}, ) trainer.fit( X_train=X_train, X_val=X_val, n_epochs=200, batch_size=2048, ) y_pred_pytorch = trainer.predict(X_test=X_test) In\u00a0[\u00a0]: Copied!
pd.DataFrame(trainer.history)[[\"train_loss\", \"val_loss\"]].plot();\n
pd.DataFrame(trainer.history)[[\"train_loss\", \"val_loss\"]].plot(); In\u00a0[\u00a0]: Copied!
from sklearn.metrics import mean_squared_error\n\nmean_squared_error(y_pred, y_pred_pytorch)\n
from sklearn.metrics import mean_squared_error mean_squared_error(y_pred, y_pred_pytorch) In\u00a0[\u00a0]: Copied!
unit_costs = [0.4, 0.5, 0.6, 0.68, 0.7, 0.8, 0.9, 1.0]\n
unit_costs = [0.4, 0.5, 0.6, 0.68, 0.7, 0.8, 0.9, 1.0] In\u00a0[\u00a0]: Copied!
num_mailed = [np.sum(y_pred > v) for v in unit_costs]\nnum_mailed\n
num_mailed = [np.sum(y_pred > v) for v in unit_costs] num_mailed In\u00a0[\u00a0]: Copied!
baseline_total_profit = np.sum(y_eval - 0.68)\nbaseline_total_profit\n
baseline_total_profit = np.sum(y_eval - 0.68) baseline_total_profit In\u00a0[\u00a0]: Copied!
total_profits = [np.sum(y_eval[y_pred > v] - v) for v in unit_costs]\ntotal_profits\n
total_profits = [np.sum(y_eval[y_pred > v] - v) for v in unit_costs] total_profits In\u00a0[\u00a0]: Copied!
gain = pd.DataFrame(\n    {\n        \"lorenz\": ltv.cumulative_true(y_eval, y_eval),\n        \"baseline\": ltv.cumulative_true(y_eval, x_eval[\"numeric\"][:, 19]),\n        \"model\": ltv.cumulative_true(y_eval, y_pred),\n    }\n)\n
gain = pd.DataFrame( { \"lorenz\": ltv.cumulative_true(y_eval, y_eval), \"baseline\": ltv.cumulative_true(y_eval, x_eval[\"numeric\"][:, 19]), \"model\": ltv.cumulative_true(y_eval, y_pred), } ) In\u00a0[\u00a0]: Copied!
num_customers = np.float32(gain.shape[0])\ngain[\"cumulative_customer\"] = (np.arange(num_customers) + 1.0) / num_customers\n
num_customers = np.float32(gain.shape[0]) gain[\"cumulative_customer\"] = (np.arange(num_customers) + 1.0) / num_customers In\u00a0[\u00a0]: Copied!
ax = gain[\n    [\n        \"cumulative_customer\",\n        \"lorenz\",\n        \"baseline\",\n        \"model\",\n    ]\n].plot(x=\"cumulative_customer\", figsize=(8, 5), legend=True)\n\nax.legend([\"Groundtruth\", \"Baseline\", \"Model\"], loc=\"lower right\")\n\nax.set_xlabel(\"Cumulative Fraction of Customers\")\nax.set_xticks(np.arange(0, 1.1, 0.1))\nax.set_xlim((0, 1.0))\n\nax.set_ylabel(\"Cumulative Fraction of Total Lifetime Value\")\nax.set_yticks(np.arange(0, 1.1, 0.1))\nax.set_ylim((0, 1.05))\nax.set_title(\"Gain Chart\");\n
ax = gain[ [ \"cumulative_customer\", \"lorenz\", \"baseline\", \"model\", ] ].plot(x=\"cumulative_customer\", figsize=(8, 5), legend=True) ax.legend([\"Groundtruth\", \"Baseline\", \"Model\"], loc=\"lower right\") ax.set_xlabel(\"Cumulative Fraction of Customers\") ax.set_xticks(np.arange(0, 1.1, 0.1)) ax.set_xlim((0, 1.0)) ax.set_ylabel(\"Cumulative Fraction of Total Lifetime Value\") ax.set_yticks(np.arange(0, 1.1, 0.1)) ax.set_ylim((0, 1.05)) ax.set_title(\"Gain Chart\"); In\u00a0[\u00a0]: Copied!
gini = ltv.gini_from_gain(gain[[\"lorenz\", \"baseline\", \"model\"]])\ngini\n
gini = ltv.gini_from_gain(gain[[\"lorenz\", \"baseline\", \"model\"]]) gini In\u00a0[\u00a0]: Copied!
df_decile = ltv.decile_stats(y_eval, y_pred)\ndf_decile\n
df_decile = ltv.decile_stats(y_eval, y_pred) df_decile In\u00a0[\u00a0]: Copied!
ax = df_decile[[\"label_mean\", \"pred_mean\"]].plot.bar(rot=0)\n\nax.set_title(\"Decile Chart\")\nax.set_xlabel(\"Prediction bucket\")\nax.set_ylabel(\"Average bucket value\")\nax.legend([\"Label\", \"Prediction\"], loc=\"upper left\");\n
ax = df_decile[[\"label_mean\", \"pred_mean\"]].plot.bar(rot=0) ax.set_title(\"Decile Chart\") ax.set_xlabel(\"Prediction bucket\") ax.set_ylabel(\"Average bucket value\") ax.legend([\"Label\", \"Prediction\"], loc=\"upper left\"); In\u00a0[\u00a0]: Copied!
def spearmanr(x1: Sequence[float], x2: Sequence[float]) -> float:\n    \"\"\"Calculates spearmanr rank correlation coefficient.\n\n    See https://docs.scipy.org/doc/scipy/reference/stats.html.\n\n    Args:\n      x1: 1D array_like.\n      x2: 1D array_like.\n\n    Returns:\n      correlation: float.\n    \"\"\"\n    return stats.spearmanr(x1, x2, nan_policy=\"raise\")[0]\n\n\nspearman_corr = spearmanr(y_eval, y_pred)\nspearman_corr\n
def spearmanr(x1: Sequence[float], x2: Sequence[float]) -> float: \"\"\"Calculates spearmanr rank correlation coefficient. See https://docs.scipy.org/doc/scipy/reference/stats.html. Args: x1: 1D array_like. x2: 1D array_like. Returns: correlation: float. \"\"\" return stats.spearmanr(x1, x2, nan_policy=\"raise\")[0] spearman_corr = spearmanr(y_eval, y_pred) spearman_corr In\u00a0[\u00a0]: Copied!
df_metrics = pd.DataFrame(\n    {\n        \"model\": MODEL,\n        \"loss_function\": LOSS,\n        \"train_loss\": history[\"loss\"][-1],\n        \"eval_loss\": history[\"val_loss\"][-1],\n        \"label_positive\": np.mean(y_eval > 0),\n        \"label_mean\": y_eval.mean(),\n        \"pred_mean\": y_pred.mean(),\n        \"decile_mape\": df_decile[\"decile_mape\"].mean(),\n        \"baseline_gini\": gini[\"normalized\"][1],\n        \"gini\": gini[\"normalized\"][2],\n        \"spearman_corr\": spearman_corr,\n    },\n    index=[VERSION],\n)\n
df_metrics = pd.DataFrame( { \"model\": MODEL, \"loss_function\": LOSS, \"train_loss\": history[\"loss\"][-1], \"eval_loss\": history[\"val_loss\"][-1], \"label_positive\": np.mean(y_eval > 0), \"label_mean\": y_eval.mean(), \"pred_mean\": y_pred.mean(), \"decile_mape\": df_decile[\"decile_mape\"].mean(), \"baseline_gini\": gini[\"normalized\"][1], \"gini\": gini[\"normalized\"][2], \"spearman_corr\": spearman_corr, }, index=[VERSION], ) In\u00a0[\u00a0]: Copied!
for unit_cost, total_profit in zip(unit_costs, total_profits):\n    df_metrics[\"total_profit_{:02d}\".format(int(unit_cost * 100))] = total_profit\n
for unit_cost, total_profit in zip(unit_costs, total_profits): df_metrics[\"total_profit_{:02d}\".format(int(unit_cost * 100))] = total_profit In\u00a0[\u00a0]: Copied!
df_metrics.T\n
df_metrics.T In\u00a0[\u00a0]: Copied!
output_path = OUTPUT_CSV_FOLDER\n
output_path = OUTPUT_CSV_FOLDER In\u00a0[\u00a0]: Copied!
if not os.path.isdir(output_path):\n    os.makedirs(output_path)\n
if not os.path.isdir(output_path): os.makedirs(output_path) In\u00a0[\u00a0]: Copied!
output_file = os.path.join(\n    output_path, \"{}_regression_{}_{}.csv\".format(MODEL, LOSS, VERSION)\n)\n
output_file = os.path.join( output_path, \"{}_regression_{}_{}.csv\".format(MODEL, LOSS, VERSION) ) In\u00a0[\u00a0]: Copied!
df_metrics.to_csv(output_file, index=False)\n
df_metrics.to_csv(output_file, index=False)"},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#zilnloss","title":"ZILNLoss\u00b6","text":"

[DISCLAIMER]

Purpose of this notebook is to check if ZILNloss implemented originaly Keras give same results in pytorch-widedeep implemenatation

"},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#kdd-cup-98-ltv-prediction","title":"KDD Cup 98 LTV Prediction\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#configs","title":"Configs\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#load-data","title":"Load data\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#label-distribution","title":"Label distribution\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#preprocess-features","title":"Preprocess features\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#vocab","title":"Vocab\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#indicator","title":"Indicator\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#numeric","title":"Numeric\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#all","title":"All\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#traineval-split","title":"Train/eval split\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#model","title":"Model\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#loss","title":"Loss\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#train","title":"Train\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#eval","title":"Eval\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#pytorch-widedeep-approach","title":"Pytorch-widedeep approach\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#appendix","title":"Appendix\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#total-profit","title":"Total Profit\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#gini-coefficient","title":"Gini Coefficient\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#calibration","title":"Calibration\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#rank-correlation","title":"Rank Correlation\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#all-metrics-together","title":"All metrics together\u00b6","text":""},{"location":"examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html#save","title":"Save\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html","title":"13_model_uncertainty_prediction","text":"
  • In this notebook we will use the higly imbalanced Protein Homology Dataset from KDD cup 2004
* The first element of each line is a BLOCK ID that denotes to which native sequence this example belongs. There is a unique BLOCK ID for each native sequence. BLOCK IDs are integers running from 1 to 303 (one for each native sequence, i.e. for each query). BLOCK IDs were assigned before the blocks were split into the train and test sets, so they do not run consecutively in either file.\n* The second element of each line is an EXAMPLE ID that uniquely describes the example. You will need this EXAMPLE ID and the BLOCK ID when you submit results.\n* The third element is the class of the example. Proteins that are homologous to the native sequence are denoted by 1, non-homologous proteins (i.e. decoys) by 0. Test examples have a \"?\" in this position.\n* All following elements are feature values. There are 74 feature values in each line. The features describe the match (e.g. the score of a sequence alignment) between the native protein sequence and the sequence that is tested for homology.\n
In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault\nfrom torchmetrics import F1Score as F1_torchmetrics\nfrom torchmetrics import Accuracy as Accuracy_torchmetrics\nfrom torchmetrics import Precision as Precision_torchmetrics\nfrom torchmetrics import Recall as Recall_torchmetrics\nfrom pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_bio_kdd04\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import classification_report\n\nimport time\nimport datetime\n\nimport warnings\n\nwarnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.dataloaders import DataLoaderImbalanced, DataLoaderDefault from torchmetrics import F1Score as F1_torchmetrics from torchmetrics import Accuracy as Accuracy_torchmetrics from torchmetrics import Precision as Precision_torchmetrics from torchmetrics import Recall as Recall_torchmetrics from pytorch_widedeep.metrics import Accuracy, Recall, Precision, F1Score, R2Score from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_bio_kdd04 from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import time import datetime import warnings warnings.filterwarnings(\"ignore\", category=DeprecationWarning) # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_bio_kdd04(as_frame=True)\ndf.head()\n
df = load_bio_kdd04(as_frame=True) df.head() Out[2]: EXAMPLE_ID BLOCK_ID target 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 0 279 261532 0 52.0 32.69 0.30 2.5 20.0 1256.8 -0.89 0.33 11.0 -55.0 267.2 0.52 0.05 -2.36 49.6 252.0 0.43 1.16 -2.06 -33.0 -123.2 1.60 -0.49 -6.06 65.0 296.1 -0.28 -0.26 -3.83 -22.6 -170.0 3.06 -1.05 -3.29 22.9 286.3 0.12 2.58 4.08 -33.0 -178.9 1.88 0.53 -7.0 -44.0 1987.0 -5.41 0.95 -4.0 -57.0 722.9 -3.26 -0.55 -7.5 125.5 1547.2 -0.36 1.12 9.0 -37.0 72.5 0.47 0.74 -11.0 -8.0 1595.1 -1.64 2.83 -2.0 -50.0 445.2 -0.35 0.26 0.76 1 279 261533 0 58.0 33.33 0.00 16.5 9.5 608.1 0.50 0.07 20.5 -52.5 521.6 -1.08 0.58 -0.02 -3.2 103.6 -0.95 0.23 -2.87 -25.9 -52.2 -0.21 0.87 -1.81 10.4 62.0 -0.28 -0.04 1.48 -17.6 -198.3 3.43 2.84 5.87 -16.9 72.6 -0.31 2.79 2.71 -33.5 -11.6 -1.11 4.01 5.0 -57.0 666.3 1.13 4.38 5.0 -64.0 39.3 1.07 -0.16 32.5 100.0 1893.7 -2.80 -0.22 2.5 -28.5 45.0 0.58 0.41 -19.0 -6.0 762.9 0.29 0.82 -3.0 -35.0 140.3 1.16 0.39 0.73 2 279 261534 0 77.0 27.27 -0.91 6.0 58.5 1623.6 -1.40 0.02 -6.5 -48.0 621.0 -1.20 0.14 -0.20 73.6 609.1 -0.44 -0.58 -0.04 -23.0 -27.4 -0.72 -1.04 -1.09 91.1 635.6 -0.88 0.24 0.59 -18.7 -7.2 -0.60 -2.82 -0.71 52.4 504.1 0.89 -0.67 -9.30 -20.8 -25.7 -0.77 -0.85 0.0 -20.0 2259.0 -0.94 1.15 -4.0 -44.0 -22.7 0.94 -0.98 -19.0 105.0 1267.9 1.03 1.27 11.0 -39.5 82.3 0.47 -0.19 -10.0 7.0 1491.8 0.32 -1.29 0.0 -34.0 658.2 -0.76 0.26 0.24 3 279 261535 0 41.0 27.91 -0.35 3.0 46.0 1921.6 -1.36 -0.47 -32.0 -51.5 560.9 -0.29 -0.10 -1.11 124.3 791.6 0.00 0.39 -1.85 -21.7 -44.9 -0.21 0.02 0.89 133.9 797.8 -0.08 1.06 -0.26 -16.4 -74.1 0.97 -0.80 -0.41 66.9 955.3 -1.90 1.28 -6.65 -28.1 47.5 -1.91 1.42 1.0 -30.0 1846.7 0.76 1.10 -4.0 -52.0 -53.9 1.71 -0.22 -12.0 97.5 1969.8 -1.70 0.16 -1.0 -32.5 255.9 -0.46 1.57 10.0 6.0 2047.7 -0.98 1.53 0.0 -49.0 554.2 -0.83 0.39 0.73 4 279 261536 0 50.0 28.00 -1.32 -9.0 12.0 464.8 0.88 0.19 8.0 -51.5 98.1 1.09 -0.33 -2.16 -3.9 102.7 0.39 -1.22 -3.39 -15.2 -42.2 -1.18 -1.11 -3.55 8.9 141.3 -0.16 -0.43 -4.15 -12.9 -13.4 -1.32 -0.98 -3.69 8.8 136.1 -0.30 4.13 1.89 -13.0 -18.7 -1.37 -0.93 0.0 -1.0 810.1 -2.29 6.72 1.0 -23.0 -29.7 0.58 -1.10 -18.5 33.5 206.8 1.84 -0.13 4.0 -29.0 30.1 0.80 -0.24 5.0 -14.0 479.5 0.68 -0.59 2.0 -36.0 -6.9 2.02 0.14 -0.23 In\u00a0[3]: Copied!
# imbalance of the classes\ndf[\"target\"].value_counts()\n
# imbalance of the classes df[\"target\"].value_counts() Out[3]:
target\n0    144455\n1      1296\nName: count, dtype: int64
In\u00a0[4]: Copied!
# drop columns we won't need in this example\ndf.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True)\n
# drop columns we won't need in this example df.drop(columns=[\"EXAMPLE_ID\", \"BLOCK_ID\"], inplace=True) In\u00a0[5]: Copied!
df_train, df_valid = train_test_split(\n    df, test_size=0.2, stratify=df[\"target\"], random_state=1\n)\ndf_valid, df_test = train_test_split(\n    df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1\n)\n
df_train, df_valid = train_test_split( df, test_size=0.2, stratify=df[\"target\"], random_state=1 ) df_valid, df_test = train_test_split( df_valid, test_size=0.5, stratify=df_valid[\"target\"], random_state=1 ) In\u00a0[6]: Copied!
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"target\"]).columns.values.tolist() In\u00a0[7]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"target\"].values\ny_valid = df_valid[\"target\"].values\ny_test = df_test[\"target\"].values\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"target\"].values y_valid = df_valid[\"target\"].values y_test = df_test[\"target\"].values In\u00a0[8]: Copied!
input_layer = len(tab_preprocessor.continuous_cols)\noutput_layer = 1\nhidden_layers = np.linspace(\n    input_layer * 2, output_layer, 5, endpoint=False, dtype=int\n).tolist()\n
input_layer = len(tab_preprocessor.continuous_cols) output_layer = 1 hidden_layers = np.linspace( input_layer * 2, output_layer, 5, endpoint=False, dtype=int ).tolist() In\u00a0[9]: Copied!
deeptabular = TabMlp(\n    mlp_hidden_dims=hidden_layers,\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, pred_dim=1)\nmodel\n
deeptabular = TabMlp( mlp_hidden_dims=hidden_layers, column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, pred_dim=1) model Out[9]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=74, out_features=148, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=148, out_features=118, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=118, out_features=89, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_3): Sequential(\n            (0): Linear(in_features=89, out_features=59, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_4): Sequential(\n            (0): Linear(in_features=59, out_features=30, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=30, out_features=1, bias=True)\n  )\n)
In\u00a0[10]: Copied!
# # Metrics from torchmetrics\n# accuracy = Accuracy_torchmetrics(average=None, num_classes=1)\n# precision = Precision_torchmetrics(average=\"micro\", num_classes=1)\n# f1 = F1_torchmetrics(average=None, num_classes=1)\n# recall = Recall_torchmetrics(average=None, num_classes=1)\n
# # Metrics from torchmetrics # accuracy = Accuracy_torchmetrics(average=None, num_classes=1) # precision = Precision_torchmetrics(average=\"micro\", num_classes=1) # f1 = F1_torchmetrics(average=None, num_classes=1) # recall = Recall_torchmetrics(average=None, num_classes=1) In\u00a0[11]: Copied!
# Metrics from pytorch-widedeep\naccuracy = Accuracy(top_k=2)\nprecision = Precision(average=False)\nrecall = Recall(average=True)\nf1 = F1Score(average=False)\n
# Metrics from pytorch-widedeep accuracy = Accuracy(top_k=2) precision = Precision(average=False) recall = Recall(average=True) f1 = F1Score(average=False) In\u00a0[12]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n\ntrainer = Trainer(\n    model,\n    objective=\"binary\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[accuracy, precision, recall, f1],\n    verbose=1,\n)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) trainer = Trainer( model, objective=\"binary\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[accuracy, precision, recall, f1], verbose=1, ) In\u00a0[13]: Copied!
start = time.time()\ntrainer.fit(\n    X_train={\"X_tab\": X_tab_train, \"target\": y_train},\n    X_val={\"X_tab\": X_tab_valid, \"target\": y_valid},\n    n_epochs=3,\n    batch_size=50,\n    custom_dataloader=DataLoaderImbalanced,\n    oversample_mul=5,\n)\nprint(\n    \"Training time[s]: {}\".format(\n        datetime.timedelta(seconds=round(time.time() - start))\n    )\n)\n
start = time.time() trainer.fit( X_train={\"X_tab\": X_tab_train, \"target\": y_train}, X_val={\"X_tab\": X_tab_valid, \"target\": y_valid}, n_epochs=3, batch_size=50, custom_dataloader=DataLoaderImbalanced, oversample_mul=5, ) print( \"Training time[s]: {}\".format( datetime.timedelta(seconds=round(time.time() - start)) ) )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 130.75it/s, loss=0.187, metrics={'acc': 0.9214, 'prec': [0.9149], 'rec': 0.9318, 'f1': [0.9233]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 173.71it/s, loss=0.106, metrics={'acc': 0.9499, 'prec': [0.1435], 'rec': 0.938, 'f1': [0.249]}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 139.68it/s, loss=0.109, metrics={'acc': 0.9559, 'prec': [0.9537], 'rec': 0.9572, 'f1': [0.9554]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 177.53it/s, loss=0.0888, metrics={'acc': 0.9602, 'prec': [0.1755], 'rec': 0.9457, 'f1': [0.2961]}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 208/208 [00:01<00:00, 141.63it/s, loss=0.08, metrics={'acc': 0.9706, 'prec': [0.9648], 'rec': 0.9766, 'f1': [0.9707]}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:01<00:00, 165.36it/s, loss=0.0969, metrics={'acc': 0.9564, 'prec': [0.1636], 'rec': 0.9535, 'f1': [0.2792]}]
Training time[s]: 0:00:10\n
\n
In\u00a0[14]: Copied!
pd.DataFrame(trainer.history)\n
pd.DataFrame(trainer.history) Out[14]: train_loss train_acc train_prec train_rec train_f1 val_loss val_acc val_prec val_rec val_f1 0 0.186707 0.921408 [0.9149412512779236] 0.931801 [0.9232940673828125] 0.106023 0.949914 [0.14353498816490173] 0.937984 [0.24897116422653198] 1 0.109498 0.955931 [0.9536514282226562] 0.957193 [0.9554190039634705] 0.088787 0.960206 [0.17553956806659698] 0.945736 [0.29611650109291077] 2 0.079979 0.970588 [0.9648183584213257] 0.976582 [0.9706646203994751] 0.096858 0.956432 [0.1635638326406479] 0.953488 [0.279228150844574] In\u00a0[15]: Copied!
df_pred = trainer.predict(X_tab=X_tab_test)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))\n
df_pred = trainer.predict(X_tab=X_tab_test) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print(\"Actual predicted values:\\n{}\".format(np.unique(df_pred, return_counts=True)))
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 292/292 [00:00<00:00, 346.55it/s]\n
              precision    recall  f1-score   support\n\n           0       1.00      0.96      0.98     14446\n           1       0.17      0.95      0.29       130\n\n    accuracy                           0.96     14576\n   macro avg       0.58      0.95      0.63     14576\nweighted avg       0.99      0.96      0.97     14576\n\nActual predicted values:\n(array([0, 1]), array([13845,   731]))\n
In\u00a0[16]: Copied!
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=10)\nprint(classification_report(df_test[\"target\"].to_list(), df_pred))\nprint(\n    \"Actual predicted values:\\n{}\".format(\n        np.unique(df_pred_unc[:, -1], return_counts=True)\n    )\n)\n
df_pred_unc = trainer.predict_uncertainty(X_tab=X_tab_test, uncertainty_granularity=10) print(classification_report(df_test[\"target\"].to_list(), df_pred)) print( \"Actual predicted values:\\n{}\".format( np.unique(df_pred_unc[:, -1], return_counts=True) ) )
predict_UncertaintyIter: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 10/10 [00:03<00:00,  3.25it/s]
              precision    recall  f1-score   support\n\n           0       1.00      0.96      0.98     14446\n           1       0.17      0.95      0.29       130\n\n    accuracy                           0.96     14576\n   macro avg       0.58      0.95      0.63     14576\nweighted avg       0.99      0.96      0.97     14576\n\nActual predicted values:\n(array([0.]), array([14576]))\n
\n
In\u00a0[17]: Copied!
df_pred_unc\n
df_pred_unc Out[17]:
array([[9.98401165e-01, 1.59881881e-03, 0.00000000e+00],\n       [9.99941409e-01, 5.85634953e-05, 0.00000000e+00],\n       [9.97351170e-01, 2.64881272e-03, 0.00000000e+00],\n       ...,\n       [9.99494374e-01, 5.05603210e-04, 0.00000000e+00],\n       [9.99981642e-01, 1.83574630e-05, 0.00000000e+00],\n       [9.99996483e-01, 3.52600046e-06, 0.00000000e+00]])
"},{"location":"examples/13_model_uncertainty_prediction.html#model-uncertainty-prediction","title":"Model Uncertainty prediction\u00b6","text":"

Note:

This notebook extends the \"Custom DataLoader for Imbalanced dataset\" notebook

"},{"location":"examples/13_model_uncertainty_prediction.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#preparing-the-data","title":"Preparing the data\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#define-the-model","title":"Define the model\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#normal-prediction","title":"\"Normal\" prediction\u00b6","text":""},{"location":"examples/13_model_uncertainty_prediction.html#prediction-using-uncertainty","title":"Prediction using uncertainty\u00b6","text":""},{"location":"examples/14_bayesian_models.html","title":"14_bayesian_models","text":"In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport pandas as pd\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\n\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint\nfrom pytorch_widedeep.preprocessing import TabPreprocessor, WidePreprocessor\nfrom pytorch_widedeep.bayesian_models import BayesianWide, BayesianTabMlp\nfrom pytorch_widedeep.training.bayesian_trainer import BayesianTrainer\n
import numpy as np import torch import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint from pytorch_widedeep.preprocessing import TabPreprocessor, WidePreprocessor from pytorch_widedeep.bayesian_models import BayesianWide, BayesianTabMlp from pytorch_widedeep.training.bayesian_trainer import BayesianTrainer

The first few things to do we know them very well, like with any other model described in any of the other notebooks

In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"age_buckets\"] = pd.cut(\n    df.age, bins=[16, 25, 30, 35, 40, 45, 50, 55, 60, 91], labels=np.arange(9)\n)\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\ndf.head()\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"age_buckets\"] = pd.cut( df.age, bins=[16, 25, 30, 35, 40, 45, 50, 55, 60, 91], labels=np.arange(9) ) df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) df.head() Out[2]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country age_buckets income_label 0 25 Private 226802 11th 7 Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 0 1 38 Private 89814 HS-grad 9 Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 3 0 2 28 Local-gov 336951 Assoc-acdm 12 Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 1 3 44 Private 160323 Some-college 10 Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 4 1 4 18 ? 103497 Some-college 10 Never-married ? Own-child White Female 0 0 30 United-States 0 0 In\u00a0[3]: Copied!
train, test = train_test_split(df, test_size=0.2, stratify=df.income_label)\n
train, test = train_test_split(df, test_size=0.2, stratify=df.income_label) In\u00a0[4]: Copied!
wide_cols = [\n    \"age_buckets\",\n    \"education\",\n    \"relationship\",\n    \"workclass\",\n    \"occupation\",\n    \"native_country\",\n    \"gender\",\n]\ncrossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")]\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\n\ntarget = train[\"income_label\"].values\n
wide_cols = [ \"age_buckets\", \"education\", \"relationship\", \"workclass\", \"occupation\", \"native_country\", \"gender\", ] crossed_cols = [(\"education\", \"occupation\"), (\"native_country\", \"occupation\")] cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target = train[\"income_label\"].values In\u00a0[5]: Copied!
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\nX_tab = wide_preprocessor.fit_transform(train)\n
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_tab = wide_preprocessor.fit_transform(train) In\u00a0[6]: Copied!
model = BayesianWide(\n    input_dim=np.unique(X_tab).shape[0],\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0,\n    posterior_rho_init=-7.0,\n    pred_dim=1,  # here the models are NOT passed to a WideDeep constructor class so the output dim MUST be specified\n)\n
model = BayesianWide( input_dim=np.unique(X_tab).shape[0], prior_sigma_1=1.0, prior_sigma_2=0.002, prior_pi=0.8, posterior_mu_init=0, posterior_rho_init=-7.0, pred_dim=1, # here the models are NOT passed to a WideDeep constructor class so the output dim MUST be specified ) In\u00a0[7]: Copied!
trainer = BayesianTrainer(\n    model,\n    objective=\"binary\",\n    optimizer=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = BayesianTrainer( model, objective=\"binary\", optimizer=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[8]: Copied!
trainer.fit(\n    X_tab=X_tab,\n    target=target,\n    val_split=0.2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer.fit( X_tab=X_tab, target=target, val_split=0.2, n_epochs=2, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:00<00:00, 124.32it/s, loss=163, metrics={'acc': 0.7813}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 238.67it/s, loss=141, metrics={'acc': 0.8219}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:00<00:00, 132.81it/s, loss=140, metrics={'acc': 0.8285}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 190.16it/s, loss=140, metrics={'acc': 0.8298}]\n
In\u00a0[9]: Copied!
tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(train)\n
tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(train)
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[10]: Copied!
model = BayesianTabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    #     embed_continuous_method = \"standard\",\n    #     cont_embed_activation=\"leaky_relu\",\n    #     cont_embed_dim = 8,\n    mlp_hidden_dims=[128, 64],\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0,\n    posterior_rho_init=-7.0,\n    pred_dim=1,\n)\n
model = BayesianTabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, # embed_continuous_method = \"standard\", # cont_embed_activation=\"leaky_relu\", # cont_embed_dim = 8, mlp_hidden_dims=[128, 64], prior_sigma_1=1.0, prior_sigma_2=0.002, prior_pi=0.8, posterior_mu_init=0, posterior_rho_init=-7.0, pred_dim=1, ) In\u00a0[11]: Copied!
trainer = BayesianTrainer(\n    model,\n    objective=\"binary\",\n    optimizer=torch.optim.Adam(model.parameters(), lr=0.01),\n    metrics=[Accuracy],\n)\n
trainer = BayesianTrainer( model, objective=\"binary\", optimizer=torch.optim.Adam(model.parameters(), lr=0.01), metrics=[Accuracy], ) In\u00a0[12]: Copied!
trainer.fit(\n    X_tab=X_tab,\n    target=target,\n    val_split=0.2,\n    n_epochs=2,\n    batch_size=256,\n)\n
trainer.fit( X_tab=X_tab, target=target, val_split=0.2, n_epochs=2, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:04<00:00, 28.74it/s, loss=2e+3, metrics={'acc': 0.8007}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 136.89it/s, loss=1.75e+3, metrics={'acc': 0.8418}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 123/123 [00:04<00:00, 29.41it/s, loss=1.73e+3, metrics={'acc': 0.8596}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 31/31 [00:00<00:00, 143.87it/s, loss=1.71e+3, metrics={'acc': 0.8569}]\n

These models are powerful beyond the success metrics because they give us a sense of uncertainty as we predict. Let's have a look

In\u00a0[13]: Copied!
X_tab_test = tab_preprocessor.transform(test)\n
X_tab_test = tab_preprocessor.transform(test) In\u00a0[14]: Copied!
preds = trainer.predict(X_tab_test, return_samples=True, n_samples=5)\n
preds = trainer.predict(X_tab_test, return_samples=True, n_samples=5)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:01<00:00, 33.92it/s]\n
In\u00a0[15]: Copied!
preds.shape\n
preds.shape Out[15]:
(5, 9769)

as we can see the prediction have shape (5, 9769), one set of predictions each time we have internally run predict (i.e. sample the network and predict, defined by the parameter n_samples). This gives us an idea of how certain the model is about a certain prediction.

Similarly, we could obtain the probabilities

In\u00a0[16]: Copied!
probs = trainer.predict_proba(X_tab_test, return_samples=True, n_samples=5)\n
probs = trainer.predict_proba(X_tab_test, return_samples=True, n_samples=5)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:01<00:00, 32.79it/s]\n
In\u00a0[17]: Copied!
probs.shape\n
probs.shape Out[17]:
(5, 9769, 2)

And we could see how the model performs each time we sampled the network

In\u00a0[18]: Copied!
for p in preds:\n    print(accuracy_score(p, test[\"income_label\"].values))\n
for p in preds: print(accuracy_score(p, test[\"income_label\"].values))
0.8559729757395844\n0.8564847988535162\n0.8567918927218753\n0.8562800696079435\n0.8558706111167981\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/14_bayesian_models.html#the-bayesian-models","title":"The Bayesian Models\u00b6","text":"

Perhaps one of the most interesting functionality in the library is the access to full Bayesian models in almost exactly the same way one would use any of the other models in the library.

Note however that the Bayesian models are ONLY available for tabular data and, at the moment, we do not support combining them to form a Wide and Deep model.

The implementation in this library is based on the publication: Weight Uncertainty in Neural Networks, by Blundell et al., 2015. Code-wise, our implementation is inspired by a number of source:

  1. https://joshfeldman.net/WeightUncertainty/
  2. https://www.nitarshan.com/bayes-by-backprop/
  3. https://github.com/piEsposito/blitz-bayesian-deep-learning
  4. https://github.com/zackchase/mxnet-the-straight-dope/tree/master/chapter18_variational-methods-and-uncertainty

The two Bayesian models available in the library are:

  1. BayesianWide: this is a linear model where the non-linearities are captured via crossed-columns
  2. BayesianMLP: this is a standard MLP that receives categorical embeddings and continuous cols (embedded or not) which are the passed through a series of dense layers. All parameters in the model are probabilistic.
"},{"location":"examples/14_bayesian_models.html#1-bayesianwide","title":"1. BayesianWide\u00b6","text":""},{"location":"examples/14_bayesian_models.html#2-bayesiantabmlp","title":"2. BayesianTabMlp\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html","title":"15_DIR-LDS_and_FDS","text":"In\u00a0[1]: Copied!
import numpy as np\nimport pandas as pd\nimport torch\nfrom torch.optim import SGD, lr_scheduler\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom sklearn.metrics import mean_squared_error\nfrom pytorch_widedeep.initializers import XavierNormal\nfrom pytorch_widedeep.datasets import load_california_housing\n\nfrom sklearn.model_selection import train_test_split\n\nimport matplotlib.pyplot as plt\nfrom scipy.ndimage import convolve1d\nfrom scipy.ndimage import gaussian_filter1d\nfrom scipy.signal.windows import triang\nfrom pytorch_widedeep.utils.deeptabular_utils import get_kernel_window, find_bin\nfrom pytorch_widedeep.models import fds_layer\n\n# increase displayed columns in jupyter notebook\npd.set_option(\"display.max_columns\", 200)\npd.set_option(\"display.max_rows\", 300)\n
import numpy as np import pandas as pd import torch from torch.optim import SGD, lr_scheduler from pytorch_widedeep import Trainer from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.models import TabMlp, WideDeep from sklearn.metrics import mean_squared_error from pytorch_widedeep.initializers import XavierNormal from pytorch_widedeep.datasets import load_california_housing from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from scipy.ndimage import convolve1d from scipy.ndimage import gaussian_filter1d from scipy.signal.windows import triang from pytorch_widedeep.utils.deeptabular_utils import get_kernel_window, find_bin from pytorch_widedeep.models import fds_layer # increase displayed columns in jupyter notebook pd.set_option(\"display.max_columns\", 200) pd.set_option(\"display.max_rows\", 300) In\u00a0[2]: Copied!
df = load_california_housing(as_frame=True)\ndf.head()\n
df = load_california_housing(as_frame=True) df.head() Out[2]: MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude MedHouseVal 0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 -122.23 4.526 1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 -122.22 3.585 2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 -122.24 3.521 3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 -122.25 3.413 4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 -122.25 3.422 In\u00a0[3]: Copied!
ks = 5\nsigma = 2\nhalf_ks = (ks - 1) // 2\nbase_kernel = [0.0] * half_ks + [1.0] + [0.0] * half_ks\nkernel_window = gaussian_filter1d(base_kernel, sigma=sigma)\nplt.plot(kernel_window)\n
ks = 5 sigma = 2 half_ks = (ks - 1) // 2 base_kernel = [0.0] * half_ks + [1.0] + [0.0] * half_ks kernel_window = gaussian_filter1d(base_kernel, sigma=sigma) plt.plot(kernel_window) Out[3]:
[<matplotlib.lines.Line2D at 0x16a0a43a0>]
In\u00a0[4]: Copied!
lds = True\nkernel = \"gaussian\"\nks = 5\nsigma = 2\nreweight = \"sqrt\"\nY = df[\"MedHouseVal\"].values\nlds_y_max = None\nlds_y_min = None\ngranularity = 100\n\nfig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1)\n\ny_max = max(Y) if lds_y_max is None else lds_y_max\ny_min = min(Y) if lds_y_min is None else lds_y_min\nbin_edges = np.linspace(y_min, y_max, num=granularity, endpoint=True)\nvalue_dict = dict(zip(bin_edges[:-1], np.histogram(Y, bin_edges)[0]))\n\nax1.set_title(\"Histogram of values in the Y\")\nax1.bar(\n    value_dict.keys(),\n    value_dict.values(),\n    width=list(value_dict.keys())[1] - list(value_dict.keys())[0],\n)\n\nif reweight:\n    value_dict = dict(zip(value_dict.keys(), np.sqrt(list(value_dict.values()))))\n\nif kernel is not None:\n    lds_kernel_window = get_kernel_window(kernel, ks, sigma)\n    smoothed_values = convolve1d(\n        list(value_dict.values()), weights=lds_kernel_window, mode=\"constant\"\n    )\n    weigths = sum(smoothed_values) / (len(smoothed_values) * smoothed_values)\nelse:\n    values = list(value_dict.values())\n    weigths = sum(values) / (len(values) * values)  # type: ignore[operator]\nvalue_dict = dict(zip(value_dict.keys(), weigths))\n\nleft_bin_edges = find_bin(bin_edges, Y)\nweights = np.array([value_dict[edge] for edge in left_bin_edges], dtype=\"float32\")\n\n\nax2.set_title(\"Bar plot with inverse-balanced weights for each bin from histogram\")\nax2.bar(\n    value_dict.keys(),\n    value_dict.values(),\n    width=list(value_dict.keys())[1] - list(value_dict.keys())[0],\n)\nfig.tight_layout()\n
lds = True kernel = \"gaussian\" ks = 5 sigma = 2 reweight = \"sqrt\" Y = df[\"MedHouseVal\"].values lds_y_max = None lds_y_min = None granularity = 100 fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1) y_max = max(Y) if lds_y_max is None else lds_y_max y_min = min(Y) if lds_y_min is None else lds_y_min bin_edges = np.linspace(y_min, y_max, num=granularity, endpoint=True) value_dict = dict(zip(bin_edges[:-1], np.histogram(Y, bin_edges)[0])) ax1.set_title(\"Histogram of values in the Y\") ax1.bar( value_dict.keys(), value_dict.values(), width=list(value_dict.keys())[1] - list(value_dict.keys())[0], ) if reweight: value_dict = dict(zip(value_dict.keys(), np.sqrt(list(value_dict.values())))) if kernel is not None: lds_kernel_window = get_kernel_window(kernel, ks, sigma) smoothed_values = convolve1d( list(value_dict.values()), weights=lds_kernel_window, mode=\"constant\" ) weigths = sum(smoothed_values) / (len(smoothed_values) * smoothed_values) else: values = list(value_dict.values()) weigths = sum(values) / (len(values) * values) # type: ignore[operator] value_dict = dict(zip(value_dict.keys(), weigths)) left_bin_edges = find_bin(bin_edges, Y) weights = np.array([value_dict[edge] for edge in left_bin_edges], dtype=\"float32\") ax2.set_title(\"Bar plot with inverse-balanced weights for each bin from histogram\") ax2.bar( value_dict.keys(), value_dict.values(), width=list(value_dict.keys())[1] - list(value_dict.keys())[0], ) fig.tight_layout() In\u00a0[5]: Copied!
features = torch.tensor(df.drop(columns=[\"MedHouseVal\"]).values)\nlabels = torch.tensor(np.vstack(df[\"MedHouseVal\"].values))\nFDS = fds_layer.FDSLayer(feature_dim=features.size(1))\n
features = torch.tensor(df.drop(columns=[\"MedHouseVal\"]).values) labels = torch.tensor(np.vstack(df[\"MedHouseVal\"].values)) FDS = fds_layer.FDSLayer(feature_dim=features.size(1)) In\u00a0[6]: Copied!
for epoch in range(3):\n    FDS.update_last_epoch_stats(epoch)\n    FDS.update_running_stats(torch.clone(features).detach(), labels, epoch)\n
for epoch in range(3): FDS.update_last_epoch_stats(epoch) FDS.update_running_stats(torch.clone(features).detach(), labels, epoch) In\u00a0[7]: Copied!
pd.DataFrame(FDS.running_mean_last_epoch.numpy()).iloc[:, 7].plot(\n    title=\"Running mean bina values for 'Longitude' feature\"\n);\n
pd.DataFrame(FDS.running_mean_last_epoch.numpy()).iloc[:, 7].plot( title=\"Running mean bina values for 'Longitude' feature\" ); In\u00a0[8]: Copied!
pd.DataFrame(FDS.smoothed_mean_last_epoch.numpy()).iloc[:, 7].plot(\n    title=\"Smoothed mean bina values for 'Longitude' feature\"\n);\n
pd.DataFrame(FDS.smoothed_mean_last_epoch.numpy()).iloc[:, 7].plot( title=\"Smoothed mean bina values for 'Longitude' feature\" ); In\u00a0[9]: Copied!
smoothed_features = FDS._smooth(torch.clone(features).detach(), labels, epoch).numpy()\nleft_bin_edges_indices = find_bin(\n    FDS.bin_edges, labels.squeeze(), ret_value=False\n).numpy()\ncontinuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist()\n\ndf_w_bins = df.copy()\ndf_w_bins[\"MedHouseVal_bins\"] = left_bin_edges_indices\ndf_smoothed_w_bins = df_w_bins.copy()\ndf_smoothed_w_bins[continuous_cols] = smoothed_features\n
smoothed_features = FDS._smooth(torch.clone(features).detach(), labels, epoch).numpy() left_bin_edges_indices = find_bin( FDS.bin_edges, labels.squeeze(), ret_value=False ).numpy() continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist() df_w_bins = df.copy() df_w_bins[\"MedHouseVal_bins\"] = left_bin_edges_indices df_smoothed_w_bins = df_w_bins.copy() df_smoothed_w_bins[continuous_cols] = smoothed_features In\u00a0[10]: Copied!
df_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot(\n    title=\"Longitude feature values before calibration\"\n);\n
df_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot( title=\"Longitude feature values before calibration\" ); In\u00a0[11]: Copied!
df_smoothed_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot(\n    title=\"Longitude feature values after calibration\\n(only slight change in values)\"\n);\n
df_smoothed_w_bins[df_w_bins[\"MedHouseVal_bins\"] == 5][\"Longitude\"].plot( title=\"Longitude feature values after calibration\\n(only slight change in values)\" ); In\u00a0[12]: Copied!
df_train, df_valid = train_test_split(df, test_size=0.2, random_state=1)\ndf_valid, df_test = train_test_split(df_valid, test_size=0.5, random_state=1)\n
df_train, df_valid = train_test_split(df, test_size=0.2, random_state=1) df_valid, df_test = train_test_split(df_valid, test_size=0.5, random_state=1) In\u00a0[13]: Copied!
continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist()\n
continuous_cols = df.drop(columns=[\"MedHouseVal\"]).columns.values.tolist() In\u00a0[14]: Copied!
# deeptabular\ntab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\nX_tab_train = tab_preprocessor.fit_transform(df_train)\nX_tab_valid = tab_preprocessor.transform(df_valid)\nX_tab_test = tab_preprocessor.transform(df_test)\n\n# target\ny_train = df_train[\"MedHouseVal\"].values\ny_valid = df_valid[\"MedHouseVal\"].values\ny_test = df_test[\"MedHouseVal\"].values\n\nX_train = {\"X_tab\": X_tab_train, \"target\": y_train}\nX_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}\n
# deeptabular tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True) X_tab_train = tab_preprocessor.fit_transform(df_train) X_tab_valid = tab_preprocessor.transform(df_valid) X_tab_test = tab_preprocessor.transform(df_test) # target y_train = df_train[\"MedHouseVal\"].values y_valid = df_valid[\"MedHouseVal\"].values y_test = df_test[\"MedHouseVal\"].values X_train = {\"X_tab\": X_tab_train, \"target\": y_train} X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:295: DeprecationWarning: 'scale' and 'already_standard' will be deprecated in the next release. Please use 'cols_to_scale' instead\n  self._check_inputs(cat_embed_cols)\n
In\u00a0[15]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, with_fds=True)\nmodel\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, with_fds=True) model Out[15]:
WideDeep(\n  (deeptabular): TabMlp(\n    (cont_norm): Identity()\n    (encoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=8, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (fds_layer): FDSLayer(\n    (pred_layer): Linear(in_features=100, out_features=1, bias=True)\n  )\n)
In\u00a0[16]: Copied!
# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\n        \"deeptabular\": XavierNormal,\n        \"fds_layer\": XavierNormal,\n        # \"FDS_dropout\": XavierNormal,\n        # \"pred_layer\": XavierNormal,\n    },\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=True,\n    lds_kernel=\"gaussian\",\n    lds_ks=5,\n    lds_sigma=2,\n    lds_granularity=100,\n    lds_reweight=False,\n    lds_y_max=None,\n    lds_y_min=None,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
# Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={ \"deeptabular\": XavierNormal, \"fds_layer\": XavierNormal, # \"FDS_dropout\": XavierNormal, # \"pred_layer\": XavierNormal, }, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=True, lds_kernel=\"gaussian\", lds_ks=5, lds_sigma=2, lds_granularity=100, lds_reweight=False, lds_y_max=None, lds_y_min=None, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 165.40it/s, loss=0.591, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 218.64it/s, loss=0.479, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:00<00:00, 366.86it/s]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:01<00:00, 182.78it/s, loss=0.497, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 214.30it/s, loss=0.47, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:00<00:00, 350.68it/s]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:04<00:00, 81.28it/s, loss=0.52, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.50it/s, loss=0.452, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 122.88it/s]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:03<00:00, 99.77it/s, loss=0.508, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 157.48it/s, loss=0.45, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 147.92it/s]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:03<00:00, 93.21it/s, loss=0.591, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 154.60it/s, loss=0.45, metrics={}]\nFDS update: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 145.73it/s]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 243.46it/s]
test RMSE: 0.7417540528440087\n
\n
In\u00a0[17]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, fds=False)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=True,\n    lds_kernel=\"gaussian\",\n    lds_ks=5,\n    lds_sigma=2,\n    lds_granularity=100,\n    lds_reweight=False,\n    lds_y_max=None,\n    lds_y_min=None,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, fds=False) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=True, lds_kernel=\"gaussian\", lds_ks=5, lds_sigma=2, lds_granularity=100, lds_reweight=False, lds_y_max=None, lds_y_min=None, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 135.40it/s, loss=0.449, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 158.01it/s, loss=0.386, metrics={}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 137.79it/s, loss=0.377, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 156.84it/s, loss=0.399, metrics={}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 138.69it/s, loss=0.358, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 150.62it/s, loss=0.41, metrics={}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 131.96it/s, loss=0.339, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.01it/s, loss=0.321, metrics={}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 136.04it/s, loss=0.331, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 173.22it/s, loss=0.32, metrics={}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 296.77it/s]\n
test RMSE: 0.6000006967500053\n
In\u00a0[18]: Copied!
deeptabular = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\nmodel = WideDeep(deeptabular=deeptabular, fds=False)\n\n# Optimizers\ndeep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n# LR Scheduler\ndeep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n# Hyperparameters\ntrainer = Trainer(\n    model,\n    objective=\"huber\",\n    lr_schedulers={\"deeptabular\": deep_sch},\n    initializers={\"deeptabular\": XavierNormal},\n    optimizers={\"deeptabular\": deep_opt},\n    metrics=[],\n    with_lds=False,\n)\n\ntrainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)\n\nprint(\n    f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\"\n)\n
deeptabular = TabMlp( column_idx=tab_preprocessor.column_idx, continuous_cols=tab_preprocessor.continuous_cols, ) model = WideDeep(deeptabular=deeptabular, fds=False) # Optimizers deep_opt = SGD(model.deeptabular.parameters(), lr=0.1) # LR Scheduler deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3) # Hyperparameters trainer = Trainer( model, objective=\"huber\", lr_schedulers={\"deeptabular\": deep_sch}, initializers={\"deeptabular\": XavierNormal}, optimizers={\"deeptabular\": deep_opt}, metrics=[], with_lds=False, ) trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50) print( f\"test RMSE: {mean_squared_error(y_test, trainer.predict(X_tab=X_tab_test), squared=False)}\" )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 129.54it/s, loss=0.445, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 137.64it/s, loss=0.427, metrics={}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 135.98it/s, loss=0.374, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 148.50it/s, loss=0.389, metrics={}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 127.72it/s, loss=0.359, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 147.63it/s, loss=0.383, metrics={}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 136.54it/s, loss=0.339, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 146.55it/s, loss=0.323, metrics={}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 331/331 [00:02<00:00, 131.18it/s, loss=0.331, metrics={}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 174.87it/s, loss=0.318, metrics={}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 42/42 [00:00<00:00, 287.25it/s]\n
test RMSE: 0.6014019159826868\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/15_DIR-LDS_and_FDS.html#label-and-feature-distribution-smoothing-for-deep-imbalanced-regression","title":"Label and Feature Distribution Smoothing for Deep Imbalanced Regression\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#initial-imports","title":"Initial imports\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#load-dataset","title":"Load dataset\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#effects-of-ks-and-sigma-paramaters-on-kernel-function","title":"Effects of ks and sigma paramaters on kernel function\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#label-distribution-smoothing-visualization","title":"Label Distribution Smoothing - visualization\u00b6","text":"
  • visualization of pytorch_widedeep.training._wd_dataset.WideDeepDataset._prepare_weights(...)

Assign weight to each sample by following procedure:

  1. creating histogram from label values with nuber of bins = granularity 2.[OPTIONAL] reweighting label frequencies by sqrt 3.[OPTIONAL] smoothing label frequencies by convolution of kernel function window with frequencies list
  2. inverting values by n_samples / (n_classes * np.bincount(y)), see
  3. assigning weight to each sample from closest bin value
"},{"location":"examples/15_DIR-LDS_and_FDS.html#feature-distribution-smoothing","title":"Feature Distribution Smoothing\u00b6","text":"

We use dataset feature values in this example, but during the training process features tensors are the ouput of last layer before FDS layer.

  • labels are np.vstack-ed to reflect normal training scenario
"},{"location":"examples/15_DIR-LDS_and_FDS.html#data-preparation","title":"Data preparation\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-with-lds-fds","title":"Model with LDS & FDS\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-with-lds-only","title":"Model with LDS only\u00b6","text":""},{"location":"examples/15_DIR-LDS_and_FDS.html#model-without-fds-or-lds","title":"Model without FDS or LDS\u00b6","text":""},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html","title":"16_Self-Supervised Pre-Training pt 1","text":"In\u00a0[1]: Copied!
import torch\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, WideDeep\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.self_supervised_training import EncoderDecoderTrainer\n
import torch from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, WideDeep from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.self_supervised_training import EncoderDecoderTrainer In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) In\u00a0[3]: Copied!
# one could chose to use a validation set for early stopping, hyperparam\n# optimization, etc. This is just an example, so we simply use train/test\n# split\ndf_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label)\n
# one could chose to use a validation set for early stopping, hyperparam # optimization, etc. This is just an example, so we simply use train/test # split df_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label) In\u00a0[4]: Copied!
df_tr.head(2)\n
df_tr.head(2) Out[4]: age workclass fnlwgt education educational_num marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 9042 26 Local-gov 250551 HS-grad 9 Married-civ-spouse Craft-repair Own-child Black Male 0 0 40 United-States 0 25322 50 Private 34832 Bachelors 13 Married-civ-spouse Tech-support Husband White Male 15024 0 40 United-States 1 In\u00a0[5]: Copied!
# As always, we need to define which cols will be represented as embeddings\n# and which one will be continuous features\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\n
# As always, we need to define which cols will be represented as embeddings # and which one will be continuous features cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" In\u00a0[6]: Copied!
# We prepare the data to be passed to the model\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols\n)\nX_tab = tab_preprocessor.fit_transform(df_tr)\ntarget = df_tr[target_col].values\n
# We prepare the data to be passed to the model tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols ) X_tab = tab_preprocessor.fit_transform(df_tr) target = df_tr[target_col].values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[7]: Copied!
X_tab[:5]\n
X_tab[:5] Out[7]:
array([[ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 26, 40],\n       [ 2,  2,  1,  2,  2,  2,  1,  2,  1,  1, 50, 40],\n       [ 2,  1,  1,  3,  2,  2,  1,  1,  2,  1, 39, 46],\n       [ 2,  3,  2,  4,  1,  2,  2,  1,  1,  1, 17, 10],\n       [ 3,  4,  2,  1,  1,  2,  1,  1,  1,  1, 32, 20]])
In\u00a0[8]: Copied!
# We define a model that will act as the encoder in the encoder/decoder\n# architecture. This could be any of: TabMlp, TabResnet or TabNet\ntab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n)\n
# We define a model that will act as the encoder in the encoder/decoder # architecture. This could be any of: TabMlp, TabResnet or TabNet tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, ) In\u00a0[9]: Copied!
tab_mlp\n
tab_mlp Out[9]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=95, out_features=200, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=200, out_features=100, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)
In\u00a0[10]: Copied!
# If we do not pass a custom decoder, which is perfectly possible via the\n# decoder param,  the EncoderDecoderTrainer will automatically build a\n# decoder which will be the 'mirror' image of the encoder\nencoder_decoder_trainer = EncoderDecoderTrainer(encoder=tab_mlp)\n
# If we do not pass a custom decoder, which is perfectly possible via the # decoder param, the EncoderDecoderTrainer will automatically build a # decoder which will be the 'mirror' image of the encoder encoder_decoder_trainer = EncoderDecoderTrainer(encoder=tab_mlp) In\u00a0[11]: Copied!
# let's have a look to the encoder_decoder_model (aka ed_model)\nencoder_decoder_trainer.ed_model\n
# let's have a look to the encoder_decoder_model (aka ed_model) encoder_decoder_trainer.ed_model Out[11]:
EncoderDecoderModel(\n  (encoder): TabMlp(\n    (cat_embed): DiffSizeCatEmbeddings(\n      (embed_layers): ModuleDict(\n        (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n        (emb_layer_education): Embedding(17, 8, padding_idx=0)\n        (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n        (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n        (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n        (emb_layer_race): Embedding(6, 4, padding_idx=0)\n        (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n        (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n        (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n        (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n      )\n      (embedding_dropout): Dropout(p=0.0, inplace=False)\n    )\n    (cont_norm): Identity()\n    (encoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=95, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (decoder): TabMlpDecoder(\n    (decoder): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=100, out_features=200, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=200, out_features=95, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n  )\n  (masker): RandomObfuscator()\n)

Ignoring the masker, which just...well...masks, the ed_model consists of:

  1. An encoder model that is a TabMlp model that is in itself comprised by an Embedding layer (or rather a collection of them, referred as cat_and_cont_embed) and an encoder (a simple MLP, referred as encoder)
  2. A decoder which is just an \"inverted\" MLP (referred as decoder)
In\u00a0[12]: Copied!
# And we just...pretrain\nencoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
# And we just...pretrain encoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 82.90it/s, loss=4.07]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 89.87it/s, loss=3.09]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 92.86it/s, loss=2.53]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 91.24it/s, loss=2.09]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 91.38it/s, loss=1.78]\n

At this point we have two options, we could either save the model for later use or we could continue to supervised training. The latter is rather simple, after running:

encoder_decoder_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n

you just have to

model = WideDeep(deeptabular=tab_mlp)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\n# And, you know...we get a test metric\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\n

Let's say that in any case, we are 'decent' scientists/people and we want to save the model:

In\u00a0[13]: Copied!
encoder_decoder_trainer.save(\n    path=\"pretrained_weights\", model_filename=\"encoder_decoder_model.pt\"\n)\n
encoder_decoder_trainer.save( path=\"pretrained_weights\", model_filename=\"encoder_decoder_model.pt\" )

some time has passed...

In\u00a0[14]: Copied!
encoder_decoder_model = torch.load(\"pretrained_weights/encoder_decoder_model.pt\")\n
encoder_decoder_model = torch.load(\"pretrained_weights/encoder_decoder_model.pt\")

Now, AND THIS IS IMPORTANT We have loaded the encoder AND the decoder. To proceed to the supervised training we ONLY need the encoder

In\u00a0[15]: Copied!
pretrained_encoder = encoder_decoder_model.encoder\n
pretrained_encoder = encoder_decoder_model.encoder In\u00a0[16]: Copied!
pretrained_encoder\n
pretrained_encoder Out[16]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=95, out_features=200, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=200, out_features=100, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)
In\u00a0[17]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=pretrained_encoder)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=pretrained_encoder) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 88.04it/s, loss=0.374, metrics={'acc': 0.8253}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 85.63it/s, loss=0.324, metrics={'acc': 0.8491}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 87.56it/s, loss=0.301, metrics={'acc': 0.8608}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 73.38it/s, loss=0.29, metrics={'acc': 0.8655}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:01<00:00, 78.68it/s, loss=0.284, metrics={'acc': 0.8686}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 173.02it/s]\n
0.8730678677449074\n

As we mentioned before, we can also use a TabResNet or TabNet model and a custom decoder. Let's have a look:

In\u00a0[18]: Copied!
from pytorch_widedeep.models import TabResnet as TabResnetEncoder, TabResnetDecoder\n
from pytorch_widedeep.models import TabResnet as TabResnetEncoder, TabResnetDecoder In\u00a0[19]: Copied!
resnet_encoder = TabResnetEncoder(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=continuous_cols,\n    blocks_dims=[200, 100, 100],\n)\n
resnet_encoder = TabResnetEncoder( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=continuous_cols, blocks_dims=[200, 100, 100], )

let's have a look to the model

In\u00a0[20]: Copied!
resnet_encoder\n
resnet_encoder Out[20]:
TabResnet(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_workclass): Embedding(10, 5, padding_idx=0)\n      (emb_layer_education): Embedding(17, 8, padding_idx=0)\n      (emb_layer_marital_status): Embedding(8, 5, padding_idx=0)\n      (emb_layer_occupation): Embedding(16, 7, padding_idx=0)\n      (emb_layer_relationship): Embedding(7, 4, padding_idx=0)\n      (emb_layer_race): Embedding(6, 4, padding_idx=0)\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_capital_gain): Embedding(124, 24, padding_idx=0)\n      (emb_layer_capital_loss): Embedding(98, 21, padding_idx=0)\n      (emb_layer_native_country): Embedding(42, 13, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): DenseResnet(\n    (dense_resnet): Sequential(\n      (lin_inp): Linear(in_features=95, out_features=200, bias=False)\n      (bn_inp): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (block_0): BasicBlock(\n        (resize): Sequential(\n          (0): Linear(in_features=200, out_features=100, bias=False)\n          (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n        (lin1): Linear(in_features=200, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (block_1): BasicBlock(\n        (lin1): Linear(in_features=100, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n  )\n)

As we can see, the tensor we are trying to reconstruct, the embeddings, is of size 94 (this number is stored in the attribute: esnet_encoder.cat_and_cont_embed.output_dim), with that information we could build or own decoder as:

In\u00a0[21]: Copied!
# for all possible params see the docs\nresnet_decoder = TabResnetDecoder(\n    embed_dim=resnet_encoder.cat_out_dim + resnet_encoder.cont_out_dim,\n    blocks_dims=[100, 100, 200],\n)\n
# for all possible params see the docs resnet_decoder = TabResnetDecoder( embed_dim=resnet_encoder.cat_out_dim + resnet_encoder.cont_out_dim, blocks_dims=[100, 100, 200], ) In\u00a0[22]: Copied!
resnet_decoder\n
resnet_decoder Out[22]:
TabResnetDecoder(\n  (decoder): DenseResnet(\n    (dense_resnet): Sequential(\n      (block_0): BasicBlock(\n        (lin1): Linear(in_features=100, out_features=100, bias=False)\n        (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=100, out_features=100, bias=False)\n        (bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (block_1): BasicBlock(\n        (resize): Sequential(\n          (0): Linear(in_features=100, out_features=200, bias=False)\n          (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        )\n        (lin1): Linear(in_features=100, out_features=200, bias=False)\n        (bn1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n        (leaky_relu): LeakyReLU(negative_slope=0.01, inplace=True)\n        (dp): Dropout(p=0.1, inplace=False)\n        (lin2): Linear(in_features=200, out_features=200, bias=False)\n        (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n    )\n  )\n  (reconstruction_layer): Linear(in_features=200, out_features=95, bias=False)\n)

and now:

In\u00a0[23]: Copied!
ec_trainer = EncoderDecoderTrainer(\n    encoder=resnet_encoder,\n    decoder=resnet_decoder,\n    masked_prob=0.2,\n)\nec_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
ec_trainer = EncoderDecoderTrainer( encoder=resnet_encoder, decoder=resnet_decoder, masked_prob=0.2, ) ec_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.89it/s, loss=1.52]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.78it/s, loss=0.81]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 39.82it/s, loss=0.56]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.73it/s, loss=0.417]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:03<00:00, 46.24it/s, loss=0.329]\n
In\u00a0[24]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=resnet_encoder)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=resnet_encoder) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 58.63it/s, loss=0.335, metrics={'acc': 0.8442}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 58.02it/s, loss=0.296, metrics={'acc': 0.864}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 55.91it/s, loss=0.283, metrics={'acc': 0.8687}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 55.00it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:02<00:00, 51.95it/s, loss=0.272, metrics={'acc': 0.8732}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 120.15it/s]\n
0.8725560446309756\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html#self-supervised-pretraining-for-tabular-data","title":"Self Supervised Pretraining for Tabular Data\u00b6","text":"

We have implemented two Self Supervised Pre-training routines that allow the user to pre-train all tabular models in the library with the exception of the TabPerceiver (which is a special monster).

The two routines implemented are illustrated in the figures below. The 1st is from TabNet: Attentive Interpretable Tabular Learning and is designed for models that do not use transformer-based architectures, while the second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, and is designed for models that use transformer-based architectures.

Fig 1. Figure 2 in their paper. I have included de original caption in case is useful, althought the Figure itself is pretty self explanatory

Fig 2. Figure 1 in their paper. Here the caption is necessary \ud83d\ude0f

It is beyond the scope of this notebook to explain in detail those implementations. Therefore, we strongly recommend the user to go and read the papers if this functionality is of interest to her/him.

One thing is worth noticing however. As seen in Fig 1(the TabNet paper's Fig 2) the masking of the input features happens in the feature space. However, the implementation in this library is inspired by that at the dreamquark-ai repo, which is in itself inspired by the original implementation (by the way, at this point I will write it once again. All TabNet related things in this library are inspired when not directly based in the code in that repo, therefore, ALL CREDIT TO THE GUYS AT dreamquark-ai).

In that implementation the masking happens in the embedding space, and currently does not mask the entire embedding (i.e. categorical feature). We decided to release as it is in this version and we will implement the exact same process described in the paper in future releases.

Having said all of the above let's see how to use self supervision for tabular data with pytorch-widedeep. We will concentrate in this notebook on the 1st of the two approaches (the 'TabNet approach'). For details on the second approach please see 16_Self_Supervised_Pretraning_pt2.

"},{"location":"examples/16_Self_Supervised_Pretraning_pt1.html#self-supervision-for-non-transformer-based-models","title":"Self Supervision for non-transformer-based models..\u00b6","text":"

...or in general, for models where the embeddigns can have all different dimensions. In this library, these are: TabMlp, TabResNet and TabNet

As shown in Figure, this is an encoder-encoder approach where we learn to predict values in the incoming data that have been masked. However, as I mentioned before, our implementation is a bit different, and the masking occurs in th embedding space.

Nonetheless, the code below illustrates how to use this encoder-decoder approach with pytorch-widedeep

"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html","title":"16_Self-Supervised Pre-Training pt 2","text":"In\u00a0[1]: Copied!
import torch\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import WideDeep, FTTransformer\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\nfrom pytorch_widedeep.self_supervised_training import (\n    ContrastiveDenoisingTrainer,\n)\n
import torch from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from pytorch_widedeep import Trainer from pytorch_widedeep.models import WideDeep, FTTransformer from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor from pytorch_widedeep.self_supervised_training import ( ContrastiveDenoisingTrainer, ) In\u00a0[2]: Copied!
df = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop(\"income\", axis=1, inplace=True)\n\n# one could chose to use a validation set for early stopping, hyperparam\n# optimization, etc. This is just an example, so we simply use train/test\n# split\ndf_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label)\n\ncat_embed_cols = [\n    \"workclass\",\n    \"education\",\n    \"marital_status\",\n    \"occupation\",\n    \"relationship\",\n    \"race\",\n    \"gender\",\n    \"capital_gain\",\n    \"capital_loss\",\n    \"native_country\",\n]\ncontinuous_cols = [\"age\", \"hours_per_week\"]\ntarget_col = \"income_label\"\n\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_embed_cols,\n    continuous_cols=continuous_cols,\n    with_attention=True,\n    with_cls_token=True,  # this is optional\n)\nX_tab = tab_preprocessor.fit_transform(df_tr)\ntarget = df_tr[target_col].values\n
df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop(\"income\", axis=1, inplace=True) # one could chose to use a validation set for early stopping, hyperparam # optimization, etc. This is just an example, so we simply use train/test # split df_tr, df_te = train_test_split(df, test_size=0.2, stratify=df.income_label) cat_embed_cols = [ \"workclass\", \"education\", \"marital_status\", \"occupation\", \"relationship\", \"race\", \"gender\", \"capital_gain\", \"capital_loss\", \"native_country\", ] continuous_cols = [\"age\", \"hours_per_week\"] target_col = \"income_label\" tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_embed_cols, continuous_cols=continuous_cols, with_attention=True, with_cls_token=True, # this is optional ) X_tab = tab_preprocessor.fit_transform(df_tr) target = df_tr[target_col].values
/Users/javierrodriguezzaurin/Projects/pytorch-widedeep/pytorch_widedeep/preprocessing/tab_preprocessor.py:358: UserWarning: Continuous columns will not be normalised\n  warnings.warn(\"Continuous columns will not be normalised\")\n
In\u00a0[3]: Copied!
ft_transformer = FTTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    embed_continuous_method=\"standard\",\n    input_dim=32,\n    kv_compression_factor=0.5,\n    n_blocks=3,\n    n_heads=4,\n)\n
ft_transformer = FTTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, embed_continuous_method=\"standard\", input_dim=32, kv_compression_factor=0.5, n_blocks=3, n_heads=4, ) In\u00a0[4]: Copied!
# for a full list of the params for the the ContrastiveDenoisingTrainer (which are many) please see the docs.\n# Note that using these params involves some knowledge of the routine and the architecture of the model used\ncontrastive_denoising_trainer = ContrastiveDenoisingTrainer(\n    model=ft_transformer,\n    preprocessor=tab_preprocessor,\n)\ncontrastive_denoising_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)\n
# for a full list of the params for the the ContrastiveDenoisingTrainer (which are many) please see the docs. # Note that using these params involves some knowledge of the routine and the architecture of the model used contrastive_denoising_trainer = ContrastiveDenoisingTrainer( model=ft_transformer, preprocessor=tab_preprocessor, ) contrastive_denoising_trainer.pretrain(X_tab, n_epochs=5, batch_size=256)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:13<00:00, 11.73it/s, loss=579]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:12<00:00, 12.56it/s, loss=143]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:12<00:00, 12.49it/s, loss=141]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:11<00:00, 12.77it/s, loss=138]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:11<00:00, 13.29it/s, loss=137]\n
In\u00a0[5]: Copied!
contrastive_denoising_trainer.save(\n    path=\"pretrained_weights\", model_filename=\"contrastive_denoising_model.pt\"\n)\n
contrastive_denoising_trainer.save( path=\"pretrained_weights\", model_filename=\"contrastive_denoising_model.pt\" )

some time has passed

In\u00a0[6]: Copied!
# some time has passed, we load the model with torch as usual:\ncontrastive_denoising_model = torch.load(\n    \"pretrained_weights/contrastive_denoising_model.pt\"\n)\n
# some time has passed, we load the model with torch as usual: contrastive_denoising_model = torch.load( \"pretrained_weights/contrastive_denoising_model.pt\" )

NOW, AND THIS IS IMPORTANT! We have loaded the entire contrastive, denoising model. To proceed to the supervised training we ONLY need the attention-based model, which is the 'model' attribute of the trainer, let's have a look

In\u00a0[7]: Copied!
contrastive_denoising_model.model\n
contrastive_denoising_model.model Out[7]:
FTTransformer(\n  (cat_embed): SameSizeCatEmbeddings(\n    (embed): Embedding(323, 32, padding_idx=0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (cont_embed): ContEmbeddings(\n    INFO: [ContLinear = weight(n_cont_cols, embed_dim) + bias(n_cont_cols, embed_dim)]\n    (linear): ContLinear(n_cont_cols=2, embed_dim=32, embed_dropout=0.0)\n    (dropout): Dropout(p=0.0, inplace=False)\n  )\n  (encoder): Sequential(\n    (fttransformer_block0): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n    (fttransformer_block1): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n    (fttransformer_block2): FTTransformerEncoder(\n      (attn): LinearAttentionLinformer(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (qkv_proj): Linear(in_features=32, out_features=96, bias=False)\n        (out_proj): Linear(in_features=32, out_features=32, bias=False)\n      )\n      (ff): FeedForward(\n        (w_1): Linear(in_features=32, out_features=84, bias=True)\n        (w_2): Linear(in_features=42, out_features=32, bias=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n        (activation): REGLU()\n      )\n      (attn_normadd): NormAdd(\n        (dropout): Dropout(p=0.2, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n      (ff_normadd): NormAdd(\n        (dropout): Dropout(p=0.1, inplace=False)\n        (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n      )\n    )\n  )\n)
In\u00a0[8]: Copied!
pretrained_model = contrastive_denoising_model.model\n
pretrained_model = contrastive_denoising_model.model In\u00a0[9]: Copied!
# and as always, ANY supervised model in this library has to go throuth the WideDeep class:\nmodel = WideDeep(deeptabular=pretrained_model)\ntrainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy])\n\ntrainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256)\n\n# And, you know...we get a test metric\nX_tab_te = tab_preprocessor.transform(df_te)\ntarget_te = df_te[target_col].values\n\npreds = trainer.predict(X_tab=X_tab_te)\ntest_acc = accuracy_score(target_te, preds)\nprint(test_acc)\n
# and as always, ANY supervised model in this library has to go throuth the WideDeep class: model = WideDeep(deeptabular=pretrained_model) trainer = Trainer(model=model, objective=\"binary\", metrics=[Accuracy]) trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256) # And, you know...we get a test metric X_tab_te = tab_preprocessor.transform(df_te) target_te = df_te[target_col].values preds = trainer.predict(X_tab=X_tab_te) test_acc = accuracy_score(target_te, preds) print(test_acc)
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 27.19it/s, loss=0.383, metrics={'acc': 0.8176}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 26.08it/s, loss=0.325, metrics={'acc': 0.8502}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 26.56it/s, loss=0.306, metrics={'acc': 0.8601}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:05<00:00, 27.41it/s, loss=0.295, metrics={'acc': 0.8641}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 153/153 [00:06<00:00, 24.70it/s, loss=0.289, metrics={'acc': 0.8656}]\npredict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 97.26it/s]
0.8695874705701709\n
\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html#self-supervised-pretraining-for-tabular-data","title":"Self Supervised Pretraining for Tabular Data\u00b6","text":"

We have implemented two Self Supervised Pre-training routines that allow the user to pre-train all tabular models in the library with the exception of the TabPerceiver (which is a special monster).

The two routines implemented are illustrated in the figures below. The 1st is from TabNet: Attentive Interpretable Tabular Learning and is designed for models that do not use transformer-based architectures, while the second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, and is designed for models that use transformer-based architectures.

Fig 1. Figure 2 in their paper. I have included de original caption in case is useful, althought the Figure itself is pretty self explanatory

Fig 2. Figure 1 in their paper. Here the caption is necessary \ud83d\ude0f

It is beyond the scope of this notebook to explain in detail those implementations. Therefore, we strongly recommend the user to go and read the papers if this functionality is of interest to her/him.

One thing is worth noticing however. As seen in Fig 1(the TabNet paper's Fig 2) the masking of the input features happens in the feature space. However, the implementation in this library is inspired by that at the dreamquark-ai repo, which is in itself inspired by the original implementation (by the way, at this point I will write it once again. All TabNet related things in this library are inspired when not directly based in the code in that repo, therefore, ALL CREDIT TO THE GUYS AT dreamquark-ai).

In that implementation the masking happens in the embedding space, and currently does not mask the entire embedding (i.e. categorical feature). We decided to release as it is in this version and we will implement the exact same process described in the paper in future releases.

Having said all of the above let's see how to use self supervision for tabular data with pytorch-widedeep. We will concentrate in this notebook on the 2nd of the two approaches (the 'SAINT approach'). For details on the 1st approach (the 'TabNet' approach) please see 16_Self_Supervised_Pretraning_pt1.

"},{"location":"examples/16_Self_Supervised_Pretraning_pt2.html#self-supervision-transformer-based-models","title":"Self Supervision transformer-based models..\u00b6","text":"

...or in general, for models where the embeddigns have all the same dimensions. In this library, these are:

  • TabTransformer
  • FTTransformer
  • SAINT
  • TabFastFormer

Note that there is one additional Transformer-based model, the TabPerceiver, however this is a \"particular\" model and at the moment we do not support self supervision for it, but it will come.

Let see at one example using the FTTransformer.

"},{"location":"examples/17_Usign_a_hugging_face_model.html","title":"17_Using_a_huggingface_model","text":"In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport lightgbm as lgb\nfrom lightgbm import Dataset as lgbDataset\nfrom scipy.sparse import hstack, csr_matrix\nfrom sklearn.metrics import (\n    f1_score,\n    recall_score,\n    accuracy_score,\n    precision_score,\n    confusion_matrix,\n)\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.feature_extraction.text import TfidfVectorizer\n\nfrom torch import Tensor, nn\nfrom transformers import DistilBertModel, DistilBertTokenizer\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep\nfrom pytorch_widedeep.metrics import F1Score, Accuracy\nfrom pytorch_widedeep.utils import Tokenizer, LabelEncoder\nfrom pytorch_widedeep.preprocessing import TextPreprocessor, TabPreprocessor\nfrom pytorch_widedeep.datasets import load_womens_ecommerce\nfrom pytorch_widedeep.utils.fastai_transforms import (\n    fix_html,\n    spec_add_spaces,\n    rm_useless_spaces,\n)\n
import numpy as np import torch import lightgbm as lgb from lightgbm import Dataset as lgbDataset from scipy.sparse import hstack, csr_matrix from sklearn.metrics import ( f1_score, recall_score, accuracy_score, precision_score, confusion_matrix, ) from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import TfidfVectorizer from torch import Tensor, nn from transformers import DistilBertModel, DistilBertTokenizer from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep from pytorch_widedeep.metrics import F1Score, Accuracy from pytorch_widedeep.utils import Tokenizer, LabelEncoder from pytorch_widedeep.preprocessing import TextPreprocessor, TabPreprocessor from pytorch_widedeep.datasets import load_womens_ecommerce from pytorch_widedeep.utils.fastai_transforms import ( fix_html, spec_add_spaces, rm_useless_spaces, )

Let's load the data and have a look:

In\u00a0[2]: Copied!
df = load_womens_ecommerce(as_frame=True)\n\ndf.columns = [c.replace(\" \", \"_\").lower() for c in df.columns]\n\n# classes from [0,num_class)\ndf[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\")\n\n# group reviews with 1 and 2 scores into one class\ndf.loc[df.rating == 0, \"rating\"] = 1\n\n# and back again to [0,num_class)\ndf[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\")\n\n# drop short reviews\ndf = df[~df.review_text.isna()]\ndf[\"review_length\"] = df.review_text.apply(lambda x: len(x.split(\" \")))\ndf = df[df.review_length >= 5]\ndf = df.drop(\"review_length\", axis=1).reset_index(drop=True)\n
df = load_womens_ecommerce(as_frame=True) df.columns = [c.replace(\" \", \"_\").lower() for c in df.columns] # classes from [0,num_class) df[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\") # group reviews with 1 and 2 scores into one class df.loc[df.rating == 0, \"rating\"] = 1 # and back again to [0,num_class) df[\"rating\"] = (df[\"rating\"] - 1).astype(\"int64\") # drop short reviews df = df[~df.review_text.isna()] df[\"review_length\"] = df.review_text.apply(lambda x: len(x.split(\" \"))) df = df[df.review_length >= 5] df = df.drop(\"review_length\", axis=1).reset_index(drop=True) In\u00a0[3]: Copied!
df.head()\n
df.head() Out[3]: clothing_id age title review_text rating recommended_ind positive_feedback_count division_name department_name class_name 0 767 33 None Absolutely wonderful - silky and sexy and comf... 2 1 0 Initmates Intimate Intimates 1 1080 34 None Love this dress! it's sooo pretty. i happene... 3 1 4 General Dresses Dresses 2 1077 60 Some major design flaws I had such high hopes for this dress and reall... 1 0 0 General Dresses Dresses 3 1049 50 My favorite buy! I love, love, love this jumpsuit. it's fun, fl... 3 1 0 General Petite Bottoms Pants 4 847 47 Flattering shirt This shirt is very flattering to all due to th... 3 1 6 General Tops Blouses

So, we will use the review_text column to predict the rating. Later on, we will try to combine it with some other columns (like division_name and age) see if these help.

Let's first have a look to the distribution of ratings

In\u00a0[4]: Copied!
df.rating.value_counts()\n
df.rating.value_counts() Out[4]:
rating\n3    12515\n2     4904\n1     2820\n0     2369\nName: count, dtype: int64

This shows that we could have perhaps grouped rating scores of 1, 2 and 3 into 1...but anyway, let's just move on with those 4 classes.

We are not going to carry any hyperparameter optimization here, so, we will only need a train and a test set (i.e. no need of a validation set for the example in this notebook)

In\u00a0[5]: Copied!
train, test = train_test_split(df, train_size=0.8, random_state=1, stratify=df.rating)\n
train, test = train_test_split(df, train_size=0.8, random_state=1, stratify=df.rating)

Let's see what we have to beat. What metrics would we obtain if we always predict the most common rating (3)?

In\u00a0[6]: Copied!
most_common_pred = [train.rating.value_counts().index[0]] * len(test)\n\nmost_common_acc = accuracy_score(test.rating, most_common_pred)\nmost_common_f1 = f1_score(test.rating, most_common_pred, average=\"weighted\")\n
most_common_pred = [train.rating.value_counts().index[0]] * len(test) most_common_acc = accuracy_score(test.rating, most_common_pred) most_common_f1 = f1_score(test.rating, most_common_pred, average=\"weighted\") In\u00a0[7]: Copied!
print(f\"Accuracy: {most_common_acc}. F1 Score: {most_common_f1}\")\n
print(f\"Accuracy: {most_common_acc}. F1 Score: {most_common_f1}\")
Accuracy: 0.553516143299425. F1 Score: 0.3944344218301668\n

ok, these are our \"baseline\" metrics.

Let's start by using simply tf-idf + lightGBM

In\u00a0[8]: Copied!
# ?Tokenizer\n
# ?Tokenizer In\u00a0[9]: Copied!
# this Tokenizer is part of our utils module but of course, any valid tokenizer can be used here.\n\n# When using notebooks there seems to be an issue related with multiprocessing (and sometimes tqdm)\n# that can only be solved by using only one CPU\ntok = Tokenizer(n_cpus=1)\ntok_reviews_tr = tok.process_all(train.review_text.tolist())\ntok_reviews_te = tok.process_all(test.review_text.tolist())\n
# this Tokenizer is part of our utils module but of course, any valid tokenizer can be used here. # When using notebooks there seems to be an issue related with multiprocessing (and sometimes tqdm) # that can only be solved by using only one CPU tok = Tokenizer(n_cpus=1) tok_reviews_tr = tok.process_all(train.review_text.tolist()) tok_reviews_te = tok.process_all(test.review_text.tolist()) In\u00a0[10]: Copied!
vectorizer = TfidfVectorizer(\n    max_features=5000, preprocessor=lambda x: x, tokenizer=lambda x: x, min_df=5\n)\n\nX_text_tr = vectorizer.fit_transform(tok_reviews_tr)\nX_text_te = vectorizer.transform(tok_reviews_te)\n
vectorizer = TfidfVectorizer( max_features=5000, preprocessor=lambda x: x, tokenizer=lambda x: x, min_df=5 ) X_text_tr = vectorizer.fit_transform(tok_reviews_tr) X_text_te = vectorizer.transform(tok_reviews_te)
/Users/javierrodriguezzaurin/.pyenv/versions/3.10.13/envs/widedeep310/lib/python3.10/site-packages/sklearn/feature_extraction/text.py:525: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n  warnings.warn(\n
In\u00a0[11]: Copied!
X_text_tr\n
X_text_tr Out[11]:
<18086x4566 sparse matrix of type '<class 'numpy.float64'>'\n\twith 884074 stored elements in Compressed Sparse Row format>

We now move our matrices to lightGBM Dataset format

In\u00a0[12]: Copied!
lgbtrain_text = lgbDataset(\n    X_text_tr,\n    train.rating.values,\n    free_raw_data=False,\n)\n\nlgbtest_text = lgbDataset(\n    X_text_te,\n    test.rating.values,\n    reference=lgbtrain_text,\n    free_raw_data=False,\n)\n
lgbtrain_text = lgbDataset( X_text_tr, train.rating.values, free_raw_data=False, ) lgbtest_text = lgbDataset( X_text_te, test.rating.values, reference=lgbtrain_text, free_raw_data=False, )

and off we go. By the way, I think as we run the next cell, we should appreciate how fast lightGBM runs. Yes, the input is a sparse matrix, but still, trains on 18086x4566 in a matter of secs

In\u00a0[\u00a0]: Copied!
lgb_text_model = lgb.train(\n    {\"objective\": \"multiclass\", \"num_classes\": 4},\n    lgbtrain_text,\n    valid_sets=[lgbtest_text, lgbtrain_text],\n    valid_names=[\"test\", \"train\"],\n)\n
lgb_text_model = lgb.train( {\"objective\": \"multiclass\", \"num_classes\": 4}, lgbtrain_text, valid_sets=[lgbtest_text, lgbtrain_text], valid_names=[\"test\", \"train\"], ) In\u00a0[14]: Copied!
preds_text = lgb_text_model.predict(X_text_te)\npred_text_class = np.argmax(preds_text, 1)\n
preds_text = lgb_text_model.predict(X_text_te) pred_text_class = np.argmax(preds_text, 1) In\u00a0[15]: Copied!
acc_text = accuracy_score(lgbtest_text.label, pred_text_class)\nf1_text = f1_score(lgbtest_text.label, pred_text_class, average=\"weighted\")\ncm_text = confusion_matrix(lgbtest_text.label, pred_text_class)\n
acc_text = accuracy_score(lgbtest_text.label, pred_text_class) f1_text = f1_score(lgbtest_text.label, pred_text_class, average=\"weighted\") cm_text = confusion_matrix(lgbtest_text.label, pred_text_class) In\u00a0[16]: Copied!
print(f\"LightGBM Accuracy: {acc_text}. LightGBM F1 Score: {f1_text}\")\n
print(f\"LightGBM Accuracy: {acc_text}. LightGBM F1 Score: {f1_text}\")
LightGBM Accuracy: 0.6444051304732419. LightGBM F1 Score: 0.617154488246181\n
In\u00a0[17]: Copied!
print(f\"LightGBM Confusion Matrix: \\n {cm_text}\")\n
print(f\"LightGBM Confusion Matrix: \\n {cm_text}\")
LightGBM Confusion Matrix: \n [[ 199  135   61   79]\n [ 123  169  149  123]\n [  30   94  279  578]\n [  16   30  190 2267]]\n

Ok, so, with no hyperparameter optimization lightGBM gets an accuracy of 0.64 and a F1 score of 0.62. This is significantly better than predicting always the most popular.

Let's see if in this implementation, some additional features, like age or class_name are of any help

In\u00a0[18]: Copied!
tab_cols = [\n    \"age\",\n    \"division_name\",\n    \"department_name\",\n    \"class_name\",\n]\n\nfor tab_df in [train, test]:\n    for c in [\"division_name\", \"department_name\", \"class_name\"]:\n        tab_df[c] = tab_df[c].str.lower()\n        tab_df[c].fillna(\"missing\", inplace=True)\n
tab_cols = [ \"age\", \"division_name\", \"department_name\", \"class_name\", ] for tab_df in [train, test]: for c in [\"division_name\", \"department_name\", \"class_name\"]: tab_df[c] = tab_df[c].str.lower() tab_df[c].fillna(\"missing\", inplace=True) In\u00a0[19]: Copied!
# This is our LabelEncoder. A class that is designed to work with the models in this library but\n# can be used for general purposes\nle = LabelEncoder(columns_to_encode=[\"division_name\", \"department_name\", \"class_name\"])\ntrain_tab_le = le.fit_transform(train)\ntest_tab_le = le.transform(test)\n
# This is our LabelEncoder. A class that is designed to work with the models in this library but # can be used for general purposes le = LabelEncoder(columns_to_encode=[\"division_name\", \"department_name\", \"class_name\"]) train_tab_le = le.fit_transform(train) test_tab_le = le.transform(test) In\u00a0[20]: Copied!
train_tab_le.head()\n
train_tab_le.head() Out[20]: clothing_id age title review_text rating recommended_ind positive_feedback_count division_name department_name class_name 4541 836 35 None Bought this on sale in my reg size- 10. im 5'9... 2 1 2 1 1 1 18573 1022 25 Look like \"mom jeans\" Maybe i just have the wrong body type for thes... 1 0 0 2 2 2 1058 815 39 Ig brought me here Love the way this top layers under my jackets ... 2 1 0 1 1 1 12132 984 47 Runs small especially the arms I love this jacket. it's the prettiest and mos... 3 1 0 1 3 3 20756 1051 42 True red, true beauty. These pants are gorgeous--the fabric has a sat... 3 1 0 2 2 4

let's for example have a look to the encodings for the categorical feature class_name

In\u00a0[21]: Copied!
le.encoding_dict[\"class_name\"]\n
le.encoding_dict[\"class_name\"] Out[21]:
{'blouses': 1,\n 'jeans': 2,\n 'jackets': 3,\n 'pants': 4,\n 'knits': 5,\n 'dresses': 6,\n 'skirts': 7,\n 'sweaters': 8,\n 'fine gauge': 9,\n 'legwear': 10,\n 'lounge': 11,\n 'shorts': 12,\n 'outerwear': 13,\n 'intimates': 14,\n 'swim': 15,\n 'trend': 16,\n 'sleep': 17,\n 'layering': 18,\n 'missing': 19,\n 'casual bottoms': 20,\n 'chemises': 21}
In\u00a0[22]: Copied!
# tabular training and test sets\nX_tab_tr = csr_matrix(train_tab_le[tab_cols].values)\nX_tab_te = csr_matrix(test_tab_le[tab_cols].values)\n\n# text + tabular training and test sets\nX_tab_text_tr = hstack((X_tab_tr, X_text_tr))\nX_tab_text_te = hstack((X_tab_te, X_text_te))\n
# tabular training and test sets X_tab_tr = csr_matrix(train_tab_le[tab_cols].values) X_tab_te = csr_matrix(test_tab_le[tab_cols].values) # text + tabular training and test sets X_tab_text_tr = hstack((X_tab_tr, X_text_tr)) X_tab_text_te = hstack((X_tab_te, X_text_te)) In\u00a0[23]: Copied!
X_tab_tr\n
X_tab_tr Out[23]:
<18086x4 sparse matrix of type '<class 'numpy.int64'>'\n\twith 72344 stored elements in Compressed Sparse Row format>
In\u00a0[24]: Copied!
X_tab_text_tr\n
X_tab_text_tr Out[24]:
<18086x4570 sparse matrix of type '<class 'numpy.float64'>'\n\twith 956418 stored elements in Compressed Sparse Row format>
In\u00a0[25]: Copied!
lgbtrain_tab_text = lgbDataset(\n    X_tab_text_tr,\n    train.rating.values,\n    categorical_feature=[0, 1, 2, 3],\n    free_raw_data=False,\n)\n\nlgbtest_tab_text = lgbDataset(\n    X_tab_text_te,\n    test.rating.values,\n    reference=lgbtrain_tab_text,\n    free_raw_data=False,\n)\n
lgbtrain_tab_text = lgbDataset( X_tab_text_tr, train.rating.values, categorical_feature=[0, 1, 2, 3], free_raw_data=False, ) lgbtest_tab_text = lgbDataset( X_tab_text_te, test.rating.values, reference=lgbtrain_tab_text, free_raw_data=False, ) In\u00a0[26]: Copied!
lgb_tab_text_model = lgb.train(\n    {\"objective\": \"multiclass\", \"num_classes\": 4},\n    lgbtrain_tab_text,\n    valid_sets=[lgbtrain_tab_text, lgbtest_tab_text],\n    valid_names=[\"test\", \"train\"],\n    verbose_eval=False,\n)\n
lgb_tab_text_model = lgb.train( {\"objective\": \"multiclass\", \"num_classes\": 4}, lgbtrain_tab_text, valid_sets=[lgbtrain_tab_text, lgbtest_tab_text], valid_names=[\"test\", \"train\"], verbose_eval=False, )
/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:2065: UserWarning: Using categorical_feature in Dataset.\n  _log_warning('Using categorical_feature in Dataset.')\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:2068: UserWarning: categorical_feature in Dataset is overridden.\nNew categorical_feature is [0, 1, 2, 3]\n  _log_warning('categorical_feature in Dataset is overridden.\\n'\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n  _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.138280 seconds.\nYou can set `force_col_wise=true` to remove the overhead.\n[LightGBM] [Info] Total Bins 143432\n[LightGBM] [Info] Number of data points in the train set: 18086, number of used features: 2289\n[LightGBM] [Info] Start training from score -2.255919\n[LightGBM] [Info] Start training from score -2.081545\n[LightGBM] [Info] Start training from score -1.528281\n[LightGBM] [Info] Start training from score -0.591354\n
/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:1780: UserWarning: Overriding the parameters from Reference Dataset.\n  _log_warning('Overriding the parameters from Reference Dataset.')\n/opt/conda/envs/wd38/lib/python3.8/site-packages/lightgbm/basic.py:1513: UserWarning: categorical_column in param dict is overridden.\n  _log_warning(f'{cat_alias} in param dict is overridden.')\n
In\u00a0[27]: Copied!
preds_tab_text = lgb_tab_text_model.predict(X_tab_text_te)\npreds_tab_text_class = np.argmax(preds_tab_text, 1)\n\nacc_tab_text = accuracy_score(lgbtest_tab_text.label, preds_tab_text_class)\nf1_tab_text = f1_score(lgbtest_tab_text.label, preds_tab_text_class, average=\"weighted\")\ncm_tab_text = confusion_matrix(lgbtest_tab_text.label, preds_tab_text_class)\n
preds_tab_text = lgb_tab_text_model.predict(X_tab_text_te) preds_tab_text_class = np.argmax(preds_tab_text, 1) acc_tab_text = accuracy_score(lgbtest_tab_text.label, preds_tab_text_class) f1_tab_text = f1_score(lgbtest_tab_text.label, preds_tab_text_class, average=\"weighted\") cm_tab_text = confusion_matrix(lgbtest_tab_text.label, preds_tab_text_class) In\u00a0[28]: Copied!
print(\n    f\"LightGBM text + tabular Accuracy: {acc_tab_text}. LightGBM text + tabular F1 Score: {f1_tab_text}\"\n)\n
print( f\"LightGBM text + tabular Accuracy: {acc_tab_text}. LightGBM text + tabular F1 Score: {f1_tab_text}\" )
LightGBM text + tabular Accuracy: 0.6382131800088456. LightGBM text + tabular F1 Score: 0.6080251307242649\n
In\u00a0[29]: Copied!
print(f\"LightGBM text + tabular Confusion Matrix:\\n {cm_tab_text}\")\n
print(f\"LightGBM text + tabular Confusion Matrix:\\n {cm_tab_text}\")
LightGBM text + tabular Confusion Matrix:\n [[ 193  123   68   90]\n [ 123  146  157  138]\n [  37   90  272  582]\n [  16   37  175 2275]]\n

So, in this set up, the addition tabular columns do not help performance.

In\u00a0[30]: Copied!
text_preprocessor = TextPreprocessor(\n    text_col=\"review_text\", max_vocab=5000, min_freq=5, maxlen=90, n_cpus=1\n)\n\nwd_X_text_tr = text_preprocessor.fit_transform(train)\nwd_X_text_te = text_preprocessor.transform(test)\n
text_preprocessor = TextPreprocessor( text_col=\"review_text\", max_vocab=5000, min_freq=5, maxlen=90, n_cpus=1 ) wd_X_text_tr = text_preprocessor.fit_transform(train) wd_X_text_te = text_preprocessor.transform(test)
The vocabulary contains 4328 tokens\n
In\u00a0[31]: Copied!
basic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=300,\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.2,\n    head_hidden_dims=[32],\n)\n\n\nwd_text_model = WideDeep(deeptext=basic_rnn, pred_dim=4)\n
basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=300, hidden_dim=64, n_layers=3, rnn_dropout=0.2, head_hidden_dims=[32], ) wd_text_model = WideDeep(deeptext=basic_rnn, pred_dim=4) In\u00a0[32]: Copied!
wd_text_model\n
wd_text_model Out[32]:
WideDeep(\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(4328, 300, padding_idx=1)\n      (rnn): LSTM(300, 64, num_layers=3, batch_first=True, dropout=0.2)\n      (rnn_mlp): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=4, bias=True)\n  )\n)
In\u00a0[33]: Copied!
text_trainer = Trainer(\n    wd_text_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
text_trainer = Trainer( wd_text_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[34]: Copied!
text_trainer.fit(\n    X_text=wd_X_text_tr,\n    target=train.rating.values,\n    n_epochs=5,\n    batch_size=256,\n)\n
text_trainer.fit( X_text=wd_X_text_tr, target=train.rating.values, n_epochs=5, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.39it/s, loss=1.16, metrics={'acc': 0.5349, 'f1': 0.2011}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 70.35it/s, loss=0.964, metrics={'acc': 0.5827, 'f1': 0.3005}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 70.33it/s, loss=0.845, metrics={'acc': 0.6252, 'f1': 0.4133}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 69.99it/s, loss=0.765, metrics={'acc': 0.6575, 'f1': 0.4875}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 69.55it/s, loss=0.709, metrics={'acc': 0.6879, 'f1': 0.5423}]\n
In\u00a0[35]: Copied!
wd_pred_text = text_trainer.predict_proba(X_text=wd_X_text_te)\nwd_pred_text_class = np.argmax(wd_pred_text, 1)\n
wd_pred_text = text_trainer.predict_proba(X_text=wd_X_text_te) wd_pred_text_class = np.argmax(wd_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 18/18 [00:00<00:00, 211.51it/s]\n
In\u00a0[36]: Copied!
wd_acc_text = accuracy_score(test.rating, wd_pred_text_class)\nwd_f1_text = f1_score(test.rating, wd_pred_text_class, average=\"weighted\")\nwd_cm_text = confusion_matrix(test.rating, wd_pred_text_class)\n
wd_acc_text = accuracy_score(test.rating, wd_pred_text_class) wd_f1_text = f1_score(test.rating, wd_pred_text_class, average=\"weighted\") wd_cm_text = confusion_matrix(test.rating, wd_pred_text_class) In\u00a0[37]: Copied!
print(f\"Basic RNN Accuracy: {wd_acc_text}. Basic RNN F1 Score: {wd_f1_text}\")\n
print(f\"Basic RNN Accuracy: {wd_acc_text}. Basic RNN F1 Score: {wd_f1_text}\")
Basic RNN Accuracy: 0.6076957098628926. Basic RNN F1 Score: 0.6017335854471788\n
In\u00a0[38]: Copied!
print(f\"Basic RNN Confusion Matrix:\\n {wd_cm_text}\")\n
print(f\"Basic RNN Confusion Matrix:\\n {wd_cm_text}\")
Basic RNN Confusion Matrix:\n [[ 327   76   62    9]\n [ 285  115  117   47]\n [ 131  122  315  413]\n [  42   69  401 1991]]\n

The performance is very similar to that of using simply tf-idf and lightgbm. Let see if adding tabular features helps when using pytorch-widedeep

In\u00a0[39]: Copied!
# ?TabPreprocessor\n
# ?TabPreprocessor In\u00a0[40]: Copied!
tab_preprocessor = TabPreprocessor(cat_embed_cols=tab_cols)\n\nwd_X_tab_tr = tab_preprocessor.fit_transform(train)\nwd_X_tab_te = tab_preprocessor.transform(test)\n
tab_preprocessor = TabPreprocessor(cat_embed_cols=tab_cols) wd_X_tab_tr = tab_preprocessor.fit_transform(train) wd_X_tab_te = tab_preprocessor.transform(test) In\u00a0[41]: Copied!
# ?TabMlp\n
# ?TabMlp In\u00a0[42]: Copied!
tab_model = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[100, 50],\n)\n
tab_model = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[100, 50], ) In\u00a0[43]: Copied!
tab_model\n
tab_model Out[43]:
TabMlp(\n  (cat_and_cont_embed): DiffSizeCatAndContEmbeddings(\n    (cat_embed): DiffSizeCatEmbeddings(\n      (embed_layers): ModuleDict(\n        (emb_layer_age): Embedding(78, 18, padding_idx=0)\n        (emb_layer_division_name): Embedding(5, 3, padding_idx=0)\n        (emb_layer_department_name): Embedding(8, 5, padding_idx=0)\n        (emb_layer_class_name): Embedding(22, 9, padding_idx=0)\n      )\n      (embedding_dropout): Dropout(p=0.1, inplace=False)\n    )\n  )\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Dropout(p=0.1, inplace=False)\n        (1): Linear(in_features=35, out_features=100, bias=True)\n        (2): ReLU(inplace=True)\n      )\n      (dense_layer_1): Sequential(\n        (0): Dropout(p=0.1, inplace=False)\n        (1): Linear(in_features=100, out_features=50, bias=True)\n        (2): ReLU(inplace=True)\n      )\n    )\n  )\n)
In\u00a0[44]: Copied!
text_model = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=300,\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.2,\n    head_hidden_dims=[32],\n)\n
text_model = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=300, hidden_dim=64, n_layers=3, rnn_dropout=0.2, head_hidden_dims=[32], ) In\u00a0[45]: Copied!
wd_tab_and_text_model = WideDeep(deeptabular=tab_model, deeptext=text_model, pred_dim=4)\n
wd_tab_and_text_model = WideDeep(deeptabular=tab_model, deeptext=text_model, pred_dim=4) In\u00a0[46]: Copied!
wd_tab_and_text_model\n
wd_tab_and_text_model Out[46]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_and_cont_embed): DiffSizeCatAndContEmbeddings(\n        (cat_embed): DiffSizeCatEmbeddings(\n          (embed_layers): ModuleDict(\n            (emb_layer_age): Embedding(78, 18, padding_idx=0)\n            (emb_layer_division_name): Embedding(5, 3, padding_idx=0)\n            (emb_layer_department_name): Embedding(8, 5, padding_idx=0)\n            (emb_layer_class_name): Embedding(22, 9, padding_idx=0)\n          )\n          (embedding_dropout): Dropout(p=0.1, inplace=False)\n        )\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=35, out_features=100, bias=True)\n            (2): ReLU(inplace=True)\n          )\n          (dense_layer_1): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=100, out_features=50, bias=True)\n            (2): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=50, out_features=4, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(4328, 300, padding_idx=1)\n      (rnn): LSTM(300, 64, num_layers=3, batch_first=True, dropout=0.2)\n      (rnn_mlp): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=64, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=32, out_features=4, bias=True)\n  )\n)
In\u00a0[47]: Copied!
tab_and_text_trainer = Trainer(\n    wd_tab_and_text_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
tab_and_text_trainer = Trainer( wd_tab_and_text_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[48]: Copied!
tab_and_text_trainer.fit(\n    X_tab=wd_X_tab_tr,\n    X_text=wd_X_text_tr,\n    target=train.rating.values,\n    n_epochs=5,\n    batch_size=256,\n)\n
tab_and_text_trainer.fit( X_tab=wd_X_tab_tr, X_text=wd_X_text_tr, target=train.rating.values, n_epochs=5, batch_size=256, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.04it/s, loss=1.13, metrics={'acc': 0.538, 'f1': 0.1911}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.28it/s, loss=0.936, metrics={'acc': 0.5887, 'f1': 0.3507}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 52.26it/s, loss=0.825, metrics={'acc': 0.6394, 'f1': 0.4545}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 51.33it/s, loss=0.757, metrics={'acc': 0.6696, 'f1': 0.5214}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:01<00:00, 50.39it/s, loss=0.702, metrics={'acc': 0.6963, 'f1': 0.5654}]\n
In\u00a0[49]: Copied!
wd_pred_tab_and_text = tab_and_text_trainer.predict_proba(\n    X_tab=wd_X_tab_te, X_text=wd_X_text_te\n)\nwd_pred_tab_and_text_class = np.argmax(wd_pred_tab_and_text, 1)\n
wd_pred_tab_and_text = tab_and_text_trainer.predict_proba( X_tab=wd_X_tab_te, X_text=wd_X_text_te ) wd_pred_tab_and_text_class = np.argmax(wd_pred_tab_and_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 18/18 [00:00<00:00, 136.94it/s]\n
In\u00a0[50]: Copied!
wd_acc_tab_and_text = accuracy_score(test.rating, wd_pred_tab_and_text_class)\nwd_f1_tab_and_text = f1_score(\n    test.rating, wd_pred_tab_and_text_class, average=\"weighted\"\n)\nwd_cm_tab_and_text = confusion_matrix(test.rating, wd_pred_tab_and_text_class)\n
wd_acc_tab_and_text = accuracy_score(test.rating, wd_pred_tab_and_text_class) wd_f1_tab_and_text = f1_score( test.rating, wd_pred_tab_and_text_class, average=\"weighted\" ) wd_cm_tab_and_text = confusion_matrix(test.rating, wd_pred_tab_and_text_class) In\u00a0[51]: Copied!
print(\n    f\"Basic RNN + Tabular  Accuracy: {wd_acc_tab_and_text}. Basic RNN + TabularF1 Score: {wd_f1_tab_and_text}\"\n)\nprint(f\"Basic RNN + Tabular  Confusion Matrix:\\n {wd_cm_tab_and_text}\")\n
print( f\"Basic RNN + Tabular Accuracy: {wd_acc_tab_and_text}. Basic RNN + TabularF1 Score: {wd_f1_tab_and_text}\" ) print(f\"Basic RNN + Tabular Confusion Matrix:\\n {wd_cm_tab_and_text}\")
Basic RNN + Tabular  Accuracy: 0.6333480760725343. Basic RNN + TabularF1 Score: 0.6332310089593208\nBasic RNN + Tabular  Confusion Matrix:\n [[ 267  132   65   10]\n [ 198  168  159   39]\n [  57  113  410  401]\n [  12   58  414 2019]]\n

We are going to \"manually\" code the Tokenizer and the model and see how they can be used as part of the process along with the pytorch-widedeep library.

Tokenizer:

In\u00a0[52]: Copied!
class BertTokenizer(object):\n    def __init__(\n        self,\n        pretrained_tokenizer=\"distilbert-base-uncased\",\n        do_lower_case=True,\n        max_length=90,\n    ):\n        super(BertTokenizer, self).__init__()\n        self.pretrained_tokenizer = pretrained_tokenizer\n        self.do_lower_case = do_lower_case\n        self.max_length = max_length\n\n    def fit(self, texts):\n        self.tokenizer = DistilBertTokenizer.from_pretrained(\n            self.pretrained_tokenizer, do_lower_case=self.do_lower_case\n        )\n\n        return self\n\n    def transform(self, texts):\n        input_ids = []\n        for text in texts:\n            encoded_sent = self.tokenizer.encode_plus(\n                text=self._pre_rules(text),\n                add_special_tokens=True,\n                max_length=self.max_length,\n                padding=\"max_length\",\n                truncation=True,\n            )\n\n            input_ids.append(encoded_sent.get(\"input_ids\"))\n        return np.stack(input_ids)\n\n    def fit_transform(self, texts):\n        return self.fit(texts).transform(texts)\n\n    @staticmethod\n    def _pre_rules(text):\n        return fix_html(rm_useless_spaces(spec_add_spaces(text)))\n
class BertTokenizer(object): def __init__( self, pretrained_tokenizer=\"distilbert-base-uncased\", do_lower_case=True, max_length=90, ): super(BertTokenizer, self).__init__() self.pretrained_tokenizer = pretrained_tokenizer self.do_lower_case = do_lower_case self.max_length = max_length def fit(self, texts): self.tokenizer = DistilBertTokenizer.from_pretrained( self.pretrained_tokenizer, do_lower_case=self.do_lower_case ) return self def transform(self, texts): input_ids = [] for text in texts: encoded_sent = self.tokenizer.encode_plus( text=self._pre_rules(text), add_special_tokens=True, max_length=self.max_length, padding=\"max_length\", truncation=True, ) input_ids.append(encoded_sent.get(\"input_ids\")) return np.stack(input_ids) def fit_transform(self, texts): return self.fit(texts).transform(texts) @staticmethod def _pre_rules(text): return fix_html(rm_useless_spaces(spec_add_spaces(text)))

Model:

In\u00a0[53]: Copied!
class BertModel(nn.Module):\n    def __init__(\n        self,\n        model_name: str = \"distilbert-base-uncased\",\n        freeze_bert: bool = False,\n    ):\n        super(BertModel, self).__init__()\n\n        self.bert = DistilBertModel.from_pretrained(\n            model_name,\n        )\n\n        if freeze_bert:\n            for param in self.bert.parameters():\n                param.requires_grad = False\n\n    def forward(self, X_inp: Tensor) -> Tensor:\n        attn_mask = (X_inp != 0).type(torch.int8)\n        outputs = self.bert(input_ids=X_inp, attention_mask=attn_mask)\n        return outputs[0][:, 0, :]\n\n    @property\n    def output_dim(self) -> int:\n        # This is THE ONLY requirement for any model to work with pytorch-widedeep. Must\n        # have a 'output_dim' property so the WideDeep class knows the incoming dims\n        # from the custom model. in this case, I hardcoded it\n        return 768\n
class BertModel(nn.Module): def __init__( self, model_name: str = \"distilbert-base-uncased\", freeze_bert: bool = False, ): super(BertModel, self).__init__() self.bert = DistilBertModel.from_pretrained( model_name, ) if freeze_bert: for param in self.bert.parameters(): param.requires_grad = False def forward(self, X_inp: Tensor) -> Tensor: attn_mask = (X_inp != 0).type(torch.int8) outputs = self.bert(input_ids=X_inp, attention_mask=attn_mask) return outputs[0][:, 0, :] @property def output_dim(self) -> int: # This is THE ONLY requirement for any model to work with pytorch-widedeep. Must # have a 'output_dim' property so the WideDeep class knows the incoming dims # from the custom model. in this case, I hardcoded it return 768 In\u00a0[54]: Copied!
bert_tokenizer = BertTokenizer()\nX_bert_tr = bert_tokenizer.fit_transform(train[\"review_text\"].tolist())\nX_bert_te = bert_tokenizer.transform(test[\"review_text\"].tolist())\n
bert_tokenizer = BertTokenizer() X_bert_tr = bert_tokenizer.fit_transform(train[\"review_text\"].tolist()) X_bert_te = bert_tokenizer.transform(test[\"review_text\"].tolist())

As I mentioned a number of times in the documentation and examples, pytorch-widedeep is designed for flexibility. For any of the data modes (tabular, text and images) there are available components/models in the library. However, the user can choose to use any model they want with the only requirement that such model must have a output_dim property.

With that in mind, the BertModel class defined above can be used by pytorch-widedeep as any other of the internal components. In other words, simply...pass it to the WideDeep class. In this case we are going to add a FC-head as part of the classifier.

In\u00a0[55]: Copied!
bert_model = BertModel(freeze_bert=True)\nwd_bert_model = WideDeep(\n    deeptext=bert_model,\n    head_hidden_dims=[256, 128, 64],\n    pred_dim=4,\n)\n
bert_model = BertModel(freeze_bert=True) wd_bert_model = WideDeep( deeptext=bert_model, head_hidden_dims=[256, 128, 64], pred_dim=4, )
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight']\n- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n
In\u00a0[56]: Copied!
wd_bert_model\n
wd_bert_model Out[56]:
WideDeep(\n  (deeptext): BertModel(\n    (bert): DistilBertModel(\n      (embeddings): Embeddings(\n        (word_embeddings): Embedding(30522, 768, padding_idx=0)\n        (position_embeddings): Embedding(512, 768)\n        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (transformer): Transformer(\n        (layer): ModuleList(\n          (0): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (1): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (2): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (3): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (4): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n          (5): TransformerBlock(\n            (attention): MultiHeadSelfAttention(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (q_lin): Linear(in_features=768, out_features=768, bias=True)\n              (k_lin): Linear(in_features=768, out_features=768, bias=True)\n              (v_lin): Linear(in_features=768, out_features=768, bias=True)\n              (out_lin): Linear(in_features=768, out_features=768, bias=True)\n            )\n            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n            (ffn): FFN(\n              (dropout): Dropout(p=0.1, inplace=False)\n              (lin1): Linear(in_features=768, out_features=3072, bias=True)\n              (lin2): Linear(in_features=3072, out_features=768, bias=True)\n              (activation): GELUActivation()\n            )\n            (output_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n          )\n        )\n      )\n    )\n  )\n  (deephead): Sequential(\n    (0): MLP(\n      (mlp): Sequential(\n        (dense_layer_0): Sequential(\n          (0): Linear(in_features=768, out_features=256, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_1): Sequential(\n          (0): Linear(in_features=256, out_features=128, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n        (dense_layer_2): Sequential(\n          (0): Linear(in_features=128, out_features=64, bias=True)\n          (1): ReLU(inplace=True)\n          (2): Dropout(p=0.1, inplace=False)\n        )\n      )\n    )\n    (1): Linear(in_features=64, out_features=4, bias=True)\n  )\n)
In\u00a0[57]: Copied!
wd_bert_trainer = Trainer(\n    wd_bert_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n\nwd_bert_trainer.fit(\n    X_text=X_bert_tr,\n    target=train.rating.values,\n    n_epochs=3,\n    batch_size=64,\n)\n
wd_bert_trainer = Trainer( wd_bert_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) wd_bert_trainer.fit( X_text=X_bert_tr, target=train.rating.values, n_epochs=3, batch_size=64, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.68it/s, loss=0.968, metrics={'acc': 0.5879, 'f1': 0.3591}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.63it/s, loss=0.884, metrics={'acc': 0.6178, 'f1': 0.4399}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:14<00:00, 19.55it/s, loss=0.87, metrics={'acc': 0.6234, 'f1': 0.4527}]\n
In\u00a0[58]: Copied!
wd_bert_pred_text = wd_bert_trainer.predict_proba(X_text=X_bert_te)\nwd_bert_pred_text_class = np.argmax(wd_bert_pred_text, 1)\n
wd_bert_pred_text = wd_bert_trainer.predict_proba(X_text=X_bert_te) wd_bert_pred_text_class = np.argmax(wd_bert_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:03<00:00, 21.97it/s]\n
In\u00a0[59]: Copied!
wd_bert_acc = accuracy_score(test.rating, wd_bert_pred_text_class)\nwd_bert_f1 = f1_score(test.rating, wd_bert_pred_text_class, average=\"weighted\")\nwd_bert_cm = confusion_matrix(test.rating, wd_bert_pred_text_class)\n
wd_bert_acc = accuracy_score(test.rating, wd_bert_pred_text_class) wd_bert_f1 = f1_score(test.rating, wd_bert_pred_text_class, average=\"weighted\") wd_bert_cm = confusion_matrix(test.rating, wd_bert_pred_text_class) In\u00a0[60]: Copied!
print(f\"Distilbert Accuracy: {wd_bert_acc}. Distilbert F1 Score: {wd_bert_f1}\")\nprint(f\"Distilbert Confusion Matrix:\\n {wd_bert_cm}\")\n
print(f\"Distilbert Accuracy: {wd_bert_acc}. Distilbert F1 Score: {wd_bert_f1}\") print(f\"Distilbert Confusion Matrix:\\n {wd_bert_cm}\")
Distilbert Accuracy: 0.6326846528084918. Distilbert F1 Score: 0.5796652991272998\nDistilbert Confusion Matrix:\n [[ 287   75   22   90]\n [ 197  136   62  169]\n [  68  119  123  671]\n [  40   64   84 2315]]\n

Now, adding a tabular model follows the exact same process as the one described in section 2.

In\u00a0[61]: Copied!
tab_model = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[100, 50],\n)\n
tab_model = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[100, 50], ) In\u00a0[62]: Copied!
wd_tab_bert_model = WideDeep(\n    deeptabular=tab_model,\n    deeptext=bert_model,\n    head_hidden_dims=[256, 128, 64],\n    pred_dim=4,\n)\n
wd_tab_bert_model = WideDeep( deeptabular=tab_model, deeptext=bert_model, head_hidden_dims=[256, 128, 64], pred_dim=4, ) In\u00a0[63]: Copied!
wd_tab_bert_trainer = Trainer(\n    wd_tab_bert_model,\n    objective=\"multiclass\",\n    metrics=[Accuracy, F1Score(average=True)],\n    num_workers=0,  # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work\n)\n
wd_tab_bert_trainer = Trainer( wd_tab_bert_model, objective=\"multiclass\", metrics=[Accuracy, F1Score(average=True)], num_workers=0, # As in the case of the tokenizer, in notebook I need to set this to 0 for the Trainer to work ) In\u00a0[64]: Copied!
wd_tab_bert_trainer.fit(\n    X_tab=wd_X_tab_tr,\n    X_text=X_bert_tr,\n    target=train.rating.values,\n    n_epochs=3,\n    batch_size=64,\n)\n
wd_tab_bert_trainer.fit( X_tab=wd_X_tab_tr, X_text=X_bert_tr, target=train.rating.values, n_epochs=3, batch_size=64, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.15it/s, loss=0.974, metrics={'acc': 0.5838, 'f1': 0.3404}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.38it/s, loss=0.885, metrics={'acc': 0.618, 'f1': 0.4378}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 283/283 [00:15<00:00, 18.40it/s, loss=0.868, metrics={'acc': 0.6252, 'f1': 0.4575}]\n
In\u00a0[65]: Copied!
wd_tab_bert_pred_text = wd_tab_bert_trainer.predict_proba(\n    X_tab=wd_X_tab_te, X_text=X_bert_te\n)\nwd_tab_bert_pred_text_class = np.argmax(wd_tab_bert_pred_text, 1)\n
wd_tab_bert_pred_text = wd_tab_bert_trainer.predict_proba( X_tab=wd_X_tab_te, X_text=X_bert_te ) wd_tab_bert_pred_text_class = np.argmax(wd_tab_bert_pred_text, 1)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 71/71 [00:03<00:00, 21.32it/s]\n
In\u00a0[66]: Copied!
wd_tab_bert_acc = accuracy_score(test.rating, wd_tab_bert_pred_text_class)\nwd_tab_bert_f1 = f1_score(test.rating, wd_tab_bert_pred_text_class, average=\"weighted\")\nwd_tab_bert_cm = confusion_matrix(test.rating, wd_tab_bert_pred_text_class)\n
wd_tab_bert_acc = accuracy_score(test.rating, wd_tab_bert_pred_text_class) wd_tab_bert_f1 = f1_score(test.rating, wd_tab_bert_pred_text_class, average=\"weighted\") wd_tab_bert_cm = confusion_matrix(test.rating, wd_tab_bert_pred_text_class) In\u00a0[67]: Copied!
print(\n    f\"Distilbert + Tabular Accuracy: {wd_tab_bert_acc}. Distilbert+ Tabular F1 Score: {wd_tab_bert_f1}\"\n)\nprint(f\"Distilbert + Tabular Confusion Matrix:\\n {wd_tab_bert_cm}\")\n
print( f\"Distilbert + Tabular Accuracy: {wd_tab_bert_acc}. Distilbert+ Tabular F1 Score: {wd_tab_bert_f1}\" ) print(f\"Distilbert + Tabular Confusion Matrix:\\n {wd_tab_bert_cm}\")
Distilbert + Tabular Accuracy: 0.6242812914639541. Distilbert+ Tabular F1 Score: 0.5508351761564895\nDistilbert + Tabular Confusion Matrix:\n [[ 297   56   11  110]\n [ 229   91   38  206]\n [  86   90   71  734]\n [  49   48   42 2364]]\n
"},{"location":"examples/17_Usign_a_hugging_face_model.html#using-a-hugginface-model","title":"Using a Hugginface model\u00b6","text":"

In this notebook we will show how to use an \"external\" Hugginface model along with any other model in the libray. In particular we will show how to combine it with a tabular DL model.

Since we are here, we will also compare the performance of a few models on a text classification problem.

The notebook will go as follows:

  1. Text classification using tf-idf + LightGBM
  2. Text classification using a basic RNN
  3. Text classification using Distilbert

In all 3 cases we will add some tabular features to see if these help.

In general, I would not pay much attention to the results since I have placed no effort in getting the best possible results (i.e. no hyperparameter optimization or trying different architectures, for example).

Let's go

"},{"location":"examples/17_Usign_a_hugging_face_model.html#1-text-classification-using-tf-idf-lightgbm","title":"1. Text classification using tf-idf + LightGBM\u00b6","text":""},{"location":"examples/17_Usign_a_hugging_face_model.html#2-text-classification-using-pytorch-widedeeps-built-in-models-a-basic-rnn","title":"2. Text classification using pytorch-widedeep's built-in models (a basic RNN)\u00b6","text":"

Moving on now to fully using pytorch-widedeep in this dataset, let's have a look on how one could use a simple RNN to predict the ratings with the library.

"},{"location":"examples/17_Usign_a_hugging_face_model.html#3-text-classification-using-a-hugginface-model-as-a-custom-model-in-pytorch-widedeeps","title":"3. Text classification using a Hugginface model as a custom model in pytorch-widedeep's\u00b6","text":""},{"location":"examples/18_feature_importance_via_attention_weights.html","title":"18_feature_importance_via_attention_weights","text":"In\u00a0[1]: Copied!
import torch\n\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\n\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabTransformer, ContextAttentionMLP, WideDeep\nfrom pytorch_widedeep.callbacks import EarlyStopping\nfrom pytorch_widedeep.metrics import Accuracy\nfrom pytorch_widedeep.datasets import load_adult\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
import torch import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabTransformer, ContextAttentionMLP, WideDeep from pytorch_widedeep.callbacks import EarlyStopping from pytorch_widedeep.metrics import Accuracy from pytorch_widedeep.datasets import load_adult from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[2]: Copied!
# use_cuda = torch.cuda.is_available()\ndf = load_adult(as_frame=True)\ndf.columns = [c.replace(\"-\", \"_\") for c in df.columns]\ndf[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\ndf.drop([\"income\", \"fnlwgt\", \"educational_num\"], axis=1, inplace=True)\ntarget_colname = \"income_label\"\n
# use_cuda = torch.cuda.is_available() df = load_adult(as_frame=True) df.columns = [c.replace(\"-\", \"_\") for c in df.columns] df[\"income_label\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int) df.drop([\"income\", \"fnlwgt\", \"educational_num\"], axis=1, inplace=True) target_colname = \"income_label\" In\u00a0[3]: Copied!
df.head()\n
df.head() Out[3]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 25 Private 11th Never-married Machine-op-inspct Own-child Black Male 0 0 40 United-States 0 1 38 Private HS-grad Married-civ-spouse Farming-fishing Husband White Male 0 0 50 United-States 0 2 28 Local-gov Assoc-acdm Married-civ-spouse Protective-serv Husband White Male 0 0 40 United-States 1 3 44 Private Some-college Married-civ-spouse Machine-op-inspct Husband Black Male 7688 0 40 United-States 1 4 18 ? Some-college Never-married ? Own-child White Female 0 0 30 United-States 0 In\u00a0[4]: Copied!
cat_embed_cols = []\nfor col in df.columns:\n    if df[col].dtype == \"O\" or df[col].nunique() < 200 and col != target_colname:\n        cat_embed_cols.append(col)\n
cat_embed_cols = [] for col in df.columns: if df[col].dtype == \"O\" or df[col].nunique() < 200 and col != target_colname: cat_embed_cols.append(col) In\u00a0[5]: Copied!
# all cols will be categorical\nassert len(cat_embed_cols) == df.shape[1] - 1\n
# all cols will be categorical assert len(cat_embed_cols) == df.shape[1] - 1 In\u00a0[6]: Copied!
train, test = train_test_split(\n    df, test_size=0.1, random_state=1, stratify=df[[target_colname]]\n)\n
train, test = train_test_split( df, test_size=0.1, random_state=1, stratify=df[[target_colname]] ) In\u00a0[7]: Copied!
tab_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, with_attention=True)\n
tab_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, with_attention=True) In\u00a0[8]: Copied!
X_tab_train = tab_preprocessor.fit_transform(train)\nX_tab_test = tab_preprocessor.transform(test)\ntarget = train[target_colname].values\n
X_tab_train = tab_preprocessor.fit_transform(train) X_tab_test = tab_preprocessor.transform(test) target = train[target_colname].values In\u00a0[9]: Copied!
tab_transformer = TabTransformer(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    embed_continuous_method=\"standard\",\n    input_dim=8,\n    n_heads=2,\n    n_blocks=1,\n    attn_dropout=0.1,\n    transformer_activation=\"relu\",\n)\n
tab_transformer = TabTransformer( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, embed_continuous_method=\"standard\", input_dim=8, n_heads=2, n_blocks=1, attn_dropout=0.1, transformer_activation=\"relu\", ) In\u00a0[10]: Copied!
model = WideDeep(deeptabular=tab_transformer)\n
model = WideDeep(deeptabular=tab_transformer) In\u00a0[11]: Copied!
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0)\n
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0) In\u00a0[12]: Copied!
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n    optimizer,\n    threshold=0.001,\n    threshold_mode=\"abs\",\n    patience=10,\n)\n
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, threshold=0.001, threshold_mode=\"abs\", patience=10, ) In\u00a0[13]: Copied!
early_stopping = EarlyStopping(\n    min_delta=0.001, patience=30, restore_best_weights=True, verbose=True\n)\n
early_stopping = EarlyStopping( min_delta=0.001, patience=30, restore_best_weights=True, verbose=True ) In\u00a0[14]: Copied!
trainer = Trainer(\n    model,\n    objective=\"binary\",\n    optimizers=optimizer,\n    lr_schedulers=lr_scheduler,\n    reducelronplateau_criterion=\"loss\",\n    callbacks=[early_stopping],\n    metrics=[Accuracy],\n)\n
trainer = Trainer( model, objective=\"binary\", optimizers=optimizer, lr_schedulers=lr_scheduler, reducelronplateau_criterion=\"loss\", callbacks=[early_stopping], metrics=[Accuracy], )

The feature importances will be computed after training, using a sample of the training dataset of size feature_importance_sample_size

In\u00a0[15]: Copied!
trainer.fit(\n    X_tab=X_tab_train,\n    target=target,\n    val_split=0.2,\n    n_epochs=100,\n    batch_size=128,\n    validation_freq=1,\n    feature_importance_sample_size=1000,\n)\n
trainer.fit( X_tab=X_tab_train, target=target, val_split=0.2, n_epochs=100, batch_size=128, validation_freq=1, feature_importance_sample_size=1000, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.80it/s, loss=0.334, metrics={'acc': 0.847}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.34it/s, loss=0.294, metrics={'acc': 0.8669}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 83.02it/s, loss=0.293, metrics={'acc': 0.8656}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.03it/s, loss=0.283, metrics={'acc': 0.8678}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 87.69it/s, loss=0.282, metrics={'acc': 0.8703}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.22it/s, loss=0.279, metrics={'acc': 0.8717}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.24it/s, loss=0.277, metrics={'acc': 0.8718}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.29it/s, loss=0.277, metrics={'acc': 0.8731}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 83.76it/s, loss=0.275, metrics={'acc': 0.8727}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.80it/s, loss=0.276, metrics={'acc': 0.8727}]\nepoch 6: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.78it/s, loss=0.273, metrics={'acc': 0.873}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 100.43it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 7: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.271, metrics={'acc': 0.8742}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.14it/s, loss=0.275, metrics={'acc': 0.8726}]\nepoch 8: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.29it/s, loss=0.271, metrics={'acc': 0.875}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.46it/s, loss=0.276, metrics={'acc': 0.8718}]\nepoch 9: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.10it/s, loss=0.27, metrics={'acc': 0.8761}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.49it/s, loss=0.275, metrics={'acc': 0.8728}]\nepoch 10: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 70.40it/s, loss=0.269, metrics={'acc': 0.8747}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.47it/s, loss=0.275, metrics={'acc': 0.8726}]\nepoch 11: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.83it/s, loss=0.268, metrics={'acc': 0.8742}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 105.03it/s, loss=0.274, metrics={'acc': 0.873}]\nepoch 12: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.86it/s, loss=0.267, metrics={'acc': 0.8743}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 106.61it/s, loss=0.274, metrics={'acc': 0.8734}]\nepoch 13: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.39it/s, loss=0.267, metrics={'acc': 0.876}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 108.05it/s, loss=0.275, metrics={'acc': 0.8717}]\nepoch 14: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.36it/s, loss=0.265, metrics={'acc': 0.8767}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 109.60it/s, loss=0.276, metrics={'acc': 0.8747}]\nepoch 15: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.34it/s, loss=0.264, metrics={'acc': 0.876}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.55it/s, loss=0.276, metrics={'acc': 0.8706}]\nepoch 16: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.35it/s, loss=0.264, metrics={'acc': 0.8777}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.08it/s, loss=0.275, metrics={'acc': 0.8753}]\nepoch 17: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.96it/s, loss=0.263, metrics={'acc': 0.877}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.83it/s, loss=0.277, metrics={'acc': 0.8739}]\nepoch 18: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.38it/s, loss=0.263, metrics={'acc': 0.8779}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.18it/s, loss=0.278, metrics={'acc': 0.8714}]\nepoch 19: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.61it/s, loss=0.261, metrics={'acc': 0.8784}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.19it/s, loss=0.278, metrics={'acc': 0.8712}]\nepoch 20: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.43it/s, loss=0.261, metrics={'acc': 0.8791}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.87it/s, loss=0.28, metrics={'acc': 0.873}]\nepoch 21: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.97it/s, loss=0.26, metrics={'acc': 0.8787}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 107.50it/s, loss=0.279, metrics={'acc': 0.8732}]\nepoch 22: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.76it/s, loss=0.253, metrics={'acc': 0.8816}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 108.11it/s, loss=0.279, metrics={'acc': 0.8707}]\nepoch 23: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.92it/s, loss=0.252, metrics={'acc': 0.8828}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 100.14it/s, loss=0.28, metrics={'acc': 0.8711}]\nepoch 24: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.04it/s, loss=0.252, metrics={'acc': 0.8829}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 98.36it/s, loss=0.28, metrics={'acc': 0.8708}]\nepoch 25: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.31it/s, loss=0.251, metrics={'acc': 0.883}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.97it/s, loss=0.281, metrics={'acc': 0.8709}]\nepoch 26: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.63it/s, loss=0.25, metrics={'acc': 0.8834}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.07it/s, loss=0.281, metrics={'acc': 0.8698}]\nepoch 27: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.37it/s, loss=0.251, metrics={'acc': 0.884}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.75it/s, loss=0.281, metrics={'acc': 0.87}]\nepoch 28: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.19it/s, loss=0.25, metrics={'acc': 0.883}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.99it/s, loss=0.282, metrics={'acc': 0.8699}]\nepoch 29: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.19it/s, loss=0.25, metrics={'acc': 0.8829}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.11it/s, loss=0.282, metrics={'acc': 0.8695}]\nepoch 30: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.57it/s, loss=0.249, metrics={'acc': 0.8839}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.64it/s, loss=0.283, metrics={'acc': 0.8689}]\nepoch 31: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.55it/s, loss=0.249, metrics={'acc': 0.8846}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.10it/s, loss=0.283, metrics={'acc': 0.869}]\nepoch 32: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.56it/s, loss=0.248, metrics={'acc': 0.8841}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.45it/s, loss=0.284, metrics={'acc': 0.8687}]\nepoch 33: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.06it/s, loss=0.248, metrics={'acc': 0.8848}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 129.26it/s, loss=0.284, metrics={'acc': 0.8689}]\nepoch 34: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.53it/s, loss=0.248, metrics={'acc': 0.8854}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.61it/s, loss=0.283, metrics={'acc': 0.869}]\nepoch 35: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.78it/s, loss=0.248, metrics={'acc': 0.8853}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 127.31it/s, loss=0.283, metrics={'acc': 0.8694}]\nepoch 36: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.51it/s, loss=0.248, metrics={'acc': 0.8863}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.94it/s, loss=0.283, metrics={'acc': 0.8693}]\nepoch 37: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 81.35it/s, loss=0.247, metrics={'acc': 0.8844}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.77it/s, loss=0.283, metrics={'acc': 0.8692}]\nepoch 38: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.62it/s, loss=0.248, metrics={'acc': 0.8837}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.62it/s, loss=0.283, metrics={'acc': 0.8692}]\nepoch 39: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.82it/s, loss=0.248, metrics={'acc': 0.8842}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.64it/s, loss=0.283, metrics={'acc': 0.8695}]\nepoch 40: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.86it/s, loss=0.247, metrics={'acc': 0.8855}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.34it/s, loss=0.283, metrics={'acc': 0.8692}]\n
Best Epoch: 10. Best val_loss: 0.27451\nRestoring model weights from the end of the best epoch\n
In\u00a0[16]: Copied!
trainer.feature_importance\n
trainer.feature_importance Out[16]:
{'age': 0.09718182,\n 'workclass': 0.090637445,\n 'education': 0.08910798,\n 'marital_status': 0.08971319,\n 'occupation': 0.12546304,\n 'relationship': 0.086381145,\n 'race': 0.050686445,\n 'gender': 0.05116429,\n 'capital_gain': 0.08165918,\n 'capital_loss': 0.07702667,\n 'hours_per_week': 0.08205996,\n 'native_country': 0.07891885}
In\u00a0[17]: Copied!
preds = trainer.predict(X_tab=X_tab_test)\n
preds = trainer.predict(X_tab=X_tab_test)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 199.63it/s]\n
In\u00a0[18]: Copied!
accuracy_score(preds, test.income_label)\n
accuracy_score(preds, test.income_label) Out[18]:
0.8685772773797339
In\u00a0[19]: Copied!
test.reset_index(drop=True, inplace=True)\n
test.reset_index(drop=True, inplace=True) In\u00a0[20]: Copied!
test[test.income_label == 0].head(1)\n
test[test.income_label == 0].head(1) Out[20]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 0 26 Private Some-college Never-married Exec-managerial Not-in-family White Male 0 0 60 United-States 0 In\u00a0[21]: Copied!
test[test.income_label == 1].head(1)\n
test[test.income_label == 1].head(1) Out[21]: age workclass education marital_status occupation relationship race gender capital_gain capital_loss hours_per_week native_country income_label 3 36 Local-gov Doctorate Married-civ-spouse Prof-specialty Husband White Male 0 1887 50 United-States 1

To get the feature importance of a test dataset, simply use the explain method

In\u00a0[22]: Copied!
feat_imp_per_sample = trainer.explain(X_tab_test, save_step_masks=False)\n
feat_imp_per_sample = trainer.explain(X_tab_test, save_step_masks=False) In\u00a0[23]: Copied!
list(test.iloc[0].index[np.argsort(-feat_imp_per_sample[0])])\n
list(test.iloc[0].index[np.argsort(-feat_imp_per_sample[0])]) Out[23]:
['hours_per_week',\n 'education',\n 'relationship',\n 'occupation',\n 'workclass',\n 'capital_gain',\n 'native_country',\n 'marital_status',\n 'capital_loss',\n 'age',\n 'race',\n 'gender']
In\u00a0[24]: Copied!
list(test.iloc[3].index[np.argsort(-feat_imp_per_sample[3])])\n
list(test.iloc[3].index[np.argsort(-feat_imp_per_sample[3])]) Out[24]:
['age',\n 'capital_loss',\n 'hours_per_week',\n 'marital_status',\n 'native_country',\n 'relationship',\n 'race',\n 'education',\n 'occupation',\n 'capital_gain',\n 'gender',\n 'workclass']

We could do the same with the ContextAttentionMLP

In\u00a0[25]: Copied!
context_attn_mlp = ContextAttentionMLP(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    cat_embed_dropout=0.0,\n    input_dim=16,\n    attn_dropout=0.1,\n    attn_activation=\"relu\",\n)\n
context_attn_mlp = ContextAttentionMLP( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, cat_embed_dropout=0.0, input_dim=16, attn_dropout=0.1, attn_activation=\"relu\", ) In\u00a0[26]: Copied!
mlp_model = WideDeep(deeptabular=context_attn_mlp)\n
mlp_model = WideDeep(deeptabular=context_attn_mlp) In\u00a0[27]: Copied!
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.01, weight_decay=0.0)\n
mlp_optimizer = torch.optim.Adam(mlp_model.parameters(), lr=0.01, weight_decay=0.0) In\u00a0[28]: Copied!
mlp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(\n    mlp_optimizer,\n    threshold=0.001,\n    threshold_mode=\"abs\",\n    patience=10,\n)\n
mlp_lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( mlp_optimizer, threshold=0.001, threshold_mode=\"abs\", patience=10, ) In\u00a0[29]: Copied!
mlp_early_stopping = EarlyStopping(\n    min_delta=0.001, patience=30, restore_best_weights=True, verbose=True\n)\n
mlp_early_stopping = EarlyStopping( min_delta=0.001, patience=30, restore_best_weights=True, verbose=True ) In\u00a0[30]: Copied!
mlp_trainer = Trainer(\n    mlp_model,\n    objective=\"binary\",\n    optimizers=mlp_optimizer,\n    lr_schedulers=mlp_lr_scheduler,\n    reducelronplateau_criterion=\"loss\",\n    callbacks=[mlp_early_stopping],\n    metrics=[Accuracy],\n)\n
mlp_trainer = Trainer( mlp_model, objective=\"binary\", optimizers=mlp_optimizer, lr_schedulers=mlp_lr_scheduler, reducelronplateau_criterion=\"loss\", callbacks=[mlp_early_stopping], metrics=[Accuracy], ) In\u00a0[31]: Copied!
mlp_trainer.fit(\n    X_tab=X_tab_train,\n    target=target,\n    val_split=0.2,\n    n_epochs=100,\n    batch_size=128,\n    validation_freq=1,\n    feature_importance_sample_size=1000,\n)\n
mlp_trainer.fit( X_tab=X_tab_train, target=target, val_split=0.2, n_epochs=100, batch_size=128, validation_freq=1, feature_importance_sample_size=1000, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.11it/s, loss=0.405, metrics={'acc': 0.8094}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.26it/s, loss=0.309, metrics={'acc': 0.8583}]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.70it/s, loss=0.332, metrics={'acc': 0.8447}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.36it/s, loss=0.293, metrics={'acc': 0.8646}]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.42it/s, loss=0.319, metrics={'acc': 0.8505}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.05it/s, loss=0.293, metrics={'acc': 0.8654}]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.00it/s, loss=0.312, metrics={'acc': 0.8554}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 116.49it/s, loss=0.291, metrics={'acc': 0.8661}]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.50it/s, loss=0.308, metrics={'acc': 0.8583}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.48it/s, loss=0.287, metrics={'acc': 0.8669}]\nepoch 6: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.84it/s, loss=0.303, metrics={'acc': 0.8605}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 128.70it/s, loss=0.288, metrics={'acc': 0.8673}]\nepoch 7: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.93it/s, loss=0.301, metrics={'acc': 0.8597}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 121.47it/s, loss=0.298, metrics={'acc': 0.8628}]\nepoch 8: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.56it/s, loss=0.3, metrics={'acc': 0.8592}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.84it/s, loss=0.281, metrics={'acc': 0.8718}]\nepoch 9: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.17it/s, loss=0.298, metrics={'acc': 0.8619}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.32it/s, loss=0.28, metrics={'acc': 0.8716}]\nepoch 10: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.13it/s, loss=0.297, metrics={'acc': 0.8615}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.50it/s, loss=0.281, metrics={'acc': 0.8718}]\nepoch 11: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 82.54it/s, loss=0.293, metrics={'acc': 0.8641}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.57it/s, loss=0.284, metrics={'acc': 0.867}]\nepoch 12: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 80.92it/s, loss=0.293, metrics={'acc': 0.863}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.42it/s, loss=0.282, metrics={'acc': 0.8701}]\nepoch 13: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.61it/s, loss=0.293, metrics={'acc': 0.8635}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.56it/s, loss=0.276, metrics={'acc': 0.8719}]\nepoch 14: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.92it/s, loss=0.29, metrics={'acc': 0.8633}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.06it/s, loss=0.286, metrics={'acc': 0.8669}]\nepoch 15: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.83it/s, loss=0.291, metrics={'acc': 0.865}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.88it/s, loss=0.282, metrics={'acc': 0.8677}]\nepoch 16: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.74it/s, loss=0.29, metrics={'acc': 0.8653}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.85it/s, loss=0.285, metrics={'acc': 0.8672}]\nepoch 17: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.99it/s, loss=0.29, metrics={'acc': 0.865}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.53it/s, loss=0.282, metrics={'acc': 0.8681}]\nepoch 18: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 71.22it/s, loss=0.288, metrics={'acc': 0.8651}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.89it/s, loss=0.288, metrics={'acc': 0.8676}]\nepoch 19: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.85it/s, loss=0.29, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.26it/s, loss=0.284, metrics={'acc': 0.8662}]\nepoch 20: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.80it/s, loss=0.289, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.44it/s, loss=0.281, metrics={'acc': 0.8703}]\nepoch 21: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.72it/s, loss=0.29, metrics={'acc': 0.8661}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.04it/s, loss=0.285, metrics={'acc': 0.8648}]\nepoch 22: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.86it/s, loss=0.289, metrics={'acc': 0.8656}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.75it/s, loss=0.282, metrics={'acc': 0.8666}]\nepoch 23: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.68it/s, loss=0.289, metrics={'acc': 0.8668}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.86it/s, loss=0.282, metrics={'acc': 0.8724}]\nepoch 24: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.93it/s, loss=0.288, metrics={'acc': 0.8653}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.69it/s, loss=0.285, metrics={'acc': 0.8656}]\nepoch 25: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.99it/s, loss=0.284, metrics={'acc': 0.8671}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.27it/s, loss=0.277, metrics={'acc': 0.8707}]\nepoch 26: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.86it/s, loss=0.282, metrics={'acc': 0.8686}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.94it/s, loss=0.276, metrics={'acc': 0.8712}]\nepoch 27: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.86it/s, loss=0.283, metrics={'acc': 0.8691}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.11it/s, loss=0.277, metrics={'acc': 0.8716}]\nepoch 28: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.43it/s, loss=0.281, metrics={'acc': 0.8696}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.64it/s, loss=0.277, metrics={'acc': 0.8712}]\nepoch 29: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.39it/s, loss=0.281, metrics={'acc': 0.8696}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.83it/s, loss=0.277, metrics={'acc': 0.872}]\nepoch 30: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.93it/s, loss=0.28, metrics={'acc': 0.8706}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 113.07it/s, loss=0.275, metrics={'acc': 0.8714}]\nepoch 31: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.35it/s, loss=0.281, metrics={'acc': 0.8697}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 112.68it/s, loss=0.276, metrics={'acc': 0.872}]\nepoch 32: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.72it/s, loss=0.28, metrics={'acc': 0.8693}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.50it/s, loss=0.276, metrics={'acc': 0.8709}]\nepoch 33: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.68it/s, loss=0.28, metrics={'acc': 0.8716}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.07it/s, loss=0.277, metrics={'acc': 0.8709}]\nepoch 34: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.58it/s, loss=0.279, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.07it/s, loss=0.274, metrics={'acc': 0.8719}]\nepoch 35: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.03it/s, loss=0.28, metrics={'acc': 0.8687}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.15it/s, loss=0.276, metrics={'acc': 0.871}]\nepoch 36: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.11it/s, loss=0.279, metrics={'acc': 0.8706}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 111.10it/s, loss=0.278, metrics={'acc': 0.8705}]\nepoch 37: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 74.30it/s, loss=0.279, metrics={'acc': 0.869}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.75it/s, loss=0.279, metrics={'acc': 0.8702}]\nepoch 38: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 72.34it/s, loss=0.28, metrics={'acc': 0.8691}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.79it/s, loss=0.277, metrics={'acc': 0.8698}]\nepoch 39: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.49it/s, loss=0.279, metrics={'acc': 0.8694}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.25it/s, loss=0.279, metrics={'acc': 0.87}]\nepoch 40: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.51it/s, loss=0.28, metrics={'acc': 0.8694}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 110.90it/s, loss=0.277, metrics={'acc': 0.8694}]\nepoch 41: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.35it/s, loss=0.278, metrics={'acc': 0.8716}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.74it/s, loss=0.28, metrics={'acc': 0.8675}]\nepoch 42: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.279, metrics={'acc': 0.8695}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.76it/s, loss=0.277, metrics={'acc': 0.8699}]\nepoch 43: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:04<00:00, 66.14it/s, loss=0.279, metrics={'acc': 0.8681}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 106.20it/s, loss=0.277, metrics={'acc': 0.8714}]\nepoch 44: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.37it/s, loss=0.279, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.18it/s, loss=0.277, metrics={'acc': 0.8716}]\nepoch 45: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.23it/s, loss=0.278, metrics={'acc': 0.8702}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.83it/s, loss=0.278, metrics={'acc': 0.8707}]\nepoch 46: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.12it/s, loss=0.278, metrics={'acc': 0.8704}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.62it/s, loss=0.279, metrics={'acc': 0.8693}]\nepoch 47: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.55it/s, loss=0.276, metrics={'acc': 0.8713}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.99it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 48: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.25it/s, loss=0.278, metrics={'acc': 0.8719}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.52it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 49: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.35it/s, loss=0.277, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 119.82it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 50: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.15it/s, loss=0.277, metrics={'acc': 0.8717}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 122.62it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 51: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.55it/s, loss=0.277, metrics={'acc': 0.8713}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 117.63it/s, loss=0.278, metrics={'acc': 0.87}]\nepoch 52: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.04it/s, loss=0.276, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.39it/s, loss=0.278, metrics={'acc': 0.8697}]\nepoch 53: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.15it/s, loss=0.277, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 127.56it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 54: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.41it/s, loss=0.277, metrics={'acc': 0.8711}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.95it/s, loss=0.278, metrics={'acc': 0.8698}]\nepoch 55: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 76.35it/s, loss=0.277, metrics={'acc': 0.8718}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 126.90it/s, loss=0.278, metrics={'acc': 0.8699}]\nepoch 56: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.83it/s, loss=0.277, metrics={'acc': 0.8707}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 115.13it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 57: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.20it/s, loss=0.277, metrics={'acc': 0.8722}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.16it/s, loss=0.279, metrics={'acc': 0.8691}]\nepoch 58: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 73.33it/s, loss=0.276, metrics={'acc': 0.871}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 123.37it/s, loss=0.278, metrics={'acc': 0.8691}]\nepoch 59: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.41it/s, loss=0.277, metrics={'acc': 0.8714}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.17it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 60: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 79.26it/s, loss=0.276, metrics={'acc': 0.8721}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 120.60it/s, loss=0.278, metrics={'acc': 0.869}]\nepoch 61: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 75.88it/s, loss=0.278, metrics={'acc': 0.8703}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 124.47it/s, loss=0.278, metrics={'acc': 0.8692}]\nepoch 62: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.13it/s, loss=0.276, metrics={'acc': 0.8711}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 125.80it/s, loss=0.278, metrics={'acc': 0.8691}]\nepoch 63: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 77.20it/s, loss=0.277, metrics={'acc': 0.8715}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 118.50it/s, loss=0.278, metrics={'acc': 0.8695}]\nepoch 64: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 275/275 [00:03<00:00, 78.11it/s, loss=0.276, metrics={'acc': 0.8719}]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 69/69 [00:00<00:00, 114.52it/s, loss=0.278, metrics={'acc': 0.869}]\n
Best Epoch: 34. Best val_loss: 0.27449\nRestoring model weights from the end of the best epoch\n
In\u00a0[32]: Copied!
mlp_trainer.feature_importance\n
mlp_trainer.feature_importance Out[32]:
{'age': 0.116632804,\n 'workclass': 0.050255153,\n 'education': 0.094621316,\n 'marital_status': 0.12328919,\n 'occupation': 0.107893184,\n 'relationship': 0.11747801,\n 'race': 0.054717205,\n 'gender': 0.07514235,\n 'capital_gain': 0.059732802,\n 'capital_loss': 0.06738944,\n 'hours_per_week': 0.0610674,\n 'native_country': 0.07178114}
In\u00a0[33]: Copied!
mlp_preds = mlp_trainer.predict(X_tab=X_tab_test)\n
mlp_preds = mlp_trainer.predict(X_tab=X_tab_test)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 39/39 [00:00<00:00, 212.38it/s]\n
In\u00a0[34]: Copied!
accuracy_score(mlp_preds, test.income_label)\n
accuracy_score(mlp_preds, test.income_label) Out[34]:
0.8726714431934494
"},{"location":"examples/18_feature_importance_via_attention_weights.html#feature-importance-via-the-attention-weights","title":"Feature Importance via the attention weights\u00b6","text":"

I will start by saying that I consider this feature of the library purely experimental. First of all I think there are multiple ways one could address finding the features importances for these models. However, and more importantly, one has to bear in mind that even tree-based algorithms on the same dataset produce different feature importances. This is more \"dramatic\" if one uses different techniques, such as shap or feature permutation (see for example this and references therein). All this to say that, sometimes, feature importance is just a measure contained within the experiment run, and for the model used.

With that in mind, each instantiation of a deep tabular model, that has millions of trainable parameters, will potentially produce a different set of feature importances, even if the model has the same architecture. Moreover, this effect will become more apparent if the dataset is relatively easy and there are dependent/related columns so that one could get to the same success metric with different parameters.

In summary, feature importances are implemented in this librray for all attention-based models for tabular data, with the exception of the TabPerceiver. However this functionality has to be used and interpreted with care and consider of value within the 'universe' (or context) of the model with which these features were produced.

Nonetheless, let's have a look to how one would access to the feature importances when using this library.

"},{"location":"examples/19_wide_and_deep_for_recsys_pt1.html","title":"19_wide_and_deep_for_recsys_pt1","text":"

The goal of this, and the companion (part 2) notebooks is to illustrate how one could use this library in the context of recommendation systems. In particular, this notebook and the scripts at the wide_deep_for_recsys dir are a response to this issue. Therefore, we will use the Kaggle notebook referred in that issue here.

In order to keep the length of the notebook tractable, we will split this exercise in 2. In this first notebook we will prepare the data in almost the exact same way as it is done in the Kaggle notebook and also show how one could use pytorch-widedeep to build a model almost identical to the one in that notebook.

In a second notebook, we will show how one could use this library to implement other models, still following the same problem formulation.

In\u00a0[1]: Copied!
from pathlib import Path\nimport warnings\n\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\n\nfrom pytorch_widedeep.datasets import load_movielens100k\n
from pathlib import Path import warnings import pandas as pd from sklearn.model_selection import train_test_split from pytorch_widedeep.datasets import load_movielens100k In\u00a0[2]: Copied!
warnings.filterwarnings(\"ignore\")\n
warnings.filterwarnings(\"ignore\") In\u00a0[3]: Copied!
save_path = Path(\"prepared_data\")\nif not save_path.exists():\n    save_path.mkdir(parents=True, exist_ok=True)\n
save_path = Path(\"prepared_data\") if not save_path.exists(): save_path.mkdir(parents=True, exist_ok=True) In\u00a0[4]: Copied!
data, users, items = load_movielens100k(as_frame=True)\n
data, users, items = load_movielens100k(as_frame=True) In\u00a0[5]: Copied!
# Alternatively, as specified in the docs: 'The last 19 fields are the genres' so:\n# list_of_genres = items.columns.tolist()[-19:]\nlist_of_genres = [\n    \"unknown\",\n    \"Action\",\n    \"Adventure\",\n    \"Animation\",\n    \"Children's\",\n    \"Comedy\",\n    \"Crime\",\n    \"Documentary\",\n    \"Drama\",\n    \"Fantasy\",\n    \"Film-Noir\",\n    \"Horror\",\n    \"Musical\",\n    \"Mystery\",\n    \"Romance\",\n    \"Sci-Fi\",\n    \"Thriller\",\n    \"War\",\n    \"Western\",\n]\n
# Alternatively, as specified in the docs: 'The last 19 fields are the genres' so: # list_of_genres = items.columns.tolist()[-19:] list_of_genres = [ \"unknown\", \"Action\", \"Adventure\", \"Animation\", \"Children's\", \"Comedy\", \"Crime\", \"Documentary\", \"Drama\", \"Fantasy\", \"Film-Noir\", \"Horror\", \"Musical\", \"Mystery\", \"Romance\", \"Sci-Fi\", \"Thriller\", \"War\", \"Western\", ]

Let's first start by loading the interactions, user and item data

In\u00a0[6]: Copied!
data.head()\n
data.head() Out[6]: user_id movie_id rating timestamp 0 196 242 3 881250949 1 186 302 3 891717742 2 22 377 1 878887116 3 244 51 2 880606923 4 166 346 1 886397596 In\u00a0[7]: Copied!
users.head()\n
users.head() Out[7]: user_id age gender occupation zip_code 0 1 24 M technician 85711 1 2 53 F other 94043 2 3 23 M writer 32067 3 4 24 M technician 43537 4 5 33 F other 15213 In\u00a0[8]: Copied!
items.head()\n
items.head() Out[8]: movie_id movie_title release_date video_release_date IMDb_URL unknown Action Adventure Animation Children's ... Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War Western 0 1 Toy Story (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0 1 2 GoldenEye (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... 0 0 0 0 0 0 0 1 0 0 2 3 Four Rooms (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0 3 4 Get Shorty (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Get%20Shorty%... 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 4 5 Copycat (1995) 01-Jan-1995 NaN http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0

5 rows \u00d7 24 columns

In\u00a0[9]: Copied!
# adding a column with the number of movies watched per user\ndataset = data.sort_values([\"user_id\", \"timestamp\"]).reset_index(drop=True)\ndataset[\"one\"] = 1\ndataset[\"num_watched\"] = dataset.groupby(\"user_id\")[\"one\"].cumsum()\ndataset.drop(\"one\", axis=1, inplace=True)\ndataset.head()\n
# adding a column with the number of movies watched per user dataset = data.sort_values([\"user_id\", \"timestamp\"]).reset_index(drop=True) dataset[\"one\"] = 1 dataset[\"num_watched\"] = dataset.groupby(\"user_id\")[\"one\"].cumsum() dataset.drop(\"one\", axis=1, inplace=True) dataset.head() Out[9]: user_id movie_id rating timestamp num_watched 0 1 168 5 874965478 1 1 1 172 5 874965478 2 2 1 165 5 874965518 3 3 1 156 4 874965556 4 4 1 196 5 874965677 5 In\u00a0[10]: Copied!
# adding a column with the mean rating at a point in time per user\ndataset[\"mean_rate\"] = (\n    dataset.groupby(\"user_id\")[\"rating\"].cumsum() / dataset[\"num_watched\"]\n)\ndataset.head()\n
# adding a column with the mean rating at a point in time per user dataset[\"mean_rate\"] = ( dataset.groupby(\"user_id\")[\"rating\"].cumsum() / dataset[\"num_watched\"] ) dataset.head() Out[10]: user_id movie_id rating timestamp num_watched mean_rate 0 1 168 5 874965478 1 5.00 1 1 172 5 874965478 2 5.00 2 1 165 5 874965518 3 5.00 3 1 156 4 874965556 4 4.75 4 1 196 5 874965677 5 4.80 In\u00a0[11]: Copied!
dataset[\"target\"] = dataset.groupby(\"user_id\")[\"movie_id\"].shift(-1)\n
dataset[\"target\"] = dataset.groupby(\"user_id\")[\"movie_id\"].shift(-1)

Following the same processing used by the author in the before-mentioned Kaggle notebook, we build sequences of previous movies watched

In\u00a0[12]: Copied!
# Here the author builds the sequences\ndataset[\"prev_movies\"] = dataset[\"movie_id\"].apply(lambda x: str(x))\ndataset[\"prev_movies\"] = (\n    dataset.groupby(\"user_id\")[\"prev_movies\"]\n    .apply(lambda x: (x + \" \").cumsum().str.strip())\n    .reset_index(drop=True)\n)\ndataset[\"prev_movies\"] = dataset[\"prev_movies\"].apply(lambda x: x.split())\ndataset.head()\n
# Here the author builds the sequences dataset[\"prev_movies\"] = dataset[\"movie_id\"].apply(lambda x: str(x)) dataset[\"prev_movies\"] = ( dataset.groupby(\"user_id\")[\"prev_movies\"] .apply(lambda x: (x + \" \").cumsum().str.strip()) .reset_index(drop=True) ) dataset[\"prev_movies\"] = dataset[\"prev_movies\"].apply(lambda x: x.split()) dataset.head() Out[12]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies 0 1 168 5 874965478 1 5.00 172.0 [168] 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196]

And now we add a genre_rate as the mean of all movies rated for a given genre per user

In\u00a0[13]: Copied!
dataset = dataset.merge(items[[\"movie_id\"] + list_of_genres], on=\"movie_id\", how=\"left\")\nfor genre in list_of_genres:\n    dataset[f\"{genre}_rate\"] = dataset[genre] * dataset[\"rating\"]\n    dataset[genre] = dataset.groupby(\"user_id\")[genre].cumsum()\n    dataset[f\"{genre}_rate\"] = (\n        dataset.groupby(\"user_id\")[f\"{genre}_rate\"].cumsum() / dataset[genre]\n    )\ndataset[list_of_genres] = dataset[list_of_genres].apply(\n    lambda x: x / dataset[\"num_watched\"]\n)\ndataset.head()\n
dataset = dataset.merge(items[[\"movie_id\"] + list_of_genres], on=\"movie_id\", how=\"left\") for genre in list_of_genres: dataset[f\"{genre}_rate\"] = dataset[genre] * dataset[\"rating\"] dataset[genre] = dataset.groupby(\"user_id\")[genre].cumsum() dataset[f\"{genre}_rate\"] = ( dataset.groupby(\"user_id\")[f\"{genre}_rate\"].cumsum() / dataset[genre] ) dataset[list_of_genres] = dataset[list_of_genres].apply( lambda x: x / dataset[\"num_watched\"] ) dataset.head() Out[13]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies unknown Action ... Fantasy_rate Film-Noir_rate Horror_rate Musical_rate Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate 0 1 168 5 874965478 1 5.00 172.0 [168] 0.0 0.000000 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 0.0 0.500000 ... NaN NaN NaN NaN NaN 5.0 5.0 NaN 5.0 NaN 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 0.0 0.333333 ... NaN NaN NaN NaN NaN 5.0 5.0 NaN 5.0 NaN 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 0.0 0.250000 ... NaN NaN NaN NaN NaN 5.0 5.0 4.0 5.0 NaN 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196] 0.0 0.200000 ... NaN NaN NaN NaN NaN 5.0 5.0 4.0 5.0 NaN

5 rows \u00d7 46 columns

Adding user features

In\u00a0[14]: Copied!
dataset = dataset.merge(users, on=\"user_id\", how=\"left\")\ndataset.head()\n
dataset = dataset.merge(users, on=\"user_id\", how=\"left\") dataset.head() Out[14]: user_id movie_id rating timestamp num_watched mean_rate target prev_movies unknown Action ... Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate age gender occupation zip_code 0 1 168 5 874965478 1 5.00 172.0 [168] 0.0 0.000000 ... NaN NaN NaN NaN NaN NaN 24 M technician 85711 1 1 172 5 874965478 2 5.00 165.0 [168, 172] 0.0 0.500000 ... NaN 5.0 5.0 NaN 5.0 NaN 24 M technician 85711 2 1 165 5 874965518 3 5.00 156.0 [168, 172, 165] 0.0 0.333333 ... NaN 5.0 5.0 NaN 5.0 NaN 24 M technician 85711 3 1 156 4 874965556 4 4.75 196.0 [168, 172, 165, 156] 0.0 0.250000 ... NaN 5.0 5.0 4.0 5.0 NaN 24 M technician 85711 4 1 196 5 874965677 5 4.80 166.0 [168, 172, 165, 156, 196] 0.0 0.200000 ... NaN 5.0 5.0 4.0 5.0 NaN 24 M technician 85711

5 rows \u00d7 50 columns

Again, we use the same settings as those in the Kaggle notebook, but COLD_START_TRESH is pretty aggressive

In\u00a0[15]: Copied!
COLD_START_TRESH = 5\n\nfiltred_data = dataset[\n    (dataset[\"num_watched\"] >= COLD_START_TRESH) & ~(dataset[\"target\"].isna())\n].sort_values(\"timestamp\")\ntrain_data, _test_data = train_test_split(filtred_data, test_size=0.2, shuffle=False)\nvalid_data, test_data = train_test_split(_test_data, test_size=0.5, shuffle=False)\n
COLD_START_TRESH = 5 filtred_data = dataset[ (dataset[\"num_watched\"] >= COLD_START_TRESH) & ~(dataset[\"target\"].isna()) ].sort_values(\"timestamp\") train_data, _test_data = train_test_split(filtred_data, test_size=0.2, shuffle=False) valid_data, test_data = train_test_split(_test_data, test_size=0.5, shuffle=False) In\u00a0[16]: Copied!
cols_to_drop = [\n    # \"rating\",\n    \"timestamp\",\n    \"num_watched\",\n]\n\ndf_train = train_data.drop(cols_to_drop, axis=1)\ndf_valid = valid_data.drop(cols_to_drop, axis=1)\ndf_test = test_data.drop(cols_to_drop, axis=1)\n\ndf_train.to_pickle(save_path / \"df_train.pkl\")\ndf_valid.to_pickle(save_path / \"df_valid.pkl\")\ndf_test.to_pickle(save_path / \"df_test.pkl\")\n
cols_to_drop = [ # \"rating\", \"timestamp\", \"num_watched\", ] df_train = train_data.drop(cols_to_drop, axis=1) df_valid = valid_data.drop(cols_to_drop, axis=1) df_test = test_data.drop(cols_to_drop, axis=1) df_train.to_pickle(save_path / \"df_train.pkl\") df_valid.to_pickle(save_path / \"df_valid.pkl\") df_test.to_pickle(save_path / \"df_test.pkl\")

Let's now build a model that is nearly identical to the one use in the Kaggle notebook

In\u00a0[17]: Copied!
import numpy as np\nimport torch\nfrom torch import nn\nfrom scipy.sparse import coo_matrix\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
import numpy as np import torch from torch import nn from scipy.sparse import coo_matrix from pytorch_widedeep import Trainer from pytorch_widedeep.models import TabMlp, BasicRNN, WideDeep from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[18]: Copied!
device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\nsave_path = Path(\"prepared_data\")\n\nPAD_IDX = 0\n
device = \"cuda\" if torch.cuda.is_available() else \"cpu\" save_path = Path(\"prepared_data\") PAD_IDX = 0

Let's use some of the functions the author of the kaggle's notebook uses to prepare the data

In\u00a0[19]: Copied!
def get_coo_indexes(lil):\n    rows = []\n    cols = []\n    for i, el in enumerate(lil):\n        if type(el) != list:\n            el = [el]\n        for j in el:\n            rows.append(i)\n            cols.append(j)\n    return rows, cols\n\n\ndef get_sparse_features(series, shape):\n    coo_indexes = get_coo_indexes(series.tolist())\n    sparse_df = coo_matrix(\n        (np.ones(len(coo_indexes[0])), (coo_indexes[0], coo_indexes[1])), shape=shape\n    )\n    return sparse_df\n\n\ndef sparse_to_idx(data, pad_idx=-1):\n    indexes = data.nonzero()\n    indexes_df = pd.DataFrame()\n    indexes_df[\"rows\"] = indexes[0]\n    indexes_df[\"cols\"] = indexes[1]\n    mdf = indexes_df.groupby(\"rows\").apply(lambda x: x[\"cols\"].tolist())\n    max_len = mdf.apply(lambda x: len(x)).max()\n    return mdf.apply(lambda x: pd.Series(x + [pad_idx] * (max_len - len(x)))).values\n
def get_coo_indexes(lil): rows = [] cols = [] for i, el in enumerate(lil): if type(el) != list: el = [el] for j in el: rows.append(i) cols.append(j) return rows, cols def get_sparse_features(series, shape): coo_indexes = get_coo_indexes(series.tolist()) sparse_df = coo_matrix( (np.ones(len(coo_indexes[0])), (coo_indexes[0], coo_indexes[1])), shape=shape ) return sparse_df def sparse_to_idx(data, pad_idx=-1): indexes = data.nonzero() indexes_df = pd.DataFrame() indexes_df[\"rows\"] = indexes[0] indexes_df[\"cols\"] = indexes[1] mdf = indexes_df.groupby(\"rows\").apply(lambda x: x[\"cols\"].tolist()) max_len = mdf.apply(lambda x: len(x)).max() return mdf.apply(lambda x: pd.Series(x + [pad_idx] * (max_len - len(x)))).values

For the time being, we will not use a validation set for hyperparameter optimization, and we will simply concatenate the validation and the test set in one test set. I simply splitted the data into train/valid/test in case the reader wants to actually do hyperparameter optimization (and because I know in the future I will).

There is also another caveat worth mentioning, related to the indexing of the movies. To build the matrices of movies watched, we use the entire dataset. A more realistic (and correct) approach would be to use ONLY the movies that appear in the training set and consider unknown or unseen those in the testing set that have not been seen during training. Nonetheless, this will not affect the purposes of this notebook, which is to illustrate how one could use pytorch-widedeep to build a recommendation algorithm. However, if one wanted to explore the performance of different algorithms in a \"proper\" way, these \"details\" need to be accounted for.

In\u00a0[20]: Copied!
df_test = pd.concat([df_valid, df_test], ignore_index=True)\n
df_test = pd.concat([df_valid, df_test], ignore_index=True) In\u00a0[21]: Copied!
id_cols = [\"user_id\", \"movie_id\"]\nmax_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())\n
id_cols = [\"user_id\", \"movie_id\"] max_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max()) In\u00a0[22]: Copied!
X_train = df_train.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1)\ny_train = np.array(df_train.target.values, dtype=\"int64\")\ntrain_movies_watched = get_sparse_features(\n    df_train[\"prev_movies\"], (len(df_train), max_movie_index + 1)\n)\n\nX_test = df_test.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1)\ny_test = np.array(df_test.target.values, dtype=\"int64\")\ntest_movies_watched = get_sparse_features(\n    df_test[\"prev_movies\"], (len(df_test), max_movie_index + 1)\n)\n
X_train = df_train.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1) y_train = np.array(df_train.target.values, dtype=\"int64\") train_movies_watched = get_sparse_features( df_train[\"prev_movies\"], (len(df_train), max_movie_index + 1) ) X_test = df_test.drop(id_cols + [\"rating\", \"prev_movies\", \"target\"], axis=1) y_test = np.array(df_test.target.values, dtype=\"int64\") test_movies_watched = get_sparse_features( df_test[\"prev_movies\"], (len(df_test), max_movie_index + 1) )

let's have a look to the information in each dataset

In\u00a0[23]: Copied!
X_train.head()\n
X_train.head() Out[23]: mean_rate unknown Action Adventure Animation Children's Comedy Crime Documentary Drama ... Mystery_rate Romance_rate Sci-Fi_rate Thriller_rate War_rate Western_rate age gender occupation zip_code 25423 4.000000 0.0 0.400000 0.200000 0.0 0.0 0.400000 0.0 0.0 0.200000 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25425 4.000000 0.0 0.285714 0.142857 0.0 0.0 0.428571 0.0 0.0 0.285714 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25424 4.000000 0.0 0.333333 0.166667 0.0 0.0 0.333333 0.0 0.0 0.333333 ... NaN 4.0 4.0 4.000000 4.0 NaN 21 M student 48823 25426 3.875000 0.0 0.250000 0.125000 0.0 0.0 0.375000 0.0 0.0 0.250000 ... NaN 4.0 4.0 3.666667 4.0 NaN 21 M student 48823 25427 3.888889 0.0 0.222222 0.111111 0.0 0.0 0.333333 0.0 0.0 0.333333 ... NaN 4.0 4.0 3.666667 4.0 NaN 21 M student 48823

5 rows \u00d7 43 columns

In\u00a0[24]: Copied!
y_train\n
y_train Out[24]:
array([772, 288, 108, ..., 183, 432, 509])
In\u00a0[25]: Copied!
train_movies_watched\n
train_movies_watched Out[25]:
<76228x1683 sparse matrix of type '<class 'numpy.float64'>'\n\twith 7957390 stored elements in COOrdinate format>
In\u00a0[26]: Copied!
sorted(df_train.prev_movies.tolist()[0])\n
sorted(df_train.prev_movies.tolist()[0]) Out[26]:
['173', '185', '255', '286', '298']
In\u00a0[27]: Copied!
np.where(train_movies_watched.todense()[0])\n
np.where(train_movies_watched.todense()[0]) Out[27]:
(array([0, 0, 0, 0, 0]), array([173, 185, 255, 286, 298]))

And from now on is when the specifics related to this library start to appear. The only component that is going to be a bit different is the so-called tabular component, referred as continuous in the notebook.

In the case of pytorch-widedeep we have the TabPreprocessor that allows for a lot of flexibility as to how we would like to process the tabular component of this Wide and Deep model. In other words, here our tabular component is a bit more elaborated than that in the notebook, just a bit...

In\u00a0[28]: Copied!
cat_cols = [\"gender\", \"occupation\", \"zip_code\"]\ncont_cols = [c for c in X_train if c not in cat_cols]\ntab_preprocessor = TabPreprocessor(\n    cat_embed_cols=cat_cols,\n    continuous_cols=cont_cols,\n)\n
cat_cols = [\"gender\", \"occupation\", \"zip_code\"] cont_cols = [c for c in X_train if c not in cat_cols] tab_preprocessor = TabPreprocessor( cat_embed_cols=cat_cols, continuous_cols=cont_cols, ) In\u00a0[29]: Copied!
X_train_tab = tab_preprocessor.fit_transform(X_train.fillna(0))\nX_test_tab = tab_preprocessor.transform(X_test.fillna(0))\n
X_train_tab = tab_preprocessor.fit_transform(X_train.fillna(0)) X_test_tab = tab_preprocessor.transform(X_test.fillna(0))

Now, in the notebook, the author moves the sparse matrices to sparse tensors and then turns them into dense tensors. In reality, this is not neccessary, one could feed sparse tensors to nn.Linear layers in pytorch. Nonetheless, this is not the most efficient implementation and is the reason why in our library the wide, linear component is implemented as an embedding layer.

Nonetheless, to reproduce the notebook the best we can and because currently the Wide model in pytorch-widedeep is not designed to receive sparse tensors (we might consider implementing this functionality), we will turn the sparse COO matrices into dense arrays. We will then code a fairly simple, custom Wide component.

In\u00a0[30]: Copied!
X_train_wide = np.array(train_movies_watched.todense())\nX_test_wide = np.array(test_movies_watched.todense())\n
X_train_wide = np.array(train_movies_watched.todense()) X_test_wide = np.array(test_movies_watched.todense())

Finally, the author of the notebook uses a simple Embedding layer to encode the sequences of movies watched, the prev_movies columns. In my opinion, there is an element of information redundancy here. This is because the wide and text components have implicitely the same information, but in different form. Moreover, both of the models used for these two components ignore the sequential element in the data. Nonetheless, we want to reproduce the Kaggle notebook as close as possible, AND as one can explore later (by simply performing simple ablation studies), the wide component seems to carry most of the predictive power.

In\u00a0[31]: Copied!
X_train_text = sparse_to_idx(train_movies_watched, pad_idx=PAD_IDX)\nX_test_text = sparse_to_idx(test_movies_watched, pad_idx=PAD_IDX)\n
X_train_text = sparse_to_idx(train_movies_watched, pad_idx=PAD_IDX) X_test_text = sparse_to_idx(test_movies_watched, pad_idx=PAD_IDX)

Let's now build the models

In\u00a0[32]: Copied!
class Wide(nn.Module):\n    def __init__(self, input_dim: int, pred_dim: int):\n        super().__init__()\n\n        self.input_dim = input_dim\n        self.pred_dim = pred_dim\n\n        # When I coded the library I never though that someone would want to code\n        # their own wide component. However, if you do, the wide component must have\n        # a 'wide_linear' attribute. In other words, the linear layer must be\n        # called 'wide_linear'\n        self.wide_linear = nn.Linear(input_dim, pred_dim)\n\n    def forward(self, X):\n        out = self.wide_linear(X.type(torch.float32))\n        return out\n\n\nwide = Wide(X_train_wide.shape[1], max_movie_index + 1)\n
class Wide(nn.Module): def __init__(self, input_dim: int, pred_dim: int): super().__init__() self.input_dim = input_dim self.pred_dim = pred_dim # When I coded the library I never though that someone would want to code # their own wide component. However, if you do, the wide component must have # a 'wide_linear' attribute. In other words, the linear layer must be # called 'wide_linear' self.wide_linear = nn.Linear(input_dim, pred_dim) def forward(self, X): out = self.wide_linear(X.type(torch.float32)) return out wide = Wide(X_train_wide.shape[1], max_movie_index + 1) In\u00a0[33]: Copied!
wide\n
wide Out[33]:
Wide(\n  (wide_linear): Linear(in_features=1683, out_features=1683, bias=True)\n)
In\u00a0[34]: Copied!
class SimpleEmbed(nn.Module):\n    def __init__(self, vocab_size: int, embed_dim: int, pad_idx: int):\n        super().__init__()\n\n        self.vocab_size = vocab_size\n        self.embed_dim = embed_dim\n        self.pad_idx = pad_idx\n\n        # The sequences of movies watched are simply embedded in the Kaggle\n        # notebook. No RNN, Transformer or any model is used\n        self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx)\n\n    def forward(self, X):\n        embed = self.embed(X)\n        embed_mean = torch.mean(embed, dim=1)\n        return embed_mean\n\n    @property\n    def output_dim(self) -> int:\n        # All deep components in a custom 'pytorch-widedeep' model must have\n        # an output_dim property\n        return self.embed_dim\n\n\n#  In the notebook the author uses simply embeddings\nsimple_embed = SimpleEmbed(max_movie_index + 1, 16, 0)\n
class SimpleEmbed(nn.Module): def __init__(self, vocab_size: int, embed_dim: int, pad_idx: int): super().__init__() self.vocab_size = vocab_size self.embed_dim = embed_dim self.pad_idx = pad_idx # The sequences of movies watched are simply embedded in the Kaggle # notebook. No RNN, Transformer or any model is used self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=pad_idx) def forward(self, X): embed = self.embed(X) embed_mean = torch.mean(embed, dim=1) return embed_mean @property def output_dim(self) -> int: # All deep components in a custom 'pytorch-widedeep' model must have # an output_dim property return self.embed_dim # In the notebook the author uses simply embeddings simple_embed = SimpleEmbed(max_movie_index + 1, 16, 0) In\u00a0[35]: Copied!
simple_embed\n
simple_embed Out[35]:
SimpleEmbed(\n  (embed): Embedding(1683, 16, padding_idx=0)\n)

Maybe one would like to use an RNN to account for the sequence nature of the problem. If that was the case it would be as easy as:

In\u00a0[36]: Copied!
basic_rnn = BasicRNN(\n    vocab_size=max_movie_index + 1,\n    embed_dim=16,\n    hidden_dim=32,\n    n_layers=2,\n    rnn_type=\"gru\",\n)\n
basic_rnn = BasicRNN( vocab_size=max_movie_index + 1, embed_dim=16, hidden_dim=32, n_layers=2, rnn_type=\"gru\", )

And finally, the tabular component, which is the notebook is simply a stak of linear + Rely layers. In our case we have an embedding layer before the linear layers to encode categorial and numerical cols

In\u00a0[37]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=tab_preprocessor.continuous_cols,\n    cont_norm_layer=None,\n    mlp_hidden_dims=[1024, 512, 256],\n    mlp_activation=\"relu\",\n)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=tab_preprocessor.continuous_cols, cont_norm_layer=None, mlp_hidden_dims=[1024, 512, 256], mlp_activation=\"relu\", ) In\u00a0[38]: Copied!
tab_mlp\n
tab_mlp Out[38]:
TabMlp(\n  (cat_embed): DiffSizeCatEmbeddings(\n    (embed_layers): ModuleDict(\n      (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n      (emb_layer_occupation): Embedding(22, 9, padding_idx=0)\n      (emb_layer_zip_code): Embedding(648, 60, padding_idx=0)\n    )\n    (embedding_dropout): Dropout(p=0.0, inplace=False)\n  )\n  (cont_norm): Identity()\n  (encoder): MLP(\n    (mlp): Sequential(\n      (dense_layer_0): Sequential(\n        (0): Linear(in_features=111, out_features=1024, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_1): Sequential(\n        (0): Linear(in_features=1024, out_features=512, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n      (dense_layer_2): Sequential(\n        (0): Linear(in_features=512, out_features=256, bias=True)\n        (1): ReLU(inplace=True)\n        (2): Dropout(p=0.1, inplace=False)\n      )\n    )\n  )\n)

Finally, we simply wrap up all models with the WideDeep 'collector' class and we are ready to train.

In\u00a0[39]: Copied!
wide_deep_model = WideDeep(\n    wide=wide, deeptabular=tab_mlp, deeptext=simple_embed, pred_dim=max_movie_index + 1\n)\n
wide_deep_model = WideDeep( wide=wide, deeptabular=tab_mlp, deeptext=simple_embed, pred_dim=max_movie_index + 1 ) In\u00a0[40]: Copied!
wide_deep_model\n
wide_deep_model Out[40]:
WideDeep(\n  (wide): Wide(\n    (wide_linear): Linear(in_features=1683, out_features=1683, bias=True)\n  )\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_gender): Embedding(3, 2, padding_idx=0)\n          (emb_layer_occupation): Embedding(22, 9, padding_idx=0)\n          (emb_layer_zip_code): Embedding(648, 60, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=111, out_features=1024, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=1024, out_features=512, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=512, out_features=256, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=256, out_features=1683, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): SimpleEmbed(\n      (embed): Embedding(1683, 16, padding_idx=0)\n    )\n    (1): Linear(in_features=16, out_features=1683, bias=True)\n  )\n)

Note that the main difference between this wide and deep model and the Wide and Deep model in the Kaggle notebook is that in that notebook, the author concatenates the embedings and the tabular features, then passes this concatenation through a stack of linear + Relu layers with a final output dim of 256. Then concatenates this output with the binary features and connects this concatenation with the final linear layer (so the final weights are of dim (batch_size, 256 + 1683)). Our implementation follows the notation of the original paper and instead of concatenating the tabular, text and wide components and then connect them to the output neurons, we first compute their output, and then add it (see here: https://arxiv.org/pdf/1606.07792.pdf, their Eq 3). Note that this is effectively the same, with the caveat that while in one case one initialises a big weight matrix \"at once\", in our implementation we initialise different matrices for different components. Anyway, let's give it a go.

In\u00a0[41]: Copied!
trainer = Trainer(\n    model=wide_deep_model,\n    objective=\"multiclass\",\n    custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX),\n    optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3),\n)\n
trainer = Trainer( model=wide_deep_model, objective=\"multiclass\", custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX), optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3), ) In\u00a0[42]: Copied!
trainer.fit(\n    X_train={\n        \"X_wide\": X_train_wide,\n        \"X_tab\": X_train_tab,\n        \"X_text\": X_train_text,\n        \"target\": y_train,\n    },\n    X_val={\n        \"X_wide\": X_test_wide,\n        \"X_tab\": X_test_tab,\n        \"X_text\": X_test_text,\n        \"target\": y_test,\n    },\n    n_epochs=5,\n    batch_size=512,\n    shuffle=False,\n)\n
trainer.fit( X_train={ \"X_wide\": X_train_wide, \"X_tab\": X_train_tab, \"X_text\": X_train_text, \"target\": y_train, }, X_val={ \"X_wide\": X_test_wide, \"X_tab\": X_test_tab, \"X_text\": X_test_text, \"target\": y_test, }, n_epochs=5, batch_size=512, shuffle=False, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.66it/s, loss=6.66]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 18.75it/s, loss=6.6]\nepoch 2: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:21<00:00,  6.95it/s, loss=5.97]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:01<00:00, 21.03it/s, loss=6.52]\nepoch 3: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.51it/s, loss=5.65]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:01<00:00, 20.16it/s, loss=6.53]\nepoch 4: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:23<00:00,  6.29it/s, loss=5.41]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 13.97it/s, loss=6.57]\nepoch 5: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 149/149 [00:19<00:00,  7.58it/s, loss=5.2]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 38/38 [00:02<00:00, 18.82it/s, loss=6.63]\n

Now one could continue to the 'compare' metrics section of the Kaggle notebook. However, for the purposes of illustrating how one could use pytorch-widedeep to build recommendation algorithms we consider this notebook completed and move onto part 2

"},{"location":"examples/19_wide_and_deep_for_recsys_pt1.html#problem-formulation","title":"Problem formulation\u00b6","text":"

In this particular exercise the problem is formulated as predicting the next movie that will be watched (in consequence the last interactions will be discarded)

"},{"location":"examples/19_wide_and_deep_for_recsys_pt2.html","title":"19_wide_and_deep_for_recsys_pt2","text":"

This is the second of the two notebooks where we aim to illustrate how one could use this library to build recommendation algorithms using the example in this Kaggle notebook as guidance. In the previous notebook we used pytorch-widedeep to build a model that replicated almost exactly that in the notebook. In this, shorter notebook we will show how one could use the library to explore other models, following the same problem formulation, this is: given a state of a user at a certain point in time having watched a series of movies, our goal is to predict which movie the user will watch next.

Assuming that one has read (and run) the previous notebook, the required data will be stored in a local dir called prepared_data, so let's read it:

In\u00a0[1]: Copied!
from pathlib import Path\n\nimport numpy as np\nimport torch\nimport pandas as pd\nfrom torch import nn\n\nfrom pytorch_widedeep import Trainer\nfrom pytorch_widedeep.utils import pad_sequences\nfrom pytorch_widedeep.models import TabMlp, WideDeep, Transformer\nfrom pytorch_widedeep.preprocessing import TabPreprocessor\n
from pathlib import Path import numpy as np import torch import pandas as pd from torch import nn from pytorch_widedeep import Trainer from pytorch_widedeep.utils import pad_sequences from pytorch_widedeep.models import TabMlp, WideDeep, Transformer from pytorch_widedeep.preprocessing import TabPreprocessor In\u00a0[2]: Copied!
save_path = Path(\"prepared_data\")\n\nPAD_IDX = 0\n\nid_cols = [\"user_id\", \"movie_id\"]\n\ndf_train = pd.read_pickle(save_path / \"df_train.pkl\")\ndf_valid = pd.read_pickle(save_path / \"df_valid.pkl\")\ndf_test = pd.read_pickle(save_path / \"df_test.pkl\")\n
save_path = Path(\"prepared_data\") PAD_IDX = 0 id_cols = [\"user_id\", \"movie_id\"] df_train = pd.read_pickle(save_path / \"df_train.pkl\") df_valid = pd.read_pickle(save_path / \"df_valid.pkl\") df_test = pd.read_pickle(save_path / \"df_test.pkl\")

...remember that in the previous notebook we explained that we are not going to use a validation set here (in a real-world example, or simply a more realistic example, one should always use it).

In\u00a0[3]: Copied!
df_test = pd.concat([df_valid, df_test], ignore_index=True)\n
df_test = pd.concat([df_valid, df_test], ignore_index=True)

Also remember that, in the previous notebook we discussed that the 'maxlen' and 'max_movie_index' parameters should be computed using only the train set. In particular, to properly do the tokenization, one would have to use ONLY train tokens and add a token for new 'unknown'/'unseen' movies in the test set. This can also be done with this library or manually, so I will leave it to the reader to implement that tokenzation appraoch.

In\u00a0[4]: Copied!
maxlen = max(\n    df_train.prev_movies.apply(lambda x: len(x)).max(),\n    df_test.prev_movies.apply(lambda x: len(x)).max(),\n)\n\nmax_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())\n
maxlen = max( df_train.prev_movies.apply(lambda x: len(x)).max(), df_test.prev_movies.apply(lambda x: len(x)).max(), ) max_movie_index = max(df_train.movie_id.max(), df_test.movie_id.max())

From now one things are pretty simple, moreover bearing in mind that in this example we are not going to use a wide component since, in pple, one would believe that the information in that component is also 'carried' by the movie sequences (However in the previous notebook, if one performs ablation studies, these suggest that most of the prediction power comes from the linear, wide model).

In the example here we are going to explore one (of many) possibilities. We are simply going to encode the triplet (user, item, rating) and use it as a deeptabular component and the sequences of previously watched movies as the deeptext component. For the deeptext component we are going to use a basic encoder-only transformer model.

Let's start with the tabular data preparation

In\u00a0[5]: Copied!
df_train_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]]\ntrain_movies_sequences = df_train.prev_movies.apply(\n    lambda x: [int(el) for el in x]\n).to_list()\ny_train = df_train.target.values.astype(int)\n\ndf_test_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]]\ntest_movies_sequences = df_test.prev_movies.apply(\n    lambda x: [int(el) for el in x]\n).to_list()\ny_test = df_test.target.values.astype(int)\n\ntab_preprocessor = tab_preprocessor = TabPreprocessor(\n    cat_embed_cols=[\"user_id\", \"movie_id\", \"rating\"],\n)\nX_train_tab = tab_preprocessor.fit_transform(df_train_user_item)\nX_test_tab = tab_preprocessor.transform(df_test_user_item)\n
df_train_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]] train_movies_sequences = df_train.prev_movies.apply( lambda x: [int(el) for el in x] ).to_list() y_train = df_train.target.values.astype(int) df_test_user_item = df_train[[\"user_id\", \"movie_id\", \"rating\"]] test_movies_sequences = df_test.prev_movies.apply( lambda x: [int(el) for el in x] ).to_list() y_test = df_test.target.values.astype(int) tab_preprocessor = tab_preprocessor = TabPreprocessor( cat_embed_cols=[\"user_id\", \"movie_id\", \"rating\"], ) X_train_tab = tab_preprocessor.fit_transform(df_train_user_item) X_test_tab = tab_preprocessor.transform(df_test_user_item)

And not the text component, simply padding the sequences:

In\u00a0[6]: Copied!
X_train_text = np.array(\n    [\n        pad_sequences(\n            s,\n            maxlen=maxlen,\n            pad_first=False,\n            pad_idx=PAD_IDX,\n        )\n        for s in train_movies_sequences\n    ]\n)\nX_test_text = np.array(\n    [\n        pad_sequences(\n            s,\n            maxlen=maxlen,\n            pad_first=False,\n            pad_idx=0,\n        )\n        for s in test_movies_sequences\n    ]\n)\n
X_train_text = np.array( [ pad_sequences( s, maxlen=maxlen, pad_first=False, pad_idx=PAD_IDX, ) for s in train_movies_sequences ] ) X_test_text = np.array( [ pad_sequences( s, maxlen=maxlen, pad_first=False, pad_idx=0, ) for s in test_movies_sequences ] )

We now define the model components and the wide and deep model.

In\u00a0[7]: Copied!
tab_mlp = TabMlp(\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    mlp_hidden_dims=[1024, 512, 256],\n    mlp_activation=\"relu\",\n)\n\n# plenty of options here, see the docs\ntransformer = Transformer(\n    vocab_size=max_movie_index + 1,\n    embed_dim=32,\n    n_heads=2,\n    n_blocks=2,\n    seq_length=maxlen,\n)\n\nwide_deep_model = WideDeep(\n    deeptabular=tab_mlp, deeptext=transformer, pred_dim=max_movie_index + 1\n)\n
tab_mlp = TabMlp( column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, mlp_hidden_dims=[1024, 512, 256], mlp_activation=\"relu\", ) # plenty of options here, see the docs transformer = Transformer( vocab_size=max_movie_index + 1, embed_dim=32, n_heads=2, n_blocks=2, seq_length=maxlen, ) wide_deep_model = WideDeep( deeptabular=tab_mlp, deeptext=transformer, pred_dim=max_movie_index + 1 ) In\u00a0[8]: Copied!
wide_deep_model\n
wide_deep_model Out[8]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_user_id): Embedding(749, 65, padding_idx=0)\n          (emb_layer_movie_id): Embedding(1612, 100, padding_idx=0)\n          (emb_layer_rating): Embedding(6, 4, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=169, out_features=1024, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=1024, out_features=512, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_2): Sequential(\n            (0): Linear(in_features=512, out_features=256, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=256, out_features=1683, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): Transformer(\n      (embedding): Embedding(1683, 32, padding_idx=0)\n      (pos_encoder): PositionalEncoding(\n        (dropout): Dropout(p=0.1, inplace=False)\n      )\n      (encoder): Sequential(\n        (transformer_block0): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n        (transformer_block1): TransformerEncoder(\n          (attn): MultiHeadedAttention(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (q_proj): Linear(in_features=32, out_features=32, bias=False)\n            (kv_proj): Linear(in_features=32, out_features=64, bias=False)\n            (out_proj): Linear(in_features=32, out_features=32, bias=False)\n          )\n          (ff): FeedForward(\n            (w_1): Linear(in_features=32, out_features=128, bias=True)\n            (w_2): Linear(in_features=128, out_features=32, bias=True)\n            (dropout): Dropout(p=0.1, inplace=False)\n            (activation): GELU(approximate='none')\n          )\n          (attn_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n          (ff_addnorm): AddNorm(\n            (dropout): Dropout(p=0.1, inplace=False)\n            (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=23552, out_features=1683, bias=True)\n  )\n)

And as in the previous notebook, let's train (you will need a GPU for this)

In\u00a0[\u00a0]: Copied!
trainer = Trainer(\n    model=wide_deep_model,\n    objective=\"multiclass\",\n    custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX),\n    optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3),\n)\n\ntrainer.fit(\n    X_train={\n        \"X_tab\": X_train_tab,\n        \"X_text\": X_train_text,\n        \"target\": y_train,\n    },\n    X_val={\n        \"X_tab\": X_test_tab,\n        \"X_text\": X_test_text,\n        \"target\": y_test,\n    },\n    n_epochs=10,\n    batch_size=521,\n    shuffle=False,\n)\n
trainer = Trainer( model=wide_deep_model, objective=\"multiclass\", custom_loss_function=nn.CrossEntropyLoss(ignore_index=PAD_IDX), optimizers=torch.optim.Adam(wide_deep_model.parameters(), lr=1e-3), ) trainer.fit( X_train={ \"X_tab\": X_train_tab, \"X_text\": X_train_text, \"target\": y_train, }, X_val={ \"X_tab\": X_test_tab, \"X_text\": X_test_text, \"target\": y_test, }, n_epochs=10, batch_size=521, shuffle=False, )
epoch 1:   0%|                                                                                                         | 0/147 [00:34<?, ?it/s]\n
In\u00a0[\u00a0]: Copied!
\n
"},{"location":"examples/20_load_from_folder_functionality.html","title":"20_load_from_folder_functionality.","text":"

In this notebook I want to illustrate how one can use our [...]FromFolder functionalities along with the [...]ChunkPreProcessors in those cases where the dataset is too bit to fit in memory.

These functionalities in the library have been designed for the following scenarop

In\u00a0[1]: Copied!
import numpy as np\nimport torch\nimport pandas as pd\nfrom torch.utils.data import DataLoader\n\nfrom pytorch_widedeep.models import TabMlp, Vision, BasicRNN, WideDeep\nfrom pytorch_widedeep.training import TrainerFromFolder\nfrom pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint\nfrom pytorch_widedeep.preprocessing import (\n    TabPreprocessor,\n    TextPreprocessor,\n    ImagePreprocessor,\n    ChunkTabPreprocessor,\n    ChunkTextPreprocessor,\n)\nfrom pytorch_widedeep.load_from_folder import (\n    TabFromFolder,\n    TextFromFolder,\n    ImageFromFolder,\n    WideDeepDatasetFromFolder,\n)\n
import numpy as np import torch import pandas as pd from torch.utils.data import DataLoader from pytorch_widedeep.models import TabMlp, Vision, BasicRNN, WideDeep from pytorch_widedeep.training import TrainerFromFolder from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint from pytorch_widedeep.preprocessing import ( TabPreprocessor, TextPreprocessor, ImagePreprocessor, ChunkTabPreprocessor, ChunkTextPreprocessor, ) from pytorch_widedeep.load_from_folder import ( TabFromFolder, TextFromFolder, ImageFromFolder, WideDeepDatasetFromFolder, ) In\u00a0[2]: Copied!
# in my case, I place the data in a folder I call tmp_data, let's see how it looks\nairbnb_data = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\")\nairbnb_data.sample(5)\n
# in my case, I place the data in a folder I call tmp_data, let's see how it looks airbnb_data = pd.read_csv(\"../tmp_data/airbnb/airbnb_sample.csv\") airbnb_data.sample(5) Out[2]: id host_id description host_listings_count host_identity_verified neighbourhood_cleansed latitude longitude is_location_exact property_type ... amenity_wide_entrance amenity_wide_entrance_for_guests amenity_wide_entryway amenity_wide_hallways amenity_wifi amenity_window_guards amenity_wine_cooler security_deposit extra_people yield 39 53242.jpg 247650 A lovely big bright bedroom in a 2 bedroom fla... 2.0 t Lambeth 51.47075 -0.12913 t apartment ... 0 0 0 0 1 0 0 250.0 5.0 9.75 214 236716.jpg 1241070 We offer a warm welcome in our quiet double ro... 1.0 t Hackney 51.56593 -0.07482 t other ... 0 0 0 0 1 0 0 200.0 10.0 76.50 400 346523.jpg 1756532 Available for you to rent is a cozy studio in ... 2.0 t Kensington and Chelsea 51.48311 -0.18428 t other ... 0 0 0 0 1 0 0 0.0 50.0 180.90 512 389627.jpg 1949299 This gorgeous studio flat is situated in the v... 1.0 t Westminster 51.51838 -0.14238 f apartment ... 0 0 0 0 1 0 0 250.0 25.0 276.90 504 388767.jpg 1945165 If you want to experience London at it's best ... 2.0 f Camden 51.54293 -0.14073 t apartment ... 0 0 0 0 1 0 0 150.0 10.0 591.10

5 rows \u00d7 223 columns

In\u00a0[3]: Copied!
# for example\nfrom IPython.display import Image\n\npil_img = Image(filename=\"../tmp_data/airbnb/property_picture/272908.jpg\")\ndisplay(pil_img)\n
# for example from IPython.display import Image pil_img = Image(filename=\"../tmp_data/airbnb/property_picture/272908.jpg\") display(pil_img) In\u00a0[4]: Copied!
# And the description for the property that that picture belongs to is:\nairbnb_data[airbnb_data.id == \"272908.jpg\"].description.tolist()\n
# And the description for the property that that picture belongs to is: airbnb_data[airbnb_data.id == \"272908.jpg\"].description.tolist() Out[4]:
[\"Bright, sunny beautiful room that will give you the perfect base to explore all of London. Come and explore one of London's best neighbourhoods - Herne Hill! As mentioned in (Website hidden by Airbnb)   (Website hidden by Airbnb)  WiFi availability with a fully stocked and clean uplifting home. Lovely sunny, airy and big double bedroom on a leafy south-London street.    Note: This room comes with a reserved Off-Street parking spot! The room is on the first floor and boasts an enormous Super King bed, gorgeous wooden floors, tall ceilings and large windows which let in the sunshine almost all day. (Yoga May or meditation cushion available on request) The flat is bright and airy and big! So lots of space for all.  Location wise you are only 10 minutes walk to either Herne Hill or West Dulwich stations, both of which will take you to Victoria and the city within minutes. You can also hop on a bus right outside the flat that will take you to Brixton tube station within 8 minutes where you \"]

Ok, so we have tabular data where one column is description and another id, points towards the images stored in disk. Now, remember the following, because this will appear a few times in the notebook: our \"reference dataset\" is the tabular data.

Therefore, since I want to illustrate a \"semi-realistic\" case, if we need to split the data into training, validation and test datasets, these datasets needs to be separetely stored in disk. In my case I have done this and in the tmp_data/airbnb dir I have the following:

../tmp_data/airbnb\n\u251c\u2500\u2500 airbnb_sample.csv\n\u251c\u2500\u2500 airbnb_sample_eval.csv\n\u251c\u2500\u2500 airbnb_sample_test.csv\n\u251c\u2500\u2500 airbnb_sample_train.csv\n\u2514\u2500\u2500 property_picture\n

Where airbnb_sample.csv is the full sample (1001 rows) and the train, eval and test set is the corresponding splits. In a realistic example, the full sample would be the 'gigantic' dataset and the rest the corresponding splits. One has to do this 'offline', prior to start the coding.

Also, one thing that one needs to know is the number of total observations/rows, as well as the splits. In our case the train size is 800, and the eval and test sizes are 100 and 101 respectively.

With all that info, let's star

In\u00a0[5]: Copied!
# path to the tabular data and the splits\ndata_path = \"../tmp_data/airbnb/\"\ntrain_fname = \"airbnb_sample_train.csv\"\neval_fname = \"airbnb_sample_eval.csv\"\ntest_fname = \"airbnb_sample_test.csv\"\n\n# split sizes\ntrain_size = 800\neval_size = 100\ntest_size = 101\n\n# number of chunks for the Chunk Preprocessors\nchunksize = 100\nn_chunks = int(np.ceil(train_size / chunksize))\n\n# path to the image dataset and name of the image col\nimg_path = \"../tmp_data/airbnb/property_picture/\"\nimg_col = \"id\"\n\n# name of the text col\ntext_col = \"description\"\n\n# mane of the target\ntarget_col = \"yield\"\n\n# definition of the categorical and continuous cols for the TabPreprocessor\ncat_embed_cols = [\n    \"host_listings_count\",\n    \"neighbourhood_cleansed\",\n    \"is_location_exact\",\n    \"property_type\",\n    \"room_type\",\n    \"accommodates\",\n    \"bathrooms\",\n    \"bedrooms\",\n    \"beds\",\n    \"guests_included\",\n    \"minimum_nights\",\n    \"instant_bookable\",\n    \"cancellation_policy\",\n    \"has_house_rules\",\n    \"host_gender\",\n    \"accommodates_catg\",\n    \"guests_included_catg\",\n    \"minimum_nights_catg\",\n    \"host_listings_count_catg\",\n    \"bathrooms_catg\",\n    \"bedrooms_catg\",\n    \"beds_catg\",\n    \"security_deposit\",\n    \"extra_people\",\n]\ncont_cols = [\"latitude\", \"longitude\"]\n
# path to the tabular data and the splits data_path = \"../tmp_data/airbnb/\" train_fname = \"airbnb_sample_train.csv\" eval_fname = \"airbnb_sample_eval.csv\" test_fname = \"airbnb_sample_test.csv\" # split sizes train_size = 800 eval_size = 100 test_size = 101 # number of chunks for the Chunk Preprocessors chunksize = 100 n_chunks = int(np.ceil(train_size / chunksize)) # path to the image dataset and name of the image col img_path = \"../tmp_data/airbnb/property_picture/\" img_col = \"id\" # name of the text col text_col = \"description\" # mane of the target target_col = \"yield\" # definition of the categorical and continuous cols for the TabPreprocessor cat_embed_cols = [ \"host_listings_count\", \"neighbourhood_cleansed\", \"is_location_exact\", \"property_type\", \"room_type\", \"accommodates\", \"bathrooms\", \"bedrooms\", \"beds\", \"guests_included\", \"minimum_nights\", \"instant_bookable\", \"cancellation_policy\", \"has_house_rules\", \"host_gender\", \"accommodates_catg\", \"guests_included_catg\", \"minimum_nights_catg\", \"host_listings_count_catg\", \"bathrooms_catg\", \"bedrooms_catg\", \"beds_catg\", \"security_deposit\", \"extra_people\", ] cont_cols = [\"latitude\", \"longitude\"] In\u00a0[6]: Copied!
tab_preprocessor = TabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=cont_cols,\n    default_embed_dim=8,\n    verbose=0,\n)\n\ntext_preprocessor = TextPreprocessor(\n    text_col=text_col,\n    n_cpus=1,\n)\n\nimg_preprocessor = ImagePreprocessor(\n    img_col=img_col,\n    img_path=img_path,\n)\n
tab_preprocessor = TabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=cont_cols, default_embed_dim=8, verbose=0, ) text_preprocessor = TextPreprocessor( text_col=text_col, n_cpus=1, ) img_preprocessor = ImagePreprocessor( img_col=img_col, img_path=img_path, ) In\u00a0[7]: Copied!
tab_preprocessor.fit(airbnb_data)\ntext_preprocessor.fit(airbnb_data)\nimg_preprocessor.fit(airbnb_data)\n
tab_preprocessor.fit(airbnb_data) text_preprocessor.fit(airbnb_data) img_preprocessor.fit(airbnb_data)
The vocabulary contains 2192 tokens\n
Out[7]:
ImagePreprocessor(img_col=id, img_path=../tmp_data/airbnb/property_picture/, width=224, height=224, verbose=1)
In\u00a0[8]: Copied!
chunk_tab_preprocessor = ChunkTabPreprocessor(\n    embed_cols=cat_embed_cols,\n    continuous_cols=cont_cols,\n    n_chunks=n_chunks,\n    default_embed_dim=8,\n    verbose=0,\n)\n\nchunk_text_preprocessor = ChunkTextPreprocessor(\n    n_chunks=n_chunks,\n    text_col=text_col,\n    n_cpus=1,\n    verbose=0,\n)\n\nfor i, chunk in enumerate(\n    pd.read_csv(\"/\".join([data_path, train_fname]), chunksize=chunksize)\n):\n    print(f\"chunk in loop: {i + 1}\")\n    chunk_tab_preprocessor.fit(chunk)\n    chunk_text_preprocessor.fit(chunk)\n
chunk_tab_preprocessor = ChunkTabPreprocessor( embed_cols=cat_embed_cols, continuous_cols=cont_cols, n_chunks=n_chunks, default_embed_dim=8, verbose=0, ) chunk_text_preprocessor = ChunkTextPreprocessor( n_chunks=n_chunks, text_col=text_col, n_cpus=1, verbose=0, ) for i, chunk in enumerate( pd.read_csv(\"/\".join([data_path, train_fname]), chunksize=chunksize) ): print(f\"chunk in loop: {i + 1}\") chunk_tab_preprocessor.fit(chunk) chunk_text_preprocessor.fit(chunk)
chunk in loop: 1\nchunk in loop: 2\nchunk in loop: 3\nchunk in loop: 4\nchunk in loop: 5\nchunk in loop: 6\nchunk in loop: 7\nchunk in loop: 8\n
In\u00a0[9]: Copied!
train_tab_folder = TabFromFolder(\n    fname=train_fname,\n    directory=data_path,\n    target_col=target_col,\n    preprocessor=tab_preprocessor,\n    text_col=text_col,\n    img_col=img_col,\n)\n\n# Note how we can use the `train_tab_folder` as reference so we don't have to\n# define all parameters again\neval_tab_folder = TabFromFolder(fname=eval_fname, reference=train_tab_folder)\n\n# Note that for the test set we can ignore the target as no metric will be\n# computed by the `predict` method\ntest_tab_folder = TabFromFolder(\n    fname=test_fname, reference=train_tab_folder, ignore_target=True\n)\n
train_tab_folder = TabFromFolder( fname=train_fname, directory=data_path, target_col=target_col, preprocessor=tab_preprocessor, text_col=text_col, img_col=img_col, ) # Note how we can use the `train_tab_folder` as reference so we don't have to # define all parameters again eval_tab_folder = TabFromFolder(fname=eval_fname, reference=train_tab_folder) # Note that for the test set we can ignore the target as no metric will be # computed by the `predict` method test_tab_folder = TabFromFolder( fname=test_fname, reference=train_tab_folder, ignore_target=True ) In\u00a0[10]: Copied!
# for the text and image datasets we do not need to specify eval or test loaders\ntext_folder = TextFromFolder(preprocessor=text_preprocessor)\nimg_folder = ImageFromFolder(preprocessor=img_preprocessor)\n
# for the text and image datasets we do not need to specify eval or test loaders text_folder = TextFromFolder(preprocessor=text_preprocessor) img_folder = ImageFromFolder(preprocessor=img_preprocessor) In\u00a0[11]: Copied!
train_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=train_size,\n    tab_from_folder=train_tab_folder,\n    text_from_folder=text_folder,\n    img_from_folder=img_folder,\n)\n\n# Note that the eval and test loaders only need their corresponding\n# `TabFromFolder` classes. The rest of the parameters can be defined\n# via a `reference` `TabFromFolder` class\neval_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=eval_size,\n    tab_from_folder=eval_tab_folder,\n    reference=train_dataset_folder,\n)\n\ntest_dataset_folder = WideDeepDatasetFromFolder(\n    n_samples=test_size,\n    tab_from_folder=test_tab_folder,\n    reference=train_dataset_folder,\n)\n
train_dataset_folder = WideDeepDatasetFromFolder( n_samples=train_size, tab_from_folder=train_tab_folder, text_from_folder=text_folder, img_from_folder=img_folder, ) # Note that the eval and test loaders only need their corresponding # `TabFromFolder` classes. The rest of the parameters can be defined # via a `reference` `TabFromFolder` class eval_dataset_folder = WideDeepDatasetFromFolder( n_samples=eval_size, tab_from_folder=eval_tab_folder, reference=train_dataset_folder, ) test_dataset_folder = WideDeepDatasetFromFolder( n_samples=test_size, tab_from_folder=test_tab_folder, reference=train_dataset_folder, ) In\u00a0[12]: Copied!
train_loader = DataLoader(train_dataset_folder, batch_size=16, num_workers=1)\neval_loader = DataLoader(eval_dataset_folder, batch_size=16, num_workers=1)\ntest_loader = DataLoader(test_dataset_folder, batch_size=16, num_workers=1)\n
train_loader = DataLoader(train_dataset_folder, batch_size=16, num_workers=1) eval_loader = DataLoader(eval_dataset_folder, batch_size=16, num_workers=1) test_loader = DataLoader(test_dataset_folder, batch_size=16, num_workers=1)

And from here on is business as usual:

In\u00a0[13]: Copied!
# for example\nbasic_rnn = BasicRNN(\n    vocab_size=len(text_preprocessor.vocab.itos),\n    embed_dim=32,\n    hidden_dim=64,\n    n_layers=2,\n)\n\ndeepimage = Vision()\n\ndeepdense = TabMlp(\n    mlp_hidden_dims=[32, 16],\n    column_idx=tab_preprocessor.column_idx,\n    cat_embed_input=tab_preprocessor.cat_embed_input,\n    continuous_cols=cont_cols,\n)\n\nmodel = WideDeep(\n    deeptabular=deepdense,\n    deeptext=basic_rnn,\n    deepimage=deepimage,\n)\n\nmodel\n
# for example basic_rnn = BasicRNN( vocab_size=len(text_preprocessor.vocab.itos), embed_dim=32, hidden_dim=64, n_layers=2, ) deepimage = Vision() deepdense = TabMlp( mlp_hidden_dims=[32, 16], column_idx=tab_preprocessor.column_idx, cat_embed_input=tab_preprocessor.cat_embed_input, continuous_cols=cont_cols, ) model = WideDeep( deeptabular=deepdense, deeptext=basic_rnn, deepimage=deepimage, ) model Out[13]:
WideDeep(\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed): DiffSizeCatEmbeddings(\n        (embed_layers): ModuleDict(\n          (emb_layer_host_listings_count): Embedding(28, 10, padding_idx=0)\n          (emb_layer_neighbourhood_cleansed): Embedding(33, 11, padding_idx=0)\n          (emb_layer_is_location_exact): Embedding(3, 2, padding_idx=0)\n          (emb_layer_property_type): Embedding(4, 3, padding_idx=0)\n          (emb_layer_room_type): Embedding(4, 3, padding_idx=0)\n          (emb_layer_accommodates): Embedding(14, 7, padding_idx=0)\n          (emb_layer_bathrooms): Embedding(11, 6, padding_idx=0)\n          (emb_layer_bedrooms): Embedding(7, 4, padding_idx=0)\n          (emb_layer_beds): Embedding(11, 6, padding_idx=0)\n          (emb_layer_guests_included): Embedding(11, 6, padding_idx=0)\n          (emb_layer_minimum_nights): Embedding(25, 9, padding_idx=0)\n          (emb_layer_instant_bookable): Embedding(3, 2, padding_idx=0)\n          (emb_layer_cancellation_policy): Embedding(6, 4, padding_idx=0)\n          (emb_layer_has_house_rules): Embedding(3, 2, padding_idx=0)\n          (emb_layer_host_gender): Embedding(4, 3, padding_idx=0)\n          (emb_layer_accommodates_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_guests_included_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_minimum_nights_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_host_listings_count_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_bathrooms_catg): Embedding(4, 3, padding_idx=0)\n          (emb_layer_bedrooms_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_beds_catg): Embedding(5, 3, padding_idx=0)\n          (emb_layer_security_deposit): Embedding(53, 15, padding_idx=0)\n          (emb_layer_extra_people): Embedding(39, 12, padding_idx=0)\n        )\n        (embedding_dropout): Dropout(p=0.0, inplace=False)\n      )\n      (cont_norm): Identity()\n      (encoder): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Linear(in_features=128, out_features=32, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n          (dense_layer_1): Sequential(\n            (0): Linear(in_features=32, out_features=16, bias=True)\n            (1): ReLU(inplace=True)\n            (2): Dropout(p=0.1, inplace=False)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=16, out_features=1, bias=True)\n  )\n  (deeptext): Sequential(\n    (0): BasicRNN(\n      (word_embed): Embedding(2192, 32, padding_idx=1)\n      (rnn): LSTM(32, 64, num_layers=2, batch_first=True, dropout=0.1)\n      (rnn_mlp): Identity()\n    )\n    (1): Linear(in_features=64, out_features=1, bias=True)\n  )\n  (deepimage): Sequential(\n    (0): Vision(\n      (features): Sequential(\n        (conv_layer_0): Sequential(\n          (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n          (1): BatchNorm2d(64, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n          (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n        )\n        (conv_layer_1): Sequential(\n          (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(128, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n        )\n        (conv_layer_2): Sequential(\n          (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(256, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n        )\n        (conv_layer_3): Sequential(\n          (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (1): BatchNorm2d(512, eps=1e-05, momentum=0.01, affine=True, track_running_stats=True)\n          (2): LeakyReLU(negative_slope=0.1, inplace=True)\n          (adaptiveavgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n        )\n      )\n    )\n    (1): Linear(in_features=512, out_features=1, bias=True)\n  )\n)
In\u00a0[14]: Copied!
trainer = TrainerFromFolder(\n    model,\n    objective=\"regression\",\n)\n\ntrainer.fit(\n    train_loader=train_loader,\n    eval_loader=eval_loader,\n)\n
trainer = TrainerFromFolder( model, objective=\"regression\", ) trainer.fit( train_loader=train_loader, eval_loader=eval_loader, )
epoch 1: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 50/50 [03:41<00:00,  4.42s/it, loss=1.64e+4]\nvalid: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:23<00:00,  3.30s/it, loss=6.27e+3]\n
In\u00a0[15]: Copied!
preds = trainer.predict(test_loader=test_loader)\n
preds = trainer.predict(test_loader=test_loader)
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 7/7 [00:22<00:00,  3.26s/it]\n

Note that in the case of predict you could also choose to do this

In\u00a0[16]: Copied!
df_test = pd.read_csv(\"/\".join([data_path, test_fname]))\n
df_test = pd.read_csv(\"/\".join([data_path, test_fname])) In\u00a0[17]: Copied!
# if the images for the test set fit in memory\nX_tab_test = chunk_tab_preprocessor.transform(df_test)\nX_text_test = chunk_text_preprocessor.transform(df_test)\nX_img_test = img_preprocessor.transform(df_test)\n
# if the images for the test set fit in memory X_tab_test = chunk_tab_preprocessor.transform(df_test) X_text_test = chunk_text_preprocessor.transform(df_test) X_img_test = img_preprocessor.transform(df_test)
Reading Images from ../tmp_data/airbnb/property_picture/\nResizing\n
100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 101/101 [00:00<00:00, 708.23it/s]
Computing normalisation metrics\n
\n
In\u00a0[18]: Copied!
preds = trainer.predict(\n    X_tab=X_tab_test, X_text=X_text_test, X_img=X_img_test, batch_size=32\n)\n
preds = trainer.predict( X_tab=X_tab_test, X_text=X_text_test, X_img=X_img_test, batch_size=32 )
predict: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 4/4 [00:03<00:00,  1.14it/s]\n
"},{"location":"examples/20_load_from_folder_functionality.html#scenario","title":"Scenario\u00b6","text":"

We have a tabular dataset combined with images and text and either some, or all these datasets do not fit in memory. Note that the tabular dataset MUST ALWAYS be present as it is considered the rerefence. This is, if we have an image dataset, the tabular dataset must contain a column that points to the image file names as stored in disk. Similarly, if we have a text dataset, then the tabular dataset must contain a column with the texts themselves or a the file names of the text files as stored in disk.

If you only have text and/or images and not a tabular component, I would suggest using other libraries (such as hugginface probably).

Within this scenario, they are two possible scenarios that we will cover here:

  1. The tabular data itsel fits in memory and is only the images that do not: in this case you could use the 'standard' Preprocessors in the library and off you go, move directly to the [...]FromFolder functionalities

  2. The tabular data is also very large and does not fit in memory, so we have to process it in chuncks. For this second case I have created the so called Chunk[...]Preprocessor (Wide, Tab and Text).

Note that at the moment ONLY csv format is allowed for the tabular file. More formats will be supported in the future.

Let's see a complete example to illustrate how each of these cases would be addressed with the new functionalities in the library. For this example we will use a sample of the airbnb dataset

The airbnb dataset, which you could get from here, is too big to be included in our datasets module (when including images). Therefore, what I did was, go there, download it, and use the download_images.py script to get the images and the airbnb_data_processing.py to process the data. I did this ages ago and I believe the format of the dataset might be different now. Nonetheless, I will show samples of the dataset as we go through so you can extrapolate the content of this notebook to your particular problem.

In the future we will find better datasets\ud83d\ude42. Finally, note that here we are only using a small sample to illustrate the use, so PLEASE ignore the results, just focus on usage.

"},{"location":"examples/20_load_from_folder_functionality.html#setting-variables-and-constants","title":"Setting variables and constants\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#step-1-the-preprocessors","title":"Step 1: the preprocessors\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#scenario-1-only-the-images-do-not-fit-in-disk","title":"Scenario 1: only the images do not fit in disk\u00b6","text":"

In this case we can prepare the data in the 'standard' way

"},{"location":"examples/20_load_from_folder_functionality.html#scenario-2-the-tabular-data-is-also-huge","title":"Scenario 2: the tabular data is also huge\u00b6","text":"

Then we need to prepare it in chunks. Note that, unfortunately, the tabular and text preprocessors need to see the whole dataset once. This is because to process tabular or text data we need to encode values. For those encodings to be consistent they need to have seen the whole dataset. Alternatively, one could code a solution with some streaming encoder for both datasets. However, such implementation is not trivial for this library (and in general). I also don't think that having to see the whole data once is such a big limitation. Let's see how is done.

Note that I have not mentioned the image dataset. This is because the processing of the image dataset does not require any form of encoding and in consequence can be done 'on the fly'. Therefore, no ChunkImgPreprocessor processor is needed.

"},{"location":"examples/20_load_from_folder_functionality.html#step-2-the-fromfolder-classes","title":"Step 2: the [...]FromFolder classes\u00b6","text":"

Once we have the preprocessors, we need to instantiate the classes that will enable us to load the data from their respective folders. From now on I am going to proceed with the chunk_tab_preprocessor, chunk_text_preprocessor and img_preprocessor, but the code would be identical if instead of the first two preprocessors we decided to use the tab_preprocessor and text_preprocessor.

Once more, our reference datasets are the tabular datasets, which we have splitted in train, eval and test prior to start the coding. Therefore, we will eventually need a loader for each split

"},{"location":"examples/20_load_from_folder_functionality.html#step-3-pytorch-datasets-and-dataloaders","title":"Step 3: pytorch datasets and dataloaders\u00b6","text":"

From here in advance, is all very 'standard' if you are familiar with pytorch. One needs to define a class that inherits from the Dataset class in pytorch. Then this will be passed to a DataLoader class and we are ready to train. Our Dataset child class is WideDeepDatasetFromFolder. This class will use the tabular dataset and the corresponding text and image columns to load the adequate data in the batches

Let's do it

"},{"location":"examples/20_load_from_folder_functionality.html#step-4-define-the-model","title":"Step 4: define the model\u00b6","text":""},{"location":"examples/20_load_from_folder_functionality.html#step-5-fit-and-predict","title":"Step 5: fit and predict\u00b6","text":""},{"location":"pytorch-widedeep/bayesian_models.html","title":"The bayesian models module","text":"

This module contains the two Bayesian Models available in this library, namely the bayesian version of the Wide and TabMlp models, referred as BayesianWide and BayesianTabMlp. These models are very useful in scenarios where getting a measure of uncertainty is important.

The models in this module are based on the publication: Weight Uncertainty in Neural Networks.

"},{"location":"pytorch-widedeep/bayesian_models.html#pytorch_widedeep.bayesian_models.tabular.bayesian_linear.bayesian_wide.BayesianWide","title":"BayesianWide","text":"
BayesianWide(\n    input_dim,\n    pred_dim=1,\n    prior_sigma_1=1.0,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0.0,\n    posterior_rho_init=-7.0,\n)\n

Bases: BaseBayesianModel

Defines a Wide model. This is a linear model where the non-linearlities are captured via crossed-columns

Parameters:

  • input_dim (int) \u2013

    size of the Embedding layer. input_dim is the summation of all the individual values for all the features that go through the wide component. For example, if the wide component receives 2 features with 5 individual values each, input_dim = 10

  • pred_dim (int, default: 1 ) \u2013

    size of the ouput tensor containing the predictions

  • prior_sigma_1 (float, default: 1.0 ) \u2013

    The prior weight distribution is a scaled mixture of two Gaussian densities:

    \\[ \\begin{aligned} P(\\mathbf{w}) = \\prod_{i=j} \\pi N (\\mathbf{w}_j | 0, \\sigma_{1}^{2}) + (1 - \\pi) N (\\mathbf{w}_j | 0, \\sigma_{2}^{2}) \\end{aligned} \\]

    prior_sigma_1 is the prior of the sigma parameter for the first of the two Gaussians that will be mixed to produce the prior weight distribution.

  • prior_sigma_2 (float, default: 0.002 ) \u2013

    Prior of the sigma parameter for the second of the two Gaussian distributions that will be mixed to produce the prior weight distribution

  • prior_pi (float, default: 0.8 ) \u2013

    Scaling factor that will be used to mix the Gaussians to produce the prior weight distribution

  • posterior_mu_init (float, default: 0.0 ) \u2013

    The posterior sample of the weights is defined as:

    \\[ \\begin{aligned} \\mathbf{w} &= \\mu + log(1 + exp(\\rho)) \\end{aligned} \\]

    where:

    \\[ \\begin{aligned} \\mathcal{N}(x\\vert \\mu, \\sigma) &= \\frac{1}{\\sqrt{2\\pi}\\sigma}e^{-\\frac{(x-\\mu)^2}{2\\sigma^2}}\\\\ \\log{\\mathcal{N}(x\\vert \\mu, \\sigma)} &= -\\log{\\sqrt{2\\pi}} -\\log{\\sigma} -\\frac{(x-\\mu)^2}{2\\sigma^2}\\\\ \\end{aligned} \\]

    \\(\\mu\\) is initialised using a normal distributtion with mean posterior_mu_init and std equal to 0.1.

  • posterior_rho_init (float, default: -7.0 ) \u2013

    As in the case of \\(\\mu\\), \\(\\rho\\) is initialised using a normal distributtion with mean posterior_rho_init and std equal to 0.1.

Attributes:

  • bayesian_wide_linear (Module) \u2013

    the linear layer that comprises the wide branch of the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.bayesian_models import BayesianWide\n>>> X = torch.empty(4, 4).random_(6)\n>>> wide = BayesianWide(input_dim=X.unique().size(0), pred_dim=1)\n>>> out = wide(X)\n
Source code in pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py
def __init__(\n    self,\n    input_dim: int,\n    pred_dim: int = 1,\n    prior_sigma_1: float = 1.0,\n    prior_sigma_2: float = 0.002,\n    prior_pi: float = 0.8,\n    posterior_mu_init: float = 0.0,\n    posterior_rho_init: float = -7.0,\n):\n    super(BayesianWide, self).__init__()\n    #  Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.\n    self.bayesian_wide_linear = bnn.BayesianEmbedding(\n        n_embed=input_dim + 1,\n        embed_dim=pred_dim,\n        padding_idx=0,\n        prior_sigma_1=prior_sigma_1,\n        prior_sigma_2=prior_sigma_2,\n        prior_pi=prior_pi,\n        posterior_mu_init=posterior_mu_init,\n        posterior_rho_init=posterior_rho_init,\n    )\n    self.bias = nn.Parameter(torch.zeros(pred_dim))\n
"},{"location":"pytorch-widedeep/bayesian_models.html#pytorch_widedeep.bayesian_models.tabular.bayesian_mlp.bayesian_tab_mlp.BayesianTabMlp","title":"BayesianTabMlp","text":"
BayesianTabMlp(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    embed_continuous=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    use_cont_bias=None,\n    cont_norm_layer=None,\n    mlp_hidden_dims=[200, 100],\n    mlp_activation=\"leaky_relu\",\n    prior_sigma_1=1,\n    prior_sigma_2=0.002,\n    prior_pi=0.8,\n    posterior_mu_init=0.0,\n    posterior_rho_init=-7.0,\n    pred_dim=1\n)\n

Bases: BaseBayesianModel

Defines a BayesianTabMlp model.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of probabilistic dense layers (i.e. a MLP).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm', 'batchnorm' or None.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded (i.e. passed each through a linear layer with or without activation)

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings

  • use_cont_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the continuous embeddings

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_hidden_dims (List[int], default: [200, 100] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'leaky_relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • prior_sigma_1 (float, default: 1 ) \u2013

    The prior weight distribution is a scaled mixture of two Gaussian densities:

    \\[ \\begin{aligned} P(\\mathbf{w}) = \\prod_{i=j} \\pi N (\\mathbf{w}_j | 0, \\sigma_{1}^{2}) + (1 - \\pi) N (\\mathbf{w}_j | 0, \\sigma_{2}^{2}) \\end{aligned} \\]

    prior_sigma_1 is the prior of the sigma parameter for the first of the two Gaussians that will be mixed to produce the prior weight distribution.

  • prior_sigma_2 (float, default: 0.002 ) \u2013

    Prior of the sigma parameter for the second of the two Gaussian distributions that will be mixed to produce the prior weight distribution for each Bayesian linear and embedding layer

  • prior_pi (float, default: 0.8 ) \u2013

    Scaling factor that will be used to mix the Gaussians to produce the prior weight distribution ffor each Bayesian linear and embedding layer

  • posterior_mu_init (float, default: 0.0 ) \u2013

    The posterior sample of the weights is defined as:

    $$ \\begin{aligned} \\mathbf{w} &= \\mu + log(1 + exp(\\rho)) \\end{aligned} $$ where:

    \\[ \\begin{aligned} \\mathcal{N}(x\\vert \\mu, \\sigma) &= \\frac{1}{\\sqrt{2\\pi}\\sigma}e^{-\\frac{(x-\\mu)^2}{2\\sigma^2}}\\\\ \\log{\\mathcal{N}(x\\vert \\mu, \\sigma)} &= -\\log{\\sqrt{2\\pi}} -\\log{\\sigma} -\\frac{(x-\\mu)^2}{2\\sigma^2}\\\\ \\end{aligned} \\]

    \\(\\mu\\) is initialised using a normal distributtion with mean posterior_mu_init and std equal to 0.1.

  • posterior_rho_init (float, default: -7.0 ) \u2013

    As in the case of \\(\\mu\\), \\(\\rho\\) is initialised using a normal distributtion with mean posterior_rho_init and std equal to 0.1.

Attributes:

  • bayesian_cat_and_cont_embed (Module) \u2013

    This is the module that processes the categorical and continuous columns

  • bayesian_tab_mlp (Sequential) \u2013

    mlp model that will receive the concatenation of the embeddings and the continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.bayesian_models import BayesianTabMlp\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = BayesianTabMlp(mlp_hidden_dims=[8,4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    embed_continuous: Optional[bool] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    use_cont_bias: Optional[bool] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    mlp_hidden_dims: List[int] = [200, 100],\n    mlp_activation: str = \"leaky_relu\",\n    prior_sigma_1: float = 1,\n    prior_sigma_2: float = 0.002,\n    prior_pi: float = 0.8,\n    posterior_mu_init: float = 0.0,\n    posterior_rho_init: float = -7.0,\n    pred_dim=1,  # Bayesian models will require their own trainer and need the output layer\n):\n    super(BayesianTabMlp, self).__init__()\n\n    self.column_idx = column_idx\n    self.cat_embed_input = cat_embed_input\n    self.cat_embed_activation = cat_embed_activation\n\n    self.continuous_cols = continuous_cols\n    self.cont_norm_layer = cont_norm_layer\n    self.embed_continuous = embed_continuous\n    self.cont_embed_dim = cont_embed_dim\n    self.cont_embed_dropout = cont_embed_dropout\n    self.use_cont_bias = use_cont_bias\n    self.cont_embed_activation = cont_embed_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n\n    self.prior_sigma_1 = prior_sigma_1\n    self.prior_sigma_2 = prior_sigma_2\n    self.prior_pi = prior_pi\n    self.posterior_mu_init = posterior_mu_init\n    self.posterior_rho_init = posterior_rho_init\n\n    self.pred_dim = pred_dim\n\n    allowed_activations = [\"relu\", \"leaky_relu\", \"tanh\", \"gelu\"]\n    if self.mlp_activation not in allowed_activations:\n        raise ValueError(\n            \"Currently, only the following activation functions are supported \"\n            \"for the Bayesian MLP's dense layers: {}. Got '{}' instead\".format(\n                \", \".join(allowed_activations),\n                self.mlp_activation,\n            )\n        )\n\n    # Categorical\n    if self.cat_embed_input is not None:\n        self.cat_embed = BayesianDiffSizeCatEmbeddings(\n            column_idx=self.column_idx,\n            embed_input=self.cat_embed_input,\n            prior_sigma_1=self.prior_sigma_1,\n            prior_sigma_2=self.prior_sigma_2,\n            prior_pi=self.prior_pi,\n            posterior_mu_init=self.posterior_mu_init,\n            posterior_rho_init=self.posterior_rho_init,\n            activation_fn=self.cat_embed_activation,\n        )\n        self.cat_out_dim = int(np.sum([embed[2] for embed in self.cat_embed_input]))\n    else:\n        self.cat_out_dim = 0\n\n    # Continuous\n    if self.continuous_cols is not None:\n        self.cont_idx = [self.column_idx[col] for col in self.continuous_cols]\n        if cont_norm_layer == \"layernorm\":\n            self.cont_norm: NormLayers = nn.LayerNorm(len(self.continuous_cols))\n        elif cont_norm_layer == \"batchnorm\":\n            self.cont_norm = nn.BatchNorm1d(len(self.continuous_cols))\n        else:\n            self.cont_norm = nn.Identity()\n        if self.embed_continuous:\n            assert self.cont_embed_dim is not None, (\n                \"If 'embed_continuous' is True, 'cont_embed_dim' must be \"\n                \"provided\"\n            )\n            self.cont_embed = BayesianContEmbeddings(\n                n_cont_cols=len(self.continuous_cols),\n                embed_dim=self.cont_embed_dim,\n                prior_sigma_1=self.prior_sigma_1,\n                prior_sigma_2=self.prior_sigma_2,\n                prior_pi=self.prior_pi,\n                posterior_mu_init=self.posterior_mu_init,\n                posterior_rho_init=self.posterior_rho_init,\n                use_bias=(\n                    False if self.use_cont_bias is None else self.use_cont_bias\n                ),\n                activation_fn=self.cont_embed_activation,\n            )\n            self.cont_out_dim = len(self.continuous_cols) * self.cont_embed_dim\n        else:\n            self.cont_out_dim = len(self.continuous_cols)\n    else:\n        self.cont_out_dim = 0\n\n    self.output_dim = self.cat_out_dim + self.cont_out_dim\n\n    mlp_hidden_dims = [self.output_dim] + mlp_hidden_dims + [pred_dim]\n    self.bayesian_tab_mlp = BayesianMLP(\n        mlp_hidden_dims,\n        mlp_activation,\n        True,  # use_bias\n        prior_sigma_1,\n        prior_sigma_2,\n        prior_pi,\n        posterior_mu_init,\n        posterior_rho_init,\n    )\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html","title":"Training Deep Learning Probabilistic Models","text":""},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer","title":"BayesianTrainer","text":"
BayesianTrainer(\n    model,\n    objective,\n    custom_loss_function=None,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    metrics=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseBayesianTrainer

Class to set the of attributes that will be used during the training process.

Both the Bayesian models and the Trainer in this repo are based on the paper: Weight Uncertainty in Neural Networks.

Parameters:

  • model (BaseBayesianModel) \u2013

    An object of class BaseBayesianModel. See the Model Components section here in the docs.

  • objective (str) \u2013

    Defines the objective, loss or cost function. Param aliases: loss_function, loss_fn, loss, cost_function, cost_fn, cost Possible values are: 'binary', 'multiclass', 'regression'

  • custom_loss_function (Optional[Module], default: None ) \u2013

    If none of the loss functions available suits the user, it is possible to pass a custom loss function. See for example pytorch_widedeep.losses.FocalLoss for the required structure of the object or the Examples folder in the repo.

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object(e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • metrics (Optional[Union[List[Metric], List[Metric]]], default: None ) \u2013
    • List of objects of type Metric. Metrics available are: Accuracy, Precision, Recall, FBetaScore, F1Score and R2Score. This can also be a custom metric as long as it is an object of type Metric. See pytorch_widedeep.metrics.Metric or the Examples folder in the repo
    • List of objects of type torchmetrics.Metric. This can be any metric from torchmetrics library Examples classification-metrics>_. It can also be a torchmetric custom metric as long as it is an object of typeMetric. Seethe instructions
  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • class_weight: List[float] This is the weight or pos_weight parameter in CrossEntropyLoss and BCEWithLogitsLoss, depending on whether

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Attributes:

  • cyclic_lr (bool) \u2013

    Attribute that indicates if the lr_scheduler is cyclic_lr (i.e. CyclicLR or OneCycleLR). See Pytorch schedulers <https://pytorch.org/docs/stable/optim.html>_.

Source code in pytorch_widedeep/training/bayesian_trainer.py
@alias(  # noqa: C901\n    \"objective\",\n    [\"loss_function\", \"loss_fn\", \"loss\", \"cost_function\", \"cost_fn\", \"cost\"],\n)\ndef __init__(\n    self,\n    model: BaseBayesianModel,\n    objective: str,\n    custom_loss_function: Optional[Module] = None,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    metrics: Optional[Union[List[Metric], List[TorchMetric]]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        objective=objective,\n        custom_loss_function=custom_loss_function,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        metrics=metrics,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.fit","title":"fit","text":"
fit(\n    X_tab,\n    target,\n    X_tab_val=None,\n    target_val=None,\n    val_split=None,\n    n_epochs=1,\n    validation_freq=1,\n    batch_size=32,\n    n_train_samples=2,\n    n_val_samples=2,\n)\n

Fit method.

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • target (ndarray) \u2013

    target values

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data

  • target_val (Optional[ndarray], default: None ) \u2013

    validation target values

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • batch_size (int, default: 32 ) \u2013

    batch size

  • n_train_samples (int, default: 2 ) \u2013

    number of samples to average over during the training process. See Weight Uncertainty in Neural Networks for details.

  • n_val_samples (int, default: 2 ) \u2013

    number of samples to average over during the validation process. See Weight Uncertainty in Neural Networks for details.

Source code in pytorch_widedeep/training/bayesian_trainer.py
def fit(  # noqa: C901\n    self,\n    X_tab: np.ndarray,\n    target: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    target_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    n_epochs: int = 1,\n    validation_freq: int = 1,\n    batch_size: int = 32,\n    n_train_samples: int = 2,\n    n_val_samples: int = 2,\n):\n    r\"\"\"Fit method.\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    target: np.ndarray\n        target values\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data\n    target_val: np.ndarray, Optional, default = None\n        validation target values\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    n_epochs: int, default=1\n        number of epochs\n    validation_freq: int, default=1\n        epochs validation frequency\n    batch_size: int, default=32\n        batch size\n    n_train_samples: int, default=2\n        number of samples to average over during the training process.\n        See [Weight Uncertainty in Neural Networks](https://arxiv.org/pdf/1505.05424.pdf) for details.\n    n_val_samples: int, default=2\n        number of samples to average over during the validation process.\n        See [Weight Uncertainty in Neural Networks](https://arxiv.org/pdf/1505.05424.pdf) for details.\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = tabular_train_val_split(\n        self.seed, self.objective, X_tab, target, X_tab_val, target_val, val_split\n    )\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, (X, y) in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_score, train_loss = self._train_step(\n                    X, y, n_train_samples, train_steps, batch_idx\n                )\n                print_loss_and_metric(t, train_loss, train_score)\n                self.callback_container.on_batch_end(batch=batch_idx)\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, train_score, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for i, (X, y) in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_score, val_loss = self._eval_step(\n                        X, y, n_val_samples, train_steps, i\n                    )\n                    print_loss_and_metric(v, val_loss, val_score)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, val_score, \"val\")\n\n            if self.reducelronplateau:\n                if self.reducelronplateau_criterion == \"loss\":\n                    on_epoch_end_metric = val_loss\n                else:\n                    on_epoch_end_metric = val_score[\n                        self.reducelronplateau_criterion\n                    ]\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.model.train()\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.predict","title":"predict","text":"
predict(\n    X_tab, n_samples=5, return_samples=False, batch_size=256\n)\n

Returns the predictions

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • n_samples (int, default: 5 ) \u2013

    number of samples that will be either returned or averaged to produce an overal prediction

  • return_samples (bool, default: False ) \u2013

    Boolean indicating whether the n samples will be averaged or directly returned

  • batch_size (int, default: 256 ) \u2013

    batch size

Returns:

  • np.ndarray: \u2013

    array with the predictions

Source code in pytorch_widedeep/training/bayesian_trainer.py
def predict(  # type: ignore[return]\n    self,\n    X_tab: np.ndarray,\n    n_samples: int = 5,\n    return_samples: bool = False,\n    batch_size: int = 256,\n) -> np.ndarray:\n    r\"\"\"Returns the predictions\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    n_samples: int, default=5\n        number of samples that will be either returned or averaged to\n        produce an overal prediction\n    return_samples: bool, default = False\n        Boolean indicating whether the n samples will be averaged or directly returned\n    batch_size: int, default = 256\n        batch size\n\n    Returns\n    -------\n    np.ndarray:\n        array with the predictions\n    \"\"\"\n\n    preds_l = self._predict(X_tab, n_samples, return_samples, batch_size)\n    preds = np.hstack(preds_l) if return_samples else np.vstack(preds_l)\n    axis = 2 if return_samples else 1\n\n    if self.objective == \"regression\":\n        return preds.squeeze(axis)\n    if self.objective == \"binary\":\n        return (preds.squeeze(axis) > 0.5).astype(\"int\")\n    if self.objective == \"multiclass\":\n        return np.argmax(preds, axis)\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.predict_proba","title":"predict_proba","text":"
predict_proba(\n    X_tab, n_samples=5, return_samples=False, batch_size=256\n)\n

Returns the predicted probabilities

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • n_samples (int, default: 5 ) \u2013

    number of samples that will be either returned or averaged to produce an overal prediction

  • return_samples (bool, default: False ) \u2013

    Boolean indicating whether the n samples will be averaged or directly returned

  • batch_size (int, default: 256 ) \u2013

    batch size

Returns:

  • ndarray \u2013

    array with the probabilities per class

Source code in pytorch_widedeep/training/bayesian_trainer.py
def predict_proba(  # type: ignore[return]\n    self,\n    X_tab: np.ndarray,\n    n_samples: int = 5,\n    return_samples: bool = False,\n    batch_size: int = 256,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted probabilities\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    n_samples: int, default=5\n        number of samples that will be either returned or averaged to\n        produce an overal prediction\n    return_samples: bool, default = False\n        Boolean indicating whether the n samples will be averaged or directly returned\n    batch_size: int, default = 256\n        batch size\n\n    Returns\n    -------\n    np.ndarray\n        array with the probabilities per class\n    \"\"\"\n    preds_l = self._predict(X_tab, n_samples, return_samples, batch_size)\n    preds = np.hstack(preds_l) if return_samples else np.vstack(preds_l)\n\n    if self.objective == \"binary\":\n        if return_samples:\n            preds = preds.squeeze(2)\n            probs = np.zeros([n_samples, preds.shape[1], 2])\n            for i in range(n_samples):\n                probs[i, :, 0] = 1 - preds[i]\n                probs[i, :, 1] = preds[i]\n        else:\n            preds = preds.squeeze(1)\n            probs = np.zeros([preds.shape[0], 2])\n            probs[:, 0] = 1 - preds\n            probs[:, 1] = preds\n        return probs\n    if self.objective == \"multiclass\":\n        return preds\n
"},{"location":"pytorch-widedeep/bayesian_trainer.html#pytorch_widedeep.training.BayesianTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"bayesian_model.pt\",\n)\n

Saves the model, training and evaluation history to disk

The Trainer class is built so that it 'just' trains a model. With that in mind, all the torch related parameters (such as optimizers or learning rate schedulers) have to be defined externally and then passed to the Trainer. As a result, the Trainer does not generate any attribute or additional data products that need to be saved other than the model object itself, which can be saved as any other torch model (e.g. torch.save(model, path)).

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'bayesian_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/training/bayesian_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"bayesian_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history to disk\n\n    The `Trainer` class is built so that it 'just' trains a model. With\n    that in mind, all the torch related parameters (such as optimizers or\n    learning rate schedulers) have to be defined externally and then\n    passed to the `Trainer`. As a result, the `Trainer` does not\n    generate any attribute or additional data products that need to be\n    saved other than the `model` object itself, which can be saved as\n    any other torch model (e.g. `torch.save(model, path)`).\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"wd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.model.state_dict(), model_path)\n    else:\n        torch.save(self.model, model_path)\n
"},{"location":"pytorch-widedeep/callbacks.html","title":"Callbacks","text":"

Here are the 4 callbacks available to the user in pytorch-widedepp: LRHistory, ModelCheckpoint, EarlyStopping and RayTuneReporter.

NOTE: other callbacks , like History, run always by default. In particular, the History callback saves the metrics in the history attribute of the Trainer.

"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.LRHistory","title":"LRHistory","text":"
LRHistory(n_epochs)\n

Bases: Callback

Saves the learning rates during training in the lr_history attribute of the Trainer.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • n_epochs (int) \u2013

    number of training epochs

Examples:

>>> from pytorch_widedeep.callbacks import LRHistory\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[LRHistory(n_epochs=10)])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(self, n_epochs: int):\n    super(LRHistory, self).__init__()\n    self.n_epochs = n_epochs\n
"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.ModelCheckpoint","title":"ModelCheckpoint","text":"
ModelCheckpoint(\n    filepath=None,\n    monitor=\"val_loss\",\n    min_delta=0.0,\n    verbose=0,\n    save_best_only=False,\n    mode=\"auto\",\n    period=1,\n    max_save=-1,\n)\n

Bases: Callback

Saves the model after every epoch.

This class is almost identical to the corresponding keras class. Therefore, credit to the Keras Team.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • filepath (Optional[str], default: None ) \u2013

    Full path to save the output weights. It must contain only the root of the filenames. Epoch number and .pt extension (for pytorch) will be added. e.g. filepath=\"path/to/output_weights/weights_out\" And the saved files in that directory will be named: 'weights_out_1.pt', 'weights_out_2.pt', .... If set to None the class just report best metric and best_epoch.

  • monitor (str, default: 'val_loss' ) \u2013

    quantity to monitor. Typically 'val_loss' or metric name (e.g. 'val_acc')

  • min_delta (float, default: 0.0 ) \u2013

    minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.

  • verbose (int, default: 0 ) \u2013

    verbosity mode

  • save_best_only (bool, default: False ) \u2013

    the latest best model according to the quantity monitored will not be overwritten.

  • mode (str, default: 'auto' ) \u2013

    If save_best_only=True, the decision to overwrite the current save file is made based on either the maximization or the minimization of the monitored quantity. For 'acc', this should be 'max', for 'loss' this should be 'min', etc. In 'auto' mode, the direction is automatically inferred from the name of the monitored quantity.

  • period (int, default: 1 ) \u2013

    Interval (number of epochs) between checkpoints.

  • max_save (int, default: -1 ) \u2013

    Maximum number of outputs to save. If -1 will save all outputs

Attributes:

  • best (float) \u2013

    best metric

  • best_epoch (int) \u2013

    best epoch

  • best_state_dict (dict) \u2013

    best model state dictionary. To restore model to its best state use Trainer.model.load_state_dict (model_checkpoint.best_state_dict) where model_checkpoint is an instance of the class ModelCheckpoint. See the Examples folder in the repo or the Examples section in this documentation for details

Examples:

>>> from pytorch_widedeep.callbacks import ModelCheckpoint\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[ModelCheckpoint(filepath='checkpoints/weights_out')])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(\n    self,\n    filepath: Optional[str] = None,\n    monitor: str = \"val_loss\",\n    min_delta: float = 0.0,\n    verbose: int = 0,\n    save_best_only: bool = False,\n    mode: str = \"auto\",\n    period: int = 1,\n    max_save: int = -1,\n):\n    super(ModelCheckpoint, self).__init__()\n\n    self.filepath = filepath\n    self.monitor = monitor\n    self.min_delta = min_delta\n    self.verbose = verbose\n    self.save_best_only = save_best_only\n    self.mode = mode\n    self.period = period\n    self.max_save = max_save\n\n    self.epochs_since_last_save = 0\n\n    if self.filepath:\n        if len(self.filepath.split(\"/\")[:-1]) == 0:\n            raise ValueError(\n                \"'filepath' must be the full path to save the output weights,\"\n                \" including the root of the filenames. e.g. 'checkpoints/weights_out'\"\n            )\n\n        root_dir = (\"/\").join(self.filepath.split(\"/\")[:-1])\n        if not os.path.exists(root_dir):\n            os.makedirs(root_dir)\n\n    if self.max_save > 0:\n        self.old_files: List[str] = []\n\n    if self.mode not in [\"auto\", \"min\", \"max\"]:\n        warnings.warn(\n            \"ModelCheckpoint mode %s is unknown, \"\n            \"fallback to auto mode.\" % (self.mode),\n            RuntimeWarning,\n        )\n        self.mode = \"auto\"\n    if self.mode == \"min\":\n        self.monitor_op = np.less\n        self.best = np.Inf\n    elif self.mode == \"max\":\n        self.monitor_op = np.greater  # type: ignore[assignment]\n        self.best = -np.Inf\n    else:\n        if _is_metric(self.monitor):\n            self.monitor_op = np.greater  # type: ignore[assignment]\n            self.best = -np.Inf\n        else:\n            self.monitor_op = np.less\n            self.best = np.Inf\n\n    if self.monitor_op == np.greater:\n        self.min_delta *= 1\n    else:\n        self.min_delta *= -1\n
"},{"location":"pytorch-widedeep/callbacks.html#pytorch_widedeep.callbacks.EarlyStopping","title":"EarlyStopping","text":"
EarlyStopping(\n    monitor=\"val_loss\",\n    min_delta=0.0,\n    patience=10,\n    verbose=0,\n    mode=\"auto\",\n    baseline=None,\n    restore_best_weights=False,\n)\n

Bases: Callback

Stop training when a monitored quantity has stopped improving.

This class is almost identical to the corresponding keras class. Therefore, credit to the Keras Team.

Callbacks are passed as input parameters to the Trainer class. See pytorch_widedeep.trainer.Trainer

Parameters:

  • monitor (str, default: 'val_loss' ) \u2013

    Quantity to monitor. Typically 'val_loss' or metric name (e.g. 'val_acc')

  • min_delta (float, default: 0.0 ) \u2013

    minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement.

  • patience (int, default: 10 ) \u2013

    Number of epochs that produced the monitored quantity with no improvement after which training will be stopped.

  • verbose (int, default: 0 ) \u2013

    verbosity mode.

  • mode (str, default: 'auto' ) \u2013

    one of {'auto', 'min', 'max'}. In 'min' mode, training will stop when the quantity monitored has stopped decreasing; in 'max' mode it will stop when the quantity monitored has stopped increasing; in 'auto' mode, the direction is automatically inferred from the name of the monitored quantity.

  • baseline (Optional[float], default: None ) \u2013

    Baseline value for the monitored quantity to reach. Training will stop if the model does not show improvement over the baseline.

  • restore_best_weights (bool, default: False ) \u2013

    Whether to restore model weights from the epoch with the best value of the monitored quantity. If False, the model weights obtained at the last step of training are used.

Attributes:

  • best (float) \u2013

    best metric

  • stopped_epoch (int) \u2013

    epoch when the training stopped

Examples:

>>> from pytorch_widedeep.callbacks import EarlyStopping\n>>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep\n>>> from pytorch_widedeep.training import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> model = WideDeep(wide, deep)\n>>> trainer = Trainer(model, objective=\"regression\", callbacks=[EarlyStopping(patience=10)])\n
Source code in pytorch_widedeep/callbacks.py
def __init__(\n    self,\n    monitor: str = \"val_loss\",\n    min_delta: float = 0.0,\n    patience: int = 10,\n    verbose: int = 0,\n    mode: str = \"auto\",\n    baseline: Optional[float] = None,\n    restore_best_weights: bool = False,\n):\n    super(EarlyStopping, self).__init__()\n\n    self.monitor = monitor\n    self.min_delta = min_delta\n    self.patience = patience\n    self.verbose = verbose\n    self.mode = mode\n    self.baseline = baseline\n    self.restore_best_weights = restore_best_weights\n\n    self.wait = 0\n    self.stopped_epoch = 0\n    self.state_dict = None\n\n    if self.mode not in [\"auto\", \"min\", \"max\"]:\n        warnings.warn(\n            \"EarlyStopping mode %s is unknown, \"\n            \"fallback to auto mode.\" % self.mode,\n            RuntimeWarning,\n        )\n        self.mode = \"auto\"\n\n    if self.mode == \"min\":\n        self.monitor_op = np.less\n    elif self.mode == \"max\":\n        self.monitor_op = np.greater  # type: ignore[assignment]\n    else:\n        if _is_metric(self.monitor):\n            self.monitor_op = np.greater  # type: ignore[assignment]\n        else:\n            self.monitor_op = np.less\n\n    if self.monitor_op == np.greater:\n        self.min_delta *= 1\n    else:\n        self.min_delta *= -1\n
"},{"location":"pytorch-widedeep/dataloaders.html","title":"Dataloaders","text":"

NOTE: This module should contain custom dataloaders that the user might want to implement. At the moment pytorch-widedeep offers one custom dataloader, DataLoaderImbalanced.

"},{"location":"pytorch-widedeep/dataloaders.html#pytorch_widedeep.dataloaders.DataLoaderImbalanced","title":"DataLoaderImbalanced","text":"
DataLoaderImbalanced(\n    dataset, batch_size, num_workers, **kwargs\n)\n

Bases: DataLoader

Class to load and shuffle batches with adjusted weights for imbalanced datasets. If the classes do not begin from 0 remapping is necessary. See here.

Parameters:

  • dataset (WideDeepDataset) \u2013

    see pytorch_widedeep.training._wd_dataset

  • batch_size (int) \u2013

    size of batch

  • num_workers (int) \u2013

    number of workers

Other Parameters:

  • **kwargs \u2013

    This can include any parameter that can be passed to the 'standard' pytorch DataLoader and that is not already explicitely passed to the class. In addition, the dictionary can also include the extra parameter oversample_mul which will multiply the number of samples of the minority class to be sampled by the WeightedRandomSampler.

    In other words, the num_samples param in WeightedRandomSampler will be defined as:

    \\[ minority \\space class \\space count \\times number \\space of \\space classes \\times oversample\\_mul \\]
Source code in pytorch_widedeep/dataloaders.py
def __init__(\n    self, dataset: WideDeepDataset, batch_size: int, num_workers: int, **kwargs\n):\n    assert dataset.Y is not None, (\n        \"The 'dataset' instance of WideDeepDataset must contain a \"\n        \"target array 'Y'\"\n    )\n\n    self.with_lds = dataset.with_lds\n    if \"oversample_mul\" in kwargs:\n        oversample_mul = kwargs[\"oversample_mul\"]\n        del kwargs[\"oversample_mul\"]\n    else:\n        oversample_mul = 1\n    weights, minor_cls_cnt, num_clss = get_class_weights(dataset)\n    num_samples = int(minor_cls_cnt * num_clss * oversample_mul)\n    samples_weight = list(np.array([weights[i] for i in dataset.Y]))\n    sampler = WeightedRandomSampler(samples_weight, num_samples, replacement=True)\n    super().__init__(\n        dataset, batch_size, num_workers=num_workers, sampler=sampler, **kwargs\n    )\n
"},{"location":"pytorch-widedeep/load_from_folder.html","title":"The load_from_folder module","text":"

The load_from_folder module contains the classes that are necessary to load data from disk and these are inspired by the ImageFolder class in the torchvision library. This module is designed with one specific case in mind. Such case is the following: given a multi-modal dataset with tabular data, images and text, the images do not fit in memory, and therefore, they have to be loaded from disk. However, as any other functionality in this library, there is some flexibility and some additional cases can also be addressed using this module.

For this module to be used, the datasets must be prepared in a certain way:

  1. the tabular data must contain a column with the images names as stored in disk, including the extension (.jpg, .png, etc...).

  2. Regarding to the text dataset, the tabular data can contain a column with the texts themselves or the names of the files containing the texts as stored in disk.

The tabular data might or might not fit in disk itself. If it does not, please see the ChunkPreprocessor utilities at the[preprocessing] (preprocessing.md) module and the examples folder in the repo, which illustrate such case. Finally note that only csv format is currently supported in that case(more formats coming soon).

"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.tabular.tabular_from_folder.TabFromFolder","title":"TabFromFolder","text":"
TabFromFolder(\n    fname,\n    directory=None,\n    target_col=None,\n    preprocessor=None,\n    text_col=None,\n    img_col=None,\n    ignore_target=False,\n    reference=None,\n    verbose=1,\n)\n

This class is used to load tabular data from disk. The current constrains are:

  1. The only file format supported right now is csv
  2. The csv file must contain headers

For examples, please, see the examples folder in the repo.

Parameters:

  • fname (str) \u2013

    the name of the csv file

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the csv file is located. If None, a TabFromFolder reference object must be provided

  • target_col (Optional[str], default: None ) \u2013

    the name of the target column. If None, a TabFromFolder reference object must be provided

  • preprocessor (Optional[TabularPreprocessor], default: None ) \u2013

    a fitted TabularPreprocessor object. If None, a TabFromFolder reference object must be provided

  • text_col (Optional[str], default: None ) \u2013

    the name of the column with the texts themselves or the names of the files that contain the text dataset. If None, either there is no text column or a TabFromFolder reference object must be provided

  • img_col (Optional[str], default: None ) \u2013

    the name of the column with the the names of the images. If None, either there is no image column or a TabFromFolder reference object must be provided

  • ignore_target (bool, default: False ) \u2013

    whether to ignore the target column. This is normally set to True when this class is used for a test dataset.

  • reference (Optional[Any], default: None ) \u2013

    a reference TabFromFolder object. If provided, the TabFromFolder object will be created using the attributes of the reference object. This is useful to instantiate a TabFromFolder object for evaluation or test purposes

  • verbose (Optional[int], default: 1 ) \u2013

    verbosity. If 0, no output will be printed during the process.

Source code in pytorch_widedeep/load_from_folder/tabular/tabular_from_folder.py
def __init__(\n    self,\n    fname: str,\n    directory: Optional[str] = None,\n    target_col: Optional[str] = None,\n    preprocessor: Optional[TabularPreprocessor] = None,\n    text_col: Optional[str] = None,\n    img_col: Optional[str] = None,\n    ignore_target: bool = False,\n    reference: Optional[Any] = None,  # is Type[\"TabFromFolder\"],\n    verbose: Optional[int] = 1,\n):\n    self.fname = fname\n    self.ignore_target = ignore_target\n    self.verbose = verbose\n\n    if reference is not None:\n        (\n            self.directory,\n            self.target_col,\n            self.preprocessor,\n            self.text_col,\n            self.img_col,\n        ) = self._set_from_reference(reference, preprocessor)\n    else:\n        assert (\n            directory is not None\n            and (target_col is not None and not ignore_target)\n            and preprocessor is not None\n        ), (\n            \"if no reference is provided, 'directory', 'target_col' and 'preprocessor' \"\n            \"must be provided\"\n        )\n\n        self.directory = directory\n        self.target_col = target_col\n        self.preprocessor = preprocessor\n        self.text_col = text_col\n        self.img_col = img_col\n\n    assert (\n        self.preprocessor.is_fitted\n    ), \"The preprocessor must be fitted before passing it to this class\"\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.tabular.tabular_from_folder.WideFromFolder","title":"WideFromFolder","text":"
WideFromFolder(\n    fname,\n    directory=None,\n    target_col=None,\n    preprocessor=None,\n    text_col=None,\n    img_col=None,\n    ignore_target=False,\n    reference=None,\n    verbose=1,\n)\n

Bases: TabFromFolder

This class is mostly identical to TabFromFolder but exists because we want to separate the treatment of the wide and the deep tabular components

Parameters:

  • fname (str) \u2013

    the name of the csv file

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the csv file is located. If None, a WideFromFolder reference object must be provided

  • target_col (Optional[str], default: None ) \u2013

    the name of the target column. If None, a WideFromFolder reference object must be provided

  • preprocessor (Optional[TabularPreprocessor], default: None ) \u2013

    a fitted TabularPreprocessor object. If None, a WideFromFolder reference object must be provided

  • text_col (Optional[str], default: None ) \u2013

    the name of the column with the texts themselves or the names of the files that contain the text dataset. If None, either there is no text column or a WideFromFolder reference object must be provided=

  • img_col (Optional[str], default: None ) \u2013

    the name of the column with the the names of the images. If None, either there is no image column or a WideFromFolder reference object must be provided

  • ignore_target (bool, default: False ) \u2013

    whether to ignore the target column. This is normally used when this class is used for a test dataset.

  • reference (Optional[Any], default: None ) \u2013

    a reference WideFromFolder object. If provided, the WideFromFolder object will be created using the attributes of the reference object. This is useful to instantiate a WideFromFolder object for evaluation or test purposes

  • verbose (int, default: 1 ) \u2013

    verbosity. If 0, no output will be printed during the process.

Source code in pytorch_widedeep/load_from_folder/tabular/tabular_from_folder.py
def __init__(\n    self,\n    fname: str,\n    directory: Optional[str] = None,\n    target_col: Optional[str] = None,\n    preprocessor: Optional[TabularPreprocessor] = None,\n    text_col: Optional[str] = None,\n    img_col: Optional[str] = None,\n    ignore_target: bool = False,\n    reference: Optional[Any] = None,  # is Type[\"WideFromFolder\"],\n    verbose: int = 1,\n):\n    super(WideFromFolder, self).__init__(\n        fname=fname,\n        directory=directory,\n        target_col=target_col,\n        preprocessor=preprocessor,\n        text_col=text_col,\n        img_col=img_col,\n        reference=reference,\n        ignore_target=ignore_target,\n        verbose=verbose,\n    )\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.text.text_from_folder.TextFromFolder","title":"TextFromFolder","text":"
TextFromFolder(preprocessor)\n

This class is used to load the text dataset (i.e. the text files) from a folder, or to retrieve the text given a texts column specified within the preprocessor object.

For examples, please, see the examples folder in the repo.

Parameters:

  • preprocessor (Union[TextPreprocessor, ChunkTextPreprocessor]) \u2013

    The preprocessor used to process the text. It must be fitted before using this class

Source code in pytorch_widedeep/load_from_folder/text/text_from_folder.py
def __init__(\n    self,\n    preprocessor: Union[TextPreprocessor, ChunkTextPreprocessor],\n):\n    assert (\n        preprocessor.is_fitted\n    ), \"The preprocessor must be fitted before using this class\"\n\n    self.preprocessor = preprocessor\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.image.image_from_folder.ImageFromFolder","title":"ImageFromFolder","text":"
ImageFromFolder(\n    directory=None,\n    preprocessor=None,\n    loader=default_loader,\n    extensions=None,\n    transforms=None,\n)\n

This class is used to load the image dataset from disk. It is inspired by the ImageFolder class at the torchvision library. Here, we have simply adapted to work within the context of a Wide and Deep multi-modal model.

For examples, please, see the examples folder in the repo.

Parameters:

  • directory (Optional[str], default: None ) \u2013

    the path to the directory where the images are located. If None, a preprocessor must be provided.

  • preprocessor (Optional[ImagePreprocessor], default: None ) \u2013

    a fitted ImagePreprocessor object.

  • loader (Callable[[str], Any], default: default_loader ) \u2013

    a function to load a sample given its path.

  • extensions (Optional[Tuple[str, ...]], default: None ) \u2013

    a tuple with the allowed extensions. If None, IMG_EXTENSIONS will be used where IMG_EXTENSIONS =\".jpg\", \".jpeg\", \".png\", \".ppm\", \".bmp\", \".pgm\", \".tif\", \".tiff\", \".webp\"

  • transforms (Optional[Any], default: None ) \u2013

    a torchvision.transforms object. If None, this class will simply return an array representation of the PIL Image

Source code in pytorch_widedeep/load_from_folder/image/image_from_folder.py
def __init__(\n    self,\n    directory: Optional[str] = None,\n    preprocessor: Optional[ImagePreprocessor] = None,\n    loader: Callable[[str], Any] = default_loader,\n    extensions: Optional[Tuple[str, ...]] = None,\n    transforms: Optional[Any] = None,\n) -> None:\n    assert (\n        directory is not None or preprocessor is not None\n    ), \"Either a directory or an instance of ImagePreprocessor must be provided\"\n\n    if directory is not None and preprocessor is not None:  # pragma: no cover\n        assert directory == preprocessor.img_path, (\n            \"If both 'directory' and 'preprocessor' are provided, the 'img_path' \"\n            \"attribute of the 'preprocessor' must be the same as the 'directory'\"\n        )\n\n    if directory is not None:\n        self.directory = directory\n    else:\n        assert (\n            preprocessor is not None\n        ), \"Either a directory or an instance of ImagePreprocessor must be provided\"\n        self.directory = preprocessor.img_path\n\n    self.preprocessor = preprocessor\n    self.loader = loader\n    self.extensions = extensions if extensions is not None else IMG_EXTENSIONS\n    self.transforms = transforms\n    if self.transforms:\n        self.transforms_names = [\n            tr.__class__.__name__ for tr in self.transforms.transforms\n        ]\n    else:\n        self.transforms_names = []\n\n        self.transpose = True\n
"},{"location":"pytorch-widedeep/load_from_folder.html#pytorch_widedeep.load_from_folder.wd_dataset_from_folder.WideDeepDatasetFromFolder","title":"WideDeepDatasetFromFolder","text":"
WideDeepDatasetFromFolder(\n    n_samples,\n    tab_from_folder=None,\n    wide_from_folder=None,\n    text_from_folder=None,\n    img_from_folder=None,\n    reference=None,\n)\n

Bases: Dataset

This class is the Dataset counterpart of the WideDeepDataset class.

Given a reference tabular dataset, with columns that indicate the path to the images and to the text files or the texts themselves, it will use the [...]FromFolder classes to load the data consistently from disk per batch.

For examples, please, see the examples folder in the repo.

Parameters:

  • n_samples (int) \u2013

    Number of samples in the dataset

  • tab_from_folder (Optional[TabFromFolder], default: None ) \u2013

    Instance of the TabFromFolder class

  • wide_from_folder (Optional[WideFromFolder], default: None ) \u2013

    Instance of the WideFromFolder class

  • text_from_folder (Optional[TextFromFolder], default: None ) \u2013

    Instance of the TextFromFolder class

  • img_from_folder (Optional[ImageFromFolder], default: None ) \u2013

    Instance of the ImageFromFolder class

  • reference (Optional[Any], default: None ) \u2013

    If not None, the 'text_from_folder' and 'img_from_folder' objects will be retrieved from the reference class. This is useful when we want to use a WideDeepDatasetFromFolder class used for a train dataset as a reference for the validation and test datasets. In this case, the text_from_folder and img_from_folder objects will be the same for all three datasets, so there is no need to create a new instance for each dataset.

Source code in pytorch_widedeep/load_from_folder/wd_dataset_from_folder.py
def __init__(\n    self,\n    n_samples: int,\n    tab_from_folder: Optional[TabFromFolder] = None,\n    wide_from_folder: Optional[WideFromFolder] = None,\n    text_from_folder: Optional[TextFromFolder] = None,\n    img_from_folder: Optional[ImageFromFolder] = None,\n    reference: Optional[Any] = None,  # is Type[\"WideDeepDatasetFromFolder\"],\n):\n    super(WideDeepDatasetFromFolder, self).__init__()\n\n    if tab_from_folder is None and wide_from_folder is None:\n        raise ValueError(\n            \"Either 'tab_from_folder' or 'wide_from_folder' must be not None\"\n        )\n\n    if reference is not None:\n        assert (\n            img_from_folder is None and text_from_folder is None\n        ), \"If reference is not None, 'img_from_folder' and 'text_from_folder' left as None\"\n        self.text_from_folder, self.img_from_folder = self._get_from_reference(\n            reference\n        )\n    else:\n        assert (\n            text_from_folder is not None and img_from_folder is not None\n        ), \"If reference is None, 'img_from_folder' and 'text_from_folder' must be not None\"\n        self.text_from_folder = text_from_folder\n        self.img_from_folder = img_from_folder\n\n    self.n_samples = n_samples\n    self.tab_from_folder = tab_from_folder\n    self.wide_from_folder = wide_from_folder\n
"},{"location":"pytorch-widedeep/losses.html","title":"Losses","text":"

pytorch-widedeep accepts a number of losses and objectives that can be passed to the Trainer class via the parameter objective (see pytorch-widedeep.training.Trainer). For most cases the loss function that pytorch-widedeep will use internally is already implemented in Pytorch.

In addition, pytorch-widedeep implements a series of \"custom\" loss functions. These are described below for completion since, as mentioned before, they are used internally by the Trainer. Of course, onen could always use them on their own and can be imported as:

from pytorch_widedeep.losses import FocalLoss

NOTE: Losses in this module expect the predictions and ground truth to have the same dimensions for regression and binary classification problems \\((N_{samples}, 1)\\). In the case of multiclass classification problems the ground truth is expected to be a 1D tensor with the corresponding classes. See Examples below

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSELoss","title":"MSELoss","text":"
MSELoss()\n

Bases: Module

Mean square error loss with the option of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import MSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = MSELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import MSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = MSELoss()(input, target, lds_weight)\n    \"\"\"\n    loss = (input - target) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSLELoss","title":"MSLELoss","text":"
MSLELoss()\n

Bases: Module

Mean square log error loss with the option of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.MSLELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import MSLELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = MSLELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import MSLELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = MSLELoss()(input, target, lds_weight)\n    \"\"\"\n    assert (\n        input.min() >= 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n\n    loss = (torch.log(input + 1) - torch.log(target + 1)) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSELoss","title":"RMSELoss","text":"
RMSELoss()\n

Bases: Module

Root mean square error loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import RMSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = RMSELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import RMSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = RMSELoss()(input, target, lds_weight)\n    \"\"\"\n    loss = (input - target) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSLELoss","title":"RMSLELoss","text":"
RMSLELoss()\n

Bases: Module

Root mean square log error loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.RMSLELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    Tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import RMSLELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = RMSLELoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        Tensor of weights that will multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import RMSLELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = RMSLELoss()(input, target, lds_weight)\n    \"\"\"\n    assert (\n        input.min() >= 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n\n    loss = (torch.log(input + 1) - torch.log(target + 1)) ** 2\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.QuantileLoss","title":"QuantileLoss","text":"
QuantileLoss(\n    quantiles=[0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]\n)\n

Bases: Module

Quantile loss defined as:

\\[ Loss = max(q \\times (y-y_{pred}), (1-q) \\times (y_{pred}-y)) \\]

All credits go to the implementation at pytorch-forecasting.

Parameters:

  • quantiles (List[float], default: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98] ) \u2013

    List of quantiles

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],\n):\n    super().__init__()\n    self.quantiles = quantiles\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.QuantileLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import QuantileLoss\n>>>\n>>> # REGRESSION\n>>> target = torch.tensor([[0.6, 1.5]]).view(-1, 1)\n>>> input = torch.tensor([[.1, .2,], [.4, .5]])\n>>> qloss = QuantileLoss([0.25, 0.75])\n>>> loss = qloss(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import QuantileLoss\n    >>>\n    >>> # REGRESSION\n    >>> target = torch.tensor([[0.6, 1.5]]).view(-1, 1)\n    >>> input = torch.tensor([[.1, .2,], [.4, .5]])\n    >>> qloss = QuantileLoss([0.25, 0.75])\n    >>> loss = qloss(input, target)\n    \"\"\"\n\n    assert input.shape == torch.Size([target.shape[0], len(self.quantiles)]), (\n        \"The input and target have inconsistent shape. The dimension of the prediction \"\n        \"of the model that is using QuantileLoss must be equal to number of quantiles, \"\n        f\"i.e. {len(self.quantiles)}.\"\n    )\n    target = target.view(-1, 1).float()\n    losses = []\n    for i, q in enumerate(self.quantiles):\n        errors = target - input[..., i]\n        losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))\n\n    loss = torch.cat(losses, dim=2)\n\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalLoss","title":"FocalLoss","text":"
FocalLoss(alpha=0.25, gamma=1.0)\n

Bases: Module

Implementation of the Focal loss for both binary and multiclass classification:

\\[ FL(p_t) = \\alpha (1 - p_t)^{\\gamma} log(p_t) \\]

where, for a case of a binary classification problem

\\[ \\begin{equation} p_t= \\begin{cases}p, & \\text{if $y=1$}.\\\\1-p, & \\text{otherwise}. \\end{cases} \\end{equation} \\]

Parameters:

  • alpha (float, default: 0.25 ) \u2013

    Focal Loss alpha parameter

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

Source code in pytorch_widedeep/losses.py
def __init__(self, alpha: float = 0.25, gamma: float = 1.0):\n    super().__init__()\n    self.alpha = alpha\n    self.gamma = gamma\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalLoss\n>>>\n>>> # BINARY\n>>> target = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> input = torch.tensor([[0.6, 0.7, 0.3, 0.8]]).t()\n>>> loss = FocalLoss()(input, target)\n>>>\n>>> # MULTICLASS\n>>> target = torch.tensor([1, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([[0.2, 0.5, 0.3], [0.8, 0.1, 0.1], [0.7, 0.2, 0.1]])\n>>> loss = FocalLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalLoss\n    >>>\n    >>> # BINARY\n    >>> target = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n    >>> input = torch.tensor([[0.6, 0.7, 0.3, 0.8]]).t()\n    >>> loss = FocalLoss()(input, target)\n    >>>\n    >>> # MULTICLASS\n    >>> target = torch.tensor([1, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([[0.2, 0.5, 0.3], [0.8, 0.1, 0.1], [0.7, 0.2, 0.1]])\n    >>> loss = FocalLoss()(input, target)\n    \"\"\"\n    input_prob = torch.sigmoid(input)\n    if input.size(1) == 1:\n        input_prob = torch.cat([1 - input_prob, input_prob], axis=1)  # type: ignore\n        num_class = 2\n    else:\n        num_class = input_prob.size(1)\n    binary_target = torch.eye(num_class)[target.squeeze().cpu().long()]\n    if use_cuda:\n        binary_target = binary_target.cuda()\n    binary_target = binary_target.contiguous()\n    weight = self._get_weight(input_prob, binary_target)\n\n    return F.binary_cross_entropy(\n        input_prob, binary_target, weight, reduction=\"mean\"\n    )\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.BayesianSELoss","title":"BayesianSELoss","text":"
BayesianSELoss()\n

Bases: Module

Squared Loss (log Gaussian) for the case of a regression as specified in the original publication Weight Uncertainty in Neural Networks.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.BayesianSELoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import BayesianSELoss\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = BayesianSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import BayesianSELoss\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = BayesianSELoss()(input, target)\n    \"\"\"\n    return (0.5 * (input - target) ** 2).sum()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.TweedieLoss","title":"TweedieLoss","text":"
TweedieLoss()\n

Bases: Module

Tweedie loss for extremely unbalanced zero-inflated data

All credits go to Wenbo Shi. See this post and the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.TweedieLoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None, p=1.5)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

  • p (float, default: 1.5 ) \u2013

    the power to be used to compute the loss. See the original publication for details

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import TweedieLoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n>>> loss = TweedieLoss()(input, target, lds_weight)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n    p: float = 1.5,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n    p: float, default = 1.5\n        the power to be used to compute the loss. See the original\n        publication for details\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import TweedieLoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> lds_weight = torch.tensor([0.1, 0.2, 0.3, 0.4]).view(-1, 1)\n    >>> loss = TweedieLoss()(input, target, lds_weight)\n    \"\"\"\n\n    assert (\n        input.min() > 0\n    ), \"\"\"All input values must be >=0, if your model is predicting\n        values <0 try to enforce positive values by activation function\n        on last layer with `trainer.enforce_positive_output=True`\"\"\"\n    assert target.min() >= 0, \"All target values must be >=0\"\n    loss = -target * torch.pow(input, 1 - p) / (1 - p) + torch.pow(input, 2 - p) / (\n        2 - p\n    )\n    if lds_weight is not None:\n        loss *= lds_weight\n\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.ZILNLoss","title":"ZILNLoss","text":"
ZILNLoss()\n

Bases: Module

Adjusted implementation of the Zero Inflated LogNormal Loss

See A Deep Probabilistic Model for Customer Lifetime Value Prediction and the corresponding code.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.ZILNLoss.forward","title":"forward","text":"
forward(input, target)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions with spape (N,3), where N is the batch size

  • target (Tensor) \u2013

    Target tensor with the actual target values

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import ZILNLoss\n>>>\n>>> target = torch.tensor([[0., 1.5]]).view(-1, 1)\n>>> input = torch.tensor([[.1, .2, .3], [.4, .5, .6]])\n>>> loss = ZILNLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(self, input: Tensor, target: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions with spape (N,3), where N is the batch size\n    target: Tensor\n        Target tensor with the actual target values\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import ZILNLoss\n    >>>\n    >>> target = torch.tensor([[0., 1.5]]).view(-1, 1)\n    >>> input = torch.tensor([[.1, .2, .3], [.4, .5, .6]])\n    >>> loss = ZILNLoss()(input, target)\n    \"\"\"\n    positive = target > 0\n    positive = positive.float()\n\n    assert input.shape == torch.Size([target.shape[0], 3]), (\n        \"Wrong shape of the 'input' tensor. The pred_dim of the \"\n        \"model that is using ZILNLoss must be equal to 3.\"\n    )\n\n    positive_input = input[..., :1]\n\n    classification_loss = F.binary_cross_entropy_with_logits(\n        positive_input, positive, reduction=\"none\"\n    ).flatten()\n\n    loc = input[..., 1:2]\n\n    # when using max the two input tensors (input and other) have to be of\n    # the same type\n    max_input = F.softplus(input[..., 2:])\n    max_other = torch.sqrt(torch.Tensor([torch.finfo(torch.double).eps])).type(\n        max_input.type()\n    )\n    scale = torch.max(max_input, max_other)\n    safe_labels = positive * target + (1 - positive) * torch.ones_like(target)\n\n    regression_loss = -torch.mean(\n        positive\n        * torch.distributions.log_normal.LogNormal(loc=loc, scale=scale).log_prob(\n            safe_labels\n        ),\n        dim=-1,\n    )\n\n    return torch.mean(classification_loss + regression_loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.L1Loss","title":"L1Loss","text":"
L1Loss()\n

Bases: Module

L1 loss adjusted for the possibility of using Label Smooth Distribution (LDS)

LDS is based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self):\n    super().__init__()\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.L1Loss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions

  • target (Tensor) \u2013

    Target tensor with the actual values

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import L1Loss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = L1Loss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self, input: Tensor, target: Tensor, lds_weight: Optional[Tensor] = None\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions\n    target: Tensor\n        Target tensor with the actual values\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import L1Loss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = L1Loss()(input, target)\n    \"\"\"\n    loss = F.l1_loss(input, target, reduction=\"none\")\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_L1Loss","title":"FocalR_L1Loss","text":"
FocalR_L1Loss(beta=0.2, gamma=1.0, activation_fn='sigmoid')\n

Bases: Module

Focal-R L1 loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_L1Loss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_L1Loss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_L1Loss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_L1Loss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_L1Loss()(input, target)\n    \"\"\"\n    loss = F.l1_loss(input, target, reduction=\"none\")\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs(input - target)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_MSELoss","title":"FocalR_MSELoss","text":"
FocalR_MSELoss(\n    beta=0.2, gamma=1.0, activation_fn=\"sigmoid\"\n)\n

Bases: Module

Focal-R MSE loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_MSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_MSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_MSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_MSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_MSELoss()(input, target)\n    \"\"\"\n    loss = (input - target) ** 2\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs((input - target) ** 2)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_RMSELoss","title":"FocalR_RMSELoss","text":"
FocalR_RMSELoss(\n    beta=0.2, gamma=1.0, activation_fn=\"sigmoid\"\n)\n

Bases: Module

Focal-R RMSE loss

Based on Delving into Deep Imbalanced Regression.

Parameters:

  • beta (float, default: 0.2 ) \u2013

    Focal Loss beta parameter in their implementation

  • gamma (float, default: 1.0 ) \u2013

    Focal Loss gamma parameter

  • activation_fn (Literal[sigmoid, tanh], default: 'sigmoid' ) \u2013

    Activation function to be used during the computation of the loss. Possible values are 'sigmoid' and 'tanh'. See the original publication for details.

Source code in pytorch_widedeep/losses.py
def __init__(\n    self,\n    beta: float = 0.2,\n    gamma: float = 1.0,\n    activation_fn: Literal[\"sigmoid\", \"tanh\"] = \"sigmoid\",\n):\n    super().__init__()\n    self.beta = beta\n    self.gamma = gamma\n    self.activation_fn = activation_fn\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.FocalR_RMSELoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import FocalR_RMSELoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = FocalR_RMSELoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import FocalR_RMSELoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = FocalR_RMSELoss()(input, target)\n    \"\"\"\n    loss = (input - target) ** 2\n    if self.activation_fn == \"tanh\":\n        loss *= (torch.tanh(self.beta * torch.abs(input - target))) ** self.gamma\n    elif self.activation_fn == \"sigmoid\":\n        loss *= (\n            2 * torch.sigmoid(self.beta * torch.abs((input - target) ** 2)) - 1\n        ) ** self.gamma\n    else:\n        ValueError(\n            \"Incorrect activation function value - must be in ['sigmoid', 'tanh']\"\n        )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.sqrt(torch.mean(loss))\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.HuberLoss","title":"HuberLoss","text":"
HuberLoss(beta=0.2)\n

Bases: Module

Hubbler Loss

Based on Delving into Deep Imbalanced Regression.

Source code in pytorch_widedeep/losses.py
def __init__(self, beta: float = 0.2):\n    super().__init__()\n    self.beta = beta\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.HuberLoss.forward","title":"forward","text":"
forward(input, target, lds_weight=None)\n

Parameters:

  • input (Tensor) \u2013

    Input tensor with predictions (not probabilities)

  • target (Tensor) \u2013

    Target tensor with the actual classes

  • lds_weight (Optional[Tensor], default: None ) \u2013

    If we choose to use LDS this is the tensor of weights that will multiply the loss value.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.losses import HuberLoss\n>>>\n>>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n>>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n>>> loss = HuberLoss()(input, target)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    input: Tensor,\n    target: Tensor,\n    lds_weight: Optional[Tensor] = None,\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    input: Tensor\n        Input tensor with predictions (not probabilities)\n    target: Tensor\n        Target tensor with the actual classes\n    lds_weight: Tensor, Optional\n        If we choose to use LDS this is the tensor of weights that will\n        multiply the loss value.\n\n    Examples\n    --------\n    >>> import torch\n    >>>\n    >>> from pytorch_widedeep.losses import HuberLoss\n    >>>\n    >>> target = torch.tensor([1, 1.2, 0, 2]).view(-1, 1)\n    >>> input = torch.tensor([0.6, 0.7, 0.3, 0.8]).view(-1, 1)\n    >>> loss = HuberLoss()(input, target)\n    \"\"\"\n    l1_loss = torch.abs(input - target)\n    cond = l1_loss < self.beta\n    loss = torch.where(\n        cond, 0.5 * l1_loss**2 / self.beta, l1_loss - 0.5 * self.beta\n    )\n    if lds_weight is not None:\n        loss *= lds_weight\n    return torch.mean(loss)\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss","title":"InfoNCELoss","text":"
InfoNCELoss(temperature=0.1, reduction='mean')\n

Bases: Module

InfoNCE Loss. Loss applied during the Contrastive Denoising Self Supervised Pre-training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training and references therein

Partially inspired by the code in this repo

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss--parameters","title":"Parameters:","text":"

temperature: float, default = 0.1 The logits are divided by the temperature before computing the loss value reduction: str, default = \"mean\" Loss reduction method

Source code in pytorch_widedeep/losses.py
def __init__(self, temperature: float = 0.1, reduction: str = \"mean\"):\n    super(InfoNCELoss, self).__init__()\n\n    self.temperature = temperature\n    self.reduction = reduction\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.InfoNCELoss.forward","title":"forward","text":"
forward(g_projs)\n

Parameters:

  • g_projs (Tuple[Tensor, Tensor]) \u2013

    Tuple with the two tensors corresponding to the output of the two projection heads, as described 'SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training'.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import InfoNCELoss\n>>> g_projs = (torch.rand(3, 5, 16), torch.rand(3, 5, 16))\n>>> loss = InfoNCELoss()\n>>> res = loss(g_projs)\n
Source code in pytorch_widedeep/losses.py
def forward(self, g_projs: Tuple[Tensor, Tensor]) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    g_projs: Tuple\n        Tuple with the two tensors corresponding to the output of the two\n        projection heads, as described 'SAINT: Improved Neural Networks\n        for Tabular Data via Row Attention and Contrastive Pre-Training'.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import InfoNCELoss\n    >>> g_projs = (torch.rand(3, 5, 16), torch.rand(3, 5, 16))\n    >>> loss = InfoNCELoss()\n    >>> res = loss(g_projs)\n    \"\"\"\n    z, z_ = g_projs[0], g_projs[1]\n\n    norm_z = F.normalize(z, dim=-1).flatten(1)\n    norm_z_ = F.normalize(z_, dim=-1).flatten(1)\n\n    logits = (norm_z @ norm_z_.t()) / self.temperature\n    logits_ = (norm_z_ @ norm_z.t()) / self.temperature\n\n    # the target/labels are the entries on the diagonal\n    target = torch.arange(len(norm_z), device=norm_z.device)\n\n    loss = F.cross_entropy(logits, target, reduction=self.reduction)\n    loss_ = F.cross_entropy(logits_, target, reduction=self.reduction)\n\n    return (loss + loss_) / 2.0\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss","title":"DenoisingLoss","text":"
DenoisingLoss(\n    lambda_cat=1.0, lambda_cont=1.0, reduction=\"mean\"\n)\n

Bases: Module

Denoising Loss. Loss applied during the Contrastive Denoising Self Supervised Pre-training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training and references therein

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss--parameters","title":"Parameters:","text":"

lambda_cat: float, default = 1. Multiplicative factor that will be applied to loss associated to the categorical features lambda_cont: float, default = 1. Multiplicative factor that will be applied to loss associated to the continuous features reduction: str, default = \"mean\" Loss reduction method

Source code in pytorch_widedeep/losses.py
def __init__(\n    self, lambda_cat: float = 1.0, lambda_cont: float = 1.0, reduction: str = \"mean\"\n):\n    super(DenoisingLoss, self).__init__()\n\n    self.lambda_cat = lambda_cat\n    self.lambda_cont = lambda_cont\n    self.reduction = reduction\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.DenoisingLoss.forward","title":"forward","text":"
forward(x_cat_and_cat_, x_cont_and_cont_)\n

Parameters:

  • x_cat_and_cat_ (Optional[Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]]) \u2013

    Tuple of tensors containing the raw input features and their encodings, referred in the SAINT paper as \\(x\\) and \\(x''\\) respectively. If one denoising MLP is used per categorical feature x_cat_and_cat_ will be a list of tuples, one per categorical feature

  • x_cont_and_cont_ (Optional[Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]]) \u2013

    same as x_cat_and_cat_ but for continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import DenoisingLoss\n>>> x_cat_and_cat_ = (torch.empty(3).random_(3).long(), torch.randn(3, 3))\n>>> x_cont_and_cont_ = (torch.randn(3, 1), torch.randn(3, 1))\n>>> loss = DenoisingLoss()\n>>> res = loss(x_cat_and_cat_, x_cont_and_cont_)\n
Source code in pytorch_widedeep/losses.py
def forward(\n    self,\n    x_cat_and_cat_: Optional[\n        Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]\n    ],\n    x_cont_and_cont_: Optional[\n        Union[List[Tuple[Tensor, Tensor]], Tuple[Tensor, Tensor]]\n    ],\n) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    x_cat_and_cat_: tuple of Tensors or lists of tuples\n        Tuple of tensors containing the raw input features and their\n        encodings, referred in the SAINT paper as $x$ and $x''$\n        respectively. If one denoising MLP is used per categorical\n        feature `x_cat_and_cat_` will be a list of tuples, one per\n        categorical feature\n    x_cont_and_cont_: tuple of Tensors or lists of tuples\n        same as `x_cat_and_cat_` but for continuous columns\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import DenoisingLoss\n    >>> x_cat_and_cat_ = (torch.empty(3).random_(3).long(), torch.randn(3, 3))\n    >>> x_cont_and_cont_ = (torch.randn(3, 1), torch.randn(3, 1))\n    >>> loss = DenoisingLoss()\n    >>> res = loss(x_cat_and_cat_, x_cont_and_cont_)\n    \"\"\"\n\n    loss_cat = (\n        self._compute_cat_loss(x_cat_and_cat_)\n        if x_cat_and_cat_ is not None\n        else torch.tensor(0.0)\n    )\n    loss_cont = (\n        self._compute_cont_loss(x_cont_and_cont_)\n        if x_cont_and_cont_ is not None\n        else torch.tensor(0.0)\n    )\n\n    return self.lambda_cat * loss_cat + self.lambda_cont * loss_cont\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss","title":"EncoderDecoderLoss","text":"
EncoderDecoderLoss(eps=1e-09)\n

Bases: Module

'Standard' Encoder Decoder Loss. Loss applied during the Endoder-Decoder Self-Supervised Pre-Training routine available in this library

NOTE: This loss is in principle not exposed to the user, as it is used internally in the library, but it is included here for completion.

The implementation of this lost is based on that at the tabnet repo, which is in itself an adaptation of that in the original paper TabNet: Attentive Interpretable Tabular Learning.

"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss--parameters","title":"Parameters:","text":"

eps: float Simply a small number to avoid dividing by zero

Source code in pytorch_widedeep/losses.py
def __init__(self, eps: float = 1e-9):\n    super(EncoderDecoderLoss, self).__init__()\n    self.eps = eps\n
"},{"location":"pytorch-widedeep/losses.html#pytorch_widedeep.losses.EncoderDecoderLoss.forward","title":"forward","text":"
forward(x_true, x_pred, mask)\n

Parameters:

  • x_true (Tensor) \u2013

    Embeddings of the input data

  • x_pred (Tensor) \u2013

    Reconstructed embeddings

  • mask (Tensor) \u2013

    Mask with 1s indicated that the reconstruction, and therefore the loss, is based on those features.

Examples:

>>> import torch\n>>> from pytorch_widedeep.losses import EncoderDecoderLoss\n>>> x_true = torch.rand(3, 3)\n>>> x_pred = torch.rand(3, 3)\n>>> mask = torch.empty(3, 3).random_(2)\n>>> loss = EncoderDecoderLoss()\n>>> res = loss(x_true, x_pred, mask)\n
Source code in pytorch_widedeep/losses.py
def forward(self, x_true: Tensor, x_pred: Tensor, mask: Tensor) -> Tensor:\n    r\"\"\"\n    Parameters\n    ----------\n    x_true: Tensor\n        Embeddings of the input data\n    x_pred: Tensor\n        Reconstructed embeddings\n    mask: Tensor\n        Mask with 1s indicated that the reconstruction, and therefore the\n        loss, is based on those features.\n\n    Examples\n    --------\n    >>> import torch\n    >>> from pytorch_widedeep.losses import EncoderDecoderLoss\n    >>> x_true = torch.rand(3, 3)\n    >>> x_pred = torch.rand(3, 3)\n    >>> mask = torch.empty(3, 3).random_(2)\n    >>> loss = EncoderDecoderLoss()\n    >>> res = loss(x_true, x_pred, mask)\n    \"\"\"\n\n    errors = x_pred - x_true\n\n    reconstruction_errors = torch.mul(errors, mask) ** 2\n\n    x_true_means = torch.mean(x_true, dim=0)\n    x_true_means[x_true_means == 0] = 1\n\n    x_true_stds = torch.std(x_true, dim=0) ** 2\n    x_true_stds[x_true_stds == 0] = x_true_means[x_true_stds == 0]\n\n    features_loss = torch.matmul(reconstruction_errors, 1 / x_true_stds)\n    nb_reconstructed_variables = torch.sum(mask, dim=1)\n    features_loss_norm = features_loss / (nb_reconstructed_variables + self.eps)\n\n    loss = torch.mean(features_loss_norm)\n\n    return loss\n
"},{"location":"pytorch-widedeep/metrics.html","title":"Metrics","text":"

NOTE: metrics in this module expect the predictions and ground truth to have the same dimensions for regression and binary classification problems: \\((N_{samples}, 1)\\). In the case of multiclass classification problems the ground truth is expected to be a 1D tensor with the corresponding classes. See Examples below

We have added the possibility of using the metrics available at the torchmetrics library. Note that this library is still in its early versions and therefore this option should be used with caution. To use torchmetrics simply import them and use them as any of the pytorch-widedeep metrics described below.

from torchmetrics import Accuracy, Precision\n\naccuracy = Accuracy(average=None, num_classes=2)\nprecision = Precision(average='micro', num_classes=2)\n\ntrainer = Trainer(model, objective=\"binary\", metrics=[accuracy, precision])\n

A functioning example for pytorch-widedeep using torchmetrics can be found in the Examples folder

NOTE: the forward method for all metrics in this module takes two tensors, y_pred and y_true (in that order). Therefore, we do not include the method in the documentation.

"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Accuracy","title":"Accuracy","text":"
Accuracy(top_k=1)\n

Bases: Metric

Class to calculate the accuracy for both binary and categorical problems

Parameters:

  • top_k (int, default: 1 ) \u2013

    Accuracy will be computed using the top k most likely classes in multiclass problems

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Accuracy\n>>>\n>>> acc = Accuracy()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> acc(y_pred, y_true)\narray(0.5)\n>>>\n>>> acc = Accuracy(top_k=2)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.3, 0.5, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> acc(y_pred, y_true)\narray(0.66666667)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, top_k: int = 1):\n    super(Accuracy, self).__init__()\n\n    self.top_k = top_k\n    self.correct_count = 0\n    self.total_count = 0\n    self._name = \"acc\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Accuracy.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.correct_count = 0\n    self.total_count = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Precision","title":"Precision","text":"
Precision(average=True)\n

Bases: Metric

Class to calculate the precision for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate precision for each label, and finds their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Precision\n>>>\n>>> prec = Precision()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> prec(y_pred, y_true)\narray(0.5)\n>>>\n>>> prec = Precision(average=True)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> prec(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(Precision, self).__init__()\n\n    self.average = average\n    self.true_positives = 0\n    self.all_positives = 0\n    self.eps = 1e-20\n    self._name = \"prec\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Precision.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.true_positives = 0\n    self.all_positives = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Recall","title":"Recall","text":"
Recall(average=True)\n

Bases: Metric

Class to calculate the recall for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate recall for each label, and finds their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import Recall\n>>>\n>>> rec = Recall()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> rec(y_pred, y_true)\narray(0.5)\n>>>\n>>> rec = Recall(average=True)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> rec(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(Recall, self).__init__()\n\n    self.average = average\n    self.true_positives = 0\n    self.actual_positives = 0\n    self.eps = 1e-20\n    self._name = \"rec\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.Recall.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.true_positives = 0\n    self.actual_positives = 0\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.FBetaScore","title":"FBetaScore","text":"
FBetaScore(beta, average=True)\n

Bases: Metric

Class to calculate the fbeta score for both binary and categorical problems

\\[ F_{\\beta} = ((1 + {\\beta}^2) * \\frac{(precision * recall)}{({\\beta}^2 * precision + recall)} \\]

Parameters:

  • beta (int) \u2013

    Coefficient to control the balance between precision and recall

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate fbeta for each label, and find their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import FBetaScore\n>>>\n>>> fbeta = FBetaScore(beta=2)\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> fbeta(y_pred, y_true)\narray(0.5)\n>>>\n>>> fbeta = FBetaScore(beta=2)\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> fbeta(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, beta: int, average: bool = True):\n    super(FBetaScore, self).__init__()\n\n    self.beta = beta\n    self.average = average\n    self.precision = Precision(average=False)\n    self.recall = Recall(average=False)\n    self.eps = 1e-20\n    self._name = \"\".join([\"f\", str(self.beta)])\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.FBetaScore.reset","title":"reset","text":"
reset()\n

resets precision and recall

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets precision and recall\n    \"\"\"\n    self.precision.reset()\n    self.recall.reset()\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.F1Score","title":"F1Score","text":"
F1Score(average=True)\n

Bases: Metric

Class to calculate the f1 score for both binary and categorical problems

Parameters:

  • average (bool, default: True ) \u2013

    This applies only to multiclass problems. if True calculate f1 for each label, and find their unweighted mean.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import F1Score\n>>>\n>>> f1 = F1Score()\n>>> y_true = torch.tensor([0, 1, 0, 1]).view(-1, 1)\n>>> y_pred = torch.tensor([[0.3, 0.2, 0.6, 0.7]]).view(-1, 1)\n>>> f1(y_pred, y_true)\narray(0.5)\n>>>\n>>> f1 = F1Score()\n>>> y_true = torch.tensor([0, 1, 2])\n>>> y_pred = torch.tensor([[0.7, 0.1, 0.2], [0.1, 0.1, 0.8], [0.1, 0.5, 0.4]])\n>>> f1(y_pred, y_true)\narray(0.33333334)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self, average: bool = True):\n    super(F1Score, self).__init__()\n\n    self.average = average\n    self.f1 = FBetaScore(beta=1, average=self.average)\n    self._name = self.f1._name\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.F1Score.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.f1.reset()\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.R2Score","title":"R2Score","text":"
R2Score()\n

Bases: Metric

Calculates R-Squared, the coefficient of determination:

\\[ R^2 = 1 - \\frac{\\sum_{j=1}^n(y_j - \\hat{y_j})^2}{\\sum_{j=1}^n(y_j - \\bar{y})^2} \\]

where \\(\\hat{y_j}\\) is the ground truth, \\(y_j\\) is the predicted value and \\(\\bar{y}\\) is the mean of the ground truth.

Examples:

>>> import torch\n>>>\n>>> from pytorch_widedeep.metrics import R2Score\n>>>\n>>> r2 = R2Score()\n>>> y_true = torch.tensor([3, -0.5, 2, 7]).view(-1, 1)\n>>> y_pred = torch.tensor([2.5, 0.0, 2, 8]).view(-1, 1)\n>>> r2(y_pred, y_true)\narray(0.94860814)\n
Source code in pytorch_widedeep/metrics.py
def __init__(self):\n    self.numerator = 0\n    self.denominator = 0\n    self.num_examples = 0\n    self.y_true_sum = 0\n\n    self._name = \"r2\"\n
"},{"location":"pytorch-widedeep/metrics.html#pytorch_widedeep.metrics.R2Score.reset","title":"reset","text":"
reset()\n

resets counters to 0

Source code in pytorch_widedeep/metrics.py
def reset(self):\n    \"\"\"\n    resets counters to 0\n    \"\"\"\n    self.numerator = 0\n    self.denominator = 0\n    self.num_examples = 0\n    self.y_true_sum = 0\n
"},{"location":"pytorch-widedeep/model_components.html","title":"The models module","text":"

This module contains the models that can be used as the four main components that will comprise a Wide and Deep model (wide, deeptabular, deeptext, deepimage), as well as the WideDeep \"constructor\" class. Note that each of the four components can be used independently. It also contains all the documentation for the models that can be used for self-supervised pre-training with tabular data.

NOTE: when we started developing the library we thought that combining Deep Learning architectures for tabular data, with CNN-based architectures (pretrained or not) for images and Transformer-based architectures for text would be an 'overkill' (also, pretrained transformer-based models were not as readily available as they are today). Therefore, at that time we made the decision of including in the library simple RNN-based architectures for the text dataset. A lot has passed since then and it is our intention to integrate this library with the Hugginface's Transformers library in the near future. Nonetheless, note that it is still possible to use any custom model as the deeptext component using this library. Please, see the example section in this documentation for details

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.linear.wide.Wide","title":"Wide","text":"
Wide(input_dim, pred_dim=1)\n

Bases: Module

Defines a Wide (linear) model where the non-linearities are captured via the so-called crossed-columns. This can be used as the wide component of a Wide & Deep model.

Parameters:

  • input_dim (int) \u2013

    size of the Linear layer (implemented via an Embedding layer). input_dim is the summation of all the individual values for all the features that go through the wide model. For example, if the wide model receives 2 features with 5 individual values each, input_dim = 10

  • pred_dim (int, default: 1 ) \u2013

    size of the ouput tensor containing the predictions. Note that unlike all the other models, the wide model is connected directly to the output neuron(s) when used to build a Wide and Deep model. Therefore, it requires the pred_dim parameter.

Attributes:

  • wide_linear (Module) \u2013

    the linear layer that comprises the wide branch of the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Wide\n>>> X = torch.empty(4, 4).random_(4)\n>>> wide = Wide(input_dim=X.unique().size(0), pred_dim=1)\n>>> out = wide(X)\n
Source code in pytorch_widedeep/models/tabular/linear/wide.py
@alias(\"pred_dim\", [\"pred_size\", \"num_class\"])\ndef __init__(self, input_dim: int, pred_dim: int = 1):\n    super(Wide, self).__init__()\n\n    self.input_dim = input_dim\n    self.pred_dim = pred_dim\n\n    # Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.\n    self.wide_linear = nn.Embedding(input_dim + 1, pred_dim, padding_idx=0)\n    # (Sum(Embedding) + bias) is equivalent to (OneHotVector + Linear)\n    self.bias = nn.Parameter(torch.zeros(pred_dim))\n    self._reset_parameters()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.linear.wide.Wide.forward","title":"forward","text":"
forward(X)\n

Forward pass. Simply connecting the Embedding layer with the ouput neuron(s)

Source code in pytorch_widedeep/models/tabular/linear/wide.py
def forward(self, X: Tensor) -> Tensor:\n    r\"\"\"Forward pass. Simply connecting the Embedding layer with the ouput\n    neuron(s)\"\"\"\n    out = self.wide_linear(X.long()).sum(dim=1) + self.bias\n    return out\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlp","title":"TabMlp","text":"
TabMlp(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    mlp_hidden_dims=[200, 100],\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabMlp model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of dense layers (i.e. a MLP).

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • mlp_hidden_dims (List[int], default: [200, 100] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float or List of floats with the dropout between the dense layers. e.g: [0.5,0.5]

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • encoder (Module) \u2013

    mlp model that will receive the concatenation of the embeddings and the continuous columns

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabMlp\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n>>> cat_embed_input = [(u, i, j) for u, i, j in zip(colnames[:4], [4] * 4, [8] * 4)]\n>>> column_idx = {k: v for v, k in enumerate(colnames)}\n>>> model = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols=[\"e\"])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/tab_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    mlp_hidden_dims: List[int] = [200, 100],\n    mlp_activation: str = \"relu\",\n    mlp_dropout: Union[float, List[float]] = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabMlp, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n    # Mlp\n    mlp_input_dim = self.cat_out_dim + self.cont_out_dim\n    mlp_hidden_dims = [mlp_input_dim] + mlp_hidden_dims\n    self.encoder = MLP(\n        mlp_hidden_dims,\n        mlp_activation,\n        mlp_dropout,\n        mlp_batchnorm,\n        mlp_batchnorm_last,\n        mlp_linear_first,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlp.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.tab_mlp.TabMlpDecoder","title":"TabMlpDecoder","text":"
TabMlpDecoder(\n    embed_dim,\n    mlp_hidden_dims=[100, 200],\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True,\n)\n

Bases: Module

Companion decoder model for the TabMlp model (which can be considered an encoder itself).

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). The TabMlpDecoder will receive the output from the MLP and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor that needs to be reconstructed.

  • mlp_hidden_dims (List[int], default: [100, 200] ) \u2013

    List with the number of neurons per dense layer in the mlp.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • mlp_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float or List of floats with the dropout between the dense layers. e.g: [0.5,0.5]

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • decoder (Module) \u2013

    mlp model that will receive the output of the encoder

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabMlpDecoder\n>>> x_inp = torch.rand(3, 8)\n>>> decoder = TabMlpDecoder(embed_dim=32, mlp_hidden_dims=[8,16])\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/mlp/tab_mlp.py
def __init__(\n    self,\n    embed_dim: int,\n    mlp_hidden_dims: List[int] = [100, 200],\n    mlp_activation: str = \"relu\",\n    mlp_dropout: Union[float, List[float]] = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabMlpDecoder, self).__init__()\n\n    self.embed_dim = embed_dim\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.decoder = MLP(\n        mlp_hidden_dims + [self.embed_dim],\n        mlp_activation,\n        mlp_dropout,\n        mlp_batchnorm,\n        mlp_batchnorm_last,\n        mlp_linear_first,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnet","title":"TabResnet","text":"
TabResnet(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    blocks_dims=[200, 100, 100],\n    blocks_dropout=0.1,\n    simplify_blocks=False,\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabResnet model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features, embedded or not. These are then passed through a series of Resnet blocks. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • blocks_dims (List[int], default: [200, 100, 100] ) \u2013

    List of integers that define the input and output units of each block. For example: [200, 100, 100] will generate 2 blocks. The first will receive a tensor of size 200 and output a tensor of size 100, and the second will receive a tensor of size 100 and output a tensor of size 100. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

  • blocks_dropout (float, default: 0.1 ) \u2013

    Block's internal dropout.

  • simplify_blocks (bool, default: False ) \u2013

    Boolean indicating if the simplest possible residual blocks (X -> [ [LIN, BN, ACT] + X ]) will be used instead of a standard one (X -> [ [LIN1, BN1, ACT1] -> [LIN2, BN2] + X ]).

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If None the output of the Resnet Blocks will be connected directly to the output neuron(s).

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    deep dense Resnet model that will receive the concatenation of the embeddings and the continuous columns

  • mlp (Module) \u2013

    if mlp_hidden_dims is True, this attribute will be an mlp model that will receive the results of the concatenation of the embeddings and the continuous columns -- if present --.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabResnet\n>>> X_deep = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabResnet(blocks_dims=[16,4], column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols = ['e'])\n>>> out = model(X_deep)\n
Source code in pytorch_widedeep/models/tabular/resnet/tab_resnet.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    blocks_dims: List[int] = [200, 100, 100],\n    blocks_dropout: float = 0.1,\n    simplify_blocks: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabResnet, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    if len(blocks_dims) < 2:\n        raise ValueError(\n            \"'blocks' must contain at least two elements, e.g. [256, 128]\"\n        )\n\n    self.blocks_dims = blocks_dims\n    self.blocks_dropout = blocks_dropout\n    self.simplify_blocks = simplify_blocks\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n\n    # Resnet\n    dense_resnet_input_dim = self.cat_out_dim + self.cont_out_dim\n    self.encoder = DenseResnet(\n        dense_resnet_input_dim, blocks_dims, blocks_dropout, self.simplify_blocks\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.blocks_dims[-1]] + self.mlp_hidden_dims,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                True if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnet.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.resnet.tab_resnet.TabResnetDecoder","title":"TabResnetDecoder","text":"
TabResnetDecoder(\n    embed_dim,\n    blocks_dims=[100, 100, 200],\n    blocks_dropout=0.1,\n    simplify_blocks=False,\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None,\n)\n

Bases: Module

Companion decoder model for the TabResnet model (which can be considered an encoder itself)

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). This class will receive the output from the ResNet blocks or the MLP(if present) and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor to be reconstructed.

  • blocks_dims (List[int], default: [100, 100, 200] ) \u2013

    List of integers that define the input and output units of each block. For example: [200, 100, 100] will generate 2 blocks. The first will receive a tensor of size 200 and output a tensor of size 100, and the second will receive a tensor of size 100 and output a tensor of size 100. See pytorch_widedeep.models.tab_resnet._layers for details on the structure of each block.

  • blocks_dropout (float, default: 0.1 ) \u2013

    Block's internal dropout.

  • simplify_blocks (bool, default: False ) \u2013

    Boolean indicating if the simplest possible residual blocks (X -> [ [LIN, BN, ACT] + X ]) will be used instead of a standard one (X -> [ [LIN1, BN1, ACT1] -> [LIN2, BN2] + X ]).

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If None the output of the Resnet Blocks will be connected directly to the output neuron(s).

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • decoder (Module) \u2013

    deep dense Resnet model that will receive the output of the encoder IF mlp_hidden_dims is None

  • mlp (Module) \u2013

    if mlp_hidden_dims is not None, the overall decoder will consist in an MLP that will receive the output of the encoder followed by the deep dense Resnet.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabResnetDecoder\n>>> x_inp = torch.rand(3, 8)\n>>> decoder = TabResnetDecoder(embed_dim=32, blocks_dims=[8, 16, 16])\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/resnet/tab_resnet.py
def __init__(\n    self,\n    embed_dim: int,\n    blocks_dims: List[int] = [100, 100, 200],\n    blocks_dropout: float = 0.1,\n    simplify_blocks: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabResnetDecoder, self).__init__()\n\n    if len(blocks_dims) < 2:\n        raise ValueError(\n            \"'blocks' must contain at least two elements, e.g. [256, 128]\"\n        )\n\n    self.embed_dim = embed_dim\n\n    self.blocks_dims = blocks_dims\n    self.blocks_dropout = blocks_dropout\n    self.simplify_blocks = simplify_blocks\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                True if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n        self.decoder = DenseResnet(\n            self.mlp_hidden_dims[-1],\n            blocks_dims,\n            blocks_dropout,\n            self.simplify_blocks,\n        )\n    else:\n        self.mlp = None\n        self.decoder = DenseResnet(\n            blocks_dims[0], blocks_dims, blocks_dropout, self.simplify_blocks\n        )\n\n    self.reconstruction_layer = nn.Linear(blocks_dims[-1], embed_dim, bias=False)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNet","title":"TabNet","text":"
TabNet(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dim=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    n_steps=3,\n    step_dim=8,\n    attn_dim=8,\n    dropout=0.0,\n    n_glu_step_dependent=2,\n    n_glu_shared=2,\n    ghost_bn=True,\n    virtual_batch_size=128,\n    momentum=0.02,\n    gamma=1.3,\n    epsilon=1e-15,\n    mask_type=\"sparsemax\"\n)\n

Bases: BaseTabularModelWithoutAttention

Defines a TabNet model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

The implementation in this library is fully based on that here by the dreamquark-ai team, simply adapted so that it can work within the WideDeep frame. Therefore, ALL CREDIT TO THE DREAMQUARK-AI TEAM.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int, int]]], default: None ) \u2013

    List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous (Optional[bool], default: None ) \u2013

    Boolean indicating if the continuous columns will be embedded using one of the available methods: 'standard', 'periodic' or 'piecewise'. If None, it will default to 'False'. NOTE: This parameter is deprecated and it will be removed in future releases. Please, use the embed_continuous_method parameter instead.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dim (Optional[int], default: None ) \u2013

    Size of the continuous embeddings. If the continuous columns are embedded, cont_embed_dim must be passed.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • n_steps (int, default: 3 ) \u2013

    number of decision steps. For a better understanding of the function of n_steps and the upcoming parameters, please see the paper.

  • step_dim (int, default: 8 ) \u2013

    Step's output dimension. This is the output dimension that WideDeep will collect and connect to the output neuron(s).

  • attn_dim (int, default: 8 ) \u2013

    Attention dimension

  • dropout (float, default: 0.0 ) \u2013

    GLU block's internal dropout

  • n_glu_step_dependent (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that are step dependent

  • n_glu_shared (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that will be shared across decision steps

  • ghost_bn (bool, default: True ) \u2013

    Boolean indicating if Ghost Batch Normalization will be used.

  • virtual_batch_size (int, default: 128 ) \u2013

    Batch size when using Ghost Batch Normalization

  • momentum (float, default: 0.02 ) \u2013

    Ghost Batch Normalization's momentum. The dreamquark-ai advises for very low values. However high values are used in the original publication. During our tests higher values lead to better results

  • gamma (float, default: 1.3 ) \u2013

    Relaxation parameter in the paper. When gamma = 1, a feature is enforced to be used only at one decision step. As gamma increases, more flexibility is provided to use a feature at multiple decision steps

  • epsilon (float, default: 1e-15 ) \u2013

    Float to avoid log(0). Always keep low

  • mask_type (str, default: 'sparsemax' ) \u2013

    Mask function to use. Either 'sparsemax' or 'entmax'

Attributes:

  • encoder (Module) \u2013

    the TabNet encoder. For details see the original publication.

Examples:

>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = [\"a\", \"b\", \"c\", \"d\", \"e\"]\n>>> cat_embed_input = [(u, i, j) for u, i, j in zip(colnames[:4], [4] * 4, [8] * 4)]\n>>> column_idx = {k: v for v, k in enumerate(colnames)}\n>>> model = TabNet(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=[\"e\"])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/tabnet/tab_net.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dim: Optional[int] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    n_steps: int = 3,\n    step_dim: int = 8,\n    attn_dim: int = 8,\n    dropout: float = 0.0,\n    n_glu_step_dependent: int = 2,\n    n_glu_shared: int = 2,\n    ghost_bn: bool = True,\n    virtual_batch_size: int = 128,\n    momentum: float = 0.02,\n    gamma: float = 1.3,\n    epsilon: float = 1e-15,\n    mask_type: str = \"sparsemax\",\n):\n    super(TabNet, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dim=cont_embed_dim,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_steps = n_steps\n    self.step_dim = step_dim\n    self.attn_dim = attn_dim\n    self.dropout = dropout\n    self.n_glu_step_dependent = n_glu_step_dependent\n    self.n_glu_shared = n_glu_shared\n    self.ghost_bn = ghost_bn\n    self.virtual_batch_size = virtual_batch_size\n    self.momentum = momentum\n    self.gamma = gamma\n    self.epsilon = epsilon\n    self.mask_type = mask_type\n\n    # Embeddings are instantiated at the base model\n    self.embed_out_dim = self.cat_out_dim + self.cont_out_dim\n\n    # TabNet\n    self.encoder = TabNetEncoder(\n        self.embed_out_dim,\n        n_steps,\n        step_dim,\n        attn_dim,\n        dropout,\n        n_glu_step_dependent,\n        n_glu_shared,\n        ghost_bn,\n        virtual_batch_size,\n        momentum,\n        gamma,\n        epsilon,\n        mask_type,\n    )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNet.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.tabnet.tab_net.TabNetDecoder","title":"TabNetDecoder","text":"
TabNetDecoder(\n    embed_dim,\n    n_steps=3,\n    step_dim=8,\n    dropout=0.0,\n    n_glu_step_dependent=2,\n    n_glu_shared=2,\n    ghost_bn=True,\n    virtual_batch_size=128,\n    momentum=0.02,\n)\n

Bases: Module

Companion decoder model for the TabNet model (which can be considered an encoder itself)

This class is designed to be used with the EncoderDecoderTrainer when using self-supervised pre-training (see the corresponding section in the docs). This class will receive the output from the TabNet encoder (i.e. the output from the so called 'steps') and 'reconstruct' the embeddings.

Parameters:

  • embed_dim (int) \u2013

    Size of the embeddings tensor to be reconstructed.

  • n_steps (int, default: 3 ) \u2013

    number of decision steps. For a better understanding of the function of n_steps and the upcoming parameters, please see the paper.

  • step_dim (int, default: 8 ) \u2013

    Step's output dimension. This is the output dimension that WideDeep will collect and connect to the output neuron(s).

  • dropout (float, default: 0.0 ) \u2013

    GLU block's internal dropout

  • n_glu_step_dependent (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that are step dependent

  • n_glu_shared (int, default: 2 ) \u2013

    number of GLU Blocks ([FC -> BN -> GLU]) that will be shared across decision steps

  • ghost_bn (bool, default: True ) \u2013

    Boolean indicating if Ghost Batch Normalization will be used.

  • virtual_batch_size (int, default: 128 ) \u2013

    Batch size when using Ghost Batch Normalization

  • momentum (float, default: 0.02 ) \u2013

    Ghost Batch Normalization's momentum. The dreamquark-ai advises for very low values. However high values are used in the original publication. During our tests higher values lead to better results

Attributes:

  • decoder (Module) \u2013

    decoder that will receive the output from the encoder's steps and will reconstruct the embeddings

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabNetDecoder\n>>> x_inp = [torch.rand(3, 8), torch.rand(3, 8), torch.rand(3, 8)]\n>>> decoder = TabNetDecoder(embed_dim=32, ghost_bn=False)\n>>> res = decoder(x_inp)\n>>> res.shape\ntorch.Size([3, 32])\n
Source code in pytorch_widedeep/models/tabular/tabnet/tab_net.py
def __init__(\n    self,\n    embed_dim: int,\n    n_steps: int = 3,\n    step_dim: int = 8,\n    dropout: float = 0.0,\n    n_glu_step_dependent: int = 2,\n    n_glu_shared: int = 2,\n    ghost_bn: bool = True,\n    virtual_batch_size: int = 128,\n    momentum: float = 0.02,\n):\n    super(TabNetDecoder, self).__init__()\n\n    self.n_steps = n_steps\n    self.step_dim = step_dim\n    self.dropout = dropout\n    self.n_glu_step_dependent = n_glu_step_dependent\n    self.n_glu_shared = n_glu_shared\n    self.ghost_bn = ghost_bn\n    self.virtual_batch_size = virtual_batch_size\n    self.momentum = momentum\n\n    shared_layers = nn.ModuleList()\n    for i in range(n_glu_shared):\n        if i == 0:\n            shared_layers.append(nn.Linear(step_dim, 2 * step_dim, bias=False))\n        else:\n            shared_layers.append(nn.Linear(step_dim, 2 * step_dim, bias=False))\n\n    self.decoder = nn.ModuleList()\n    for step in range(n_steps):\n        transformer = FeatTransformer(\n            step_dim,\n            step_dim,\n            dropout,\n            shared_layers,\n            n_glu_step_dependent,\n            ghost_bn,\n            virtual_batch_size,\n            momentum=momentum,\n        )\n        self.decoder.append(transformer)\n\n    self.reconstruction_layer = nn.Linear(step_dim, embed_dim, bias=False)\n    initialize_non_glu(self.reconstruction_layer, step_dim, embed_dim)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP","title":"ContextAttentionMLP","text":"
ContextAttentionMLP(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    attn_dropout=0.2,\n    with_addnorm=False,\n    attn_activation=\"leaky_relu\",\n    n_blocks=3\n)\n

Bases: BaseTabularModelWithAttention

Defines a ContextAttentionMLP model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features that are also embedded. These are then passed through a series of attention blocks. Each attention block is comprised by a ContextAttentionEncoder. Such encoder is in part inspired by the attention mechanism described in Hierarchical Attention Networks for Document Classification. See pytorch_widedeep.models.tabular.mlp._attention_layers for details.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout for each attention block

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if residual connections will be used in the attention blocks

  • attn_activation (str, default: 'leaky_relu' ) \u2013

    String indicating the activation function to be applied to the dense layer in each attention encoder. 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks

Attributes:

  • encoder (Module) \u2013

    Sequence of attention encoders.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import ContextAttentionMLP\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = ContextAttentionMLP(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/context_attention_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    attn_dropout: float = 0.2,\n    with_addnorm: bool = False,\n    attn_activation: str = \"leaky_relu\",\n    n_blocks: int = 3,\n):\n    super(ContextAttentionMLP, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.attn_dropout = attn_dropout\n    self.with_addnorm = with_addnorm\n    self.attn_activation = attn_activation\n    self.n_blocks = n_blocks\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    # Embeddings are instantiated at the base model\n    # Attention Blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"attention_block\" + str(i),\n            ContextAttentionEncoder(\n                input_dim,\n                attn_dropout,\n                with_addnorm,\n                attn_activation,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.context_attention_mlp.ContextAttentionMLP.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, F)\\), where \\(N\\) is the batch size and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP","title":"SelfAttentionMLP","text":"
SelfAttentionMLP(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    attn_dropout=0.2,\n    n_heads=8,\n    use_bias=False,\n    with_addnorm=False,\n    attn_activation=\"leaky_relu\",\n    n_blocks=3\n)\n

Bases: BaseTabularModelWithAttention

Defines a SelfAttentionMLP model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

This class combines embedding representations of the categorical features with numerical (aka continuous) features that are also embedded. These are then passed through a series of attention blocks. Each attention block is comprised by what we would refer as a simplified SelfAttentionEncoder. See pytorch_widedeep.models.tabular.mlp._attention_layers for details. The reason to use a simplified version of self attention is because we observed that the 'standard' attention mechanism used in the TabTransformer has a notable tendency to overfit.

In more detail, this model only uses Q and K (and not V). If we think about it as in terms of text (and intuitively), the Softmax(QK^T) is the attention mechanism that tells us how much, at each position in the input sentence, each word is represented or 'expressed'. We refer to that as 'attention weights'. These attention weighst are normally multiplied by a Value matrix to further strength the focus on the words that each word should be attending to (again, intuitively).

In this implementation we skip this last multiplication and instead we multiply the attention weights directly by the input tensor. This is a simplification that we expect is beneficial in terms of avoiding overfitting for tabular data.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout for each attention block

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per attention block.

  • use_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K projection layers.

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if residual connections will be used in the attention blocks

  • attn_activation (str, default: 'leaky_relu' ) \u2013

    String indicating the activation function to be applied to the dense layer in each attention encoder. 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks

Attributes:

  • cat_and_cont_embed (Module) \u2013

    This is the module that processes the categorical and continuous columns

  • encoder (Module) \u2013

    Sequence of attention encoders.

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import SelfAttentionMLP\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = SelfAttentionMLP(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols = ['e'])\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/mlp/self_attention_mlp.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    attn_dropout: float = 0.2,\n    n_heads: int = 8,\n    use_bias: bool = False,\n    with_addnorm: bool = False,\n    attn_activation: str = \"leaky_relu\",\n    n_blocks: int = 3,\n):\n    super(SelfAttentionMLP, self).__init__(\n        column_idx=column_idx,\n        input_dim=input_dim,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.attn_dropout = attn_dropout\n    self.n_heads = n_heads\n    self.use_bias = use_bias\n    self.with_addnorm = with_addnorm\n    self.attn_activation = attn_activation\n    self.n_blocks = n_blocks\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    # Embeddings are instantiated at the base model\n    # Attention Blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"attention_block\" + str(i),\n            SelfAttentionEncoder(\n                input_dim,\n                attn_dropout,\n                use_bias,\n                n_heads,\n                with_addnorm,\n                attn_activation,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.mlp.self_attention_mlp.SelfAttentionMLP.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, H, F, F)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer","title":"TabTransformer","text":"
TabTransformer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous=None,\n    embed_continuous_method=None,\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_heads=8,\n    use_qkv_bias=False,\n    n_blocks=4,\n    attn_dropout=0.2,\n    ff_dropout=0.1,\n    ff_factor=4,\n    transformer_activation=\"gelu\",\n    use_linear_attention=False,\n    use_flash_attention=False,\n    mlp_hidden_dims=None,\n    mlp_activation=\"relu\",\n    mlp_dropout=0.1,\n    mlp_batchnorm=False,\n    mlp_batchnorm_last=False,\n    mlp_linear_first=True\n)\n

Bases: BaseTabularModelWithAttention

Defines our adptation of the TabTransformer model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: This is an enhanced adaptation of the model described in the paper. It can be considered as the flagship of our transformer family of models for tabular data and offers mutiple, additional features relative to the original publication(and some other models in the library)

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: None ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per Transformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers.

  • n_blocks (int, default: 4 ) \u2013

    Number of Transformer blocks

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the Multi-Head Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'gelu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • use_linear_attention (bool, default: False ) \u2013

    Boolean indicating if Linear Attention (from Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention) will be used. The inclusing of this mode of attention is inspired by this post, where the Uber team finds that this attention mechanism leads to the best results for their tabular data.

  • use_flash_attention (bool, default: False ) \u2013

    Boolean indicating if Flash Attention will be used.

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (float, default: 0.1 ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (bool, default: True ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of Transformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabTransformer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabTransformer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_transformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous: Optional[bool] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = None,\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_heads: int = 8,\n    use_qkv_bias: bool = False,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.2,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    transformer_activation: str = \"gelu\",\n    use_linear_attention: bool = False,\n    use_flash_attention: bool = False,\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: str = \"relu\",\n    mlp_dropout: float = 0.1,\n    mlp_batchnorm: bool = False,\n    mlp_batchnorm_last: bool = False,\n    mlp_linear_first: bool = True,\n):\n    super(TabTransformer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=embed_continuous,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        input_dim=input_dim,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_heads = n_heads\n    self.use_qkv_bias = use_qkv_bias\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.transformer_activation = transformer_activation\n    self.use_linear_attention = use_linear_attention\n    self.use_flash_attention = use_flash_attention\n    self.ff_factor = ff_factor\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n\n    if self.n_cont and not self.n_cat and not self.embed_continuous:\n        raise ValueError(\n            \"If only continuous features are used 'embed_continuous' must be set to 'True'\"\n        )\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"transformer_block\" + str(i),\n            TransformerEncoder(\n                input_dim,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                transformer_activation,\n                use_linear_attention,\n                use_flash_attention,\n            ),\n        )\n\n    self.mlp_first_hidden_dim = self._mlp_first_hidden_dim()\n\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                False if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_transformer.TabTransformer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, H, F, F)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

NOTE: if flash attention or linear attention are used, no attention weights are saved during the training process and calling this property will throw a ValueError

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT","title":"SAINT","text":"
SAINT(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    use_qkv_bias=False,\n    n_heads=8,\n    n_blocks=2,\n    attn_dropout=0.1,\n    ff_dropout=0.2,\n    ff_factor=4,\n    transformer_activation=\"gelu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines a SAINT model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: This is an slightly modified and enhanced version of the model described in the paper,

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per Transformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 2 ) \u2013

    Number of SAINT-Transformer blocks.

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Multi-Head Attention column and row layers

  • ff_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'gelu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of SAINT-Transformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import SAINT\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = SAINT(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/saint.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    use_qkv_bias: bool = False,\n    n_heads: int = 8,\n    n_blocks: int = 2,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.2,\n    ff_factor: int = 4,\n    transformer_activation: str = \"gelu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(SAINT, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.use_qkv_bias = use_qkv_bias\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"saint_block\" + str(i),\n            SaintEncoder(\n                input_dim,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                transformer_activation,\n                self.n_feats,\n            ),\n        )\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                False if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.saint.SAINT.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. Each element of the list is a tuple where the first and the second elements are the column and row attention weights respectively

The shape of the attention weights is:

  • column attention: \\((N, H, F, F)\\)

  • row attention: \\((1, H, N, N)\\)

where \\(N\\) is the batch size, \\(H\\) is the number of heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer","title":"FTTransformer","text":"
FTTransformer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=64,\n    kv_compression_factor=0.5,\n    kv_sharing=False,\n    use_qkv_bias=False,\n    n_heads=8,\n    n_blocks=4,\n    attn_dropout=0.2,\n    ff_dropout=0.1,\n    ff_factor=1.33,\n    transformer_activation=\"reglu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines a FTTransformer model that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 64 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns.

  • kv_compression_factor (float, default: 0.5 ) \u2013

    By default, the FTTransformer uses Linear Attention (See Linformer: Self-Attention with Linear Complexity ). The compression factor that will be used to reduce the input sequence length. If we denote the resulting sequence length as \\(k = int(kv_{compression \\space factor} \\times s)\\) where \\(s\\) is the input sequence length.

  • kv_sharing (bool, default: False ) \u2013

    Boolean indicating if the \\(E\\) and \\(F\\) projection matrices will share weights. See Linformer: Self-Attention with Linear Complexity for details

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per FTTransformer block

  • use_qkv_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 4 ) \u2013

    Number of FTTransformer blocks

  • attn_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the Linear-Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (float, default: 1.33 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4, but they use 4/3 in the paper.

  • transformer_activation (str, default: 'reglu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final FTTransformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of FTTransformer blocks

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import FTTransformer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = FTTransformer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/ft_transformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 64,\n    kv_compression_factor: float = 0.5,\n    kv_sharing: bool = False,\n    use_qkv_bias: bool = False,\n    n_heads: int = 8,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.2,\n    ff_dropout: float = 0.1,\n    ff_factor: float = 1.33,\n    transformer_activation: str = \"reglu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(FTTransformer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.kv_compression_factor = kv_compression_factor\n    self.kv_sharing = kv_sharing\n    self.use_qkv_bias = use_qkv_bias\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    is_first = True\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"fttransformer_block\" + str(i),\n            FTTransformerEncoder(\n                input_dim,\n                self.n_feats,\n                n_heads,\n                use_qkv_bias,\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                kv_compression_factor,\n                kv_sharing,\n                transformer_activation,\n                is_first,\n            ),\n        )\n        is_first = False\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                False if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.ft_transformer.FTTransformer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is: \\((N, H, F, k)\\), where \\(N\\) is the batch size, \\(H\\) is the number of attention heads, \\(F\\) is the number of features/columns and \\(k\\) is the reduced sequence length or dimension, i.e. \\(k = int(kv_{compression \\space factor} \\times s)\\)

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver","title":"TabPerceiver","text":"
TabPerceiver(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_cross_attns=1,\n    n_cross_attn_heads=4,\n    n_latents=16,\n    latent_dim=128,\n    n_latent_heads=4,\n    n_latent_blocks=4,\n    n_perceiver_blocks=4,\n    share_weights=False,\n    attn_dropout=0.1,\n    ff_dropout=0.1,\n    ff_factor=4,\n    transformer_activation=\"geglu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines an adaptation of a Perceiver that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: while there are scientific publications for the TabTransformer, SAINT and FTTransformer, the TabPerceiver and the TabFastFormer are our own adaptations of the Perceiver and the FastFormer for tabular data.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns.

  • n_cross_attns (int, default: 1 ) \u2013

    Number of times each perceiver block will cross attend to the input data (i.e. number of cross attention components per perceiver block). This should normally be 1. However, in the paper they describe some architectures (normally computer vision-related problems) where the Perceiver attends multiple times to the input array. Therefore, maybe multiple cross attention to the input array is also useful in some cases for tabular data .

  • n_cross_attn_heads (int, default: 4 ) \u2013

    Number of attention heads for the cross attention component

  • n_latents (int, default: 16 ) \u2013

    Number of latents. This is the \\(N\\) parameter in the paper. As indicated in the paper, this number should be significantly lower than \\(M\\) (the number of columns in the dataset). Setting \\(N\\) closer to \\(M\\) defies the main purpose of the Perceiver, which is to overcome the transformer quadratic bottleneck

  • latent_dim (int, default: 128 ) \u2013

    Latent dimension.

  • n_latent_heads (int, default: 4 ) \u2013

    Number of attention heads per Latent Transformer

  • n_latent_blocks (int, default: 4 ) \u2013

    Number of transformer encoder blocks (normalised MHA + normalised FF) per Latent Transformer

  • n_perceiver_blocks (int, default: 4 ) \u2013

    Number of Perceiver blocks defined as [Cross Attention + Latent Transformer]

  • share_weights (bool, default: False ) \u2013

    Boolean indicating if the weights will be shared between Perceiver blocks

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Multi-Head Attention layers

  • ff_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • transformer_activation (str, default: 'geglu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (ModuleDict) \u2013

    ModuleDict with the Perceiver blocks

  • latents (Parameter) \u2013

    Latents that will be used for prediction

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabPerceiver\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabPerceiver(column_idx=column_idx, cat_embed_input=cat_embed_input,\n... continuous_cols=continuous_cols, n_latents=2, latent_dim=16,\n... n_perceiver_blocks=2)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_perceiver.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_cross_attns: int = 1,\n    n_cross_attn_heads: int = 4,\n    n_latents: int = 16,\n    latent_dim: int = 128,\n    n_latent_heads: int = 4,\n    n_latent_blocks: int = 4,\n    n_perceiver_blocks: int = 4,\n    share_weights: bool = False,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    transformer_activation: str = \"geglu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabPerceiver, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_cross_attns = n_cross_attns\n    self.n_cross_attn_heads = n_cross_attn_heads\n    self.n_latents = n_latents\n    self.latent_dim = latent_dim\n    self.n_latent_heads = n_latent_heads\n    self.n_latent_blocks = n_latent_blocks\n    self.n_perceiver_blocks = n_perceiver_blocks\n    self.share_weights = share_weights\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.latents = nn.init.trunc_normal_(\n        nn.Parameter(torch.empty(n_latents, latent_dim))\n    )\n\n    self.encoder = nn.ModuleDict()\n    first_perceiver_block = self._build_perceiver_block()\n    self.encoder[\"perceiver_block0\"] = first_perceiver_block\n\n    if share_weights:\n        for n in range(1, n_perceiver_blocks):\n            self.encoder[\"perceiver_block\" + str(n)] = first_perceiver_block\n    else:\n        for n in range(1, n_perceiver_blocks):\n            self.encoder[\"perceiver_block\" + str(n)] = self._build_perceiver_block()\n\n    self.mlp_first_hidden_dim = self.latent_dim\n\n    # Mlp\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                False if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_perceiver.TabPerceiver.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. If the weights are not shared between perceiver blocks each element of the list will be a list itself containing the Cross Attention and Latent Transformer attention weights respectively

The shape of the attention weights is:

  • Cross Attention: \\((N, C, L, F)\\)

  • Latent Attention: \\((N, T, L, L)\\)

WHere \\(N\\) is the batch size, \\(C\\) is the number of Cross Attention heads, \\(L\\) is the number of Latents, \\(F\\) is the number of features/columns in the dataset and \\(T\\) is the number of Latent Attention heads

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer","title":"TabFastFormer","text":"
TabFastFormer(\n    column_idx,\n    *,\n    cat_embed_input=None,\n    cat_embed_dropout=None,\n    use_cat_bias=None,\n    cat_embed_activation=None,\n    shared_embed=None,\n    add_shared_embed=None,\n    frac_shared_embed=None,\n    continuous_cols=None,\n    cont_norm_layer=None,\n    embed_continuous_method=\"standard\",\n    cont_embed_dropout=None,\n    cont_embed_activation=None,\n    quantization_setup=None,\n    n_frequencies=None,\n    sigma=None,\n    share_last_layer=None,\n    full_embed_dropout=None,\n    input_dim=32,\n    n_heads=8,\n    use_bias=False,\n    n_blocks=4,\n    attn_dropout=0.1,\n    ff_dropout=0.2,\n    ff_factor=4,\n    share_qv_weights=False,\n    share_weights=False,\n    transformer_activation=\"relu\",\n    mlp_hidden_dims=None,\n    mlp_activation=None,\n    mlp_dropout=None,\n    mlp_batchnorm=None,\n    mlp_batchnorm_last=None,\n    mlp_linear_first=None\n)\n

Bases: BaseTabularModelWithAttention

Defines an adaptation of a FastFormer that can be used as the deeptabular component of a Wide & Deep model or independently by itself.

Most of the parameters for this class are Optional since the use of categorical or continuous is in fact optional (i.e. one can use categorical features only, continuous features only or both).

NOTE: while there are scientific publications for the TabTransformer, SAINT and FTTransformer, the TabPerceiver and the TabFastFormer are our own adaptations of the Perceiver and the FastFormer for tabular data.

Parameters:

  • column_idx (Dict[str, int]) \u2013

    Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...}.

  • cat_embed_input (Optional[List[Tuple[str, int]]], default: None ) \u2013

    List of Tuples with the column name and number of unique values and embedding dimension. e.g. [(education, 11), ...]

  • cat_embed_dropout (Optional[float], default: None ) \u2013

    Categorical embeddings dropout. If None, it will default to 0.

  • use_cat_bias (Optional[bool], default: None ) \u2013

    Boolean indicating if bias will be used for the categorical embeddings. If None, it will default to 'False'.

  • cat_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the categorical embeddings, if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • shared_embed (Optional[bool], default: None ) \u2013

    Boolean indicating if the embeddings will be \"shared\". The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • add_shared_embed (Optional[bool], default: None ) \u2013

    The two embedding sharing strategies are: 1) add the shared embeddings to the column embeddings or 2) to replace the first frac_shared_embed with the shared embeddings. See pytorch_widedeep.models.embeddings_layers.SharedEmbeddings If 'None' is passed, it will default to 'False'.

  • frac_shared_embed (Optional[float], default: None ) \u2013

    The fraction of embeddings that will be shared (if add_shared_embed = False) by all the different categories for one particular column. If 'None' is passed, it will default to 0.0.

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the numeric (aka continuous) columns

  • cont_norm_layer (Optional[Literal[batchnorm, layernorm]], default: None ) \u2013

    Type of normalization layer applied to the continuous features. Options are: 'layernorm' and 'batchnorm'. if None, no normalization layer will be used.

  • embed_continuous_method (Optional[Literal[standard, piecewise, periodic]], default: 'standard' ) \u2013

    Method to use to embed the continuous features. Options are: 'standard', 'periodic' or 'piecewise'. The 'standard' embedding method is based on the FT-Transformer implementation presented in the paper: Revisiting Deep Learning Models for Tabular Data. The 'periodic' and_'piecewise'_ methods were presented in the paper: On Embeddings for Numerical Features in Tabular Deep Learning. Please, read the papers for details.

  • cont_embed_dropout (Optional[float], default: None ) \u2013

    Dropout for the continuous embeddings. If None, it will default to 0.0

  • cont_embed_activation (Optional[str], default: None ) \u2013

    Activation function for the continuous embeddings if any. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported. If None, no activation function will be applied.

  • quantization_setup (Optional[Dict[str, List[float]]], default: None ) \u2013

    This parameter is used when the 'piecewise' method is used to embed the continuous cols. It is a dict where keys are the name of the continuous columns and values are lists with the boundaries for the quantization of the continuous_cols. See the examples for details. If If the 'piecewise' method is used, this parameter is required.

  • n_frequencies (Optional[int], default: None ) \u2013

    This is the so called 'k' in their paper On Embeddings for Numerical Features in Tabular Deep Learning, and is the number of 'frequencies' that will be used to represent each continuous column. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • sigma (Optional[float], default: None ) \u2013

    This is the sigma parameter in the paper mentioned when describing the previous parameters and it is used to initialise the 'frequency weights'. See their Eq 2 in the paper for details. If the 'periodic' method is used, this parameter is required.

  • share_last_layer (Optional[bool], default: None ) \u2013

    This parameter is not present in the before mentioned paper but it is implemented in the official repo. If True the linear layer that turns the frequencies into embeddings will be shared across the continuous columns. If False a different linear layer will be used for each continuous column. If the 'periodic' method is used, this parameter is required.

  • full_embed_dropout (Optional[bool], default: None ) \u2013

    If True, the full embedding corresponding to a column will be masked out/dropout. If None, it will default to False.

  • input_dim (int, default: 32 ) \u2013

    The so-called dimension of the model. Is the number of embeddings used to encode the categorical and/or continuous columns

  • n_heads (int, default: 8 ) \u2013

    Number of attention heads per FastFormer block

  • use_bias (bool, default: False ) \u2013

    Boolean indicating whether or not to use bias in the Q, K, and V projection layers

  • n_blocks (int, default: 4 ) \u2013

    Number of FastFormer blocks

  • attn_dropout (float, default: 0.1 ) \u2013

    Dropout that will be applied to the Additive Attention layers

  • ff_dropout (float, default: 0.2 ) \u2013

    Dropout that will be applied to the FeedForward network

  • ff_factor (int, default: 4 ) \u2013

    Multiplicative factor applied to the first layer of the FF network in each Transformer block, This is normally set to 4.

  • share_qv_weights (bool, default: False ) \u2013

    Following the paper, this is a boolean indicating if the Value (\\(V\\)) and the Query (\\(Q\\)) transformation parameters will be shared.

  • share_weights (bool, default: False ) \u2013

    In addition to sharing the \\(V\\) and \\(Q\\) transformation parameters, the parameters across different Fastformer layers can also be shared. Please, see pytorch_widedeep/models/tabular/transformers/tab_fastformer.py for details

  • transformer_activation (str, default: 'relu' ) \u2013

    Transformer Encoder activation function. 'tanh', 'relu', 'leaky_relu', 'gelu', 'geglu' and 'reglu' are supported

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    MLP hidden dimensions. If not provided no MLP on top of the final FTTransformer block will be used

  • mlp_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the MLP. e.g: [64, 32]. If not provided no MLP on top of the final Transformer block will be used.

  • mlp_activation (Optional[str], default: None ) \u2013

    Activation function for the dense layers of the MLP. Currently 'tanh', 'relu', 'leaky'_relu' and _'gelu' are supported. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 'relu'.

  • mlp_dropout (Optional[float], default: None ) \u2013

    float with the dropout between the dense layers of the MLP. If 'mlp_hidden_dims' is not None and this parameter is None, it will default to 0.0.

  • mlp_batchnorm (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_batchnorm_last (Optional[bool], default: None ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers If 'mlp_hidden_dims' is not None and this parameter is None, it will default to False.

  • mlp_linear_first (Optional[bool], default: None ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT] If 'mlp_hidden_dims' is not None and this parameter is None, it will default to True.

Attributes:

  • encoder (Module) \u2013

    Sequence of FasFormer blocks.

  • mlp (Module) \u2013

    MLP component in the model

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import TabFastFormer\n>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)\n>>> colnames = ['a', 'b', 'c', 'd', 'e']\n>>> cat_embed_input = [(u,i) for u,i in zip(colnames[:4], [4]*4)]\n>>> continuous_cols = ['e']\n>>> column_idx = {k:v for v,k in enumerate(colnames)}\n>>> model = TabFastFormer(column_idx=column_idx, cat_embed_input=cat_embed_input, continuous_cols=continuous_cols)\n>>> out = model(X_tab)\n
Source code in pytorch_widedeep/models/tabular/transformers/tab_fastformer.py
def __init__(\n    self,\n    column_idx: Dict[str, int],\n    *,\n    cat_embed_input: Optional[List[Tuple[str, int]]] = None,\n    cat_embed_dropout: Optional[float] = None,\n    use_cat_bias: Optional[bool] = None,\n    cat_embed_activation: Optional[str] = None,\n    shared_embed: Optional[bool] = None,\n    add_shared_embed: Optional[bool] = None,\n    frac_shared_embed: Optional[float] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cont_norm_layer: Optional[Literal[\"batchnorm\", \"layernorm\"]] = None,\n    embed_continuous_method: Optional[\n        Literal[\"standard\", \"piecewise\", \"periodic\"]\n    ] = \"standard\",\n    cont_embed_dropout: Optional[float] = None,\n    cont_embed_activation: Optional[str] = None,\n    quantization_setup: Optional[Dict[str, List[float]]] = None,\n    n_frequencies: Optional[int] = None,\n    sigma: Optional[float] = None,\n    share_last_layer: Optional[bool] = None,\n    full_embed_dropout: Optional[bool] = None,\n    input_dim: int = 32,\n    n_heads: int = 8,\n    use_bias: bool = False,\n    n_blocks: int = 4,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.2,\n    ff_factor: int = 4,\n    share_qv_weights: bool = False,\n    share_weights: bool = False,\n    transformer_activation: str = \"relu\",\n    mlp_hidden_dims: Optional[List[int]] = None,\n    mlp_activation: Optional[str] = None,\n    mlp_dropout: Optional[float] = None,\n    mlp_batchnorm: Optional[bool] = None,\n    mlp_batchnorm_last: Optional[bool] = None,\n    mlp_linear_first: Optional[bool] = None,\n):\n    super(TabFastFormer, self).__init__(\n        column_idx=column_idx,\n        cat_embed_input=cat_embed_input,\n        cat_embed_dropout=cat_embed_dropout,\n        use_cat_bias=use_cat_bias,\n        cat_embed_activation=cat_embed_activation,\n        shared_embed=shared_embed,\n        add_shared_embed=add_shared_embed,\n        frac_shared_embed=frac_shared_embed,\n        continuous_cols=continuous_cols,\n        cont_norm_layer=cont_norm_layer,\n        embed_continuous=None,\n        embed_continuous_method=embed_continuous_method,\n        cont_embed_dropout=cont_embed_dropout,\n        cont_embed_activation=cont_embed_activation,\n        input_dim=input_dim,\n        quantization_setup=quantization_setup,\n        n_frequencies=n_frequencies,\n        sigma=sigma,\n        share_last_layer=share_last_layer,\n        full_embed_dropout=full_embed_dropout,\n    )\n\n    self.n_heads = n_heads\n    self.use_bias = use_bias\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.share_qv_weights = share_qv_weights\n    self.share_weights = share_weights\n    self.transformer_activation = transformer_activation\n\n    self.mlp_hidden_dims = mlp_hidden_dims\n    self.mlp_activation = mlp_activation\n    self.mlp_dropout = mlp_dropout\n    self.mlp_batchnorm = mlp_batchnorm\n    self.mlp_batchnorm_last = mlp_batchnorm_last\n    self.mlp_linear_first = mlp_linear_first\n\n    self.with_cls_token = \"cls_token\" in column_idx\n    self.n_cat = len(cat_embed_input) if cat_embed_input is not None else 0\n    self.n_cont = len(continuous_cols) if continuous_cols is not None else 0\n    self.n_feats = self.n_cat + self.n_cont\n\n    # Embeddings are instantiated at the base model\n    # Transformer blocks\n    self.encoder = nn.Sequential()\n    first_fastformer_block = FastFormerEncoder(\n        input_dim,\n        n_heads,\n        use_bias,\n        attn_dropout,\n        ff_dropout,\n        ff_factor,\n        share_qv_weights,\n        transformer_activation,\n    )\n    self.encoder.add_module(\"fastformer_block0\", first_fastformer_block)\n    for i in range(1, n_blocks):\n        if share_weights:\n            self.encoder.add_module(\n                \"fastformer_block\" + str(i), first_fastformer_block\n            )\n        else:\n            self.encoder.add_module(\n                \"fastformer_block\" + str(i),\n                FastFormerEncoder(\n                    input_dim,\n                    n_heads,\n                    use_bias,\n                    attn_dropout,\n                    ff_dropout,\n                    ff_factor,\n                    share_qv_weights,\n                    transformer_activation,\n                ),\n            )\n\n    self.mlp_first_hidden_dim = (\n        self.input_dim if self.with_cls_token else (self.n_feats * self.input_dim)\n    )\n\n    # Mlp: adding an MLP on top of the Resnet blocks is optional and\n    # therefore all related params are optional\n    if self.mlp_hidden_dims is not None:\n        self.mlp = MLP(\n            d_hidden=[self.mlp_first_hidden_dim] + self.mlp_hidden_dim,\n            activation=(\n                \"relu\" if self.mlp_activation is None else self.mlp_activation\n            ),\n            dropout=0.0 if self.mlp_dropout is None else self.mlp_dropout,\n            batchnorm=False if self.mlp_batchnorm is None else self.mlp_batchnorm,\n            batchnorm_last=(\n                False\n                if self.mlp_batchnorm_last is None\n                else self.mlp_batchnorm_last\n            ),\n            linear_first=(\n                False if self.mlp_linear_first is None else self.mlp_linear_first\n            ),\n        )\n    else:\n        self.mlp = None\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.tabular.transformers.tab_fastformer.TabFastFormer.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights. Each element of the list is a tuple where the first and second elements are the \\(\\alpha\\) and \\(\\beta\\) attention weights in the paper.

The shape of the attention weights is \\((N, H, F)\\) where \\(N\\) is the batch size, \\(H\\) is the number of attention heads and \\(F\\) is the number of features/columns in the dataset

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.BasicRNN","title":"BasicRNN","text":"
BasicRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.1,\n    bidirectional=False,\n    use_hidden_state=True,\n    padding_idx=1,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Standard text classifier/regressor comprised by a stack of RNNs (LSTMs or GRUs) that can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of the stack of RNNs

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • n_layers (int, default: 3 ) \u2013

    Number of recurrent layers

  • rnn_dropout (float, default: 0.1 ) \u2013

    Dropout for each RNN layer except the last layer

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • use_hidden_state (bool, default: True ) \u2013

    Boolean indicating whether to use the final hidden state or the RNN's output as predicting features. Typically the former is used.

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import BasicRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = BasicRNN(vocab_size=4, hidden_dim=4, n_layers=2, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/basic_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    n_layers: int = 3,\n    rnn_dropout: float = 0.1,\n    bidirectional: bool = False,\n    use_hidden_state: bool = True,\n    padding_idx: int = 1,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(BasicRNN, self).__init__()\n\n    if embed_dim is None and embed_matrix is None:\n        raise ValueError(\n            \"If no 'embed_matrix' is passed, the embedding dimension must\"\n            \"be specified with 'embed_dim'\"\n        )\n\n    if rnn_type.lower() not in [\"lstm\", \"gru\"]:\n        raise ValueError(\n            f\"'rnn_type' must be 'lstm' or 'gru', got {rnn_type} instead\"\n        )\n\n    if (\n        embed_dim is not None\n        and embed_matrix is not None\n        and not embed_dim == embed_matrix.shape[1]\n    ):\n        warnings.warn(\n            \"the input embedding dimension {} and the dimension of the \"\n            \"pretrained embeddings {} do not match. The pretrained embeddings \"\n            \"dimension ({}) will be used\".format(\n                embed_dim, embed_matrix.shape[1], embed_matrix.shape[1]\n            ),\n            UserWarning,\n        )\n\n    self.vocab_size = vocab_size\n    self.embed_trainable = embed_trainable\n    self.embed_dim = embed_dim\n\n    self.rnn_type = rnn_type\n    self.hidden_dim = hidden_dim\n    self.n_layers = n_layers\n    self.rnn_dropout = rnn_dropout\n    self.bidirectional = bidirectional\n    self.use_hidden_state = use_hidden_state\n    self.padding_idx = padding_idx\n\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    # Embeddings\n    if embed_matrix is not None:\n        self.word_embed, self.embed_dim = self._set_embeddings(embed_matrix)\n    else:\n        self.word_embed = nn.Embedding(\n            self.vocab_size, self.embed_dim, padding_idx=self.padding_idx\n        )\n\n    # RNN\n    rnn_params = {\n        \"input_size\": self.embed_dim,\n        \"hidden_size\": hidden_dim,\n        \"num_layers\": n_layers,\n        \"bidirectional\": bidirectional,\n        \"dropout\": rnn_dropout,\n        \"batch_first\": True,\n    }\n    if self.rnn_type.lower() == \"lstm\":\n        self.rnn: Union[nn.LSTM, nn.GRU] = nn.LSTM(**rnn_params)\n    elif self.rnn_type.lower() == \"gru\":\n        self.rnn = nn.GRU(**rnn_params)\n\n    self.rnn_output_dim = hidden_dim * 2 if bidirectional else hidden_dim\n\n    # FC-Head (Mlp)\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp: Union[MLP, nn.Identity] = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    else:\n        # simple hack to add readability in the forward pass\n        self.rnn_mlp = nn.Identity()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.BasicRNN.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.AttentiveRNN","title":"AttentiveRNN","text":"
AttentiveRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    n_layers=3,\n    rnn_dropout=0.1,\n    bidirectional=False,\n    use_hidden_state=True,\n    padding_idx=1,\n    attn_concatenate=True,\n    attn_dropout=0.1,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BasicRNN

Text classifier/regressor comprised by a stack of RNNs (LSTMs or GRUs) plus an attention layer. This model can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of attention layer

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • n_layers (int, default: 3 ) \u2013

    Number of recurrent layers

  • rnn_dropout (float, default: 0.1 ) \u2013

    Dropout for each RNN layer except the last layer

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • use_hidden_state (bool, default: True ) \u2013

    Boolean indicating whether to use the final hidden state or the RNN's output as predicting features. Typically the former is used.

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • attn_concatenate (bool, default: True ) \u2013

    Boolean indicating if the input to the attention mechanism will be the output of the RNN or the output of the RNN concatenated with the last hidden state.

  • attn_dropout (float, default: 0.1 ) \u2013

    Internal dropout for the attention mechanism

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import AttentiveRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = AttentiveRNN(vocab_size=4, hidden_dim=4, n_layers=2, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/attentive_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    n_layers: int = 3,\n    rnn_dropout: float = 0.1,\n    bidirectional: bool = False,\n    use_hidden_state: bool = True,\n    padding_idx: int = 1,\n    attn_concatenate: bool = True,\n    attn_dropout: float = 0.1,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(AttentiveRNN, self).__init__(\n        vocab_size=vocab_size,\n        embed_dim=embed_dim,\n        embed_matrix=embed_matrix,\n        embed_trainable=embed_trainable,\n        rnn_type=rnn_type,\n        hidden_dim=hidden_dim,\n        n_layers=n_layers,\n        rnn_dropout=rnn_dropout,\n        bidirectional=bidirectional,\n        use_hidden_state=use_hidden_state,\n        padding_idx=padding_idx,\n        head_hidden_dims=head_hidden_dims,\n        head_activation=head_activation,\n        head_dropout=head_dropout,\n        head_batchnorm=head_batchnorm,\n        head_batchnorm_last=head_batchnorm_last,\n        head_linear_first=head_linear_first,\n    )\n\n    # Embeddings and RNN defined in the BasicRNN inherited class\n\n    # Attention\n    self.attn_concatenate = attn_concatenate\n    self.attn_dropout = attn_dropout\n\n    if bidirectional and attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 4\n    elif bidirectional or attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 2\n    else:\n        self.rnn_output_dim = hidden_dim\n    self.attn = ContextAttention(\n        self.rnn_output_dim, attn_dropout, sum_along_seq=True\n    )\n\n    # FC-Head (Mlp)\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.attentive_rnn.AttentiveRNN.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights

The shape of the attention weights is \\((N, S)\\), where \\(N\\) is the batch size and \\(S\\) is the length of the sequence

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN","title":"StackedAttentiveRNN","text":"
StackedAttentiveRNN(\n    vocab_size,\n    embed_dim=None,\n    embed_matrix=None,\n    embed_trainable=True,\n    rnn_type=\"lstm\",\n    hidden_dim=64,\n    bidirectional=False,\n    padding_idx=1,\n    n_blocks=3,\n    attn_concatenate=False,\n    attn_dropout=0.1,\n    with_addnorm=False,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=None,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Text classifier/regressor comprised by a stack of blocks: [RNN + Attention]. This can be used as the deeptext component of a Wide & Deep model or independently by itself.

In addition, there is the option to add a Fully Connected (FC) set of dense layers on top of the attentiob blocks

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • embed_dim (Optional[int], default: None ) \u2013

    Dimension of the word embeddings if non-pretained word vectors are used

  • embed_matrix (Optional[ndarray], default: None ) \u2013

    Pretrained word embeddings

  • embed_trainable (bool, default: True ) \u2013

    Boolean indicating if the pretrained embeddings are trainable

  • rnn_type (str, default: 'lstm' ) \u2013

    String indicating the type of RNN to use. One of 'lstm' or 'gru'

  • hidden_dim (int, default: 64 ) \u2013

    Hidden dim of the RNN

  • bidirectional (bool, default: False ) \u2013

    Boolean indicating whether the staked RNNs are bidirectional

  • padding_idx (int, default: 1 ) \u2013

    index of the padding token in the padded-tokenised sequences. The TextPreprocessor class within this library uses fastai's tokenizer where the token index 0 is reserved for the 'unknown' word token. Therefore, the default value is set to 1.

  • n_blocks (int, default: 3 ) \u2013

    Number of attention blocks. Each block is comprised by an RNN and a Context Attention Encoder

  • attn_concatenate (bool, default: False ) \u2013

    Boolean indicating if the input to the attention mechanism will be the output of the RNN or the output of the RNN concatenated with the last hidden state or simply

  • attn_dropout (float, default: 0.1 ) \u2013

    Internal dropout for the attention mechanism

  • with_addnorm (bool, default: False ) \u2013

    Boolean indicating if the output of each block will be added to the input and normalised

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Optional[float], default: None ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • word_embed (Module) \u2013

    word embedding matrix

  • rnn (Module) \u2013

    Stack of RNNs

  • rnn_mlp (Module) \u2013

    Stack of dense layers on top of the RNN. This will only exists if head_layers_dim is not None

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import StackedAttentiveRNN\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = StackedAttentiveRNN(vocab_size=4, hidden_dim=4, padding_idx=0, embed_dim=4)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/stacked_attentive_rnn.py
def __init__(\n    self,\n    vocab_size: int,\n    embed_dim: Optional[int] = None,\n    embed_matrix: Optional[np.ndarray] = None,\n    embed_trainable: bool = True,\n    rnn_type: str = \"lstm\",\n    hidden_dim: int = 64,\n    bidirectional: bool = False,\n    padding_idx: int = 1,\n    n_blocks: int = 3,\n    attn_concatenate: bool = False,\n    attn_dropout: float = 0.1,\n    with_addnorm: bool = False,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Optional[float] = None,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(StackedAttentiveRNN, self).__init__()\n\n    if (\n        embed_dim is not None\n        and embed_matrix is not None\n        and not embed_dim == embed_matrix.shape[1]\n    ):\n        warnings.warn(\n            \"the input embedding dimension {} and the dimension of the \"\n            \"pretrained embeddings {} do not match. The pretrained embeddings \"\n            \"dimension ({}) will be used\".format(\n                embed_dim, embed_matrix.shape[1], embed_matrix.shape[1]\n            ),\n            UserWarning,\n        )\n\n    if rnn_type.lower() not in [\"lstm\", \"gru\"]:\n        raise ValueError(\n            f\"'rnn_type' must be 'lstm' or 'gru', got {rnn_type} instead\"\n        )\n\n    self.vocab_size = vocab_size\n    self.embed_trainable = embed_trainable\n    self.embed_dim = embed_dim\n\n    self.rnn_type = rnn_type\n    self.hidden_dim = hidden_dim\n    self.bidirectional = bidirectional\n    self.padding_idx = padding_idx\n\n    self.n_blocks = n_blocks\n    self.attn_concatenate = attn_concatenate\n    self.attn_dropout = attn_dropout\n    self.with_addnorm = with_addnorm\n\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    # Embeddings\n    self.word_embed, self.embed_dim = self._set_embeddings(embed_matrix)\n\n    # Linear Projection: if embed_dim is different that the input of the\n    # attention blocks we add a linear projection\n    if bidirectional and attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 4\n    elif bidirectional or attn_concatenate:\n        self.rnn_output_dim = hidden_dim * 2\n    else:\n        self.rnn_output_dim = hidden_dim\n\n    if self.rnn_output_dim != self.embed_dim:\n        self.embed_proj: Union[nn.Linear, nn.Identity] = nn.Linear(\n            self.embed_dim, self.rnn_output_dim\n        )\n    else:\n        self.embed_proj = nn.Identity()\n\n    # RNN\n    rnn_params = {\n        \"input_size\": self.rnn_output_dim,\n        \"hidden_size\": hidden_dim,\n        \"bidirectional\": bidirectional,\n        \"batch_first\": True,\n    }\n    if self.rnn_type.lower() == \"lstm\":\n        self.rnn: Union[nn.LSTM, nn.GRU] = nn.LSTM(**rnn_params)\n    elif self.rnn_type.lower() == \"gru\":\n        self.rnn = nn.GRU(**rnn_params)\n\n    # FC-Head (Mlp)\n    self.attention_blks = nn.ModuleList()\n    for i in range(n_blocks):\n        self.attention_blks.append(\n            ContextAttentionEncoder(\n                self.rnn,\n                self.rnn_output_dim,\n                attn_dropout,\n                attn_concatenate,\n                with_addnorm=with_addnorm if i != n_blocks - 1 else False,\n                sum_along_seq=i == n_blocks - 1,\n            )\n        )\n\n    # Mlp\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.rnn_output_dim] + self.head_hidden_dims\n        self.rnn_mlp: Union[MLP, nn.Identity] = MLP(\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    else:\n        # simple hack to add readability in the forward pass\n        self.rnn_mlp = nn.Identity()\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.stacked_attentive_rnn.StackedAttentiveRNN.attention_weights","title":"attention_weights property","text":"
attention_weights\n

List with the attention weights per block

The shape of the attention weights is \\((N, S)\\) Where \\(N\\) is the batch size and \\(S\\) is the length of the sequence

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.text.basic_transformer.Transformer","title":"Transformer","text":"
Transformer(\n    vocab_size,\n    seq_length,\n    input_dim,\n    n_heads,\n    n_blocks,\n    attn_dropout=0.1,\n    ff_dropout=0.1,\n    ff_factor=4,\n    activation=\"gelu\",\n    use_linear_attention=False,\n    use_flash_attention=False,\n    padding_idx=0,\n    with_cls_token=False,\n    *,\n    with_pos_encoding=True,\n    pos_encoding_dropout=0.1,\n    pos_encoder=None\n)\n

Bases: Module

Basic Encoder-Only Transformer Model for text classification/regression. As all other models in the library this model can be used as the deeptext component of a Wide & Deep model or independently by itself.

NOTE: This model is introduced in the context of recommendation systems and thought for sequences of any nature (e.g. items). It can, of course, still be used for text. However, at this stage, we have decided to not include the possibility of loading pretrained word vectors since we aim to integrate the library wit Huggingface in the (hopefully) near future

Parameters:

  • vocab_size (int) \u2013

    Number of words in the vocabulary

  • input_dim (int) \u2013

    Dimension of the token embeddings

    Param aliases: embed_dim, d_model.

Attributes:

  • embedding (Module) \u2013

    Standard token embedding layer

  • pos_encoder (Module) \u2013

    Positional Encoder

  • encoder (Module) \u2013

    Sequence of Transformer blocks

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Transformer\n>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)\n>>> model = Transformer(vocab_size=4, seq_length=5, input_dim=8, n_heads=1, n_blocks=1)\n>>> out = model(X_text)\n
Source code in pytorch_widedeep/models/text/basic_transformer.py
@alias(\"input_dim\", [\"embed_dim\", \"d_model\"])\n@alias(\"seq_length\", [\"max_length\", \"maxlen\"])\ndef __init__(\n    self,\n    vocab_size: int,\n    seq_length: int,\n    input_dim: int,\n    n_heads: int,\n    n_blocks: int,\n    attn_dropout: float = 0.1,\n    ff_dropout: float = 0.1,\n    ff_factor: int = 4,\n    activation: str = \"gelu\",\n    use_linear_attention: bool = False,\n    use_flash_attention: bool = False,\n    padding_idx: int = 0,\n    with_cls_token: bool = False,\n    *,  # from here on pos encoding args\n    with_pos_encoding: bool = True,\n    pos_encoding_dropout: float = 0.1,\n    pos_encoder: Optional[nn.Module] = None,\n):\n    super().__init__()\n\n    self.input_dim = input_dim\n    self.seq_length = seq_length\n    self.n_heads = n_heads\n    self.n_blocks = n_blocks\n    self.attn_dropout = attn_dropout\n    self.ff_dropout = ff_dropout\n    self.ff_factor = ff_factor\n    self.activation = activation\n    self.use_linear_attention = use_linear_attention\n    self.use_flash_attention = use_flash_attention\n    self.padding_idx = padding_idx\n    self.with_cls_token = with_cls_token\n    self.with_pos_encoding = with_pos_encoding\n    self.pos_encoding_dropout = pos_encoding_dropout\n\n    self.embedding = nn.Embedding(\n        vocab_size, input_dim, padding_idx=self.padding_idx\n    )\n\n    if with_pos_encoding:\n        if pos_encoder is not None:\n            self.pos_encoder: Union[nn.Module, nn.Identity, PositionalEncoding] = (\n                pos_encoder\n            )\n        else:\n            self.pos_encoder = PositionalEncoding(\n                input_dim, pos_encoding_dropout, seq_length\n            )\n    else:\n        self.pos_encoder = nn.Identity()\n\n    self.encoder = nn.Sequential()\n    for i in range(n_blocks):\n        self.encoder.add_module(\n            \"transformer_block\" + str(i),\n            TransformerEncoder(\n                input_dim,\n                n_heads,\n                False,  # use_qkv_bias\n                attn_dropout,\n                ff_dropout,\n                ff_factor,\n                activation,\n                use_linear_attention,\n                use_flash_attention,\n            ),\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.image.vision.Vision","title":"Vision","text":"
Vision(\n    pretrained_model_setup=None,\n    n_trainable=None,\n    trainable_params=None,\n    channel_sizes=[64, 128, 256, 512],\n    kernel_sizes=[7, 3, 3, 3],\n    strides=[2, 1, 1, 1],\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=0.1,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=False,\n)\n

Bases: BaseWDModelComponent

Defines a standard image classifier/regressor using a pretrained network or a sequence of convolution layers that can be used as the deepimage component of a Wide & Deep model or independently by itself.

NOTE: this class represents the integration between pytorch-widedeep and torchvision. New architectures will be available as they are added to torchvision. In a distant future we aim to bring transformer-based architectures as well. However, simple CNN-based architectures (and even MLP-based) seem to produce SoTA results. For the time being, we describe below the options available through this class

Parameters:

  • pretrained_model_setup (Union[str, Dict[str, Union[str, WeightsEnum]]], default: None ) \u2013

    Name of the pretrained model. Should be a variant of the following architectures: 'resnet', 'shufflenet', 'resnext', 'wide_resnet', 'regnet', 'densenet', 'mobilenetv3', 'mobilenetv2', 'mnasnet', 'efficientnet' and 'squeezenet'. if pretrained_model_setup = None a basic, fully trainable CNN will be used. Alternatively, since Torchvision 0.13 one can use pretrained models with different weigths. Therefore, pretrained_model_setup can also be dictionary with the name of the model and the weights (e.g. {'resnet50': ResNet50_Weights.DEFAULT} or {'resnet50': \"IMAGENET1K_V2\"}). Aliased as pretrained_model_name.

  • n_trainable (Optional[int], default: None ) \u2013

    Number of trainable layers starting from the layer closer to the output neuron(s). Note that this number DOES NOT take into account the so-called 'head' which is ALWAYS trainable. If trainable_params is not None this parameter will be ignored

  • trainable_params (Optional[List[str]], default: None ) \u2013

    List of strings containing the names (or substring within the name) of the parameters that will be trained. For example, if we use a 'resnet18' pretrained model and we set trainable_params = ['layer4'] only the parameters of 'layer4' of the network (and the head, as mentioned before) will be trained. Note that setting this or the previous parameter involves some knowledge of the architecture used.

  • channel_sizes (List[int], default: [64, 128, 256, 512] ) \u2013

    List of integers with the channel sizes of a CNN in case we choose not to use a pretrained model

  • kernel_sizes (Union[int, List[int]], default: [7, 3, 3, 3] ) \u2013

    List of integers with the kernel sizes of a CNN in case we choose not to use a pretrained model. Must be of length equal to len(channel_sizes) - 1.

  • strides (Union[int, List[int]], default: [2, 1, 1, 1] ) \u2013

    List of integers with the stride sizes of a CNN in case we choose not to use a pretrained model. Must be of length equal to len(channel_sizes) - 1.

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the number of neurons per dense layer in the head. e.g: [64,32]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (Union[float, List[float]], default: 0.1 ) \u2013

    float indicating the dropout between the dense layers.

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the dense layers

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not batch normalization will be applied to the last of the dense layers

  • head_linear_first (bool, default: False ) \u2013

    Boolean indicating the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

Attributes:

  • features (Module) \u2013

    The pretrained model or Standard CNN plus the optional head

Examples:

>>> import torch\n>>> from pytorch_widedeep.models import Vision\n>>> X_img = torch.rand((2,3,224,224))\n>>> model = Vision(channel_sizes=[64, 128], kernel_sizes = [3, 3], strides=[1, 1], head_hidden_dims=[32, 8])\n>>> out = model(X_img)\n
Source code in pytorch_widedeep/models/image/vision.py
@alias(\"pretrained_model_setup\", [\"pretrained_model_name\"])\ndef __init__(\n    self,\n    pretrained_model_setup: Union[str, Dict[str, Union[str, WeightsEnum]]] = None,\n    n_trainable: Optional[int] = None,\n    trainable_params: Optional[List[str]] = None,\n    channel_sizes: List[int] = [64, 128, 256, 512],\n    kernel_sizes: Union[int, List[int]] = [7, 3, 3, 3],\n    strides: Union[int, List[int]] = [2, 1, 1, 1],\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: Union[float, List[float]] = 0.1,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = False,\n):\n    super(Vision, self).__init__()\n\n    self._check_pretrained_model_setup(\n        pretrained_model_setup, n_trainable, trainable_params\n    )\n\n    self.pretrained_model_setup = pretrained_model_setup\n    self.n_trainable = n_trainable\n    self.trainable_params = trainable_params\n    self.channel_sizes = channel_sizes\n    self.kernel_sizes = kernel_sizes\n    self.strides = strides\n    self.head_hidden_dims = head_hidden_dims\n    self.head_activation = head_activation\n    self.head_dropout = head_dropout\n    self.head_batchnorm = head_batchnorm\n    self.head_batchnorm_last = head_batchnorm_last\n    self.head_linear_first = head_linear_first\n\n    self.features, self.backbone_output_dim = self._get_features()\n\n    if pretrained_model_setup is not None:\n        self._freeze(self.features)\n\n    if self.head_hidden_dims is not None:\n        head_hidden_dims = [self.backbone_output_dim] + self.head_hidden_dims\n        self.vision_mlp = MLP(\n            head_hidden_dims,\n            self.head_activation,\n            self.head_dropout,\n            self.head_batchnorm,\n            self.head_batchnorm_last,\n            self.head_linear_first,\n        )\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.image.vision.Vision.output_dim","title":"output_dim property","text":"
output_dim\n

The output dimension of the model. This is a required property neccesary to build the WideDeep class

"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.wide_deep.WideDeep","title":"WideDeep","text":"
WideDeep(\n    wide=None,\n    deeptabular=None,\n    deeptext=None,\n    deepimage=None,\n    deephead=None,\n    head_hidden_dims=None,\n    head_activation=\"relu\",\n    head_dropout=0.1,\n    head_batchnorm=False,\n    head_batchnorm_last=False,\n    head_linear_first=True,\n    enforce_positive=False,\n    enforce_positive_activation=\"softplus\",\n    pred_dim=1,\n    with_fds=False,\n    **fds_config\n)\n

Bases: Module

Main collector class that combines all wide, deeptabular deeptext and deepimage models.

Note that all models described so far in this library must be passed to the WideDeep class once constructed. This is because the models output the last layer before the prediction layer. Such prediction layer is added by the WideDeep class as it collects the components for every data mode.

There are two options to combine these models that correspond to the two main architectures that pytorch-widedeep can build.

  • Directly connecting the output of the model components to an ouput neuron(s).

  • Adding a Fully-Connected Head (FC-Head) on top of the deep models. This FC-Head will combine the output form the deeptabular, deeptext and deepimage and will be then connected to the output neuron(s).

Parameters:

  • wide (Optional[Module], default: None ) \u2013

    Wide model. This is a linear model where the non-linearities are captured via crossed-columns.

  • deeptabular (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library implements a number of possible architectures for the deeptabular component. See the documenation of the package.

  • deeptext (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library implements a number of possible architectures for the deeptext component. See the documenation of the package.

  • deepimage (Optional[BaseWDModelComponent], default: None ) \u2013

    Currently this library uses torchvision and implements a number of possible architectures for the deepimage component. See the documenation of the package.

  • deephead (Optional[BaseWDModelComponent], default: None ) \u2013

    Alternatively, the user can pass a custom model that will receive the output of the deep component. If deephead is not None all the previous fc-head parameters will be ignored

  • head_hidden_dims (Optional[List[int]], default: None ) \u2013

    List with the sizes of the dense layers in the head e.g: [128, 64]

  • head_activation (str, default: 'relu' ) \u2013

    Activation function for the dense layers in the head. Currently 'tanh', 'relu', 'leaky_relu' and 'gelu' are supported

  • head_dropout (float, default: 0.1 ) \u2013

    Dropout of the dense layers in the head

  • head_batchnorm (bool, default: False ) \u2013

    Boolean indicating whether or not to include batch normalization in the dense layers that form the 'rnn_mlp'

  • head_batchnorm_last (bool, default: False ) \u2013

    Boolean indicating whether or not to apply batch normalization to the last of the dense layers in the head

  • head_linear_first (bool, default: True ) \u2013

    Boolean indicating whether the order of the operations in the dense layer. If True: [LIN -> ACT -> BN -> DP]. If False: [BN -> DP -> LIN -> ACT]

  • enforce_positive (bool, default: False ) \u2013

    Boolean indicating if the output from the final layer must be positive. This is important if you are using loss functions with non-negative input restrictions, e.g. RMSLE, or if you know your predictions are bounded in between 0 and inf

  • enforce_positive_activation (str, default: 'softplus' ) \u2013

    Activation function to enforce that the final layer has a positive output. 'softplus' or 'relu' are supported.

  • pred_dim (int, default: 1 ) \u2013

    Size of the final wide and deep output layer containing the predictions. 1 for regression and binary classification or number of classes for multiclass classification.

  • with_fds (bool, default: False ) \u2013

    Boolean indicating if Feature Distribution Smoothing (FDS) will be applied before the final prediction layer. Only available for regression problems. See Delving into Deep Imbalanced Regression for details.

Other Parameters:

  • **fds_config \u2013

    Dictionary with the parameters to be used when using Feature Distribution Smoothing. Please, see the docs for the FDSLayer. NOTE: Feature Distribution Smoothing is available when using ONLY a deeptabular component NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

Examples:

>>> from pytorch_widedeep.models import TabResnet, Vision, BasicRNN, Wide, WideDeep\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>> deeptabular = TabResnet(blocks_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> deeptext = BasicRNN(vocab_size=10, embed_dim=4, padding_idx=0)\n>>> deepimage = Vision()\n>>> model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage)\n

NOTE: It is possible to use custom components to build Wide & Deep models. Simply, build them and pass them as the corresponding parameters. Note that the custom models MUST return a last layer of activations(i.e. not the final prediction) so that these activations are collected by WideDeep and combined accordingly. In addition, the models MUST also contain an attribute output_dim with the size of these last layers of activations. See for example pytorch_widedeep.models.tab_mlp.TabMlp

Source code in pytorch_widedeep/models/wide_deep.py
@alias(  # noqa: C901\n    \"pred_dim\",\n    [\"num_class\", \"pred_size\"],\n)\ndef __init__(\n    self,\n    wide: Optional[nn.Module] = None,\n    deeptabular: Optional[BaseWDModelComponent] = None,\n    deeptext: Optional[BaseWDModelComponent] = None,\n    deepimage: Optional[BaseWDModelComponent] = None,\n    deephead: Optional[BaseWDModelComponent] = None,\n    head_hidden_dims: Optional[List[int]] = None,\n    head_activation: str = \"relu\",\n    head_dropout: float = 0.1,\n    head_batchnorm: bool = False,\n    head_batchnorm_last: bool = False,\n    head_linear_first: bool = True,\n    enforce_positive: bool = False,\n    enforce_positive_activation: str = \"softplus\",\n    pred_dim: int = 1,\n    with_fds: bool = False,\n    **fds_config,\n):\n    super(WideDeep, self).__init__()\n\n    self._check_inputs(\n        wide,\n        deeptabular,\n        deeptext,\n        deepimage,\n        deephead,\n        head_hidden_dims,\n        pred_dim,\n        with_fds,\n    )\n\n    # this attribute will be eventually over-written by the Trainer's\n    # device. Acts here as a 'placeholder'.\n    self.wd_device: Optional[str] = None\n\n    # required as attribute just in case we pass a deephead\n    self.pred_dim = pred_dim\n\n    self.with_fds = with_fds\n    self.enforce_positive = enforce_positive\n\n    # The main 5 components of the wide and deep assemble: wide,\n    # deeptabular, deeptext, deepimage and deephead\n    self.with_deephead = deephead is not None or head_hidden_dims is not None\n    if deephead is None and head_hidden_dims is not None:\n        self.deephead = self._build_deephead(\n            deeptabular,\n            deeptext,\n            deepimage,\n            head_hidden_dims,\n            head_activation,\n            head_dropout,\n            head_batchnorm,\n            head_batchnorm_last,\n            head_linear_first,\n        )\n    elif deephead is not None:\n        self.deephead = nn.Sequential(\n            deephead, nn.Linear(deephead.output_dim, self.pred_dim)\n        )\n    else:\n        # for consistency with other components we default to None\n        self.deephead = None\n\n    self.wide = wide\n    self.deeptabular, self.deeptext, self.deepimage = self._set_model_components(\n        deeptabular, deeptext, deepimage, self.with_deephead\n    )\n\n    if self.with_fds:\n        self.fds_layer = FDSLayer(feature_dim=self.deeptabular.output_dim, **fds_config)  # type: ignore[arg-type]\n\n    if self.enforce_positive:\n        self.enf_pos = get_activation_fn(enforce_positive_activation)\n
"},{"location":"pytorch-widedeep/model_components.html#pytorch_widedeep.models.fds_layer.FDSLayer","title":"FDSLayer","text":"
FDSLayer(\n    feature_dim,\n    granularity=100,\n    y_max=None,\n    y_min=None,\n    start_update=0,\n    start_smooth=2,\n    kernel=\"gaussian\",\n    ks=5,\n    sigma=2,\n    momentum=0.9,\n    clip_min=None,\n    clip_max=None,\n)\n

Bases: Module

Feature Distribution Smoothing layer. Please, see Delving into Deep Imbalanced Regression for details.

NOTE: this is NOT an available model per se, but more a utility that can be used as we run a WideDeep model. The parameters of this extra layers can be set as the class WideDeep is instantiated via the keyword arguments fds_config.

NOTE: Feature Distribution Smoothing is available when using ONLY a deeptabular component

NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

The code here is based on the code at the official repo

Parameters:

  • feature_dim (int) \u2013

    input dimension size, i.e. output size of previous layer. This will be the dimension of the output from the deeptabular component

  • granularity (int, default: 100 ) \u2013

    number of bins that the target \\(y\\) is divided into and that will be used to compute the features' statistics (mean and variance)

  • y_max (Optional[float], default: None ) \u2013

    \\(y\\) upper limit to be considered when binning

  • y_min (Optional[float], default: None ) \u2013

    \\(y\\) lower limit to be considered when binning

  • start_update (int, default: 0 ) \u2013

    number of _'waiting epochs' after which the FDS layer will start to update its statistics

  • start_smooth (int, default: 2 ) \u2013

    number of _'waiting epochs' after which the FDS layer will start smoothing the feature distributions

  • kernel (Literal[gaussian, triang, laplace], default: 'gaussian' ) \u2013

    choice of smoothing kernel

  • ks (int, default: 5 ) \u2013

    kernel window size

  • sigma (float, default: 2 ) \u2013

    if a 'gaussian' or 'laplace' kernels are used, this is the corresponding standard deviation

  • momentum (Optional[float], default: 0.9 ) \u2013

    to train the layer the authors used a momentum update of the running statistics across each epoch. Set to 0.9 in the paper.

  • clip_min (Optional[float], default: None ) \u2013

    this parameter is used to clip the ratio between the so called running variance and the smoothed variance, and is introduced for numerical stability. We leave it as optional as we did not find a notable improvement in our experiments. The authors used a value of 0.1

  • clip_max (Optional[float], default: None ) \u2013

    same as clip_min but for the upper limit.We leave it as optional as we did not find a notable improvement in our experiments. The authors used a value of 10.

Source code in pytorch_widedeep/models/fds_layer.py
def __init__(\n    self,\n    feature_dim: int,\n    granularity: int = 100,\n    y_max: Optional[float] = None,\n    y_min: Optional[float] = None,\n    start_update: int = 0,\n    start_smooth: int = 2,\n    kernel: Literal[\"gaussian\", \"triang\", \"laplace\"] = \"gaussian\",\n    ks: int = 5,\n    sigma: float = 2,\n    momentum: Optional[float] = 0.9,\n    clip_min: Optional[float] = None,\n    clip_max: Optional[float] = None,\n):\n    \"\"\"\n    Feature Distribution Smoothing layer. Please, see\n    [Delving into Deep Imbalanced Regression](https:/arxiv.org/abs/2102.09554)\n    for details.\n\n    :information_source: **NOTE**: this is NOT an available model per se,\n     but more a utility that can be used as we run a `WideDeep` model.\n     The parameters of this extra layers can be set as the class\n     `WideDeep` is instantiated via the keyword arguments `fds_config`.\n\n    :information_source: **NOTE**: Feature Distribution Smoothing is\n     available when using ONLY a `deeptabular` component\n\n    :information_source: **NOTE**: We consider this feature absolutely\n    experimental and we recommend the user to not use it unless the\n    corresponding [publication](https://arxiv.org/abs/2102.09554) is\n    well understood\n\n    The code here is based on the code at the\n    [official repo](https://github.com/YyzHarry/imbalanced-regression)\n\n    Parameters\n    ----------\n    feature_dim: int,\n        input dimension size, i.e. output size of previous layer. This\n        will be the dimension of the output from the `deeptabular`\n        component\n    granularity: int = 100,\n        number of bins that the target $y$ is divided into and that will\n        be used to compute the features' statistics (mean and variance)\n    y_max: Optional[float] = None,\n        $y$ upper limit to be considered when binning\n    y_min: Optional[float] = None,\n        $y$ lower limit to be considered when binning\n    start_update: int = 0,\n        number of _'waiting epochs' after which the FDS layer will start\n        to update its statistics\n    start_smooth: int = 1,\n        number of _'waiting epochs' after which the FDS layer will start\n        smoothing the feature distributions\n    kernel: Literal[\"gaussian\", \"triang\", \"laplace\", None] = \"gaussian\",\n        choice of smoothing kernel\n    ks: int = 5,\n        kernel window size\n    sigma: Union[int, float] = 2,\n        if a _'gaussian'_ or _'laplace'_ kernels are used, this is the\n        corresponding standard deviation\n    momentum: float = 0.9,\n        to train the layer the authors used a momentum update of the running\n        statistics across each epoch. Set to 0.9 in the paper.\n    clip_min: Optional[float] = None,\n        this parameter is used to clip the ratio between the so called\n        running variance and the smoothed variance, and is introduced for\n        numerical stability. We leave it as optional as we did not find a\n        notable improvement in our experiments. The authors used a value\n        of 0.1\n    clip_max: Optional[float] = None,\n        same as `clip_min` but for the upper limit.We leave it as optional\n        as we did not find a notable improvement in our experiments. The\n        authors used a value of 10.\n    \"\"\"\n    super(FDSLayer, self).__init__()\n    assert (\n        start_update + 1 < start_smooth\n    ), \"initial update must start at least 2 epoch before smoothing\"\n\n    self.feature_dim = feature_dim\n    self.granularity = granularity\n    self.y_max = y_max\n    self.y_min = y_min\n    self.kernel_window = torch.tensor(\n        get_kernel_window(kernel, ks, sigma), dtype=torch.float32\n    )\n    self.half_ks = (ks - 1) // 2\n    self.momentum = momentum\n    self.start_update = start_update\n    self.start_smooth = start_smooth\n    self.clip_min = clip_min\n    self.clip_max = clip_max\n\n    self.pred_layer = nn.Linear(feature_dim, 1)\n\n    self._register_buffers()\n
"},{"location":"pytorch-widedeep/preprocessing.html","title":"The preprocessing module","text":"

This module contains the classes that are used to prepare the data before being passed to the models. There is one Preprocessor per data mode or model component: wide, deeptabular, deepimage and deeptext.

"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor","title":"WidePreprocessor","text":"
WidePreprocessor(wide_cols, crossed_cols=None)\n

Bases: BasePreprocessor

Preprocessor to prepare the wide input dataset

This Preprocessor prepares the data for the wide, linear component. This linear model is implemented via an Embedding layer that is connected to the output neuron. WidePreprocessor numerically encodes all the unique values of all categorical columns wide_cols + crossed_cols. See the Example below.

Parameters:

  • wide_cols (List[str]) \u2013

    List of strings with the name of the columns that will label encoded and passed through the wide component

  • crossed_cols (Optional[List[Tuple[str, str]]], default: None ) \u2013

    List of Tuples with the name of the columns that will be 'crossed' and then label encoded. e.g. [('education', 'occupation'), ...]. For binary features, a cross-product transformation is 1 if and only if the constituent features are all 1, and 0 otherwise.

Attributes:

  • wide_crossed_cols (List) \u2013

    List with the names of all columns that will be label encoded

  • encoding_dict (Dict) \u2013

    Dictionary where the keys are the result of pasting colname + '_' + column value and the values are the corresponding mapped integer.

  • inverse_encoding_dict (Dict) \u2013

    the inverse encoding dictionary

  • wide_dim (int) \u2013

    Dimension of the wide model (i.e. dim of the linear layer)

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import WidePreprocessor\n>>> df = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l']})\n>>> wide_cols = ['color']\n>>> crossed_cols = [('color', 'size')]\n>>> wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\n>>> X_wide = wide_preprocessor.fit_transform(df)\n>>> X_wide\narray([[1, 4],\n       [2, 5],\n       [3, 6]])\n>>> wide_preprocessor.encoding_dict\n{'color_r': 1, 'color_b': 2, 'color_g': 3, 'color_size_r-s': 4, 'color_size_b-n': 5, 'color_size_g-l': 6}\n>>> wide_preprocessor.inverse_transform(X_wide)\n  color color_size\n0     r        r-s\n1     b        b-n\n2     g        g-l\n
Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def __init__(\n    self, wide_cols: List[str], crossed_cols: Optional[List[Tuple[str, str]]] = None\n):\n    super(WidePreprocessor, self).__init__()\n\n    self.wide_cols = wide_cols\n    self.crossed_cols = crossed_cols\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.fit","title":"fit","text":"
fit(df)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • WidePreprocessor \u2013

    WidePreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit(self, df: pd.DataFrame) -> \"WidePreprocessor\":\n    r\"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    WidePreprocessor\n        `WidePreprocessor` fitted object\n    \"\"\"\n    df_wide = self._prepare_wide(df)\n    self.wide_crossed_cols = df_wide.columns.tolist()\n    glob_feature_list = self._make_global_feature_list(\n        df_wide[self.wide_crossed_cols]\n    )\n    # leave 0 for padding/\"unseen\" categories\n    self.encoding_dict = {v: i + 1 for i, v in enumerate(glob_feature_list)}\n    self.wide_dim = len(self.encoding_dict)\n    self.inverse_encoding_dict = {k: v for v, k in self.encoding_dict.items()}\n    self.inverse_encoding_dict[0] = \"unseen\"\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.transform","title":"transform","text":"
transform(df)\n

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    r\"\"\"\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    check_is_fitted(self, attributes=[\"encoding_dict\"])\n    df_wide = self._prepare_wide(df)\n    encoded = np.zeros([len(df_wide), len(self.wide_crossed_cols)])\n    for col_i, col in enumerate(self.wide_crossed_cols):\n        encoded[:, col_i] = df_wide[col].apply(\n            lambda x: (\n                self.encoding_dict[col + \"_\" + str(x)]\n                if col + \"_\" + str(x) in self.encoding_dict\n                else 0\n            )\n        )\n    return encoded.astype(\"int64\")\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(encoded)\n

Takes as input the output from the transform method and it will return the original values.

Parameters:

  • encoded (ndarray) \u2013

    numpy array with the encoded values that are the output from the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original values

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:\n    r\"\"\"Takes as input the output from the `transform` method and it will\n    return the original values.\n\n    Parameters\n    ----------\n    encoded: np.ndarray\n        numpy array with the encoded values that are the output from the\n        `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original values\n    \"\"\"\n    decoded = pd.DataFrame(encoded, columns=self.wide_crossed_cols)\n\n    if pd.__version__ >= \"2.1.0\":\n        decoded = decoded.map(lambda x: self.inverse_encoding_dict[x])\n    else:\n        decoded = decoded.applymap(lambda x: self.inverse_encoding_dict[x])\n\n    for col in decoded.columns:\n        rm_str = \"\".join([col, \"_\"])\n        decoded[col] = decoded[col].apply(lambda x: x.replace(rm_str, \"\"))\n    return decoded\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.WidePreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor","title":"TabPreprocessor","text":"
TabPreprocessor(\n    cat_embed_cols=None,\n    continuous_cols=None,\n    quantization_setup=None,\n    cols_to_scale=None,\n    auto_embed_dim=True,\n    embedding_rule=\"fastai_new\",\n    default_embed_dim=16,\n    with_attention=False,\n    with_cls_token=False,\n    shared_embed=False,\n    verbose=1,\n    *,\n    scale=False,\n    already_standard=None,\n    **kwargs\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deeptabular component input dataset

Parameters:

  • cat_embed_cols (Optional[Union[List[str], List[Tuple[str, int]]]], default: None ) \u2013

    List containing the name of the categorical columns that will be represented by embeddings (e.g. ['education', 'relationship', ...]) or a Tuple with the name and the embedding dimension (e.g.: [ ('education',32), ('relationship',16), ...])

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the continuous cols

  • quantization_setup (Optional[Union[int, Dict[str, Union[int, List[float]]]]], default: None ) \u2013

    Continuous columns can be turned into categorical via pd.cut. If quantization_setup is an int, all continuous columns will be quantized using this value as the number of bins. Alternatively, a dictionary where the keys are the column names to quantize and the values are the either integers indicating the number of bins or a list of scalars indicating the bin edges can also be used.

  • cols_to_scale (Optional[Union[List[str], str]], default: None ) \u2013

    List with the names of the columns that will be standarised via sklearn's StandardScaler. It can also be the string 'all' in which case all the continuous cols will be scaled.

  • auto_embed_dim (bool, default: True ) \u2013

    Boolean indicating whether the embedding dimensions will be automatically defined via rule of thumb. See embedding_rule below.

  • embedding_rule (Literal[google, fastai_old, fastai_new], default: 'fastai_new' ) \u2013

    If auto_embed_dim=True, this is the choice of embedding rule of thumb. Choices are:

    • fastai_new: \\(min(600, round(1.6 \\times n_{cat}^{0.56}))\\)

    • fastai_old: \\(min(50, (n_{cat}//{2})+1)\\)

    • google: \\(min(600, round(n_{cat}^{0.24}))\\)

  • default_embed_dim (int, default: 16 ) \u2013

    Dimension for the embeddings if the embedding dimension is not provided in the cat_embed_cols parameter and auto_embed_dim is set to False.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model (more precisely a model where all embeddings must have the same dimensions). If True, the param cat_embed_cols must just be a list containing just the categorical column names: e.g. ['education', 'relationship', ...]. This is because they will all be encoded using embeddings of the same dim, which will be specified later when the model is defined. Param alias: for_transformer

  • with_cls_token (bool, default: False ) \u2013

    Boolean indicating if a '[CLS]' token will be added to the dataset when using attention-based models. The final hidden state corresponding to this token is used as the aggregated representation for classification and regression tasks. If not, the categorical and/or continuous embeddings will be concatenated before being passed to the final MLP (if present).

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • verbose (int, default: 1 ) \u2013
  • scale (bool, default: False ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. Bool indicating whether or not to scale/standarise continuous cols. It is important to emphasize that all the DL models for tabular data in the library also include the possibility of normalising the input continuous features via a BatchNorm or a LayerNorm. Param alias: scale_cont_cols.

  • already_standard (Optional[List[str]], default: None ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. List with the name of the continuous cols that do not need to be scaled/standarised.

Other Parameters:

  • **kwargs \u2013

    pd.cut and StandardScaler related args

Attributes:

  • embed_dim (Dict) \u2013

    Dictionary where keys are the embed cols and values are the embedding dimensions. If with_attention is set to True this attribute is not generated during the fit process

  • label_encoder (LabelEncoder) \u2013

    see pytorch_widedeep.utils.dense_utils.LabelEncder

  • cat_embed_input (List) \u2013

    List of Tuples with the column name, number of individual values for that column and, If with_attention is set to False, the corresponding embeddings dim, e.g. [('education', 16, 10), ('relationship', 6, 8), ...].

  • standardize_cols (List) \u2013

    List of the columns that will be standarized

  • scaler (StandardScaler) \u2013

    an instance of sklearn.preprocessing.StandardScaler

  • column_idx (Dict) \u2013

    Dictionary where keys are column names and values are column indexes. This is neccesary to slice tensors

  • quantizer (Quantizer) \u2013

    an instance of Quantizer

Examples:

>>> import pandas as pd\n>>> import numpy as np\n>>> from pytorch_widedeep.preprocessing import TabPreprocessor\n>>> df = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l'], 'age': [25, 40, 55]})\n>>> cat_embed_cols = [('color',5), ('size',5)]\n>>> cont_cols = ['age']\n>>> deep_preprocessor = TabPreprocessor(cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols)\n>>> X_tab = deep_preprocessor.fit_transform(df)\n>>> deep_preprocessor.cat_embed_cols\n[('color', 5), ('size', 5)]\n>>> deep_preprocessor.column_idx\n{'color': 0, 'size': 1, 'age': 2}\n>>> cont_df = pd.DataFrame({\"col1\": np.random.rand(10), \"col2\": np.random.rand(10) + 1})\n>>> cont_cols = [\"col1\", \"col2\"]\n>>> tab_preprocessor = TabPreprocessor(continuous_cols=cont_cols, quantization_setup=3)\n>>> ft_cont_df = tab_preprocessor.fit_transform(cont_df)\n>>> # or...\n>>> quantization_setup = {'col1': [0., 0.4, 1.], 'col2': [1., 1.4, 2.]}\n>>> tab_preprocessor2 = TabPreprocessor(continuous_cols=cont_cols, quantization_setup=quantization_setup)\n>>> ft_cont_df2 = tab_preprocessor2.fit_transform(cont_df)\n
Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
@alias(\"with_attention\", [\"for_transformer\"])\n@alias(\"cat_embed_cols\", [\"embed_cols\"])\n@alias(\"scale\", [\"scale_cont_cols\"])\n@alias(\"quantization_setup\", [\"cols_and_bins\"])\ndef __init__(\n    self,\n    cat_embed_cols: Optional[Union[List[str], List[Tuple[str, int]]]] = None,\n    continuous_cols: Optional[List[str]] = None,\n    quantization_setup: Optional[\n        Union[int, Dict[str, Union[int, List[float]]]]\n    ] = None,\n    cols_to_scale: Optional[Union[List[str], str]] = None,\n    auto_embed_dim: bool = True,\n    embedding_rule: Literal[\"google\", \"fastai_old\", \"fastai_new\"] = \"fastai_new\",\n    default_embed_dim: int = 16,\n    with_attention: bool = False,\n    with_cls_token: bool = False,\n    shared_embed: bool = False,\n    verbose: int = 1,\n    *,\n    scale: bool = False,\n    already_standard: Optional[List[str]] = None,\n    **kwargs,\n):\n    super(TabPreprocessor, self).__init__()\n\n    self.continuous_cols = continuous_cols\n    self.quantization_setup = quantization_setup\n    self.cols_to_scale = cols_to_scale\n    self.scale = scale\n    self.already_standard = already_standard\n    self.auto_embed_dim = auto_embed_dim\n    self.embedding_rule = embedding_rule\n    self.default_embed_dim = default_embed_dim\n    self.with_attention = with_attention\n    self.with_cls_token = with_cls_token\n    self.shared_embed = shared_embed\n    self.verbose = verbose\n\n    self.quant_args = {\n        k: v for k, v in kwargs.items() if k in pd.cut.__code__.co_varnames\n    }\n    self.scale_args = {\n        k: v for k, v in kwargs.items() if k in StandardScaler().get_params()\n    }\n\n    self._check_inputs(cat_embed_cols)\n\n    if with_cls_token:\n        self.cat_embed_cols = (\n            [\"cls_token\"] + cat_embed_cols  # type: ignore[operator]\n            if cat_embed_cols is not None\n            else [\"cls_token\"]\n        )\n    else:\n        self.cat_embed_cols = cat_embed_cols  # type: ignore[assignment]\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.fit","title":"fit","text":"
fit(df)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • TabPreprocessor \u2013

    TabPreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def fit(self, df: pd.DataFrame) -> BasePreprocessor:  # noqa: C901\n    \"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    TabPreprocessor\n        `TabPreprocessor` fitted object\n    \"\"\"\n\n    df_adj = self._insert_cls_token(df) if self.with_cls_token else df.copy()\n\n    self.column_idx: Dict[str, int] = {}\n\n    # Categorical embeddings logic\n    if self.cat_embed_cols is not None or self.quantization_setup is not None:\n        self.cat_embed_input: List[Union[Tuple[str, int], Tuple[str, int, int]]] = (\n            []\n        )\n\n    if self.cat_embed_cols is not None:\n        df_cat, cat_embed_dim = self._prepare_categorical(df_adj)\n\n        self.label_encoder = LabelEncoder(\n            columns_to_encode=df_cat.columns.tolist(),\n            shared_embed=self.shared_embed,\n            with_attention=self.with_attention,\n        )\n        self.label_encoder.fit(df_cat)\n\n        for k, v in self.label_encoder.encoding_dict.items():\n            if self.with_attention:\n                self.cat_embed_input.append((k, len(v)))\n            else:\n                self.cat_embed_input.append((k, len(v), cat_embed_dim[k]))\n\n        self.column_idx.update({k: v for v, k in enumerate(df_cat.columns)})\n\n    # Continuous columns logic\n    if self.continuous_cols is not None:\n        df_cont, cont_embed_dim = self._prepare_continuous(df_adj)\n\n        # Standardization logic\n        if self.standardize_cols is not None:\n            self.scaler = StandardScaler(**self.scale_args).fit(\n                df_cont[self.standardize_cols].values\n            )\n        elif self.verbose:\n            warnings.warn(\"Continuous columns will not be normalised\")\n\n        # Quantization logic\n        if self.cols_and_bins is not None:\n            # we do not run 'Quantizer.fit' here since in the wild case\n            # someone wants standardization and quantization for the same\n            # columns, the Quantizer will run on the scaled data\n            self.quantizer = Quantizer(self.cols_and_bins, **self.quant_args)\n\n            if self.with_attention:\n                for col, n_cat, _ in cont_embed_dim:\n                    self.cat_embed_input.append((col, n_cat))\n            else:\n                self.cat_embed_input.extend(cont_embed_dim)\n\n        self.column_idx.update(\n            {k: v + len(self.column_idx) for v, k in enumerate(df_cont)}\n        )\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.transform","title":"transform","text":"
transform(df)\n

Returns the processed dataframe as a np.ndarray

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:  # noqa: C901\n    \"\"\"Returns the processed `dataframe` as a np.ndarray\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    check_is_fitted(self, condition=self.is_fitted)\n\n    df_adj = self._insert_cls_token(df) if self.with_cls_token else df.copy()\n\n    if self.cat_embed_cols is not None:\n        df_cat = df_adj[self.cat_cols]\n        df_cat = self.label_encoder.transform(df_cat)\n    if self.continuous_cols is not None:\n        df_cont = df_adj[self.continuous_cols]\n        # Standardization logic\n        if self.standardize_cols:\n            df_cont[self.standardize_cols] = self.scaler.transform(\n                df_cont[self.standardize_cols].values\n            )\n        # Quantization logic\n        if self.cols_and_bins is not None:\n            # Adjustment so I don't have to override the method\n            # in 'ChunkTabPreprocessor'\n            if self.quantizer.is_fitted:\n                df_cont = self.quantizer.transform(df_cont)\n            else:\n                df_cont = self.quantizer.fit_transform(df_cont)\n    try:\n        df_deep = pd.concat([df_cat, df_cont], axis=1)\n    except NameError:\n        try:\n            df_deep = df_cat.copy()\n        except NameError:\n            df_deep = df_cont.copy()\n\n    return df_deep.values\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(encoded)\n

Takes as input the output from the transform method and it will return the original values.

Parameters:

  • encoded (ndarray) \u2013

    array with the output of the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original values

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def inverse_transform(self, encoded: np.ndarray) -> pd.DataFrame:  # noqa: C901\n    r\"\"\"Takes as input the output from the `transform` method and it will\n    return the original values.\n\n    Parameters\n    ----------\n    encoded: np.ndarray\n        array with the output of the `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original values\n    \"\"\"\n    decoded = pd.DataFrame(encoded, columns=list(self.column_idx.keys()))\n    # embeddings back to original category\n    if self.cat_embed_cols is not None:\n        decoded = self.label_encoder.inverse_transform(decoded)\n    if self.continuous_cols is not None:\n        # quantized cols to the mid point\n        if self.cols_and_bins is not None:\n            if self.verbose:\n                print(\n                    \"Note that quantized cols will be turned into the mid point of \"\n                    \"the corresponding bin\"\n                )\n            for k, v in self.quantizer.inversed_bins.items():\n                decoded[k] = decoded[k].map(v)\n        # continuous_cols back to non-standarised\n        try:\n            decoded[self.standardize_cols] = self.scaler.inverse_transform(\n                decoded[self.standardize_cols]\n            )\n        except Exception:  # KeyError:\n            pass\n\n    if \"cls_token\" in decoded.columns:\n        decoded.drop(\"cls_token\", axis=1, inplace=True)\n\n    return decoded\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    transformed input dataframe

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        transformed input dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.Quantizer","title":"Quantizer","text":"
Quantizer(quantization_setup, **kwargs)\n

Helper class to perform the quantization of continuous columns. It is included in this docs for completion, since depending on the value of the parameter 'quantization_setup' of the TabPreprocessor class, that class might have an attribute of type Quantizer. However, this class is designed to always run internally within the TabPreprocessor class.

Parameters:

  • quantization_setup (Dict[str, Union[int, List[float]]]) \u2013

    Dictionary where the keys are the column names to quantize and the values are the either integers indicating the number of bins or a list of scalars indicating the bin edges.

Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
def __init__(\n    self,\n    quantization_setup: Dict[str, Union[int, List[float]]],\n    **kwargs,\n):\n    self.quantization_setup = quantization_setup\n    self.quant_args = kwargs\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor","title":"TextPreprocessor","text":"
TextPreprocessor(\n    text_col,\n    max_vocab=30000,\n    min_freq=5,\n    maxlen=80,\n    pad_first=True,\n    pad_idx=1,\n    already_processed=False,\n    word_vectors_path=None,\n    n_cpus=None,\n    verbose=1,\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deeptext input dataset

Parameters:

  • text_col (str) \u2013

    column in the input dataframe containing the texts

  • max_vocab (int, default: 30000 ) \u2013

    Maximum number of tokens in the vocabulary

  • min_freq (int, default: 5 ) \u2013

    Minimum frequency for a token to be part of the vocabulary

  • maxlen (int, default: 80 ) \u2013

    Maximum length of the tokenized sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

  • already_processed (Optional[bool], default: False ) \u2013

    Boolean indicating if the sequence of elements is already processed or prepared. If this is the case, this Preprocessor will simply tokenize and pad the sequence.

    Param aliases: `not_text`. <br/>\n

    This parameter is thought for those cases where the input sequences are already fully processed or are directly not text (e.g. IDs)

  • word_vectors_path (Optional[str], default: None ) \u2013

    Path to the pretrained word vectors

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • vocab (Vocab) \u2013

    an instance of pytorch_widedeep.utils.fastai_transforms.Vocab

  • embedding_matrix (ndarray) \u2013

    Array with the pretrained embeddings

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import TextPreprocessor\n>>> df_train = pd.DataFrame({'text_column': [\"life is like a box of chocolates\",\n... \"You never know what you're gonna get\"]})\n>>> text_preprocessor = TextPreprocessor(text_col='text_column', max_vocab=25, min_freq=1, maxlen=10)\n>>> text_preprocessor.fit_transform(df_train)\nThe vocabulary contains 24 tokens\narray([[ 1,  1,  1,  1, 10, 11, 12, 13, 14, 15],\n       [ 5,  9, 16, 17, 18,  9, 19, 20, 21, 22]], dtype=int32)\n>>> df_te = pd.DataFrame({'text_column': ['you never know what is in the box']})\n>>> text_preprocessor.transform(df_te)\narray([[ 1,  1,  9, 16, 17, 18, 11,  0,  0, 13]], dtype=int32)\n
Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
@alias(\"already_processed\", [\"not_text\"])\ndef __init__(\n    self,\n    text_col: str,\n    max_vocab: int = 30000,\n    min_freq: int = 5,\n    maxlen: int = 80,\n    pad_first: bool = True,\n    pad_idx: int = 1,\n    already_processed: Optional[bool] = False,\n    word_vectors_path: Optional[str] = None,\n    n_cpus: Optional[int] = None,\n    verbose: int = 1,\n):\n    super(TextPreprocessor, self).__init__()\n\n    self.text_col = text_col\n    self.max_vocab = max_vocab\n    self.min_freq = min_freq\n    self.maxlen = maxlen\n    self.pad_first = pad_first\n    self.pad_idx = pad_idx\n    self.already_processed = already_processed\n    self.word_vectors_path = word_vectors_path\n    self.verbose = verbose\n    self.n_cpus = n_cpus if n_cpus is not None else os.cpu_count()\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.fit","title":"fit","text":"
fit(df)\n

Builds the vocabulary

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • TextPreprocessor \u2013

    TextPreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def fit(self, df: pd.DataFrame) -> BasePreprocessor:\n    \"\"\"Builds the vocabulary\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    TextPreprocessor\n        `TextPreprocessor` fitted object\n    \"\"\"\n    texts = self._read_texts(df)\n\n    tokens = get_texts(texts, self.already_processed, self.n_cpus)\n\n    self.vocab: TVocab = Vocab(\n        max_vocab=self.max_vocab,\n        min_freq=self.min_freq,\n        pad_idx=self.pad_idx,\n    ).fit(\n        tokens,\n    )\n\n    if self.verbose:\n        print(\"The vocabulary contains {} tokens\".format(len(self.vocab.stoi)))\n    if self.word_vectors_path is not None:\n        self.embedding_matrix = build_embeddings_matrix(\n            self.vocab, self.word_vectors_path, self.min_freq\n        )\n\n    self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.transform","title":"transform","text":"
transform(df)\n

Returns the padded, 'numericalised' sequences

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequences

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Returns the padded, _'numericalised'_ sequences\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequences\n    \"\"\"\n    check_is_fitted(self, attributes=[\"vocab\"])\n    texts = self._read_texts(df)\n    tokens = get_texts(texts, self.already_processed, self.n_cpus)\n    return self._pad_sequences(tokens)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.transform_sample","title":"transform_sample","text":"
transform_sample(text)\n

Returns the padded, 'numericalised' sequence

Parameters:

  • text (str) \u2013

    text to be tokenized and padded

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequence

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def transform_sample(self, text: str) -> np.ndarray:\n    \"\"\"Returns the padded, _'numericalised'_ sequence\n\n    Parameters\n    ----------\n    text: str\n        text to be tokenized and padded\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequence\n    \"\"\"\n    check_is_fitted(self, attributes=[\"vocab\"])\n    tokens = get_texts([text], self.already_processed, self.n_cpus)\n    return self._pad_sequences(tokens)[0]\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Padded, 'numericalised' sequences

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Padded, _'numericalised'_ sequences\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.TextPreprocessor.inverse_transform","title":"inverse_transform","text":"
inverse_transform(padded_seq)\n

Returns the original text plus the added 'special' tokens

Parameters:

  • padded_seq (ndarray) \u2013

    array with the output of the transform method

Returns:

  • DataFrame \u2013

    Pandas dataframe with the original text plus the added 'special' tokens

Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def inverse_transform(self, padded_seq: np.ndarray) -> pd.DataFrame:\n    \"\"\"Returns the original text plus the added 'special' tokens\n\n    Parameters\n    ----------\n    padded_seq: np.ndarray\n        array with the output of the `transform` method\n\n    Returns\n    -------\n    pd.DataFrame\n        Pandas dataframe with the original text plus the added 'special' tokens\n    \"\"\"\n    texts = [self.vocab.inverse_transform(num) for num in padded_seq]\n    return pd.DataFrame({self.text_col: texts})\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor","title":"ImagePreprocessor","text":"
ImagePreprocessor(\n    img_col, img_path, width=224, height=224, verbose=1\n)\n

Bases: BasePreprocessor

Preprocessor to prepare the deepimage input dataset.

The Preprocessing consists simply on resizing according to their aspect ratio

Parameters:

  • img_col (str) \u2013

    name of the column with the images filenames

  • img_path (str) \u2013

    path to the dicrectory where the images are stored

  • width (int, default: 224 ) \u2013

    width of the resulting processed image.

  • height (int, default: 224 ) \u2013

    width of the resulting processed image.

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • aap (AspectAwarePreprocessor) \u2013

    an instance of pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor

  • spp (SimplePreprocessor) \u2013

    an instance of pytorch_widedeep.utils.image_utils.SimplePreprocessor

  • normalise_metrics (Dict) \u2013

    Dict containing the normalisation metrics of the image dataset, i.e. mean and std for the R, G and B channels

Examples:

>>> import pandas as pd\n>>>\n>>> from pytorch_widedeep.preprocessing import ImagePreprocessor\n>>>\n>>> path_to_image1 = 'tests/test_data_utils/images/galaxy1.png'\n>>> path_to_image2 = 'tests/test_data_utils/images/galaxy2.png'\n>>>\n>>> df_train = pd.DataFrame({'images_column': [path_to_image1]})\n>>> df_test = pd.DataFrame({'images_column': [path_to_image2]})\n>>> img_preprocessor = ImagePreprocessor(img_col='images_column', img_path='.', verbose=0)\n>>> resized_images = img_preprocessor.fit_transform(df_train)\n>>> new_resized_images = img_preprocessor.transform(df_train)\n

NOTE: Normalising metrics will only be computed when the fit_transform method is run. Running transform only will not change the computed metrics and running fit only simply instantiates the resizing functions.

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def __init__(\n    self,\n    img_col: str,\n    img_path: str,\n    width: int = 224,\n    height: int = 224,\n    verbose: int = 1,\n):\n    super(ImagePreprocessor, self).__init__()\n\n    self.img_col = img_col\n    self.img_path = img_path\n    self.width = width\n    self.height = height\n    self.verbose = verbose\n\n    self.aap = AspectAwarePreprocessor(self.width, self.height)\n    self.spp = SimplePreprocessor(self.width, self.height)\n\n    self.compute_normalising_computed = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor.transform","title":"transform","text":"
transform(df)\n

Resizes the images to the input height and width.

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe with the img_col

Returns:

  • ndarray \u2013

    Resized images to the input height and width

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Resizes the images to the input height and width.\n\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe with the `img_col`\n\n    Returns\n    -------\n    np.ndarray\n        Resized images to the input height and width\n    \"\"\"\n    image_list = df[self.img_col].tolist()\n    if self.verbose:\n        print(\"Reading Images from {}\".format(self.img_path))\n    imgs = [cv2.imread(\"/\".join([self.img_path, img])) for img in image_list]\n\n    # finding images with different height and width\n    aspect = [(im.shape[0], im.shape[1]) for im in imgs]\n    aspect_r = [a[0] / a[1] for a in aspect]\n    diff_idx = [i for i, r in enumerate(aspect_r) if r != 1.0]\n\n    if self.verbose:\n        print(\"Resizing\")\n    resized_imgs = []\n    for i, img in tqdm(enumerate(imgs), total=len(imgs), disable=self.verbose != 1):\n        if i in diff_idx:\n            resized_imgs.append(self.aap.preprocess(img))\n        else:\n            # if aspect ratio is 1:1, no need for AspectAwarePreprocessor\n            resized_imgs.append(self.spp.preprocess(img))\n\n    if not self.compute_normalising_computed:\n        if self.verbose:\n            print(\"Computing normalisation metrics\")\n        # mean and std deviation will only be computed when the fit method\n        # is called\n        mean_R, mean_G, mean_B = [], [], []\n        std_R, std_G, std_B = [], [], []\n        for rsz_img in resized_imgs:\n            (mean_b, mean_g, mean_r), (std_b, std_g, std_r) = cv2.meanStdDev(\n                rsz_img\n            )\n            mean_R.append(mean_r)\n            mean_G.append(mean_g)\n            mean_B.append(mean_b)\n            std_R.append(std_r)\n            std_G.append(std_g)\n            std_B.append(std_b)\n        self.normalise_metrics = dict(\n            mean={\n                \"R\": np.mean(mean_R) / 255.0,\n                \"G\": np.mean(mean_G) / 255.0,\n                \"B\": np.mean(mean_B) / 255.0,\n            },\n            std={\n                \"R\": np.mean(std_R) / 255.0,\n                \"G\": np.mean(std_G) / 255.0,\n                \"B\": np.mean(std_B) / 255.0,\n            },\n        )\n        self.compute_normalising_computed = True\n    return np.asarray(resized_imgs)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.image_preprocessor.ImagePreprocessor.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Parameters:

  • df (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ndarray \u2013

    Resized images to the input height and width

Source code in pytorch_widedeep/preprocessing/image_preprocessor.py
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:\n    \"\"\"Combines `fit` and `transform`\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    np.ndarray\n        Resized images to the input height and width\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#chunked-versions","title":"Chunked versions","text":"

Chunked versions of the preprocessors are also available. These are useful when the data is too big to fit in memory. See also the load_from_folder module in the library and the corresponding section here in the documentation.

Note that there is not a ChunkImagePreprocessor. This is because the processing of the images will occur inside the ImageFromFolder class in the load_from_folder module.

"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor","title":"ChunkWidePreprocessor","text":"
ChunkWidePreprocessor(\n    wide_cols, n_chunks, crossed_cols=None\n)\n

Bases: WidePreprocessor

Preprocessor to prepare the wide input dataset

This Preprocessor prepares the data for the wide, linear component. This linear model is implemented via an Embedding layer that is connected to the output neuron. ChunkWidePreprocessor numerically encodes all the unique values of all categorical columns wide_cols + crossed_cols. See the Example below.

Parameters:

  • wide_cols (List[str]) \u2013

    List of strings with the name of the columns that will label encoded and passed through the wide component

  • crossed_cols (Optional[List[Tuple[str, str]]], default: None ) \u2013

    List of Tuples with the name of the columns that will be 'crossed' and then label encoded. e.g. [('education', 'occupation'), ...]. For binary features, a cross-product transformation is 1 if and only if the constituent features are all 1, and 0 otherwise.

Attributes:

  • wide_crossed_cols (List) \u2013

    List with the names of all columns that will be label encoded

  • encoding_dict (Dict) \u2013

    Dictionary where the keys are the result of pasting colname + '_' + column value and the values are the corresponding mapped integer.

  • inverse_encoding_dict (Dict) \u2013

    the inverse encoding dictionary

  • wide_dim (int) \u2013

    Dimension of the wide model (i.e. dim of the linear layer)

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import ChunkWidePreprocessor\n>>> chunk = pd.DataFrame({'color': ['r', 'b', 'g'], 'size': ['s', 'n', 'l']})\n>>> wide_cols = ['color']\n>>> crossed_cols = [('color', 'size')]\n>>> chunk_wide_preprocessor = ChunkWidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols,\n... n_chunks=1)\n>>> X_wide = chunk_wide_preprocessor.fit_transform(chunk)\n
Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def __init__(\n    self,\n    wide_cols: List[str],\n    n_chunks: int,\n    crossed_cols: Optional[List[Tuple[str, str]]] = None,\n):\n    super(ChunkWidePreprocessor, self).__init__(wide_cols, crossed_cols)\n\n    self.n_chunks = n_chunks\n\n    self.chunk_counter = 0\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor.partial_fit","title":"partial_fit","text":"
partial_fit(chunk)\n

Fits the Preprocessor and creates required attributes

Parameters:

  • chunk (DataFrame) \u2013

    Input pandas dataframe

Returns:

  • ChunkWidePreprocessor \u2013

    ChunkWidePreprocessor fitted object

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def partial_fit(self, chunk: pd.DataFrame) -> \"ChunkWidePreprocessor\":\n    r\"\"\"Fits the Preprocessor and creates required attributes\n\n    Parameters\n    ----------\n    chunk: pd.DataFrame\n        Input pandas dataframe\n\n    Returns\n    -------\n    ChunkWidePreprocessor\n        `ChunkWidePreprocessor` fitted object\n    \"\"\"\n    df_wide = self._prepare_wide(chunk)\n    self.wide_crossed_cols = df_wide.columns.tolist()\n\n    if self.chunk_counter == 0:\n        self.glob_feature_set = set(\n            self._make_global_feature_list(df_wide[self.wide_crossed_cols])\n        )\n    else:\n        self.glob_feature_set.update(\n            self._make_global_feature_list(df_wide[self.wide_crossed_cols])\n        )\n\n    self.chunk_counter += 1\n\n    if self.chunk_counter == self.n_chunks:\n        self.encoding_dict = {v: i + 1 for i, v in enumerate(self.glob_feature_set)}\n        self.wide_dim = len(self.encoding_dict)\n        self.inverse_encoding_dict = {k: v for v, k in self.encoding_dict.items()}\n        self.inverse_encoding_dict[0] = \"unseen\"\n\n        self.is_fitted = True\n\n    return self\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.wide_preprocessor.ChunkWidePreprocessor.fit","title":"fit","text":"
fit(df)\n

Runs partial_fit. This is just to override the fit method in the base class. This class is not designed or thought to run fit

Source code in pytorch_widedeep/preprocessing/wide_preprocessor.py
def fit(self, df: pd.DataFrame) -> \"ChunkWidePreprocessor\":\n    \"\"\"\n    Runs `partial_fit`. This is just to override the fit method in the base\n    class. This class is not designed or thought to run fit\n    \"\"\"\n    return self.partial_fit(df)\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.tab_preprocessor.ChunkTabPreprocessor","title":"ChunkTabPreprocessor","text":"
ChunkTabPreprocessor(\n    n_chunks,\n    cat_embed_cols=None,\n    continuous_cols=None,\n    cols_and_bins=None,\n    cols_to_scale=None,\n    default_embed_dim=16,\n    with_attention=False,\n    with_cls_token=False,\n    shared_embed=False,\n    verbose=1,\n    *,\n    scale=False,\n    already_standard=None,\n    **kwargs\n)\n

Bases: TabPreprocessor

Preprocessor to prepare the deeptabular component input dataset

Parameters:

  • n_chunks (int) \u2013

    Number of chunks that the tabular dataset is divided by.

  • cat_embed_cols (Optional[Union[List[str], List[Tuple[str, int]]]], default: None ) \u2013

    List containing the name of the categorical columns that will be represented by embeddings (e.g. ['education', 'relationship', ...]) or a Tuple with the name and the embedding dimension (e.g.: [ ('education',32), ('relationship',16), ...])

  • continuous_cols (Optional[List[str]], default: None ) \u2013

    List with the name of the continuous cols

  • cols_and_bins (Optional[Dict[str, List[float]]], default: None ) \u2013

    Continuous columns can be turned into categorical via pd.cut. 'cols_and_bins' is dictionary where the keys are the column names to quantize and the values are a list of scalars indicating the bin edges.

  • cols_to_scale (Optional[Union[List[str], str]], default: None ) \u2013

    List with the names of the columns that will be standarised via sklearn's StandardScaler

  • default_embed_dim (int, default: 16 ) \u2013

    Dimension for the embeddings if the embed_dim is not provided in the cat_embed_cols parameter and auto_embed_dim is set to False.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model (more precisely a model where all embeddings must have the same dimensions). If True, the param cat_embed_cols must just be a list containing just the categorical column names: e.g. ['education', 'relationship', ...]. This is because they will all be encoded using embeddings of the same dim, which will be specified later when the model is defined. Param alias: for_transformer

  • with_cls_token (bool, default: False ) \u2013

    Boolean indicating if a '[CLS]' token will be added to the dataset when using attention-based models. The final hidden state corresponding to this token is used as the aggregated representation for classification and regression tasks. If not, the categorical (and continuous embeddings if present) will be concatenated before being passed to the final MLP (if present).

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

  • verbose (int, default: 1 ) \u2013
  • scale (bool, default: False ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. Bool indicating whether or not to scale/standarise continuous cols. It is important to emphasize that all the DL models for tabular data in the library also include the possibility of normalising the input continuous features via a BatchNorm or a LayerNorm. Param alias: scale_cont_cols.

  • already_standard (Optional[List[str]], default: None ) \u2013

    note: this arg will be removed in upcoming releases. Please use cols_to_scale instead. List with the name of the continuous cols that do not need to be scaled/standarised.

Other Parameters:

  • **kwargs \u2013

    pd.cut and StandardScaler related args

Attributes:

  • embed_dim (Dict) \u2013

    Dictionary where keys are the embed cols and values are the embedding dimensions. If with_attention is set to True this attribute is not generated during the fit process

  • label_encoder (LabelEncoder) \u2013

    see pytorch_widedeep.utils.dense_utils.LabelEncder

  • cat_embed_input (List) \u2013

    List of Tuples with the column name, number of individual values for that column and, If with_attention is set to False, the corresponding embeddings dim, e.g. [('education', 16, 10), ('relationship', 6, 8), ...].

  • standardize_cols (List) \u2013

    List of the columns that will be standarized

  • scaler (StandardScaler) \u2013

    an instance of sklearn.preprocessing.StandardScaler if 'cols_to_scale' is not None or 'scale' is 'True'

  • column_idx (Dict) \u2013

    Dictionary where keys are column names and values are column indexes. This is neccesary to slice tensors

  • quantizer (Quantizer) \u2013

    an instance of Quantizer

Examples:

>>> import pandas as pd\n>>> import numpy as np\n>>> from pytorch_widedeep.preprocessing import ChunkTabPreprocessor\n>>> np.random.seed(42)\n>>> chunk_df = pd.DataFrame({'cat_col': np.random.choice(['A', 'B', 'C'], size=8),\n... 'cont_col': np.random.uniform(1, 100, size=8)})\n>>> cat_embed_cols = [('cat_col',4)]\n>>> cont_cols = ['cont_col']\n>>> tab_preprocessor = ChunkTabPreprocessor(\n... n_chunks=1, cat_embed_cols=cat_embed_cols, continuous_cols=cont_cols\n... )\n>>> X_tab = tab_preprocessor.fit_transform(chunk_df)\n>>> tab_preprocessor.cat_embed_cols\n[('cat_col', 4)]\n>>> tab_preprocessor.column_idx\n{'cat_col': 0, 'cont_col': 1}\n
Source code in pytorch_widedeep/preprocessing/tab_preprocessor.py
@alias(\"with_attention\", [\"for_transformer\"])\n@alias(\"cat_embed_cols\", [\"embed_cols\"])\n@alias(\"scale\", [\"scale_cont_cols\"])\n@alias(\"cols_and_bins\", [\"quantization_setup\"])\ndef __init__(\n    self,\n    n_chunks: int,\n    cat_embed_cols: Optional[Union[List[str], List[Tuple[str, int]]]] = None,\n    continuous_cols: Optional[List[str]] = None,\n    cols_and_bins: Optional[Dict[str, List[float]]] = None,\n    cols_to_scale: Optional[Union[List[str], str]] = None,\n    default_embed_dim: int = 16,\n    with_attention: bool = False,\n    with_cls_token: bool = False,\n    shared_embed: bool = False,\n    verbose: int = 1,\n    *,\n    scale: bool = False,\n    already_standard: Optional[List[str]] = None,\n    **kwargs,\n):\n    super(ChunkTabPreprocessor, self).__init__(\n        cat_embed_cols=cat_embed_cols,\n        continuous_cols=continuous_cols,\n        quantization_setup=None,\n        cols_to_scale=cols_to_scale,\n        auto_embed_dim=False,\n        embedding_rule=\"google\",  # does not matter, irrelevant\n        default_embed_dim=default_embed_dim,\n        with_attention=with_attention,\n        with_cls_token=with_cls_token,\n        shared_embed=shared_embed,\n        verbose=verbose,\n        scale=scale,\n        already_standard=already_standard,\n        **kwargs,\n    )\n\n    self.n_chunks = n_chunks\n    self.chunk_counter = 0\n\n    self.cols_and_bins = cols_and_bins  # type: ignore[assignment]\n    if self.cols_and_bins is not None:\n        self.quantizer = Quantizer(self.cols_and_bins, **self.quant_args)\n\n    self.embed_prepared = False\n    self.continuous_prepared = False\n
"},{"location":"pytorch-widedeep/preprocessing.html#pytorch_widedeep.preprocessing.text_preprocessor.ChunkTextPreprocessor","title":"ChunkTextPreprocessor","text":"
ChunkTextPreprocessor(\n    text_col,\n    n_chunks,\n    root_dir=None,\n    max_vocab=30000,\n    min_freq=5,\n    maxlen=80,\n    pad_first=True,\n    pad_idx=1,\n    already_processed=False,\n    word_vectors_path=None,\n    n_cpus=None,\n    verbose=1,\n)\n

Bases: TextPreprocessor

Preprocessor to prepare the deeptext input dataset

Parameters:

  • text_col (str) \u2013

    column in the input dataframe containing either the texts or the filenames where the text documents are stored

  • n_chunks (int) \u2013

    Number of chunks that the text dataset is divided by.

  • root_dir (Optional[str], default: None ) \u2013

    If 'text_col' contains the filenames with the text documents, this is the path to the directory where those documents are stored.

  • max_vocab (int, default: 30000 ) \u2013

    Maximum number of tokens in the vocabulary

  • min_freq (int, default: 5 ) \u2013

    Minimum frequency for a token to be part of the vocabulary

  • maxlen (int, default: 80 ) \u2013

    Maximum length of the tokenized sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

  • word_vectors_path (Optional[str], default: None ) \u2013

    Path to the pretrained word vectors

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

  • verbose (int, default: 1 ) \u2013

    Enable verbose output.

Attributes:

  • vocab (Vocab) \u2013

    an instance of pytorch_widedeep.utils.fastai_transforms.ChunkVocab

  • embedding_matrix (ndarray) \u2013

    Array with the pretrained embeddings if word_vectors_path is not None

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.preprocessing import ChunkTextPreprocessor\n>>> chunk_df = pd.DataFrame({'text_column': [\"life is like a box of chocolates\",\n... \"You never know what you're gonna get\"]})\n>>> chunk_text_preprocessor = ChunkTextPreprocessor(text_col='text_column', n_chunks=1,\n... max_vocab=25, min_freq=1, maxlen=10, verbose=0, n_cpus=1)\n>>> processed_chunk = chunk_text_preprocessor.fit_transform(chunk_df)\n
Source code in pytorch_widedeep/preprocessing/text_preprocessor.py
def __init__(\n    self,\n    text_col: str,\n    n_chunks: int,\n    root_dir: Optional[str] = None,\n    max_vocab: int = 30000,\n    min_freq: int = 5,\n    maxlen: int = 80,\n    pad_first: bool = True,\n    pad_idx: int = 1,\n    already_processed: Optional[bool] = False,\n    word_vectors_path: Optional[str] = None,\n    n_cpus: Optional[int] = None,\n    verbose: int = 1,\n):\n    super(ChunkTextPreprocessor, self).__init__(\n        text_col=text_col,\n        max_vocab=max_vocab,\n        min_freq=min_freq,\n        maxlen=maxlen,\n        pad_first=pad_first,\n        pad_idx=pad_idx,\n        already_processed=already_processed,\n        word_vectors_path=word_vectors_path,\n        n_cpus=n_cpus,\n        verbose=verbose,\n    )\n\n    self.n_chunks = n_chunks\n    self.root_dir = root_dir\n\n    self.chunk_counter = 0\n\n    self.is_fitted = False\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html","title":"Self Supervised Pre-training for tabular data","text":"

In this library we have implemented two methods or routines that allow the user to use self-suerpvised pre-training for all tabular models in the library with the exception of the TabPerceiver (this is a particular model and self-supervised pre-training requires some adjustments that will be implemented in future versions). Please see the examples folder in the repo or the examples section in the docs for details on how to use self-supervised pre-training with this library.

The two routines implemented are illustrated in the figures below. The first is from TabNet: Attentive Interpretable Tabular Learning. It is a 'standard' encoder-decoder architecture and and is designed here for models that do not use transformer-based architectures (or when the embeddings can all have different dimensions). The second is from SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, it is based on Contrastive and Denoising learning and is designed for models that use transformer-based architectures (or when the embeddings all need to have the same dimension):

Figure 1. Figure 2 in their paper. The caption of the original paper is included in case it is useful.

Figure 2. Figure 1 in their paper. The caption of the original paper is included in case it is useful.

Note that the self-supervised pre-trainers described below focus, of course, on the self-supervised pre-training phase, i.e. the left side in Figure 1 and the upper part in Figure 2. When combined with the Trainer described earlier in the documenation, one can reproduce the full process illustrated in the figures above.

Also Note that it is beyond the scope of this docs to explain in detail these routines. In addition, to fully utilise the self-supervised trainers implemented in this library a minimum understanding of the processes as described in the papers is required. Therefore, we strongly encourage the users to have a look to the papers.

"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer","title":"EncoderDecoderTrainer","text":"
EncoderDecoderTrainer(\n    encoder,\n    decoder=None,\n    masked_prob=0.2,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseEncoderDecoderTrainer

This class implements an Encoder-Decoder self-supervised 'routine' inspired by TabNet: Attentive Interpretable Tabular Learning. See Figure 1 above.

Parameters:

  • encoder (ModelWithoutAttention) \u2013

    An instance of a TabMlp, TabResNet or TabNet model

  • decoder (Optional[DecoderWithoutAttention], default: None ) \u2013

    An instance of a TabMlpDecoder, TabResNetDecoder or TabNetDecoder model. if None the decoder will be automatically built as a 'simetric' model to the Encoder

  • masked_prob (float, default: 0.2 ) \u2013

    Indicates the fraction of elements in the embedding tensor that will be masked and hence used for reconstruction

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def __init__(\n    self,\n    encoder: ModelWithoutAttention,\n    decoder: Optional[DecoderWithoutAttention] = None,\n    masked_prob: float = 0.2,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        encoder=encoder,\n        decoder=decoder,\n        masked_prob=masked_prob,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer.pretrain","title":"pretrain","text":"
pretrain(\n    X_tab,\n    X_tab_val=None,\n    val_split=None,\n    validation_freq=1,\n    n_epochs=1,\n    batch_size=32,\n)\n

Pretrain method. Can also be called using .fit(<same_args>)

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • batch_size (int, default: 32 ) \u2013

    batch size

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def pretrain(\n    self,\n    X_tab: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    validation_freq: int = 1,\n    n_epochs: int = 1,\n    batch_size: int = 32,\n):\n    r\"\"\"Pretrain method. Can also be called using `.fit(<same_args>)`\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    validation_freq: int, default=1\n        epochs validation frequency\n    n_epochs: int, default=1\n        number of epochs\n    batch_size: int, default=32\n        batch size\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = self._train_eval_split(X_tab, X_tab_val, val_split)\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, X in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_loss = self._train_step(X[0], batch_idx)\n                self.callback_container.on_batch_end(batch=batch_idx)\n                print_loss_and_metric(t, train_loss)\n\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, None, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for batch_idx, X in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_loss = self._eval_step(X[0], batch_idx)\n                    print_loss_and_metric(v, val_loss)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, None, \"val\")\n            on_epoch_end_metric = val_loss\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.ed_model.train()\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.EncoderDecoderTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"ed_model.pt\",\n)\n

Saves the model, training and evaluation history (if any) to disk

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'ed_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/self_supervised_training/encoder_decoder_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"ed_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history (if any) to disk\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"ed_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.ed_model.state_dict(), model_path)\n    else:\n        torch.save(self.ed_model, model_path)\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer","title":"ContrastiveDenoisingTrainer","text":"
ContrastiveDenoisingTrainer(\n    model,\n    preprocessor,\n    optimizer=None,\n    lr_scheduler=None,\n    callbacks=None,\n    loss_type=\"both\",\n    projection_head1_dims=None,\n    projection_head2_dims=None,\n    projection_heads_activation=\"relu\",\n    cat_mlp_type=\"multiple\",\n    cont_mlp_type=\"multiple\",\n    denoise_mlps_activation=\"relu\",\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseContrastiveDenoisingTrainer

This class trains a Contrastive, Denoising Self Supervised 'routine' that is based on the one described in SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, their Figure 1.

Parameters:

  • model (ModelWithAttention) \u2013

    An instance of a TabTransformer, SAINT, FTTransformer, TabFastFormer, TabPerceiver, ContextAttentionMLP and SelfAttentionMLP.

  • preprocessor (TabPreprocessor) \u2013

    A fitted TabPreprocessor object. See pytorch_widedeep.preprocessing.tab_preprocessor.TabPreprocessor

  • optimizer (Optional[Optimizer], default: None ) \u2013

    An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam ()). if no optimizer is passed it will default to AdamW.

  • lr_scheduler (Optional[LRScheduler], default: None ) \u2013

    An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)).

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. This can also be a custom callback. See pytorch_widedeep.callbacks.Callback or the Examples folder in the repo.

  • loss_type (Literal[contrastive, denoising, both], default: 'both' ) \u2013

    One of 'contrastive', 'denoising' or 'both'. See SAINT: Improved Neural Networks for Tabular Data via Row Attention and Contrastive Pre-Training, their figure (1) and their equation (5).

  • projection_head1_dims (Optional[List[int]], default: None ) \u2013

    The projection heads are simply MLPs. This parameter is a list of integers with the dimensions of the MLP hidden layers. See the paper for details. Note that setting up this parameter requires some knowledge of the architecture one is using. For example, if we are representing the features with embeddings of dim 32 (i.e. the so called dimension of the model is 32), then the first dimension of the projection head must be 32 (e.g. [32, 16])

  • projection_head2_dims (Optional[List[int]], default: None ) \u2013

    Same as 'projection_head1_dims' for the second head

  • projection_heads_activation (str, default: 'relu' ) \u2013

    Activation function for the projection heads

  • cat_mlp_type (Literal[single, multiple], default: 'multiple' ) \u2013

    If 'denoising' loss is used, one can choose two types of 'stacked' MLPs to process the output from the transformer-based encoder that receives 'corrupted' (cut-mixed and mixed-up) features. These are 'single' or 'multiple'. The former approach will apply a single MLP to all the categorical features while the latter will use one MLP per categorical feature

  • cont_mlp_type (Literal[single, multiple], default: 'multiple' ) \u2013

    Same as 'cat_mlp_type' but for the continuous features

  • denoise_mlps_activation (str, default: 'relu' ) \u2013

    activation function for the so called 'denoising mlps'.

  • verbose (int, default: 1 ) \u2013

    Setting it to 0 will print nothing during training.

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train_test_split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def __init__(\n    self,\n    model: ModelWithAttention,\n    preprocessor: TabPreprocessor,\n    optimizer: Optional[Optimizer] = None,\n    lr_scheduler: Optional[LRScheduler] = None,\n    callbacks: Optional[List[Callback]] = None,\n    loss_type: Literal[\"contrastive\", \"denoising\", \"both\"] = \"both\",\n    projection_head1_dims: Optional[List[int]] = None,\n    projection_head2_dims: Optional[List[int]] = None,\n    projection_heads_activation: str = \"relu\",\n    cat_mlp_type: Literal[\"single\", \"multiple\"] = \"multiple\",\n    cont_mlp_type: Literal[\"single\", \"multiple\"] = \"multiple\",\n    denoise_mlps_activation: str = \"relu\",\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        preprocessor=preprocessor,\n        loss_type=loss_type,\n        optimizer=optimizer,\n        lr_scheduler=lr_scheduler,\n        callbacks=callbacks,\n        projection_head1_dims=projection_head1_dims,\n        projection_head2_dims=projection_head2_dims,\n        projection_heads_activation=projection_heads_activation,\n        cat_mlp_type=cat_mlp_type,\n        cont_mlp_type=cont_mlp_type,\n        denoise_mlps_activation=denoise_mlps_activation,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer.pretrain","title":"pretrain","text":"
pretrain(\n    X_tab,\n    X_tab_val=None,\n    val_split=None,\n    validation_freq=1,\n    n_epochs=1,\n    batch_size=32,\n)\n

Pretrain method. Can also be called using .fit(<same_args>)

Parameters:

  • X_tab (ndarray) \u2013

    tabular dataset

  • X_tab_val (Optional[ndarray], default: None ) \u2013

    validation data. Note that, although it is possible to use contrastive-denoising training with a validation set, such set must include feature values that are all seen in the training set in the case of the categorical columns. This is because the values of the columns themselves will be used as targets when computing the loss. Therefore, if a new category is present in the validation set that was not seen in training this will effectively be like trying to predict a new, never seen category (and Pytorch will throw an error)

  • val_split (Optional[float], default: None ) \u2013

    An alterative to passing the validation set is to use a train/val split fraction via val_split

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • batch_size (int, default: 32 ) \u2013

    batch size

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def pretrain(\n    self,\n    X_tab: np.ndarray,\n    X_tab_val: Optional[np.ndarray] = None,\n    val_split: Optional[float] = None,\n    validation_freq: int = 1,\n    n_epochs: int = 1,\n    batch_size: int = 32,\n):\n    r\"\"\"Pretrain method. Can also be called using `.fit(<same_args>)`\n\n    Parameters\n    ----------\n    X_tab: np.ndarray,\n        tabular dataset\n    X_tab_val: np.ndarray, Optional, default = None\n        validation data. Note that, although it is possible to use\n        contrastive-denoising training with a validation set, such set\n        must include feature values that are _all_ seen in the training\n        set in the case of the categorical columns. This is because the\n        values of the columns themselves will be used as targets when\n        computing the loss. Therefore, if a new category is present in\n        the validation set that was not seen in training this will\n        effectively be like trying to predict a new, never seen category\n        (and Pytorch will throw an error)\n    val_split: float, Optional. default=None\n        An alterative to passing the validation set is to use a train/val\n        split fraction via `val_split`\n    validation_freq: int, default=1\n        epochs validation frequency\n    n_epochs: int, default=1\n        number of epochs\n    batch_size: int, default=32\n        batch size\n    \"\"\"\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = self._train_eval_split(X_tab, X_tab_val, val_split)\n    train_loader = DataLoader(\n        dataset=train_set, batch_size=batch_size, num_workers=self.num_workers\n    )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    self.callback_container.on_train_begin(\n        {\n            \"batch_size\": batch_size,\n            \"train_steps\": train_steps,\n            \"n_epochs\": n_epochs,\n        }\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, X in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_loss = self._train_step(X[0], batch_idx)\n                self.callback_container.on_batch_end(batch=batch_idx)\n                print_loss_and_metric(t, train_loss)\n\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, None, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for batch_idx, X in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_loss = self._eval_step(X[0], batch_idx)\n                    print_loss_and_metric(v, val_loss)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, None, \"val\")\n            on_epoch_end_metric = val_loss\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            self.callback_container.on_train_end(epoch_logs)\n            break\n\n    self.callback_container.on_train_end(epoch_logs)\n    self._restore_best_weights()\n    self.cd_model.train()\n
"},{"location":"pytorch-widedeep/self_supervised_pretraining.html#pytorch_widedeep.self_supervised_training.ContrastiveDenoisingTrainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"cd_model.pt\",\n)\n

Saves the model, training and evaluation history (if any) to disk

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'cd_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/self_supervised_training/contrastive_denoising_trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"cd_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history (if any) to disk\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"cd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.cd_model.state_dict(), model_path)\n    else:\n        torch.save(self.cd_model, model_path)\n
"},{"location":"pytorch-widedeep/tab2vec.html","title":"Tab2Vec","text":""},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec","title":"Tab2Vec","text":"
Tab2Vec(\n    tab_preprocessor,\n    model,\n    return_dataframe=False,\n    verbose=False,\n)\n

Class to transform an input dataframe into vectorized form.

This class will take an input dataframe in the form of the dataframe used for training, and it will turn it into a vectorised form based on the processing applied by the model to the categorical and continuous columns.

NOTE: Currently this class is only implemented for the deeptabular component. Therefore, if the input dataframe has a text column or a column with the path to images, these will be ignored. We will be adding these functionalities in future versions

Parameters:

  • model (Union[WideDeep, BayesianWide, BayesianTabMlp]) \u2013

    WideDeep, BayesianWide or BayesianTabMlp model. Must be trained.

  • tab_preprocessor (TabPreprocessor) \u2013

    TabPreprocessor object. Must be fitted.

  • return_dataframe (bool, default: False ) \u2013

    Boolean indicating of the returned object(s) will be array(s) or pandas dataframe(s)

Attributes:

  • vectorizer (Module) \u2013

    Torch module with the categorical and continuous encoding process

Examples:

>>> import string\n>>> from random import choices\n>>> import numpy as np\n>>> import pandas as pd\n>>> from pytorch_widedeep import Tab2Vec\n>>> from pytorch_widedeep.models import TabMlp, WideDeep\n>>> from pytorch_widedeep.preprocessing import TabPreprocessor\n>>>\n>>> colnames = list(string.ascii_lowercase)[:4]\n>>> cat_col1_vals = [\"a\", \"b\", \"c\"]\n>>> cat_col2_vals = [\"d\", \"e\", \"f\"]\n>>>\n>>> # Create the toy input dataframe and a toy dataframe to be vectorised\n>>> cat_inp = [np.array(choices(c, k=5)) for c in [cat_col1_vals, cat_col2_vals]]\n>>> cont_inp = [np.round(np.random.rand(5), 2) for _ in range(2)]\n>>> df_inp = pd.DataFrame(np.vstack(cat_inp + cont_inp).transpose(), columns=colnames)\n>>> cat_t2v = [np.array(choices(c, k=5)) for c in [cat_col1_vals, cat_col2_vals]]\n>>> cont_t2v = [np.round(np.random.rand(5), 2) for _ in range(2)]\n>>> df_t2v = pd.DataFrame(np.vstack(cat_t2v + cont_t2v).transpose(), columns=colnames)\n>>>\n>>> # fit the TabPreprocessor\n>>> embed_cols = [(\"a\", 2), (\"b\", 4)]\n>>> cont_cols = [\"c\", \"d\"]\n>>> tab_preprocessor = TabPreprocessor(cat_embed_cols=embed_cols, continuous_cols=cont_cols)\n>>> X_tab = tab_preprocessor.fit_transform(df_inp)\n>>>\n>>> # define the model (and let's assume we train it)\n>>> tabmlp = TabMlp(\n... column_idx=tab_preprocessor.column_idx,\n... cat_embed_input=tab_preprocessor.cat_embed_input,\n... continuous_cols=tab_preprocessor.continuous_cols,\n... mlp_hidden_dims=[8, 4])\n>>> model = WideDeep(deeptabular=tabmlp)\n>>> # ...train the model...\n>>>\n>>> # vectorise the dataframe\n>>> t2v = Tab2Vec(tab_preprocessor, model)\n>>> X_vec = t2v.transform(df_t2v)\n
Source code in pytorch_widedeep/tab2vec.py
def __init__(\n    self,\n    tab_preprocessor: TabPreprocessor,\n    model: Union[WideDeep, BayesianWide, BayesianTabMlp],\n    return_dataframe: bool = False,\n    verbose: bool = False,\n):\n    super(Tab2Vec, self).__init__()\n\n    self._check_inputs(tab_preprocessor, model, verbose)\n\n    self.tab_preprocessor = tab_preprocessor\n    self.return_dataframe = return_dataframe\n    self.verbose = verbose\n\n    self.vectorizer = self._set_vectorizer(model)\n\n    self._set_dim_attributes(tab_preprocessor, model)\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.fit","title":"fit","text":"
fit(df, target_col=None)\n

This is an empty method i.e. Returns the unchanged object itself. Is only included for consistency in case Tab2Vec is used as part of a Pipeline

Parameters:

  • df (DataFrame) \u2013

    DataFrame to be vectorised, i.e. the categorical and continuous columns will be encoded based on the processing applied within the model

  • target_col (Optional[str], default: None ) \u2013

    Column name of the target_col variable. If None only the array of predictors will be returned

Returns:

  • Tab2Vec \u2013
Source code in pytorch_widedeep/tab2vec.py
def fit(self, df: pd.DataFrame, target_col: Optional[str] = None) -> \"Tab2Vec\":\n    r\"\"\"This is an empty method i.e. Returns the unchanged object itself. Is\n    only included for consistency in case `Tab2Vec` is used as part of a\n    Pipeline\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        DataFrame to be vectorised, i.e. the categorical and continuous\n        columns will be encoded based on the processing applied within\n        the model\n    target_col: str, Optional\n        Column name of the target_col variable. If `None` only the array of\n        predictors will be returned\n\n    Returns\n    -------\n    Tab2Vec\n    \"\"\"\n\n    return self\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.transform","title":"transform","text":"
transform(df, target_col=None)\n

Transforms the input dataframe into vectorized form. If a target column name is passed the target values will be returned separately in their corresponding type (np.ndarray or pd.DataFrame)

Parameters:

  • df (DataFrame) \u2013

    DataFrame to be vectorised, i.e. the categorical and continuous columns will be encoded based on the processing applied within the model

  • target_col (Optional[str], default: None ) \u2013

    Column name of the target_col variable. If None only the array of predictors will be returned

Returns:

  • Union[np.ndarray, Tuple[np.ndarray, np.ndarray], pd.DataFrame, Tuple[pd.DataFrame, pd.Series] \u2013

    Returns eiter a numpy array with the vectorised values, or a Tuple of numpy arrays with the vectorised values and the target. The same applies to dataframes in case we choose to set return_dataframe = True

Source code in pytorch_widedeep/tab2vec.py
def transform(\n    self,\n    df: pd.DataFrame,\n    target_col: Optional[str] = None,\n) -> Union[\n    np.ndarray,\n    Tuple[np.ndarray, np.ndarray],\n    pd.DataFrame,\n    Tuple[pd.DataFrame, pd.Series],\n]:\n    r\"\"\"Transforms the input dataframe into vectorized form. If a target\n    column name is passed the target values will be returned separately\n    in their corresponding type (np.ndarray or pd.DataFrame)\n\n    Parameters\n    ----------\n    df: pd.DataFrame\n        DataFrame to be vectorised, i.e. the categorical and continuous\n        columns will be encoded based on the processing applied within\n        the model\n    target_col: str, Optional\n        Column name of the target_col variable. If `None` only the array of\n        predictors will be returned\n\n    Returns\n    -------\n    Union[np.ndarray, Tuple[np.ndarray, np.ndarray], pd.DataFrame, Tuple[pd.DataFrame, pd.Series]\n        Returns eiter a numpy array with the vectorised values, or a Tuple\n        of numpy arrays with the vectorised values and the target. The\n        same applies to dataframes in case we choose to set\n        `return_dataframe = True`\n    \"\"\"\n\n    X_tab = self.tab_preprocessor.transform(df)\n    X = torch.from_numpy(X_tab.astype(\"float\")).to(device)\n\n    with torch.no_grad():\n        if self.is_tab_transformer:\n            x_vec, x_cont_not_embed = self.vectorizer(X)\n        else:\n            x_vec = self.vectorizer(X)\n            x_cont_not_embed = None\n\n    if self.tab_preprocessor.with_cls_token:\n        x_vec = x_vec[:, 1:, :]\n\n    if self.tab_preprocessor.with_attention:\n        x_vec = einops.rearrange(x_vec, \"s c e -> s (c e)\")\n\n    if x_cont_not_embed is not None:\n        x_vec = torch.cat([x_vec, x_cont_not_embed], 1).detach().cpu().numpy()\n    else:\n        x_vec = x_vec.detach().cpu().numpy()\n\n    if self.return_dataframe:\n        new_colnames = self._new_colnames()\n        if target_col:\n            return pd.DataFrame(data=x_vec, columns=new_colnames), df[[target_col]]\n        else:\n            return pd.DataFrame(data=x_vec, columns=new_colnames)\n    else:\n        if target_col:\n            return x_vec, df[target_col].values\n        else:\n            return x_vec\n
"},{"location":"pytorch-widedeep/tab2vec.html#pytorch_widedeep.tab2vec.Tab2Vec.fit_transform","title":"fit_transform","text":"
fit_transform(df, target_col=None)\n

Combines fit and transform

Source code in pytorch_widedeep/tab2vec.py
def fit_transform(\n    self, df: pd.DataFrame, target_col: Optional[str] = None\n) -> Union[\n    np.ndarray,\n    Tuple[np.ndarray, np.ndarray],\n    pd.DataFrame,\n    Tuple[pd.DataFrame, pd.Series],\n]:\n    r\"\"\"Combines `fit` and `transform`\"\"\"\n    return self.fit(df, target_col).transform(df, target_col)\n
"},{"location":"pytorch-widedeep/trainer.html","title":"Training multimodal Deep Learning Models","text":"

Here is the documentation for the Trainer class, that will do all the heavy lifting.

Trainer is also available from pytorch-widedeep directly, for example, one could do:

    from pytorch-widedeep.training import Trainer\n

or also:

    from pytorch-widedeep import Trainer\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer","title":"Trainer","text":"
Trainer(\n    model,\n    objective,\n    custom_loss_function=None,\n    optimizers=None,\n    lr_schedulers=None,\n    initializers=None,\n    transforms=None,\n    callbacks=None,\n    metrics=None,\n    verbose=1,\n    seed=1,\n    **kwargs\n)\n

Bases: BaseTrainer

Class to set the of attributes that will be used during the training process.

Parameters:

  • model (WideDeep) \u2013

    An object of class WideDeep

  • objective (str) \u2013

    Defines the objective, loss or cost function.

    Param aliases: loss_function, loss_fn, loss, cost_function, cost_fn, cost.

    Possible values are:

    • binary, aliases: logistic, binary_logloss, binary_cross_entropy

    • binary_focal_loss

    • multiclass, aliases: multi_logloss, cross_entropy, categorical_cross_entropy,

    • multiclass_focal_loss

    • regression, aliases: mse, l2, mean_squared_error

    • mean_absolute_error, aliases: mae, l1

    • mean_squared_log_error, aliases: msle

    • root_mean_squared_error, aliases: rmse

    • root_mean_squared_log_error, aliases: rmsle

    • zero_inflated_lognormal, aliases: ziln

    • quantile

    • tweedie

  • custom_loss_function (Optional[Module], default: None ) \u2013

    It is possible to pass a custom loss function. See for example pytorch_widedeep.losses.FocalLoss for the required structure of the object or the Examples section in this documentation or in the repo. Note that if custom_loss_function is not None, objective must be 'binary', 'multiclass' or 'regression', consistent with the loss function

  • optimizers (Optional[Union[Optimizer, Dict[str, Optimizer]]], default: None ) \u2013
    • An instance of Pytorch's Optimizer object (e.g. torch.optim.Adam()) or
    • a dictionary where there keys are the model components (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding optimizers. If multiple optimizers are used the dictionary MUST contain an optimizer per model component.

    if no optimizers are passed it will default to Adam for all model components

  • lr_schedulers (Optional[Union[LRScheduler, Dict[str, LRScheduler]]], default: None ) \u2013
    • An instance of Pytorch's LRScheduler object (e.g torch.optim.lr_scheduler.StepLR(opt, step_size=5)) or
    • a dictionary where there keys are the model componenst (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding learning rate schedulers.
  • initializers (Optional[Union[Initializer, Dict[str, Initializer]]], default: None ) \u2013
    • An instance of an Initializer object see pytorch-widedeep.initializers or
    • a dictionary where there keys are the model components (i.e. 'wide', 'deeptabular', 'deeptext', 'deepimage' and/or 'deephead') and the values are the corresponding initializers.
  • transforms (Optional[List[Transforms]], default: None ) \u2013

    List with torchvision.transforms to be applied to the image component of the model (i.e. deepimage) See torchvision transforms.

  • callbacks (Optional[List[Callback]], default: None ) \u2013

    List with Callback objects. The three callbacks available in pytorch-widedeep are: LRHistory, ModelCheckpoint and EarlyStopping. The History and the LRShedulerCallback callbacks are used by default. This can also be a custom callback as long as the object of type Callback. See pytorch_widedeep.callbacks.Callback or the examples folder in the repo.

  • metrics (Optional[Union[List[Metric], List[Metric]]], default: None ) \u2013
    • List of objects of type Metric. Metrics available are: Accuracy, Precision, Recall, FBetaScore, F1Score and R2Score. This can also be a custom metric as long as it is an object of type Metric. See pytorch_widedeep.metrics.Metric or the examples folder in the repo
    • List of objects of type torchmetrics.Metric. This can be any metric from torchmetrics library Examples. This can also be a custom metric as long as it is an object of type Metric. See the instructions.
  • verbose (int, default: 1 ) \u2013

    Verbosity level. If set to 0 nothing will be printed during training

  • seed (int, default: 1 ) \u2013

    Random seed to be used internally for train/test split

Other Parameters:

  • **kwargs \u2013

    Other infrequently used arguments that can also be passed as kwargs are:

    • device: str string indicating the device. One of 'cpu' or 'gpu'

    • num_workers: int number of workers to be used internally by the data loaders

    • lambda_sparse: float lambda sparse parameter in case the deeptabular component is TabNet

    • class_weight: List[float] This is the weight or pos_weight parameter in CrossEntropyLoss and BCEWithLogitsLoss, depending on whether

    • reducelronplateau_criterion: str This sets the criterion that will be used by the lr scheduler to take a step: One of 'loss' or 'metric'. The ReduceLROnPlateau learning rate is a bit particular.

Attributes:

  • cyclic_lr (bool) \u2013

    Attribute that indicates if any of the lr_schedulers is cyclic_lr (i.e. CyclicLR or OneCycleLR). See Pytorch schedulers.

  • feature_importance (dict) \u2013

    dict where the keys are the column names and the values are the corresponding feature importances. This attribute will only exist if the deeptabular component is a Tabnet model.

Examples:

>>> import torch\n>>> from torchvision.transforms import ToTensor\n>>>\n>>> # wide deep imports\n>>> from pytorch_widedeep.callbacks import EarlyStopping, LRHistory\n>>> from pytorch_widedeep.initializers import KaimingNormal, KaimingUniform, Normal, Uniform\n>>> from pytorch_widedeep.models import TabResnet, Vision, BasicRNN, Wide, WideDeep\n>>> from pytorch_widedeep import Trainer\n>>>\n>>> embed_input = [(u, i, j) for u, i, j in zip([\"a\", \"b\", \"c\"][:4], [4] * 3, [8] * 3)]\n>>> column_idx = {k: v for v, k in enumerate([\"a\", \"b\", \"c\"])}\n>>> wide = Wide(10, 1)\n>>>\n>>> # build the model\n>>> deeptabular = TabResnet(blocks_dims=[8, 4], column_idx=column_idx, cat_embed_input=embed_input)\n>>> deeptext = BasicRNN(vocab_size=10, embed_dim=4, padding_idx=0)\n>>> deepimage = Vision()\n>>> model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=deeptext, deepimage=deepimage)\n>>>\n>>> # set optimizers and schedulers\n>>> wide_opt = torch.optim.Adam(model.wide.parameters())\n>>> deep_opt = torch.optim.AdamW(model.deeptabular.parameters())\n>>> text_opt = torch.optim.Adam(model.deeptext.parameters())\n>>> img_opt = torch.optim.AdamW(model.deepimage.parameters())\n>>>\n>>> wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)\n>>> deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3)\n>>> text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)\n>>> img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3)\n>>>\n>>> optimizers = {\"wide\": wide_opt, \"deeptabular\": deep_opt, \"deeptext\": text_opt, \"deepimage\": img_opt}\n>>> schedulers = {\"wide\": wide_sch, \"deeptabular\": deep_sch, \"deeptext\": text_sch, \"deepimage\": img_sch}\n>>>\n>>> # set initializers and callbacks\n>>> initializers = {\"wide\": Uniform, \"deeptabular\": Normal, \"deeptext\": KaimingNormal, \"deepimage\": KaimingUniform}\n>>> transforms = [ToTensor]\n>>> callbacks = [LRHistory(n_epochs=4), EarlyStopping]\n>>>\n>>> # set the trainer\n>>> trainer = Trainer(model, objective=\"regression\", initializers=initializers, optimizers=optimizers,\n... lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms)\n
Source code in pytorch_widedeep/training/trainer.py
@alias(  # noqa: C901\n    \"objective\",\n    [\"loss_function\", \"loss_fn\", \"loss\", \"cost_function\", \"cost_fn\", \"cost\"],\n)\ndef __init__(\n    self,\n    model: WideDeep,\n    objective: str,\n    custom_loss_function: Optional[nn.Module] = None,\n    optimizers: Optional[Union[Optimizer, Dict[str, Optimizer]]] = None,\n    lr_schedulers: Optional[Union[LRScheduler, Dict[str, LRScheduler]]] = None,\n    initializers: Optional[Union[Initializer, Dict[str, Initializer]]] = None,\n    transforms: Optional[List[Transforms]] = None,\n    callbacks: Optional[List[Callback]] = None,\n    metrics: Optional[Union[List[Metric], List[TorchMetric]]] = None,\n    verbose: int = 1,\n    seed: int = 1,\n    **kwargs,\n):\n    super().__init__(\n        model=model,\n        objective=objective,\n        custom_loss_function=custom_loss_function,\n        optimizers=optimizers,\n        lr_schedulers=lr_schedulers,\n        initializers=initializers,\n        transforms=transforms,\n        callbacks=callbacks,\n        metrics=metrics,\n        verbose=verbose,\n        seed=seed,\n        **kwargs,\n    )\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.fit","title":"fit","text":"
fit(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_train=None,\n    X_val=None,\n    val_split=None,\n    target=None,\n    n_epochs=1,\n    validation_freq=1,\n    batch_size=32,\n    custom_dataloader=None,\n    feature_importance_sample_size=None,\n    finetune=False,\n    with_lds=False,\n    **kwargs\n)\n

Fit method.

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in dictionaries (X_train or X_val).

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_train (Optional[Dict[str, ndarray]], default: None ) \u2013

    The training dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • X_val (Optional[Dict[str, ndarray]], default: None ) \u2013

    The validation dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • val_split (Optional[float], default: None ) \u2013

    train/val split fraction

  • target (Optional[ndarray], default: None ) \u2013

    target values

  • n_epochs (int, default: 1 ) \u2013

    number of epochs

  • validation_freq (int, default: 1 ) \u2013

    epochs validation frequency

  • batch_size (int, default: 32 ) \u2013

    batch size

  • custom_dataloader (Optional[DataLoader], default: None ) \u2013

    object of class torch.utils.data.DataLoader. Available predefined dataloaders are in pytorch-widedeep.dataloaders.If None, a standard torch DataLoader is used.

  • finetune (bool, default: False ) \u2013

    fine-tune individual model components. This functionality can also be used to 'warm-up' (and hence the alias warmup) individual components before the joined training starts, and hence its alias. See the Examples folder in the repo for more details

    pytorch_widedeep implements 3 fine-tune routines.

    • fine-tune all trainable layers at once. This routine is inspired by the work of Howard & Sebastian Ruder 2018 in their ULMfit paper. Using a Slanted Triangular learing (see Leslie N. Smith paper ) , the process is the following: i) the learning rate will gradually increase for 10% of the training steps from max_lr/10 to max_lr. ii) It will then gradually decrease to max_lr/10 for the remaining 90% of the steps. The optimizer used in the process is Adam.

    and two gradual fine-tune routines, where only certain layers are trained at a time.

    • The so called Felbo gradual fine-tune rourine, based on the the Felbo et al., 2017 DeepEmoji paper.
    • The Howard routine based on the work of Howard & Sebastian Ruder 2018 in their ULMfit paper.

    For details on how these routines work, please see the Examples section in this documentation and the Examples folder in the repo. Param Alias: warmup

  • with_lds (bool, default: False ) \u2013

    Boolean indicating if Label Distribution Smoothing will be used. information_source: NOTE: We consider this feature absolutely experimental and we recommend the user to not use it unless the corresponding publication is well understood

Other Parameters:

  • **kwargs (dict) \u2013

    Other keyword arguments are:

    • DataLoader related parameters: For example, sampler, batch_sampler, collate_fn, etc. Please, see the pytorch DataLoader docs for details.

    • Label Distribution Smoothing related parameters:

      • lds_kernel (Literal['gaussian', 'triang', 'laplace']): choice of kernel for Label Distribution Smoothing
      • lds_ks (int): LDS kernel window size
      • lds_sigma (float): standard deviation of ['gaussian','laplace'] kernel for LDS
      • lds_granularity (int): number of bins in histogram used in LDS to count occurence of sample values
      • lds_reweight (bool): option to reweight bin frequency counts in LDS
      • lds_y_max (Optional[float]): option to restrict LDS bins by upper label limit
      • lds_y_min (Optional[float]): option to restrict LDS bins by lower label limit

      See pytorch_widedeep.trainer._wd_dataset for more details on the implications of these parameters

    • Finetune related parameters: see the source code at pytorch_widedeep._finetune. Namely, these are:

      • finetune_epochs (int): number of epochs use for fine tuning
      • finetune_max_lr (float): max lr during fine tuning
      • routine (str): one of 'howard' or 'felbo'
      • deeptabular_gradual (bool): boolean indicating if the deeptabular component will be fine tuned gradually
      • deeptabular_layers (List[nn.Module]): List of pytorch modules indicating the layers of the deeptabular that will be fine tuned
      • deeptabular_max_lr (float): max lr for the deeptabular componet during fine tuning
      • deeptext_gradual (bool): same as deeptabular_gradual but for the deeptext component
      • deeptext_layers (List[nn.Module]): same as deeptabular_gradual but for the deeptext component
      • deeptext_max_lr (float): same as deeptabular_gradual but for the deeptext component
      • deepimage_gradual (bool): same as deeptabular_gradual but for the deepimage component
      • deepimage_layers (List[nn.Module]): same as deeptabular_gradual but for the deepimage component
      • deepimage_max_lr (float): same as deeptabular_gradual but for the deepimage component

Examples:

For a series of comprehensive examples on how to use the fit method, please see the Examples folder in the repo

Source code in pytorch_widedeep/training/trainer.py
@alias(\"finetune\", [\"warmup\"])\ndef fit(  # noqa: C901\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_train: Optional[Dict[str, np.ndarray]] = None,\n    X_val: Optional[Dict[str, np.ndarray]] = None,\n    val_split: Optional[float] = None,\n    target: Optional[np.ndarray] = None,\n    n_epochs: int = 1,\n    validation_freq: int = 1,\n    batch_size: int = 32,\n    custom_dataloader: Optional[DataLoader] = None,\n    feature_importance_sample_size: Optional[int] = None,\n    finetune: bool = False,\n    with_lds: bool = False,\n    **kwargs,\n):\n    r\"\"\"Fit method.\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    dictionaries (`X_train` or `X_val`).\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_train: Dict, Optional. default=None\n        The training dataset can also be passed in a dictionary. Keys are\n        _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    X_val: Dict, Optional. default=None\n        The validation dataset can also be passed in a dictionary. Keys\n        are _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_.\n        Values are the corresponding matrices.\n    val_split: float, Optional. default=None\n        train/val split fraction\n    target: np.ndarray, Optional. default=None\n        target values\n    n_epochs: int, default=1\n        number of epochs\n    validation_freq: int, default=1\n        epochs validation frequency\n    batch_size: int, default=32\n        batch size\n    custom_dataloader: `DataLoader`, Optional, default=None\n        object of class `torch.utils.data.DataLoader`. Available\n        predefined dataloaders are in `pytorch-widedeep.dataloaders`.If\n        `None`, a standard torch `DataLoader` is used.\n    finetune: bool, default=False\n        fine-tune individual model components. This functionality can also\n        be used to 'warm-up' (and hence the alias `warmup`) individual\n        components before the joined training starts, and hence its\n        alias. See the Examples folder in the repo for more details\n\n        `pytorch_widedeep` implements 3 fine-tune routines.\n\n        - fine-tune all trainable layers at once. This routine is\n          inspired by the work of Howard & Sebastian Ruder 2018 in their\n          [ULMfit paper](https://arxiv.org/abs/1801.06146). Using a\n          Slanted Triangular learing (see\n          [Leslie N. Smith paper](https://arxiv.org/pdf/1506.01186.pdf) ) ,\n          the process is the following: *i*) the learning rate will\n          gradually increase for 10% of the training steps from max_lr/10\n          to max_lr. *ii*) It will then gradually decrease to max_lr/10\n          for the remaining 90% of the steps. The optimizer used in the\n          process is `Adam`.\n\n        and two gradual fine-tune routines, where only certain layers are\n        trained at a time.\n\n        - The so called `Felbo` gradual fine-tune rourine, based on the the\n          Felbo et al., 2017 [DeepEmoji paper](https://arxiv.org/abs/1708.00524).\n        - The `Howard` routine based on the work of Howard & Sebastian Ruder 2018 in their\n          [ULMfit paper](https://arxiv.org/abs/1801.06146>).\n\n        For details on how these routines work, please see the Examples\n        section in this documentation and the Examples folder in the repo. <br/>\n        Param Alias: `warmup`\n    with_lds: bool, default=False\n        Boolean indicating if Label Distribution Smoothing will be used. <br/>\n        information_source: **NOTE**: We consider this feature absolutely\n        experimental and we recommend the user to not use it unless the\n        corresponding [publication](https://arxiv.org/abs/2102.09554) is\n        well understood\n\n    Other Parameters\n    ----------------\n    **kwargs : dict\n        Other keyword arguments are:\n\n        - **DataLoader related parameters**:<br/>\n            For example,  `sampler`, `batch_sampler`, `collate_fn`, etc.\n            Please, see the pytorch\n            [DataLoader docs](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader)\n            for details.\n\n        - **Label Distribution Smoothing related parameters**:<br/>\n\n            - lds_kernel (`Literal['gaussian', 'triang', 'laplace']`):\n                choice of kernel for Label Distribution Smoothing\n            - lds_ks (`int`):\n                LDS kernel window size\n            - lds_sigma (`float`):\n                standard deviation of ['gaussian','laplace'] kernel for LDS\n            - lds_granularity (`int`):\n                number of bins in histogram used in LDS to count occurence of sample values\n            - lds_reweight (`bool`):\n                option to reweight bin frequency counts in LDS\n            - lds_y_max (`Optional[float]`):\n                option to restrict LDS bins by upper label limit\n            - lds_y_min (`Optional[float]`):\n                option to restrict LDS bins by lower label limit\n\n            See `pytorch_widedeep.trainer._wd_dataset` for more details on\n            the implications of these parameters\n\n        - **Finetune related parameters**:<br/>\n            see the source code at `pytorch_widedeep._finetune`. Namely, these are:\n\n            - `finetune_epochs` (`int`):\n                number of epochs use for fine tuning\n            - `finetune_max_lr` (`float`):\n               max lr during fine tuning\n            - `routine` (`str`):\n               one of _'howard'_ or _'felbo'_\n            - `deeptabular_gradual` (`bool`):\n               boolean indicating if the `deeptabular` component will be fine tuned gradually\n            - `deeptabular_layers` (`List[nn.Module]`):\n               List of pytorch modules indicating the layers of the\n               `deeptabular` that will be fine tuned\n            - `deeptabular_max_lr` (`float`):\n               max lr for the `deeptabular` componet during fine tuning\n            - `deeptext_gradual` (`bool`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deeptext_layers` (`List[nn.Module]`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deeptext_max_lr` (`float`):\n               same as `deeptabular_gradual` but for the `deeptext` component\n            - `deepimage_gradual` (`bool`):\n               same as `deeptabular_gradual` but for the `deepimage` component\n            - `deepimage_layers` (`List[nn.Module]`):\n               same as `deeptabular_gradual` but for the `deepimage` component\n            - `deepimage_max_lr` (`float`):\n                same as `deeptabular_gradual` but for the `deepimage` component\n\n    Examples\n    --------\n\n    For a series of comprehensive examples on how to use the `fit` method, please see the\n    [Examples](https://github.com/jrzaurin/pytorch-widedeep/tree/master/examples)\n    folder in the repo\n    \"\"\"\n\n    lds_args, dataloader_args, finetune_args = self._extract_kwargs(kwargs)\n    lds_args[\"with_lds\"] = with_lds\n    self.with_lds = with_lds\n\n    self.batch_size = batch_size\n\n    train_set, eval_set = wd_train_val_split(\n        self.seed,\n        self.method,  # type: ignore\n        X_wide,\n        X_tab,\n        X_text,\n        X_img,\n        X_train,\n        X_val,\n        val_split,\n        target,\n        **lds_args,\n    )\n    if isinstance(custom_dataloader, type):\n        if issubclass(custom_dataloader, DataLoader):\n            train_loader = custom_dataloader(  # type: ignore[misc]\n                dataset=train_set,\n                batch_size=batch_size,\n                num_workers=self.num_workers,\n                **dataloader_args,\n            )\n        else:\n            NotImplementedError(\n                \"Custom DataLoader must be a subclass of \"\n                \"torch.utils.data.DataLoader, please see the \"\n                \"pytorch documentation or examples in \"\n                \"pytorch_widedeep.dataloaders\"\n            )\n    else:\n        train_loader = DataLoaderDefault(\n            dataset=train_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            **dataloader_args,\n        )\n    train_steps = len(train_loader)\n    if eval_set is not None:\n        eval_loader = DataLoader(\n            dataset=eval_set,\n            batch_size=batch_size,\n            num_workers=self.num_workers,\n            shuffle=False,\n        )\n        eval_steps = len(eval_loader)\n\n    if finetune:\n        self.with_finetuning: bool = True\n        self._finetune(train_loader, **finetune_args)\n        if self.verbose:\n            print(\n                \"Fine-tuning (or warmup) of individual components completed. \"\n                \"Training the whole model for {} epochs\".format(n_epochs)\n            )\n    else:\n        self.with_finetuning = False\n\n    self.callback_container.on_train_begin(\n        {\"batch_size\": batch_size, \"train_steps\": train_steps, \"n_epochs\": n_epochs}\n    )\n    for epoch in range(n_epochs):\n        epoch_logs: Dict[str, float] = {}\n        self.callback_container.on_epoch_begin(epoch, logs=epoch_logs)\n\n        self.train_running_loss = 0.0\n        with trange(train_steps, disable=self.verbose != 1) as t:\n            for batch_idx, (data, targett, lds_weightt) in zip(t, train_loader):\n                t.set_description(\"epoch %i\" % (epoch + 1))\n                train_score, train_loss = self._train_step(\n                    data, targett, batch_idx, epoch, lds_weightt\n                )\n                print_loss_and_metric(t, train_loss, train_score)\n                self.callback_container.on_batch_end(batch=batch_idx)\n        epoch_logs = save_epoch_logs(epoch_logs, train_loss, train_score, \"train\")\n\n        on_epoch_end_metric = None\n        if eval_set is not None and epoch % validation_freq == (\n            validation_freq - 1\n        ):\n            self.callback_container.on_eval_begin()\n            self.valid_running_loss = 0.0\n            with trange(eval_steps, disable=self.verbose != 1) as v:\n                for i, (data, targett) in zip(v, eval_loader):\n                    v.set_description(\"valid\")\n                    val_score, val_loss = self._eval_step(data, targett, i)\n                    print_loss_and_metric(v, val_loss, val_score)\n            epoch_logs = save_epoch_logs(epoch_logs, val_loss, val_score, \"val\")\n\n            if self.reducelronplateau:\n                if self.reducelronplateau_criterion == \"loss\":\n                    on_epoch_end_metric = val_loss\n                else:\n                    on_epoch_end_metric = val_score[\n                        self.reducelronplateau_criterion\n                    ]\n        else:\n            if self.reducelronplateau:\n                raise NotImplementedError(\n                    \"ReduceLROnPlateau scheduler can be used only with validation data.\"\n                )\n        self.callback_container.on_epoch_end(epoch, epoch_logs, on_epoch_end_metric)\n\n        if self.early_stop:\n            # self.callback_container.on_train_end(epoch_logs)\n            break\n\n        if self.model.with_fds:\n            self._update_fds_stats(train_loader, epoch)\n\n    self.callback_container.on_train_end(epoch_logs)\n\n    if feature_importance_sample_size is not None:\n        self.feature_importance = FeatureImportance(\n            self.device, feature_importance_sample_size\n        ).feature_importance(train_loader, self.model)\n    self._restore_best_weights()\n    self.model.train()\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict","title":"predict","text":"
predict(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n)\n

Returns the predictions

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in a dictionary (X_test)

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are X_wide, 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

Returns:

  • np.ndarray: \u2013

    array with the predictions

Source code in pytorch_widedeep/training/trainer.py
def predict(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n) -> np.ndarray:\n    r\"\"\"Returns the predictions\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    a dictionary (`X_test`)\n\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        `X_wide`, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n\n    Returns\n    -------\n    np.ndarray:\n        array with the predictions\n    \"\"\"\n    preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size)\n    if self.method == \"regression\":\n        return np.vstack(preds_l).squeeze(1)\n    if self.method == \"binary\":\n        preds = np.vstack(preds_l).squeeze(1)\n        return (preds > 0.5).astype(\"int\")\n    if self.method == \"qregression\":\n        return np.vstack(preds_l)\n    if self.method == \"multiclass\":\n        preds = np.vstack(preds_l)\n        return np.argmax(preds, 1)  # type: ignore[return-value]\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict_uncertainty","title":"predict_uncertainty","text":"
predict_uncertainty(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n    uncertainty_granularity=1000,\n)\n

Returns the predicted ucnertainty of the model for the test dataset using a Monte Carlo method during which dropout layers are activated in the evaluation/prediction phase and each sample is predicted N times (uncertainty_granularity times).

This is based on Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning.

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are 'X_wide', 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

  • uncertainty_granularity \u2013

    number of times the model does prediction for each sample

Returns:

  • np.ndarray: \u2013
    • if method = regression, it will return an array with (max, min, mean, stdev) values for each sample.
    • if method = binary it will return an array with (mean_cls_0_prob, mean_cls_1_prob, predicted_cls) for each sample.
    • if method = multiclass it will return an array with (mean_cls_0_prob, mean_cls_1_prob, mean_cls_2_prob, ... , predicted_cls) values for each sample.
Source code in pytorch_widedeep/training/trainer.py
def predict_uncertainty(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n    uncertainty_granularity=1000,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted ucnertainty of the model for the test dataset\n    using a Monte Carlo method during which dropout layers are activated\n    in the evaluation/prediction phase and each sample is predicted N\n    times (`uncertainty_granularity` times).\n\n    This is based on\n    [Dropout as a Bayesian Approximation: Representing\n    Model Uncertainty in Deep Learning](https://arxiv.org/abs/1506.02142?context=stat).\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        _'X_wide'_, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n    uncertainty_granularity: int default = 1000\n        number of times the model does prediction for each sample\n\n    Returns\n    -------\n    np.ndarray:\n        - if `method = regression`, it will return an array with `(max, min, mean, stdev)`\n          values for each sample.\n        - if `method = binary` it will return an array with\n          `(mean_cls_0_prob, mean_cls_1_prob, predicted_cls)` for each sample.\n        - if `method = multiclass` it will return an array with\n          `(mean_cls_0_prob, mean_cls_1_prob, mean_cls_2_prob, ... , predicted_cls)`\n          values for each sample.\n\n    \"\"\"\n    preds_l = self._predict(\n        X_wide,\n        X_tab,\n        X_text,\n        X_img,\n        X_test,\n        batch_size,\n        uncertainty_granularity,\n        uncertainty=True,\n    )\n    preds = np.vstack(preds_l)\n    samples_num = int(preds.shape[0] / uncertainty_granularity)\n    if self.method == \"regression\":\n        preds = preds.squeeze(1)\n        preds = preds.reshape((uncertainty_granularity, samples_num))\n        return np.array(\n            (\n                preds.max(axis=0),\n                preds.min(axis=0),\n                preds.mean(axis=0),\n                preds.std(axis=0),\n            )\n        ).T\n    if self.method == \"qregression\":\n        raise ValueError(\n            \"Currently predict_uncertainty is not supported for qregression method\"\n        )\n    if self.method == \"binary\":\n        preds = preds.squeeze(1)\n        preds = preds.reshape((uncertainty_granularity, samples_num))\n        preds = preds.mean(axis=0)\n        probs = np.zeros([preds.shape[0], 3])\n        probs[:, 0] = 1 - preds\n        probs[:, 1] = preds\n        return probs\n    if self.method == \"multiclass\":\n        preds = preds.reshape(uncertainty_granularity, samples_num, preds.shape[1])\n        preds = preds.mean(axis=0)\n        preds = np.hstack((preds, np.vstack(np.argmax(preds, 1))))\n        return preds\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.predict_proba","title":"predict_proba","text":"
predict_proba(\n    X_wide=None,\n    X_tab=None,\n    X_text=None,\n    X_img=None,\n    X_test=None,\n    batch_size=None,\n)\n

Returns the predicted probabilities for the test dataset for binary and multiclass methods

The input datasets can be passed either directly via numpy arrays (X_wide, X_tab, X_text or X_img) or alternatively, in a dictionary (X_test)

Parameters:

  • X_wide (Optional[ndarray], default: None ) \u2013

    Input for the wide model component. See pytorch_widedeep.preprocessing.WidePreprocessor

  • X_tab (Optional[ndarray], default: None ) \u2013

    Input for the deeptabular model component. See pytorch_widedeep.preprocessing.TabPreprocessor

  • X_text (Optional[ndarray], default: None ) \u2013

    Input for the deeptext model component. See pytorch_widedeep.preprocessing.TextPreprocessor

  • X_img (Optional[ndarray], default: None ) \u2013

    Input for the deepimage model component. See pytorch_widedeep.preprocessing.ImagePreprocessor

  • X_test (Optional[Dict[str, ndarray]], default: None ) \u2013

    The test dataset can also be passed in a dictionary. Keys are X_wide, 'X_tab', 'X_text', 'X_img' and 'target'. Values are the corresponding matrices.

  • batch_size (Optional[int], default: None ) \u2013

    If a trainer is used to predict after having trained a model, the batch_size needs to be defined as it will not be defined as the Trainer is instantiated

Returns:

  • ndarray \u2013

    array with the probabilities per class

Source code in pytorch_widedeep/training/trainer.py
def predict_proba(  # type: ignore[return]\n    self,\n    X_wide: Optional[np.ndarray] = None,\n    X_tab: Optional[np.ndarray] = None,\n    X_text: Optional[np.ndarray] = None,\n    X_img: Optional[np.ndarray] = None,\n    X_test: Optional[Dict[str, np.ndarray]] = None,\n    batch_size: Optional[int] = None,\n) -> np.ndarray:\n    r\"\"\"Returns the predicted probabilities for the test dataset for  binary\n    and multiclass methods\n\n    The input datasets can be passed either directly via numpy arrays\n    (`X_wide`, `X_tab`, `X_text` or `X_img`) or alternatively, in\n    a dictionary (`X_test`)\n\n    Parameters\n    ----------\n    X_wide: np.ndarray, Optional. default=None\n        Input for the `wide` model component.\n        See `pytorch_widedeep.preprocessing.WidePreprocessor`\n    X_tab: np.ndarray, Optional. default=None\n        Input for the `deeptabular` model component.\n        See `pytorch_widedeep.preprocessing.TabPreprocessor`\n    X_text: np.ndarray, Optional. default=None\n        Input for the `deeptext` model component.\n        See `pytorch_widedeep.preprocessing.TextPreprocessor`\n    X_img: np.ndarray, Optional. default=None\n        Input for the `deepimage` model component.\n        See `pytorch_widedeep.preprocessing.ImagePreprocessor`\n    X_test: Dict, Optional. default=None\n        The test dataset can also be passed in a dictionary. Keys are\n        `X_wide`, _'X_tab'_, _'X_text'_, _'X_img'_ and _'target'_. Values\n        are the corresponding matrices.\n    batch_size: int, default = 256\n        If a trainer is used to predict after having trained a model, the\n        `batch_size` needs to be defined as it will not be defined as\n        the `Trainer` is instantiated\n\n    Returns\n    -------\n    np.ndarray\n        array with the probabilities per class\n    \"\"\"\n\n    preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size)\n    if self.method == \"binary\":\n        preds = np.vstack(preds_l).squeeze(1)\n        probs = np.zeros([preds.shape[0], 2])\n        probs[:, 0] = 1 - preds\n        probs[:, 1] = preds\n        return probs\n    if self.method == \"multiclass\":\n        return np.vstack(preds_l)\n
"},{"location":"pytorch-widedeep/trainer.html#pytorch_widedeep.training.Trainer.save","title":"save","text":"
save(\n    path,\n    save_state_dict=False,\n    model_filename=\"wd_model.pt\",\n)\n

Saves the model, training and evaluation history, and the feature_importance attribute (if the deeptabular component is a Tabnet model) to disk

The Trainer class is built so that it 'just' trains a model. With that in mind, all the torch related parameters (such as optimizers, learning rate schedulers, initializers, etc) have to be defined externally and then passed to the Trainer. As a result, the Trainer does not generate any attribute or additional data products that need to be saved other than the model object itself, which can be saved as any other torch model (e.g. torch.save(model, path)).

The exception is Tabnet. If the deeptabular component is a Tabnet model, an attribute (a dict) called feature_importance will be created at the end of the training process. Therefore, a save method was created that will save the feature importance dictionary to a json file and, since we are here, the model weights, training history and learning rate history.

Parameters:

  • path (str) \u2013

    path to the directory where the model and the feature importance attribute will be saved.

  • save_state_dict (bool, default: False ) \u2013

    Boolean indicating whether to save directly the model or the model's state dictionary

  • model_filename (str, default: 'wd_model.pt' ) \u2013

    filename where the model weights will be store

Source code in pytorch_widedeep/training/trainer.py
def save(\n    self,\n    path: str,\n    save_state_dict: bool = False,\n    model_filename: str = \"wd_model.pt\",\n):\n    r\"\"\"Saves the model, training and evaluation history, and the\n    `feature_importance` attribute (if the `deeptabular` component is a\n    Tabnet model) to disk\n\n    The `Trainer` class is built so that it 'just' trains a model. With\n    that in mind, all the torch related parameters (such as optimizers,\n    learning rate schedulers, initializers, etc) have to be defined\n    externally and then passed to the `Trainer`. As a result, the\n    `Trainer` does not generate any attribute or additional data\n    products that need to be saved other than the `model` object itself,\n    which can be saved as any other torch model (e.g. `torch.save(model,\n    path)`).\n\n    The exception is Tabnet. If the `deeptabular` component is a Tabnet\n    model, an attribute (a dict) called `feature_importance` will be\n    created at the end of the training process. Therefore, a `save`\n    method was created that will save the feature importance dictionary\n    to a json file and, since we are here, the model weights, training\n    history and learning rate history.\n\n    Parameters\n    ----------\n    path: str\n        path to the directory where the model and the feature importance\n        attribute will be saved.\n    save_state_dict: bool, default = False\n        Boolean indicating whether to save directly the model or the\n        model's state dictionary\n    model_filename: str, Optional, default = \"wd_model.pt\"\n        filename where the model weights will be store\n    \"\"\"\n\n    save_dir = Path(path)\n    history_dir = save_dir / \"history\"\n    history_dir.mkdir(exist_ok=True, parents=True)\n\n    # the trainer is run with the History Callback by default\n    with open(history_dir / \"train_eval_history.json\", \"w\") as teh:\n        json.dump(self.history, teh)  # type: ignore[attr-defined]\n\n    has_lr_history = any(\n        [clbk.__class__.__name__ == \"LRHistory\" for clbk in self.callbacks]\n    )\n    if self.lr_scheduler is not None and has_lr_history:\n        with open(history_dir / \"lr_history.json\", \"w\") as lrh:\n            json.dump(self.lr_history, lrh)  # type: ignore[attr-defined]\n\n    model_path = save_dir / model_filename\n    if save_state_dict:\n        torch.save(self.model.state_dict(), model_path)\n    else:\n        torch.save(self.model, model_path)\n\n    if self.model.is_tabnet:\n        with open(save_dir / \"feature_importance.json\", \"w\") as fi:\n            json.dump(self.feature_importance, fi)\n
"},{"location":"pytorch-widedeep/utils/index.html","title":"The utils module","text":"

These are a series of utilities that might be useful for a number of preprocessing tasks, even not directly related to pytorch-widedeep. All the classes and functions discussed here are available directly from the utils module. For example, the LabelEncoder within the deeptabular_utils submodule can be imported as:

from pytorch_widedeep.utils import LabelEncoder\n

These are classes and functions that are internally used in the library. We include them here in case the user finds them useful for other purposes.

"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html","title":"deeptabular utils","text":""},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder","title":"LabelEncoder","text":"
LabelEncoder(\n    columns_to_encode=None,\n    with_attention=False,\n    shared_embed=False,\n)\n

Label Encode categorical values for multiple columns at once

NOTE: LabelEncoder reserves 0 for unseen new categories. This is convenient when defining the embedding layers, since we can just set padding idx to 0.

Parameters:

  • columns_to_encode (Optional[List[str]], default: None ) \u2013

    List of strings containing the names of the columns to encode. If None all columns of type object in the dataframe will be label encoded.

  • with_attention (bool, default: False ) \u2013

    Boolean indicating whether the preprocessed data will be passed to an attention-based model. Aliased as for_transformer.

  • shared_embed (bool, default: False ) \u2013

    Boolean indicating if the embeddings will be \"shared\" when using attention-based models. The idea behind shared_embed is described in the Appendix A in the TabTransformer paper: 'The goal of having column embedding is to enable the model to distinguish the classes in one column from those in the other columns'. In other words, the idea is to let the model learn which column is embedded at the time. See: pytorch_widedeep.models.transformers._layers.SharedEmbeddings.

Attributes:

  • encoding_dict (Dict) \u2013

    Dictionary containing the encoding mappings in the format, e.g. : {'colname1': {'cat1': 1, 'cat2': 2, ...}, 'colname2': {'cat1': 1, 'cat2': 2, ...}, ...}

  • inverse_encoding_dict (Dict) \u2013

    Dictionary containing the inverse encoding mappings in the format, e.g. : {'colname1': {1: 'cat1', 2: 'cat2', ...}, 'colname2': {1: 'cat1', 2: 'cat2', ...}, ...}

Source code in pytorch_widedeep/utils/deeptabular_utils.py
@alias(\"with_attention\", [\"for_transformer\"])\ndef __init__(\n    self,\n    columns_to_encode: Optional[List[str]] = None,\n    with_attention: bool = False,\n    shared_embed: bool = False,\n):\n    self.columns_to_encode = columns_to_encode\n\n    self.shared_embed = shared_embed\n    self.with_attention = with_attention\n\n    self.reset_embed_idx = not self.with_attention or self.shared_embed\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.partial_fit","title":"partial_fit","text":"
partial_fit(df)\n

Main method. Creates encoding attributes.

Returns:

  • LabelEncoder \u2013

    LabelEncoder fitted object

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def partial_fit(self, df: pd.DataFrame) -> \"LabelEncoder\":  # noqa: C901\n    \"\"\"Main method. Creates encoding attributes.\n\n    Returns\n    -------\n    LabelEncoder\n        `LabelEncoder` fitted object\n    \"\"\"\n    # here df is a chunk of the data. this is meant to be run when the\n    # data is large and we pass a chunk at a time. Therefore, we do not\n    # copy the input chunk as mutating a chunk is ok\n    if self.columns_to_encode is None:\n        self.columns_to_encode = list(df.select_dtypes(include=[\"object\"]).columns)\n    else:\n        # sanity check to make sure all categorical columns are in an adequate\n        # format\n        for col in self.columns_to_encode:\n            df[col] = df[col].astype(\"O\")\n\n    unique_column_vals: Dict[str, List[str]] = {}\n    for c in self.columns_to_encode:\n        unique_column_vals[c] = df[c].unique().tolist()\n\n    if not hasattr(self, \"encoding_dict\"):\n        # we run the method 'partial_fit' for the 1st time\n        self.encoding_dict: Dict[str, Dict[str, int]] = {}\n        if \"cls_token\" in unique_column_vals and self.shared_embed:\n            self.encoding_dict[\"cls_token\"] = {\"[CLS]\": 0}\n            del unique_column_vals[\"cls_token\"]\n\n        # leave 0 for padding/\"unseen\" categories. Also we need an\n        # attribute to keep track of the encoding in case we use\n        # attention and we do not re-start the index/counter\n        self.cum_idx: int = 1\n        for k, v in unique_column_vals.items():\n            self.encoding_dict[k] = {o: i + self.cum_idx for i, o in enumerate(v)}\n            self.cum_idx = 1 if self.reset_embed_idx else self.cum_idx + len(v)\n    else:\n        # the 'partial_fit' method has already run.\n        # \"cls_token\" will have been added already\n        if \"cls_token\" in unique_column_vals and self.shared_embed:\n            del unique_column_vals[\"cls_token\"]\n\n        # Classes in the new df/chunk of the dataset that have not been seen\n        # before\n        unseen_classes: Dict[str, List[str]] = {}\n        for c in self.columns_to_encode:\n            unseen_classes[c] = list(\n                np.setdiff1d(\n                    unique_column_vals[c], list(self.encoding_dict[c].keys())\n                )\n            )\n\n        # leave 0 for padding/\"unseen\" categories\n        for k, v in unique_column_vals.items():\n            # if we use attention we need to start encoding from the\n            # last 'overall' encoding index. Otherwise, we use the max\n            # encoding index per categorical col\n            _idx = (\n                max(self.encoding_dict[k].values()) + 1\n                if self.reset_embed_idx\n                else self.cum_idx\n            )\n            if len(unseen_classes[k]) != 0:\n                for i, o in enumerate(unseen_classes[k]):\n                    if o not in self.encoding_dict[k]:\n                        self.encoding_dict[k][o] = i + _idx\n                # if self.reset_embed_idx is True it will be 1 anyway\n                self.cum_idx = (\n                    1\n                    if self.reset_embed_idx\n                    else self.cum_idx + len(unseen_classes[k])\n                )\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.fit","title":"fit","text":"
fit(df)\n

Simply runs the partial_fit method when the data fits in memory

Returns:

  • LabelEncoder \u2013

    LabelEncoder fitted object

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def fit(self, df: pd.DataFrame) -> \"LabelEncoder\":\n    \"\"\"Simply runs the `partial_fit` method when the data fits in memory\n\n    Returns\n    -------\n    LabelEncoder\n        `LabelEncoder` fitted object\n    \"\"\"\n    # this is meant to be run when the data fits in memory and therefore,\n    # we do not want to mutate the original df, so we copy it\n    self.partial_fit(df.copy())\n\n    self.inverse_encoding_dict = self.create_inverse_encoding_dict()\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.transform","title":"transform","text":"
transform(df)\n

Label Encoded the categories in columns_to_encode

Returns:

  • DataFrame \u2013

    label-encoded dataframe

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Label Encoded the categories in `columns_to_encode`\n\n    Returns\n    -------\n    pd.DataFrame\n        label-encoded dataframe\n    \"\"\"\n    try:\n        self.encoding_dict\n    except AttributeError:\n        raise NotFittedError(\n            \"This LabelEncoder instance is not fitted yet. \"\n            \"Call 'fit' with appropriate arguments before using this LabelEncoder.\"\n        )\n\n    df_inp = df.copy()\n    # sanity check to make sure all categorical columns are in an adequate\n    # format\n    for col in self.columns_to_encode:  # type: ignore\n        df_inp[col] = df_inp[col].astype(\"O\")\n\n    for k, v in self.encoding_dict.items():\n        df_inp[k] = df_inp[k].apply(lambda x: v[x] if x in v.keys() else 0)\n\n    return df_inp\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.fit_transform","title":"fit_transform","text":"
fit_transform(df)\n

Combines fit and transform

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.utils import LabelEncoder\n>>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n>>> columns_to_encode = ['col2']\n>>> encoder = LabelEncoder(columns_to_encode)\n>>> encoder.fit_transform(df)\n   col1  col2\n0     1     1\n1     2     2\n2     3     3\n>>> encoder.encoding_dict\n{'col2': {'me': 1, 'you': 2, 'him': 3}}\n

Returns:

  • DataFrame \u2013

    label-encoded dataframe

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Combines `fit` and `transform`\n\n    Examples\n    --------\n\n    >>> import pandas as pd\n    >>> from pytorch_widedeep.utils import LabelEncoder\n    >>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n    >>> columns_to_encode = ['col2']\n    >>> encoder = LabelEncoder(columns_to_encode)\n    >>> encoder.fit_transform(df)\n       col1  col2\n    0     1     1\n    1     2     2\n    2     3     3\n    >>> encoder.encoding_dict\n    {'col2': {'me': 1, 'you': 2, 'him': 3}}\n\n    Returns\n    -------\n    pd.DataFrame\n        label-encoded dataframe\n    \"\"\"\n    return self.fit(df).transform(df)\n
"},{"location":"pytorch-widedeep/utils/deeptabular_utils.html#pytorch_widedeep.utils.deeptabular_utils.LabelEncoder.inverse_transform","title":"inverse_transform","text":"
inverse_transform(df)\n

Returns the original categories

Examples:

>>> import pandas as pd\n>>> from pytorch_widedeep.utils import LabelEncoder\n>>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n>>> columns_to_encode = ['col2']\n>>> encoder = LabelEncoder(columns_to_encode)\n>>> df_enc = encoder.fit_transform(df)\n>>> encoder.inverse_transform(df_enc)\n   col1 col2\n0     1   me\n1     2  you\n2     3  him\n

Returns:

  • DataFrame \u2013

    DataFrame with original categories

Source code in pytorch_widedeep/utils/deeptabular_utils.py
def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:\n    \"\"\"Returns the original categories\n\n    Examples\n    --------\n\n    >>> import pandas as pd\n    >>> from pytorch_widedeep.utils import LabelEncoder\n    >>> df = pd.DataFrame({'col1': [1,2,3], 'col2': ['me', 'you', 'him']})\n    >>> columns_to_encode = ['col2']\n    >>> encoder = LabelEncoder(columns_to_encode)\n    >>> df_enc = encoder.fit_transform(df)\n    >>> encoder.inverse_transform(df_enc)\n       col1 col2\n    0     1   me\n    1     2  you\n    2     3  him\n\n    Returns\n    -------\n    pd.DataFrame\n        DataFrame with original categories\n    \"\"\"\n\n    if not hasattr(self, \"inverse_encoding_dict\"):\n        self.inverse_encoding_dict = self.create_inverse_encoding_dict()\n\n    for k, v in self.inverse_encoding_dict.items():\n        df[k] = df[k].apply(lambda x: v[x])\n\n    return df\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html","title":"Fastai transforms","text":"

I directly copied and pasted part of the transforms.py module from the fastai library (from an old version). The reason to do such a thing is because pytorch_widedeep only needs the Tokenizer and the Vocab classes there. This way I avoid extra dependencies. Credit for all the code in the fastai_transforms module in this pytorch-widedeep package goes to Jeremy Howard and the fastai team. I only include the documentation here for completion, but I strongly advise the user to read the fastai documentation.

"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer","title":"Tokenizer","text":"
Tokenizer(\n    tok_func=SpacyTokenizer,\n    lang=\"en\",\n    pre_rules=None,\n    post_rules=None,\n    special_cases=None,\n    n_cpus=None,\n)\n

Class to combine a series of rules and a tokenizer function to tokenize text with multiprocessing.

Setting some of the parameters of this class require perhaps some familiarity with the source code.

Parameters:

  • tok_func (Callable, default: SpacyTokenizer ) \u2013

    Tokenizer Object. See pytorch_widedeep.utils.fastai_transforms.SpacyTokenizer

  • lang (str, default: 'en' ) \u2013

    Text's Language

  • pre_rules (Optional[ListRules], default: None ) \u2013

    Custom type: Collection[Callable[[str], str]]. These are Callable objects that will be applied to the text (str) directly as rule(tok) before being tokenized.

  • post_rules (Optional[ListRules], default: None ) \u2013

    Custom type: Collection[Callable[[str], str]]. These are Callable objects that will be applied to the tokens as rule(tokens) after the text has been tokenized.

  • special_cases (Optional[Collection[str]], default: None ) \u2013

    special cases to be added to the tokenizer via Spacy's add_special_case method

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

Source code in pytorch_widedeep/utils/fastai_transforms.py
def __init__(\n    self,\n    tok_func: Callable = SpacyTokenizer,\n    lang: str = \"en\",\n    pre_rules: Optional[ListRules] = None,\n    post_rules: Optional[ListRules] = None,\n    special_cases: Optional[Collection[str]] = None,\n    n_cpus: Optional[int] = None,\n):\n    self.tok_func, self.lang, self.special_cases = tok_func, lang, special_cases\n    self.pre_rules = ifnone(pre_rules, defaults.text_pre_rules)\n    self.post_rules = ifnone(post_rules, defaults.text_post_rules)\n    self.special_cases = (\n        special_cases if special_cases is not None else defaults.text_spec_tok\n    )\n    self.n_cpus = ifnone(n_cpus, defaults.cpus)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer.process_text","title":"process_text","text":"
process_text(t, tok)\n

Process and tokenize one text t with tokenizer tok.

Parameters:

  • t (str) \u2013

    text to be processed and tokenized

  • tok (BaseTokenizer) \u2013

    Instance of BaseTokenizer. See pytorch_widedeep.utils.fastai_transforms.BaseTokenizer

Returns:

  • List[str] \u2013

    List of tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def process_text(self, t: str, tok: BaseTokenizer) -> List[str]:\n    r\"\"\"Process and tokenize one text ``t`` with tokenizer ``tok``.\n\n    Parameters\n    ----------\n    t: str\n        text to be processed and tokenized\n    tok: ``BaseTokenizer``\n        Instance of `BaseTokenizer`. See\n        `pytorch_widedeep.utils.fastai_transforms.BaseTokenizer`\n\n    Returns\n    -------\n    List[str]\n        List of tokens\n    \"\"\"\n    for rule in self.pre_rules:\n        t = rule(t)\n    toks = tok.tokenizer(t)\n    for rule in self.post_rules:\n        toks = rule(toks)\n    return toks\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Tokenizer.process_all","title":"process_all","text":"
process_all(texts)\n

Process a list of texts. Parallel execution of process_text.

Examples:

>>> from pytorch_widedeep.utils import Tokenizer\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> tok = Tokenizer()\n>>> tok.process_all(texts)\n[['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n

NOTE: Note the token TK_MAJ (xxmaj), used to indicate the next word begins with a capital in the original text. For more details of special tokens please see the fastai docs.

Returns:

  • List[List[str]] \u2013

    List containing lists of tokens. One list per \"document\"

Source code in pytorch_widedeep/utils/fastai_transforms.py
def process_all(self, texts: Collection[str]) -> List[List[str]]:\n    r\"\"\"Process a list of texts. Parallel execution of ``process_text``.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import Tokenizer\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> tok = Tokenizer()\n    >>> tok.process_all(texts)\n    [['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n\n    :information_source: **NOTE**:\n    Note the token ``TK_MAJ`` (`xxmaj`), used to indicate the\n    next word begins with a capital in the original text. For more\n    details of special tokens please see the [``fastai`` docs](https://docs.fast.ai/text.core.html#Tokenizing).\n\n    Returns\n    -------\n    List[List[str]]\n        List containing lists of tokens. One list per \"_document_\"\n\n    \"\"\"\n\n    if self.n_cpus <= 1:\n        return self._process_all_1(texts)\n    with ProcessPoolExecutor(self.n_cpus) as e:\n        return sum(\n            e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), []\n        )\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab","title":"Vocab","text":"
Vocab(max_vocab, min_freq, pad_idx=None)\n

Contains the correspondence between numbers and tokens.

Parameters:

  • max_vocab (int) \u2013

    maximum vocabulary size

  • min_freq (int) \u2013

    minimum frequency for a token to be considereds

  • pad_idx (Optional[int], default: None ) \u2013

    padding index. If None, Fastai's Tokenizer leaves the 0 index for the unknown token ('xxunk') and defaults to 1 for the padding token ('xxpad').

Attributes:

  • itos (Collection) \u2013

    index to str. Collection of strings that are the tokens of the vocabulary

  • stoi (defaultdict) \u2013

    str to index. Dictionary containing the tokens of the vocabulary and their corresponding index

Source code in pytorch_widedeep/utils/fastai_transforms.py
def __init__(\n    self,\n    max_vocab: int,\n    min_freq: int,\n    pad_idx: Optional[int] = None,\n):\n    self.max_vocab = max_vocab\n    self.min_freq = min_freq\n    self.pad_idx = pad_idx\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.create","title":"create","text":"
create(tokens)\n

Create a vocabulary object from a set of tokens.

Parameters:

  • tokens (Tokens) \u2013

    Custom type: Collection[Collection[str]] see pytorch_widedeep.wdtypes. Collection of collection of strings (e.g. list of tokenized sentences)

Examples:

>>> from pytorch_widedeep.utils import Tokenizer, Vocab\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> tokens = Tokenizer().process_all(texts)\n>>> vocab = Vocab(max_vocab=18, min_freq=1).create(tokens)\n>>> vocab.numericalize(['machine', 'learning', 'is', 'great'])\n[10, 11, 9, 12]\n>>> vocab.textify([10, 11, 9, 12])\n'machine learning is great'\n

NOTE: Note the many special tokens that fastai's' tokenizer adds. These are particularly useful when building Language models and/or in classification/Regression tasks. Please see the fastai docs.

Returns:

  • Vocab \u2013

    An instance of a Vocab object

Source code in pytorch_widedeep/utils/fastai_transforms.py
def create(\n    self,\n    tokens: Tokens,\n) -> \"Vocab\":\n    r\"\"\"Create a vocabulary object from a set of tokens.\n\n    Parameters\n    ----------\n    tokens: Tokens\n        Custom type: ``Collection[Collection[str]]``  see\n        `pytorch_widedeep.wdtypes`. Collection of collection of\n        strings (e.g. list of tokenized sentences)\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import Tokenizer, Vocab\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> tokens = Tokenizer().process_all(texts)\n    >>> vocab = Vocab(max_vocab=18, min_freq=1).create(tokens)\n    >>> vocab.numericalize(['machine', 'learning', 'is', 'great'])\n    [10, 11, 9, 12]\n    >>> vocab.textify([10, 11, 9, 12])\n    'machine learning is great'\n\n    :information_source: **NOTE**:\n    Note the many special tokens that ``fastai``'s' tokenizer adds. These\n    are particularly useful when building Language models and/or in\n    classification/Regression tasks. Please see the [``fastai`` docs](https://docs.fast.ai/text.core.html#Tokenizing).\n\n    Returns\n    -------\n    Vocab\n        An instance of a `Vocab` object\n    \"\"\"\n\n    freq = Counter(p for o in tokens for p in o)\n    itos = [o for o, c in freq.most_common(self.max_vocab) if c >= self.min_freq]\n    for o in reversed(defaults.text_spec_tok):\n        if o in itos:\n            itos.remove(o)\n        itos.insert(0, o)\n\n    if self.pad_idx is not None and self.pad_idx != 1:\n        itos.remove(PAD)\n        itos.insert(self.pad_idx, PAD)\n        # get the new 'xxunk' index\n        xxunk_idx = np.where([el == \"xxunk\" for el in itos])[0][0]\n    else:\n        xxunk_idx = 0\n\n    itos = itos[: self.max_vocab]\n    if (\n        len(itos) < self.max_vocab\n    ):  # Make sure vocab size is a multiple of 8 for fast mixed precision training\n        while len(itos) % 8 != 0:\n            itos.append(\"xxfake\")\n\n    self.itos = itos\n    self.stoi = defaultdict(\n        lambda: xxunk_idx, {v: k for k, v in enumerate(self.itos)}\n    )\n\n    return self\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.fit","title":"fit","text":"
fit(tokens)\n

Calls the create method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including a fit method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def fit(\n    self,\n    tokens: Tokens,\n) -> \"Vocab\":\n    \"\"\"\n    Calls the `create` method. I simply want to honor fast ai naming, but\n    for consistency with the rest of the library I am including a fit method\n    \"\"\"\n    return self.create(tokens)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.numericalize","title":"numericalize","text":"
numericalize(t)\n

Convert a list of tokens t to their ids.

Returns:

  • List[int] \u2013

    List of 'numericalsed' tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def numericalize(self, t: Collection[str]) -> List[int]:\n    \"\"\"Convert a list of tokens ``t`` to their ids.\n\n    Returns\n    -------\n    List[int]\n        List of '_numericalsed_' tokens\n    \"\"\"\n    return [self.stoi[w] for w in t]\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.transform","title":"transform","text":"
transform(t)\n

Calls the numericalize method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including a transform method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def transform(self, t: Collection[str]) -> List[int]:\n    \"\"\"\n    Calls the `numericalize` method. I simply want to honor fast ai naming,\n    but for consistency with the rest of the library I am including a\n    transform method\n    \"\"\"\n    return self.numericalize(t)\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.textify","title":"textify","text":"
textify(nums, sep=' ')\n

Convert a list of nums (or indexes) to their tokens.

Returns:

  • List[str] \u2013

    List of tokens

Source code in pytorch_widedeep/utils/fastai_transforms.py
def textify(self, nums: Collection[int], sep=\" \") -> Union[str, List[str]]:\n    \"\"\"Convert a list of ``nums`` (or indexes) to their tokens.\n\n    Returns\n    -------\n    List[str]\n        List of tokens\n    \"\"\"\n    return (\n        sep.join([self.itos[i] for i in nums])\n        if sep is not None\n        else [self.itos[i] for i in nums]\n    )\n
"},{"location":"pytorch-widedeep/utils/fastai_transforms.html#pytorch_widedeep.utils.fastai_transforms.Vocab.inverse_transform","title":"inverse_transform","text":"
inverse_transform(nums, sep=' ')\n

Calls the textify method. I simply want to honor fast ai naming, but for consistency with the rest of the library I am including an inverse_transform method

Source code in pytorch_widedeep/utils/fastai_transforms.py
def inverse_transform(\n    self, nums: Collection[int], sep=\" \"\n) -> Union[str, List[str]]:\n    \"\"\"\n    Calls the `textify` method. I simply want to honor fast ai naming, but\n    for consistency with the rest of the library I am including an\n    inverse_transform method\n    \"\"\"\n    # I simply want to honor fast ai naming, but for consistency with the\n    # rest of the library I am including an inverse_transform method\n    return self.textify(nums, sep)\n
"},{"location":"pytorch-widedeep/utils/image_utils.html","title":"Image utils","text":"

SimplePreprocessor and AspectAwarePreprocessor are directly taked from the great series of Books `Deep Learning for Computer Vision by Adrian. Therefore, all credit for the code in the image_utils module goes to Adrian Rosebrock.

"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor","title":"AspectAwarePreprocessor","text":"
AspectAwarePreprocessor(\n    width, height, inter=cv2.INTER_AREA\n)\n

Class to resize an image to a certain width and height taking into account the image aspect ratio

Parameters:

  • width (int) \u2013

    output width

  • height (int) \u2013

    output height

  • inter \u2013

    opencv interpolation method. See opencv InterpolationFlags.

Source code in pytorch_widedeep/utils/image_utils.py
def __init__(self, width: int, height: int, inter=cv2.INTER_AREA):\n    self.width = width\n    self.height = height\n    self.inter = inter\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.AspectAwarePreprocessor.preprocess","title":"preprocess","text":"
preprocess(image)\n

Returns the resized input image taking into account the image aspect ratio

Parameters:

  • image (ndarray) \u2013

    Input image to be resized

Examples:

>>> import cv2\n>>> from pytorch_widedeep.utils import AspectAwarePreprocessor\n>>> img = cv2.imread(\"tests/test_data_utils/images/galaxy1.png\")\n>>> img.shape\n(694, 890, 3)\n>>> app = AspectAwarePreprocessor(width=224, height=224)\n>>> resized_img = app.preprocess(img)\n>>> resized_img.shape\n(224, 224, 3)\n

Returns:

  • ndarray \u2013

    Resized image according to its original image aspect ratio

Source code in pytorch_widedeep/utils/image_utils.py
def preprocess(self, image: np.ndarray) -> np.ndarray:\n    r\"\"\"Returns the resized input image taking into account the image aspect ratio\n\n    Parameters\n    ----------\n    image: np.ndarray\n        Input image to be resized\n\n    Examples\n    --------\n    >>> import cv2\n    >>> from pytorch_widedeep.utils import AspectAwarePreprocessor\n    >>> img = cv2.imread(\"tests/test_data_utils/images/galaxy1.png\")\n    >>> img.shape\n    (694, 890, 3)\n    >>> app = AspectAwarePreprocessor(width=224, height=224)\n    >>> resized_img = app.preprocess(img)\n    >>> resized_img.shape\n    (224, 224, 3)\n\n    Returns\n    -------\n    np.ndarray\n        Resized image according to its original image aspect ratio\n    \"\"\"\n    (h, w) = image.shape[:2]\n    dW = 0\n    dH = 0\n\n    if w < h:\n        image = imutils.resize(image, width=self.width, inter=self.inter)\n        dH = int((image.shape[0] - self.height) / 2.0)\n    else:\n        image = imutils.resize(image, height=self.height, inter=self.inter)\n        dW = int((image.shape[1] - self.width) / 2.0)\n\n    (h, w) = image.shape[:2]\n    image = image[dH : h - dH, dW : w - dW]\n\n    resized_image = cv2.resize(\n        image, (self.width, self.height), interpolation=self.inter\n    )\n\n    return resized_image\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.SimplePreprocessor","title":"SimplePreprocessor","text":"
SimplePreprocessor(width, height, inter=cv2.INTER_AREA)\n

Class to resize an image to a certain width and height

Parameters:

  • width (int) \u2013

    output width

  • height (int) \u2013

    output height

  • inter \u2013

    opencv interpolation method. See opencv InterpolationFlags.

Source code in pytorch_widedeep/utils/image_utils.py
def __init__(self, width: int, height: int, inter=cv2.INTER_AREA):\n    self.width = width\n    self.height = height\n    self.inter = inter\n
"},{"location":"pytorch-widedeep/utils/image_utils.html#pytorch_widedeep.utils.image_utils.SimplePreprocessor.preprocess","title":"preprocess","text":"
preprocess(image)\n

Returns the resized input image

Parameters:

  • image (ndarray) \u2013

    Input image to be resized

Returns:

  • ndarray \u2013

    Resized image

Source code in pytorch_widedeep/utils/image_utils.py
def preprocess(self, image: np.ndarray) -> np.ndarray:\n    r\"\"\"Returns the resized input image\n\n    Parameters\n    ----------\n    image: np.ndarray\n        Input image to be resized\n\n    Returns\n    -------\n    np.ndarray\n        Resized image\n\n    \"\"\"\n    resized_image = cv2.resize(\n        image, (self.width, self.height), interpolation=self.inter\n    )\n\n    return resized_image\n
"},{"location":"pytorch-widedeep/utils/text_utils.html","title":"Text utils","text":"

Collection of helper function that facilitate processing text.

"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.simple_preprocess","title":"simple_preprocess","text":"
simple_preprocess(\n    doc, lower=False, deacc=False, min_len=2, max_len=15\n)\n

This is Gensim's simple_preprocess with a lower param to indicate wether or not to lower case all the token in the doc

For more information see: Gensim utils module

Parameters:

  • doc (str) \u2013

    Input document.

  • lower (bool, default: False ) \u2013

    Lower case tokens in the input doc

  • deacc (bool, default: False ) \u2013

    Remove accent marks from tokens using Gensim's deaccent

  • min_len (int, default: 2 ) \u2013

    Minimum length of token (inclusive). Shorter tokens are discarded.

  • max_len (int, default: 15 ) \u2013

    Maximum length of token in result (inclusive). Longer tokens are discarded.

Examples:

>>> from pytorch_widedeep.utils import simple_preprocess\n>>> simple_preprocess('Machine learning is great')\n['Machine', 'learning', 'is', 'great']\n

Returns:

  • List[str] \u2013

    List with the processed tokens

Source code in pytorch_widedeep/utils/text_utils.py
def simple_preprocess(\n    doc: str,\n    lower: bool = False,\n    deacc: bool = False,\n    min_len: int = 2,\n    max_len: int = 15,\n) -> List[str]:\n    r\"\"\"\n    This is `Gensim`'s `simple_preprocess` with a `lower` param to\n    indicate wether or not to lower case all the token in the doc\n\n    For more information see: `Gensim` [utils module](https://radimrehurek.com/gensim/utils.html)\n\n    Parameters\n    ----------\n    doc: str\n        Input document.\n    lower: bool, default = False\n        Lower case tokens in the input doc\n    deacc: bool, default = False\n        Remove accent marks from tokens using `Gensim`'s `deaccent`\n    min_len: int, default = 2\n        Minimum length of token (inclusive). Shorter tokens are discarded.\n    max_len: int, default = 15\n        Maximum length of token in result (inclusive). Longer tokens are discarded.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import simple_preprocess\n    >>> simple_preprocess('Machine learning is great')\n    ['Machine', 'learning', 'is', 'great']\n\n    Returns\n    -------\n    List[str]\n        List with the processed tokens\n    \"\"\"\n    tokens = [\n        token\n        for token in tokenize(doc, lower=lower, deacc=deacc, errors=\"ignore\")\n        if min_len <= len(token) <= max_len and not token.startswith(\"_\")\n    ]\n    return tokens\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.get_texts","title":"get_texts","text":"
get_texts(texts, already_processed=False, n_cpus=None)\n

Tokenization using Fastai's Tokenizer because it does a series of very convenients things during the tokenization process

See pytorch_widedeep.utils.fastai_utils.Tokenizer

Parameters:

  • texts (List[str]) \u2013

    List of str with the texts (or documents). One str per document

  • already_processed (Optional[bool], default: False ) \u2013

    Boolean indicating if the text is already processed and we simply want to tokenize it. This parameter is thought for those cases where the input sequences might not be text (but IDs, or anything else) and we just want to tokenize it

  • n_cpus (Optional[int], default: None ) \u2013

    number of CPUs to used during the tokenization process

Examples:

>>> from pytorch_widedeep.utils import get_texts\n>>> texts = ['Machine learning is great', 'but building stuff is even better']\n>>> get_texts(texts)\n[['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n

Returns:

  • List[List[str]] \u2013

    List of lists, one list per 'document' containing its corresponding tokens

NOTE: get_texts uses pytorch_widedeep.utils.fastai_transforms.Tokenizer. Such tokenizer uses a series of convenient processing steps, including the addition of some special tokens, such as TK_MAJ (xxmaj), used to indicate the next word begins with a capital in the original text. For more details of special tokens please see the fastai `docs

Source code in pytorch_widedeep/utils/text_utils.py
def get_texts(\n    texts: List[str],\n    already_processed: Optional[bool] = False,\n    n_cpus: Optional[int] = None,\n) -> List[List[str]]:\n    r\"\"\"Tokenization using `Fastai`'s `Tokenizer` because it does a\n    series of very convenients things during the tokenization process\n\n    See `pytorch_widedeep.utils.fastai_utils.Tokenizer`\n\n    Parameters\n    ----------\n    texts: List\n        List of str with the texts (or documents). One str per document\n    already_processed: bool, Optional, default = False\n        Boolean indicating if the text is already processed and we simply want\n        to tokenize it. This parameter is thought for those cases where the\n        input sequences might not be text (but IDs, or anything else) and we\n        just want to tokenize it\n    n_cpus: int, Optional, default = None\n        number of CPUs to used during the tokenization process\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import get_texts\n    >>> texts = ['Machine learning is great', 'but building stuff is even better']\n    >>> get_texts(texts)\n    [['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]\n\n    Returns\n    -------\n    List[List[str]]\n        List of lists, one list per '_document_' containing its corresponding tokens\n\n    :information_source: **NOTE**:\n    `get_texts` uses `pytorch_widedeep.utils.fastai_transforms.Tokenizer`.\n    Such tokenizer uses a series of convenient processing steps, including\n    the  addition of some special tokens, such as `TK_MAJ` (`xxmaj`), used to\n    indicate the next word begins with a capital in the original text. For more\n    details of special tokens please see the [`fastai` `docs](https://docs.fast.ai/text.core.html#Tokenizing)\n    \"\"\"\n\n    num_cpus = n_cpus if n_cpus is not None else os.cpu_count()\n\n    if not already_processed:\n        processed_texts = [\" \".join(simple_preprocess(t)) for t in texts]\n    else:\n        processed_texts = texts\n    tok = Tokenizer(n_cpus=num_cpus).process_all(processed_texts)\n    return tok\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.pad_sequences","title":"pad_sequences","text":"
pad_sequences(seq, maxlen, pad_first=True, pad_idx=1)\n

Given a List of tokenized and numericalised sequences it will return padded sequences according to the input parameters.

Parameters:

  • seq (List[int]) \u2013

    List of int with the numericalised tokens

  • maxlen (int) \u2013

    Maximum length of the padded sequences

  • pad_first (bool, default: True ) \u2013

    Indicates whether the padding index will be added at the beginning or the end of the sequences

  • pad_idx (int, default: 1 ) \u2013

    padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.

Examples:

>>> from pytorch_widedeep.utils import pad_sequences\n>>> seq = [1,2,3]\n>>> pad_sequences(seq, maxlen=5, pad_idx=0)\narray([0, 0, 1, 2, 3], dtype=int32)\n

Returns:

  • ndarray \u2013

    numpy array with the padded sequences

Source code in pytorch_widedeep/utils/text_utils.py
def pad_sequences(\n    seq: List[int], maxlen: int, pad_first: bool = True, pad_idx: int = 1\n) -> np.ndarray:\n    r\"\"\"\n    Given a List of tokenized and `numericalised` sequences it will return\n    padded sequences according to the input parameters.\n\n    Parameters\n    ----------\n    seq: List\n        List of int with the `numericalised` tokens\n    maxlen: int\n        Maximum length of the padded sequences\n    pad_first: bool,  default = True\n        Indicates whether the padding index will be added at the beginning or the\n        end of the sequences\n    pad_idx: int, default = 1\n        padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.\n\n    Examples\n    --------\n    >>> from pytorch_widedeep.utils import pad_sequences\n    >>> seq = [1,2,3]\n    >>> pad_sequences(seq, maxlen=5, pad_idx=0)\n    array([0, 0, 1, 2, 3], dtype=int32)\n\n    Returns\n    -------\n    np.ndarray\n        numpy array with the padded sequences\n    \"\"\"\n    if len(seq) == 0:\n        return np.zeros(maxlen, dtype=\"int32\") + pad_idx\n    elif len(seq) >= maxlen:\n        res = np.array(seq[-maxlen:]).astype(\"int32\")\n        return res\n    else:\n        res = np.zeros(maxlen, dtype=\"int32\") + pad_idx\n        if pad_first:\n            res[-len(seq) :] = seq\n        else:\n            res[: len(seq) :] = seq\n        return res\n
"},{"location":"pytorch-widedeep/utils/text_utils.html#pytorch_widedeep.utils.text_utils.build_embeddings_matrix","title":"build_embeddings_matrix","text":"
build_embeddings_matrix(\n    vocab, word_vectors_path, min_freq, verbose=1\n)\n

Build the embedding matrix using pretrained word vectors.

Returns pretrained word embeddings. If a word in our vocabulary is not among the pretrained embeddings it will be assigned the mean pretrained word-embeddings vector

Parameters:

  • vocab (Union[Vocab, ChunkVocab]) \u2013

    see pytorch_widedeep.utils.fastai_utils.Vocab

  • word_vectors_path (str) \u2013

    path to the pretrained word embeddings

  • min_freq (int) \u2013

    minimum frequency required for a word to be in the vocabulary

  • verbose (int, default: 1 ) \u2013

    level of verbosity. Set to 0 for no verbosity

Returns:

  • ndarray \u2013

    Pretrained word embeddings

Source code in pytorch_widedeep/utils/text_utils.py
def build_embeddings_matrix(\n    vocab: Union[Vocab, ChunkVocab],\n    word_vectors_path: str,\n    min_freq: int,\n    verbose: int = 1,\n) -> np.ndarray:  # pragma: no cover\n    r\"\"\"Build the embedding matrix using pretrained word vectors.\n\n    Returns pretrained word embeddings. If a word in our vocabulary is not\n    among the pretrained embeddings it will be assigned the mean pretrained\n    word-embeddings vector\n\n    Parameters\n    ----------\n    vocab: Vocab\n        see `pytorch_widedeep.utils.fastai_utils.Vocab`\n    word_vectors_path: str\n        path to the pretrained word embeddings\n    min_freq: int\n        minimum frequency required for a word to be in the vocabulary\n    verbose: int,  default=1\n        level of verbosity. Set to 0 for no verbosity\n\n    Returns\n    -------\n    np.ndarray\n        Pretrained word embeddings\n    \"\"\"\n    if not os.path.isfile(word_vectors_path):\n        raise FileNotFoundError(\"{} not found\".format(word_vectors_path))\n    if verbose:\n        print(\"Indexing word vectors...\")\n\n    embeddings_index = {}\n    f = open(word_vectors_path)\n    for line in f:\n        values = line.split()\n        word = values[0]\n        coefs = np.asarray(values[1:], dtype=\"float32\")\n        embeddings_index[word] = coefs\n    f.close()\n\n    if verbose:\n        print(\"Loaded {} word vectors\".format(len(embeddings_index)))\n        print(\"Preparing embeddings matrix...\")\n\n    mean_word_vector = np.mean(list(embeddings_index.values()), axis=0)  # type: ignore[arg-type]\n    embedding_dim = len(list(embeddings_index.values())[0])\n    num_words = len(vocab.itos)\n    embedding_matrix = np.zeros((num_words, embedding_dim))\n    found_words = 0\n    for i, word in enumerate(vocab.itos):\n        embedding_vector = embeddings_index.get(word)\n        if embedding_vector is not None:\n            embedding_matrix[i] = embedding_vector\n            found_words += 1\n        else:\n            embedding_matrix[i] = mean_word_vector\n\n    if verbose:\n        print(\n            \"{} words in the vocabulary had {} vectors and appear more than {} times\".format(\n                found_words, word_vectors_path, min_freq\n            )\n        )\n\n    return embedding_matrix.astype(\"float32\")\n
"}]} \ No newline at end of file diff --git a/mkdocs/site/sitemap.xml b/mkdocs/site/sitemap.xml index 4345b3f1..868abff7 100644 --- a/mkdocs/site/sitemap.xml +++ b/mkdocs/site/sitemap.xml @@ -2,217 +2,217 @@ https://pytorch-widedeep.readthedocs.io/index.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/contributing.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/installation.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/quick_start.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/01_preprocessors_and_utils.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/02_model_components.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/03_binary_classification_with_defaults.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/04_regression_with_images_and_text.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/05_save_and_load_model_and_artifacts.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/06_finetune_and_warmup.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/07_custom_components.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/08_custom_dataLoader_imbalanced_dataset.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/09_extracting_embeddings.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/10_3rd_party_integration-RayTune_WnB.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/11_auc_multiclass.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/12_ZILNLoss_origkeras_vs_pytorch_widedeep.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/13_model_uncertainty_prediction.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/14_bayesian_models.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/15_DIR-LDS_and_FDS.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/16_Self_Supervised_Pretraning_pt1.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/16_Self_Supervised_Pretraning_pt2.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/17_Usign_a_hugging_face_model.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/18_feature_importance_via_attention_weights.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/19_wide_and_deep_for_recsys_pt1.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/19_wide_and_deep_for_recsys_pt2.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/examples/20_load_from_folder_functionality.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/bayesian_models.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/bayesian_trainer.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/callbacks.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/dataloaders.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/load_from_folder.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/losses.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/metrics.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/model_components.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/preprocessing.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/self_supervised_pretraining.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/tab2vec.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/trainer.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/utils/index.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/utils/deeptabular_utils.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/utils/fastai_transforms.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/utils/image_utils.html - 2024-02-07 + 2024-02-17 daily https://pytorch-widedeep.readthedocs.io/pytorch-widedeep/utils/text_utils.html - 2024-02-07 + 2024-02-17 daily \ No newline at end of file diff --git a/mkdocs/site/sitemap.xml.gz b/mkdocs/site/sitemap.xml.gz index b2f232dec62e3c97d5250de61a056bb549b71c6a..8610128a214bcd3d99794155c9e06e1414d7c2c1 100644 GIT binary patch delta 30 mcmZ3+wv3HkzMF%CZ{~%G?3Wk|H{Nn#;@A-Qa_%if1_l70O$x{W delta 30 mcmZ3+wv3HkzMF%??)2e_?3WnpHr{e!;+Q!7zwRwX1_l77Dhl@i