Skip to content
This repository has been archived by the owner on Jul 3, 2023. It is now read-only.

Changes some columns #287

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion examples/model_examples/time-series/model_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def training_set(
rolling_mean_t90: pd.Series,
rolling_mean_t180: pd.Series,
rolling_std_t30: pd.Series,
price_change_t1: pd.Series,
price_change_t2: pd.Series,
price_change_t365: pd.Series,
price_change_t730: pd.Series,
rolling_price_std_t7: pd.Series,
rolling_price_std_t30: pd.Series,
date: pd.Series,
Expand Down
2 changes: 1 addition & 1 deletion examples/model_examples/time-series/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def main():
"sales_train_validation_path": "m5-forecasting-accuracy/sales_train_validation.csv",
"submission_path": "m5-forecasting-accuracy/sample_submission.csv",
"load_test2": "False",
"n_fold": 3,
"n_fold": 0,
"model_params": model_params,
"num_rows_to_skip": 27500000, # for training set
}
Expand Down
86 changes: 46 additions & 40 deletions examples/model_examples/time-series/transforms.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Tuple
from typing import Optional, Tuple

import pandas as pd
from pandas.core.groupby import generic
from sklearn import preprocessing

from hamilton.function_modifiers import parameterize, source
from hamilton.function_modifiers import parameterize, source, value


def _label_encoder(col: pd.Series) -> Tuple[preprocessing.LabelEncoder, pd.Series]:
Expand Down Expand Up @@ -55,63 +55,69 @@ def grouped_demand(demand: pd.Series) -> generic.SeriesGroupBy:
return demand.groupby(level=0)


def lag_t28(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28))


def lag_t29(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(29))


def lag_t30(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(30))


def rolling_mean_t7(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(7).mean())


def rolling_std_t7(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(7).std())


def rolling_mean_t30(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(30).mean())


def rolling_mean_t90(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(90).mean())


def rolling_mean_t180(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(180).mean())


def rolling_std_t30(grouped_demand: generic.SeriesGroupBy) -> pd.Series:
return grouped_demand.transform(lambda x: x.shift(28).rolling(30).std())
@parameterize(
lag_t28={"lag": value(28)},
lag_t29={"lag": value(29)},
lag_t30={"lag": value(30)},
rolling_mean_t7={"lag": value(28), "window": value(7), "transform": value("mean")},
rolling_std_t7={"lag": value(28), "window": value(7), "transform": value("std")},
rolling_mean_t30={"lag": value(28), "window": value(30), "transform": value("mean")},
rolling_mean_t90={"lag": value(28), "window": value(90), "transform": value("mean")},
rolling_mean_t180={"lag": value(28), "window": value(180), "transform": value("mean")},
rolling_std_t30={"lag": value(28), "window": value(30), "transform": value("std")},
)
def transform_grouped_demand(
grouped_demand: generic.SeriesGroupBy,
lag: int,
window: Optional[int] = None,
transform: Optional[str] = None,
) -> pd.Series:
"""Transforms the grouped demand by lagging, applying a rolling mean,
then shifting by a specified amount.

:param grouped_demand: Grouped series of demand data
:param shift_by: How much to lag
:param window: The window to apply the rolling transformation over (if applicable)
:param transform: The transformation to apply (if applicable)
:return: Transformed demand data
"""
# First shift
out = grouped_demand.transform(lambda x: x.shift(lag))
if window is not None:
out = out.rolling(window)
out = getattr(out, transform)()
return out


def grouped_sell_price(sell_price: pd.Series) -> generic.SeriesGroupBy:
return sell_price.groupby(level=0)


# price features
def lag_price_t1(grouped_sell_price: generic.SeriesGroupBy) -> pd.Series:
return grouped_sell_price.transform(lambda x: x.shift(1))
def lag_price_t2(grouped_sell_price: generic.SeriesGroupBy) -> pd.Series:
return grouped_sell_price.transform(lambda x: x.shift(2))


def price_change_t1(lag_price_t1: pd.Series, sell_price: pd.Series) -> pd.Series:
return (lag_price_t1 - sell_price) / lag_price_t1
def price_change_t2(lag_price_t2: pd.Series, sell_price: pd.Series) -> pd.Series:
return (lag_price_t2 - sell_price) / lag_price_t2


def rolling_price_max_t365(grouped_sell_price: generic.SeriesGroupBy) -> pd.Series:
return grouped_sell_price.transform(lambda x: x.shift(1).rolling(365).max())


def rolling_price_max_t730(grouped_sell_price: generic.SeriesGroupBy) -> pd.Series:
return grouped_sell_price.transform(lambda x: x.shift(1).rolling(730).max())


def price_change_t365(rolling_price_max_t365: pd.Series, sell_price: pd.Series) -> pd.Series:
return (rolling_price_max_t365 - sell_price) / rolling_price_max_t365


def price_change_t730(rolling_price_max_t730: pd.Series, sell_price: pd.Series) -> pd.Series:
return (rolling_price_max_t730 - sell_price) / rolling_price_max_t730


def rolling_price_std_t7(grouped_sell_price: generic.SeriesGroupBy) -> pd.Series:
return grouped_sell_price.transform(lambda x: x.rolling(7).std())

Expand Down