diff --git a/Databricks/clean data with spark/clean data with spark SQL.dbc b/Databricks/clean data with spark/clean data with spark SQL.dbc new file mode 100644 index 0000000..23e543a Binary files /dev/null and b/Databricks/clean data with spark/clean data with spark SQL.dbc differ diff --git a/Databricks/clean data with spark/clean data with spark python.dbc b/Databricks/clean data with spark/clean data with spark python.dbc new file mode 100644 index 0000000..1b0212c Binary files /dev/null and b/Databricks/clean data with spark/clean data with spark python.dbc differ diff --git a/Databricks/clean data with spark/orders_raw.csv b/Databricks/clean data with spark/orders_raw.csv new file mode 100644 index 0000000..7d59dc2 --- /dev/null +++ b/Databricks/clean data with spark/orders_raw.csv @@ -0,0 +1,12 @@ +order_id,order_date,customer_id,amount +1,27/09/2024,101,10 +2,,101,11 +3,29/09/2024,201,12 +4,30/09/2024,101,13 +4,30/09/2024,101,13 +5,01/10/2024,301, +6,02/10/2024,401,15 +7,03/10/2024,,16 +8,32/10/2024,501,17 +9,05/10/2024,301,AAA +10,06/10/2024,401,19