Source code for pydata.utils.transformer
import pandas as pd
[docs]
class Transformer:
[docs]
@staticmethod
def basic_cleanup(df: pd.DataFrame) -> pd.DataFrame:
# Remove all unicode characters
df.replace(r"[^\x00-\x7F]+", "", regex=True, inplace=True)
return df
[docs]
@staticmethod
def change_date_format(date: str) -> str:
front, end = date.split(" ")
year, _, _ = front.split("-")
if year[:2] == "00":
year = "20" + year[2:]
front = year + "-" + front[5:]
return front + " " + end
[docs]
@staticmethod
def reformat_datetime(df: pd.DataFrame) -> pd.DataFrame:
df["created"] = df["created"].apply(Transformer.change_date_format)
df["ended"] = df["ended"].apply(Transformer.change_date_format)
df["created"] = pd.to_datetime(df["created"])
df["ended"] = pd.to_datetime(df["ended"])
df["chargeTimeHrs"] = pd.to_timedelta(df["chargeTimeHrs"])
return df
[docs]
@staticmethod
def drop_columns_with_nulls(df: pd.DataFrame, threshold=30) -> pd.DataFrame:
# calculate the percentage of missing values in each column
missing_perc = df.isna().sum() / df.shape[0] * 100
# Drop columns with a percentage of missing values above the threshold
columns_to_drop = missing_perc[missing_perc > threshold].index
df = df.drop(columns=columns_to_drop)
return df
[docs]
@staticmethod
def sort_by_week(df: pd.DataFrame) -> pd.DataFrame:
sorter = [
"Sun",
"Sat",
"Fri",
"Thu",
"Wed",
"Tue",
"Mon",
]
# Convert 'day_of_week' to categorical type
df["weekday"] = pd.Categorical(df["weekday"], categories=sorter, ordered=True)
# Sort the DataFrame by 'day_of_week'
df.sort_values("weekday", inplace=True)
return df