Source code for wrangle_in_py.extracting_ymd_hms

import pandas as pd

[docs] def extracting_ymd(df, column): """ Returns a copy of the input DataFrame with three new columns: year, month, and day, extracted from the specified datetime column. Parameters ---------- df : pandas.DataFrame The DataFrame containing the datetime column. column : str The name of the datetime column to extract from. Raises ------- KeyError : If the input column is not a column in df. TypeError : If the input column is not of datetime type. Returns ------- pandas.DataFrame : A copy of the input DataFrame with added columns: '<column>_year', '<column>_month', '<column>_day'. Example ------- >>> df = pd.DataFrame({'timestamp': ['2024-01-07 12:30:45', '2023-12-25 08:15:30']}) >>> df['timestamp'] = pd.to_datetime(df['timestamp']) >>> extracting_ymd(df, 'timestamp') timestamp timestamp_year timestamp_month timestamp_day 0 2024-01-07 12:30:45 2024 1 7 1 2023-12-25 08:15:30 2023 12 25 """ # Check if the column exists if column not in df.columns: raise KeyError(f"Column '{column}' does not exist in the DataFrame.") # Check if the column is of datetime type if not pd.api.types.is_datetime64_any_dtype(df[column]): raise TypeError(f"Column '{column}' must be of datetime type.") # Create a copy of the DataFrame to avoid modifying the original result = df.copy() # Ensure empty datetime columns are correctly handled if result.empty: result[f"{column}_year"] = pd.Series(dtype="Int64") result[f"{column}_month"] = pd.Series(dtype="Int64") result[f"{column}_day"] = pd.Series(dtype="Int64") return result # Extract hour, minute, and second into new columns result[f"{column}_year"] = result[column].dt.year.astype("Int64") result[f"{column}_month"] = result[column].dt.month.astype("Int64") result[f"{column}_day"] = result[column].dt.day.astype("Int64") return result
[docs] def extracting_hms(df, column): """ Returns a copy of the input DataFrame with three new columns: hour, minute, and second, extracted from the specified datetime column. Parameters ----------- df : pd.DataFrame The DataFrame containing the datetime column. column : str The name of the datetime column to extract from. Raises ------- KeyError : If the input column is not a column in df. TypeError : If the input column is not of datetime type. Returns ------- pd.DataFrame : A copy of the input DataFrame with added columns: '<column>_hour', '<column>_minute', '<column>_second'. Example ------- >>> df = pd.DataFrame({'timestamp': ['2024-01-07 12:30:45', '2023-12-25 08:15:30']}) >>> df['timestamp'] = pd.to_datetime(df['timestamp']) >>> extracting_hms(df, 'timestamp') timestamp timestamp_hour timestamp_minute timestamp_second 0 2024-01-07 12:30:45 12 30 45 1 2023-12-25 08:15:30 8 15 30 """ # Check if the column exists if column not in df.columns: raise KeyError(f"Column '{column}' does not exist in the DataFrame.") # Check if the column is of datetime type if not pd.api.types.is_datetime64_any_dtype(df[column]): raise TypeError(f"Column '{column}' must be of datetime type.") # Create a copy of the DataFrame to avoid modifying the original result = df.copy() # Ensure empty datetime columns are correctly handled if result.empty: result[f"{column}_hour"] = pd.Series(dtype="Int64") result[f"{column}_minute"] = pd.Series(dtype="Int64") result[f"{column}_second"] = pd.Series(dtype="Int64") return result # Extract hour, minute, and second into new columns result[f"{column}_hour"] = result[column].dt.hour.astype("Int64") result[f"{column}_minute"] = result[column].dt.minute.astype("Int64") result[f"{column}_second"] = result[column].dt.second.astype("Int64") return result