Source code for twin4build.systems.utils.time_series_input_system
# Standard library imports
import datetime
import os
from typing import Any, Dict, List, Optional, Tuple, Union
# Third party imports
import pandas as pd
# Local application imports
import twin4build.core as core
import twin4build.utils.types as tps
from twin4build.utils.data_loaders.load import load_from_database, load_from_spreadsheet
from twin4build.utils.get_main_dir import get_main_dir
[docs]
class TimeSeriesInputSystem(core.System):
"""A system for reading and processing time series data from files or DataFrames.
This component provides functionality to handle time series data inputs, either from
CSV files or pandas DataFrames. It supports automatic file path resolution and
caching of processed data for improved performance.
Args:
df: Input dataframe containing time series data. Must have datetime index and value column.
filename: Path to the CSV file. Can be absolute or relative to cache_root. If relative, will try both current directory and cache_root.
datecolumn: Index of the date column (0-based). Defaults to 0.
valuecolumn: Index of the value column (0-based). Defaults to 1.
useSpreadsheet: Whether to use a spreadsheet for input. Defaults to False.
useDatabase: Whether to use a database for input. Defaults to False.
uuid: UUID for database operations.
name: Name for database operations.
dbconfig: Database configuration parameters.
**kwargs: Additional keyword arguments
"""
def __init__(
self,
df: Optional[pd.DataFrame] = None,
filename: Optional[str] = None,
datecolumn: int = 0,
valuecolumn: int = 1,
useSpreadsheet: bool = False,
useDatabase: bool = False,
uuid: Optional[str] = None,
name: Optional[str] = None,
dbconfig: Optional[Dict[str, Any]] = None,
**kwargs,
) -> None:
"""Initialize the TimeSeriesInputSystem.
Args:
df: Input dataframe containing time series data. Must have datetime index and value column.
filename: Path to the CSV file. Can be absolute or relative to cache_root. If relative, will try both current directory and cache_root.
datecolumn: Index of the date column (0-based). Defaults to 0.
valuecolumn: Index of the value column (0-based). Defaults to 1.
useSpreadsheet: Whether to use a spreadsheet for input. Defaults to False.
useDatabase: Whether to use a database for input. Defaults to False.
uuid: UUID for database operations.
name: Name for database operations.
dbconfig: Database configuration parameters.
**kwargs: Additional keyword arguments passed to parent System class.
Raises:
AssertionError: If neither df nor filename is provided.
ValueError: If the specified file cannot be found in any of the search paths.
"""
assert (
useSpreadsheet == False or useDatabase == False
), "useSpreadsheet and useDatabase cannot both be True."
super().__init__(**kwargs)
assert (
df is not None or filename is not None
), 'Either "df" or "filename" must be provided as argument.'
# Store attributes as private variables
self._df = df
self._useSpreadsheet = useSpreadsheet
self._useDatabase = useDatabase
self._filename = filename
self._datecolumn = datecolumn
self._valuecolumn = valuecolumn
self._uuid = uuid
self._name = name
self._dbconfig = dbconfig
self._cached_initialize_arguments = None
self._cache_root = get_main_dir()
# Define inputs and outputs as private variables
self._input = {}
self._output = {"value": tps.Scalar(is_leaf=True)}
if filename is not None:
if os.path.isfile(filename): # Absolute or relative was provided
self._filename = filename
else: # Check if relative path to root was provided
filename = filename.lstrip("/\\")
filename_ = os.path.join(self.cache_root, filename)
if os.path.isfile(filename_) == False:
raise (
ValueError(
f'Neither one of the following filenames exist: \n"{filename}"\n{filename_}'
)
)
self._filename = filename_
self._config = {
"parameters": {},
"spreadsheet": {
"filename": self.filename,
"datecolumn": self.datecolumn,
"valuecolumn": self.valuecolumn,
},
"database": {
"uuid": self.uuid,
"name": self.name,
"dbconfig": self.dbconfig,
},
}
@property
def config(self):
"""
Get the configuration of the TimeSeriesInputSystem.
Returns:
dict: The configuration dictionary.
"""
return self._config
@property
def input(self) -> dict:
"""
Get the input ports of the time series input system.
Returns:
dict: Dictionary containing input ports (empty for leaf systems)
"""
return self._input
@property
def output(self) -> dict:
"""
Get the output ports of the time series input system.
Returns:
dict: Dictionary containing output ports:
- "value": Time series values [units depend on data]
"""
return self._output
@property
def df(self) -> Optional[pd.DataFrame]:
"""
Get the processed input data containing time series values.
"""
return self._df
@df.setter
def df(self, value: Optional[pd.DataFrame]) -> None:
"""
Set the processed input data containing time series values.
"""
self._df = value
@property
def filename(self) -> Optional[str]:
"""
Get the path to the input CSV file (absolute or relative to root).
"""
return self._filename
@filename.setter
def filename(self, value: Optional[str]) -> None:
"""
Set the path to the input CSV file (absolute or relative to root).
"""
self._filename = value
@property
def datecolumn(self) -> int:
"""
Get the index of the date/time column (0-based).
"""
return self._datecolumn
@datecolumn.setter
def datecolumn(self, value: int) -> None:
"""
Set the index of the date/time column (0-based).
"""
self._datecolumn = value
@property
def valuecolumn(self) -> int:
"""
Get the index of the value column (0-based).
"""
return self._valuecolumn
@valuecolumn.setter
def valuecolumn(self, value: int) -> None:
"""
Set the index of the value column (0-based).
"""
self._valuecolumn = value
@property
def useSpreadsheet(self) -> bool:
"""
Get whether to use a spreadsheet for input.
"""
return self._useSpreadsheet
@useSpreadsheet.setter
def useSpreadsheet(self, value: bool) -> None:
"""
Set whether to use a spreadsheet for input.
"""
self._useSpreadsheet = value
@property
def useDatabase(self) -> bool:
"""
Get whether to use a database for input.
"""
return self._useDatabase
@useDatabase.setter
def useDatabase(self, value: bool) -> None:
"""
Set whether to use a database for input.
"""
self._useDatabase = value
@property
def uuid(self) -> Optional[str]:
"""
Get the UUID for database operations.
"""
return self._uuid
@uuid.setter
def uuid(self, value: Optional[str]) -> None:
"""
Set the UUID for database operations.
"""
self._uuid = value
@property
def name(self) -> Optional[str]:
"""
Get the name for database operations.
"""
return self._name
@name.setter
def name(self, value: Optional[str]) -> None:
"""
Set the name for database operations.
"""
self._name = value
@property
def dbconfig(self) -> Optional[Dict[str, Any]]:
"""
Get the database configuration parameters.
"""
return self._dbconfig
@dbconfig.setter
def dbconfig(self, value: Optional[Dict[str, Any]]) -> None:
"""
Set the database configuration parameters.
"""
self._dbconfig = value
[docs]
def initialize(
self,
start_time: datetime.datetime,
end_time: datetime.datetime,
step_size: int,
simulator: core.Simulator,
) -> None:
"""
Initialize the TimeSeriesInputSystem.
Args:
start_time (datetime.datetime): Start time for the simulation.
end_time (datetime.datetime): End time for the simulation.
step_size (int): Step size for the simulation.
simulator (core.Simulator): Simulator to be used for initialization.
"""
if self.df is None or (
self._cached_initialize_arguments != (start_time, end_time, step_size)
and self._cached_initialize_arguments is not None
):
if self.useSpreadsheet:
self.df = load_from_spreadsheet(
self.filename,
self.datecolumn,
self.valuecolumn,
step_size=step_size,
start_time=start_time,
end_time=end_time,
cache_root=self._cache_root,
)
elif self.useDatabase:
self.df = load_from_database(
config=self.dbconfig,
sensor_uuid=self.uuid,
sensor_name=self.name,
step_size=step_size,
start_time=start_time,
end_time=end_time,
cache_root=self._cache_root,
)
self._cached_initialize_arguments = (start_time, end_time, step_size)
[docs]
def do_step(
self,
secondTime: float,
dateTime: datetime.datetime,
step_size: int,
stepIndex: int,
simulator: Optional[core.Simulator] = None,
) -> None:
"""
Perform a single timestep for the TimeSeriesInputSystem.
Args:
secondTime (int, optional): Current simulation time in seconds.
dateTime (datetime, optional): Current simulation time as a datetime object.
step_size (int, optional): Step size for the simulation.
"""
self.output["value"].set(self.df.values[stepIndex], stepIndex)