Skip to content

Data

The data package provides a clean, unified interface for fetching, loading, and saving all market and historical data required by the optpricing library.

get_available_snapshot_dates

get_available_snapshot_dates(ticker: str) -> list[str]

Lists all available snapshot dates for a given ticker.

Scans the market data directory for saved parquet files corresponding to the ticker and extracts the date from the filenames.

Parameters:

Name Type Description Default
ticker str

The stock ticker to search for, e.g., 'SPY'.

required

Returns:

Type Description
list[str]

A sorted list of available dates in 'YYYY-MM-DD' format, from most recent to oldest.

Source code in src/optpricing/data/market_data_manager.py
def get_available_snapshot_dates(ticker: str) -> list[str]:
    """
    Lists all available snapshot dates for a given ticker.

    Scans the market data directory for saved parquet files corresponding
    to the ticker and extracts the date from the filenames.

    Parameters
    ----------
    ticker : str
        The stock ticker to search for, e.g., 'SPY'.

    Returns
    -------
    list[str]
        A sorted list of available dates in 'YYYY-MM-DD' format, from
        most recent to oldest.
    """
    try:
        files = [
            f.name
            for f in MARKET_SNAPSHOT_DIR.iterdir()
            if f.name.startswith(f"{ticker}_") and f.name.endswith(".parquet")
        ]
        return sorted(
            [f.replace(f"{ticker}_", "").replace(".parquet", "") for f in files],
            reverse=True,
        )

    except FileNotFoundError:
        return []

get_live_dividend_yield

get_live_dividend_yield(ticker: str) -> float

Fetches the live forward dividend yield for a ticker using yfinance.

Parameters:

Name Type Description Default
ticker str

The stock ticker to search for, e.g., 'SPY'.

required

Returns:

Type Description
float

The associated div or zero.

Source code in src/optpricing/data/market_data_manager.py
def get_live_dividend_yield(ticker: str) -> float:
    """
    Fetches the live forward dividend yield for a ticker using yfinance.

    Parameters
    ----------
    ticker : str
        The stock ticker to search for, e.g., 'SPY'.

    Returns
    -------
    float
        The associated div or zero.
    """
    print(f"Fetching live dividend yield for {ticker}...")
    try:
        t = yf.Ticker(ticker)
        dividend_yield = t.info.get("dividendYield")
        return float(dividend_yield / 100 or 0.0)
    except Exception as e:
        # Handle cases where the ticker is invalid or yfinance fails
        print(f"  -> FAILED to fetch dividend yield for {ticker}. Error: {e}")
        return 0.0

get_live_option_chain

get_live_option_chain(ticker: str) -> pd.DataFrame | None

Fetches a live option chain from the configured data provider.

The data provider is determined by the live_data_provider key in the config.yaml file. Supported providers are "yfinance".

Parameters:

Name Type Description Default
ticker str

The stock ticker for which to fetch the option chain, e.g., 'SPY'.

required

Returns:

Type Description
DataFrame | None

A DataFrame containing the formatted option chain data, or None if the fetch fails or no data is returned.

Source code in src/optpricing/data/market_data_manager.py
def get_live_option_chain(ticker: str) -> pd.DataFrame | None:
    """
    Fetches a live option chain from the configured data provider.

    The data provider is determined by the `live_data_provider` key in the
    `config.yaml` file. Supported providers are "yfinance".

    Parameters
    ----------
    ticker : str
        The stock ticker for which to fetch the option chain, e.g., 'SPY'.

    Returns
    -------
    pd.DataFrame | None
        A DataFrame containing the formatted option chain data, or None if
        the fetch fails or no data is returned.
    """
    provider = _config.get("live_data_provider", "yfinance").lower()
    if provider == "yfinance":
        return _fetch_from_yfinance(ticker)

    else:
        print(
            f"Warning: Unknown live_data_provider '{provider}'. Defaulting to yfinance."
        )
        return _fetch_from_yfinance(ticker)

load_historical_returns

load_historical_returns(
    ticker: str, period: str = "10y"
) -> pd.Series

Loads historical log returns, fetching and saving them if not found.

Checks for a pre-saved parquet file for the given ticker and period. If the file does not exist, it calls save_historical_returns to download and save it first.

Parameters:

Name Type Description Default
ticker str

The stock ticker for which to load returns, e.g., 'SPY'.

required
period str

The time period for which to fetch data, e.g., "10y", "5y", "1mo". Defaults to "10y".

'10y'

Returns:

Type Description
Series

A pandas Series containing the historical log returns.

Raises:

Type Description
FileNotFoundError

If the data file cannot be found and also fails to be downloaded.

Source code in src/optpricing/data/historical_manager.py
def load_historical_returns(
    ticker: str,
    period: str = "10y",
) -> pd.Series:
    """
    Loads historical log returns, fetching and saving them if not found.

    Checks for a pre-saved parquet file for the given ticker and period. If
    the file does not exist, it calls `save_historical_returns` to download
    and save it first.

    Parameters
    ----------
    ticker : str
        The stock ticker for which to load returns, e.g., 'SPY'.
    period : str, optional
        The time period for which to fetch data, e.g., "10y", "5y", "1mo".
        Defaults to "10y".

    Returns
    -------
    pd.Series
        A pandas Series containing the historical log returns.

    Raises
    ------
    FileNotFoundError
        If the data file cannot be found and also fails to be downloaded.
    """
    filename = HISTORICAL_DIR / f"{ticker}_{period}_returns.parquet"
    if not filename.exists():
        print(f"No historical data found for {ticker}. Fetching and saving now...")
        save_historical_returns([ticker], period)

    if not filename.exists():
        raise FileNotFoundError(f"Could not find or save historical data for {ticker}.")

    return pd.read_parquet(filename)["log_return"]

load_market_snapshot

load_market_snapshot(
    ticker: str, snapshot_date: str
) -> pd.DataFrame | None

Loads a previously saved market data snapshot for a specific date.

Parameters:

Name Type Description Default
ticker str

The stock ticker of the desired snapshot, e.g., 'SPY'.

required
snapshot_date str

The date of the snapshot in 'YYYY-MM-DD' format.

required

Returns:

Type Description
DataFrame | None

A DataFrame containing the snapshot data, or None if the file is not found.

Source code in src/optpricing/data/market_data_manager.py
def load_market_snapshot(ticker: str, snapshot_date: str) -> pd.DataFrame | None:
    """
    Loads a previously saved market data snapshot for a specific date.

    Parameters
    ----------
    ticker : str
        The stock ticker of the desired snapshot, e.g., 'SPY'.
    snapshot_date : str
        The date of the snapshot in 'YYYY-MM-DD' format.

    Returns
    -------
    pd.DataFrame | None
        A DataFrame containing the snapshot data, or None if the file
        is not found.
    """
    filename = MARKET_SNAPSHOT_DIR / f"{ticker}_{snapshot_date}.parquet"
    if not filename.exists():
        print(f"Error: Snapshot file not found: {filename}")
        return None

    print(f"Loading data from {filename}...")
    return pd.read_parquet(filename)

save_historical_returns

save_historical_returns(
    tickers: list[str], period: str = "10y"
)

Fetches and saves historical log returns for a list of tickers.

This function iterates through a list of stock tickers, fetches historical price data from yfinance for the specified period, calculates the daily log returns, and saves them to a parquet file in the historical data directory.

Parameters:

Name Type Description Default
tickers list[str]

A list of stock tickers to process, e.g., ['SPY', 'AAPL'].

required
period str

The time period for which to fetch data, e.g., "10y", "5y", "1mo". Defaults to "10y".

'10y'
Source code in src/optpricing/data/historical_manager.py
def save_historical_returns(
    tickers: list[str],
    period: str = "10y",
):
    """
    Fetches and saves historical log returns for a list of tickers.

    This function iterates through a list of stock tickers, fetches historical
    price data from yfinance for the specified period, calculates the daily
    log returns, and saves them to a parquet file in the historical data directory.

    Parameters
    ----------
    tickers : list[str]
        A list of stock tickers to process, e.g., ['SPY', 'AAPL'].
    period : str, optional
        The time period for which to fetch data, e.g., "10y", "5y", "1mo".
        Defaults to "10y".
    """
    print(f"--- Saving {period} Historical Returns ---")
    for ticker in tickers:
        try:
            print(f"Fetching data for {ticker}...")
            data = yf.Ticker(ticker).history(period=period)

            if data.empty:
                print(f"  -> No data found for {ticker}. Skipping.")
                continue

            log_returns = np.log(data["Close"] / data["Close"].shift(1)).dropna()
            filename = HISTORICAL_DIR / f"{ticker}_{period}_returns.parquet"
            log_returns.to_frame(name="log_return").to_parquet(filename)

            print(f"  -> Saved to {filename}")

        except Exception as e:
            print(f"  -> FAILED to save data for {ticker}. Error: {e}")

save_market_snapshot

save_market_snapshot(tickers: list[str])

Saves a snapshot of the current market option chain for given tickers.

For each ticker, it fetches the live option chain using get_live_option_chain and saves it to a parquet file named with the ticker and the current date.

Parameters:

Name Type Description Default
tickers list[str]

A list of stock tickers to process, e.g., ['SPY', 'AAPL'].

required
Source code in src/optpricing/data/market_data_manager.py
def save_market_snapshot(tickers: list[str]):
    """
    Saves a snapshot of the current market option chain for given tickers.

    For each ticker, it fetches the live option chain using
    `get_live_option_chain` and saves it to a parquet file named with the
    ticker and the current date.

    Parameters
    ----------
    tickers : list[str]
        A list of stock tickers to process, e.g., ['SPY', 'AAPL'].
    """
    today_str = date.today().strftime("%Y-%m-%d")

    print(f"--- Saving Market Data Snapshot for {today_str} ---")
    for ticker in tickers:
        chain_df = get_live_option_chain(ticker)

        if chain_df is None or chain_df.empty:
            print(f"  -> No valid option data found for {ticker}. Skipping.")
            continue

        filename = MARKET_SNAPSHOT_DIR / f"{ticker}_{today_str}.parquet"
        chain_df.to_parquet(filename)
        print(f"  -> Successfully saved {len(chain_df)} options to {filename}")