Coverage for src / qsmile / data / io.py: 100%
43 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 22:47 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-01 22:47 +0000
1"""Load option chain data from parquet files."""
3from __future__ import annotations
5from pathlib import Path
7import numpy as np
8import pandas as pd
10from qsmile.data.meta import SmileMetadata
11from qsmile.data.prices import OptionChain
12from qsmile.data.strikes import StrikeArray
14_CHAINS_COLS = ["strike", "bid", "ask", "volume", "openInterest"]
17class SampleDataReader:
18 """Read option chain parquet files from a directory.
20 Parameters
21 ----------
22 root : str | Path | None
23 Directory containing ``chains/*.parquet`` files.
24 Defaults to ``<project_root>/parquet``.
25 """
27 def __init__(self, root: str | Path | None = None) -> None:
28 """Create a reader backed by a parquet directory.
30 Parameters
31 ----------
32 root : str | Path | None
33 Directory containing ``chains/*.parquet`` files.
34 Defaults to ``<project_root>/parquet``.
35 """
36 if root is None:
37 root = Path(__file__).resolve().parent.parent.parent.parent / "parquet"
38 self._root = Path(root)
40 def get_chain(
41 self,
42 underlying: str,
43 fetch_date: str,
44 expiry_date: str,
45 ) -> OptionChain:
46 """Load an option chain from parquet and return an ``OptionChain``.
48 Parameters
49 ----------
50 underlying : str
51 Ticker symbol, e.g. ``"SPX"``.
52 fetch_date : str
53 Fetch / pricing date in ``YYYY-MM-DD`` format.
54 expiry_date : str
55 Expiry date in ``YYYY-MM-DD`` format.
57 Returns:
58 -------
59 OptionChain
60 Fully constructed option chain with metadata and strike data.
61 """
62 path = self._resolve_path(underlying, fetch_date, expiry_date)
63 df_raw = pd.read_parquet(path)
64 return self._build_chain(df_raw)
66 # ------------------------------------------------------------------
68 def _resolve_path(
69 self,
70 underlying: str,
71 fetch_date: str,
72 expiry_date: str,
73 ) -> Path:
74 fd = pd.Timestamp(fetch_date).strftime("%Y%m%d")
75 ed = pd.Timestamp(expiry_date).strftime("%Y%m%d")
76 filename = f"{underlying}_{fd}_{ed}.parquet"
77 path = self._root / "chains" / filename
78 if not path.exists():
79 msg = f"parquet file not found: {path}"
80 raise FileNotFoundError(msg)
81 return path
83 @staticmethod
84 def _build_chain(df_raw: pd.DataFrame) -> OptionChain:
85 date = pd.Timestamp(df_raw["fetchDate"].iloc[0])
86 expiry_date = pd.Timestamp(df_raw["expiryDate"].iloc[0])
88 calls = df_raw[df_raw["optionType"] == "call"][_CHAINS_COLS].set_index("strike")
89 puts = df_raw[df_raw["optionType"] == "put"][_CHAINS_COLS].set_index("strike")
90 merged = calls.join(puts, lsuffix="_call", rsuffix="_put", how="inner").sort_index()
92 strike_idx = pd.Index(merged.index.values.astype(np.float64), name="strike")
94 sd = StrikeArray()
95 sd.set(("call", "bid"), pd.Series(merged["bid_call"].values.astype(np.float64), index=strike_idx))
96 sd.set(("call", "ask"), pd.Series(merged["ask_call"].values.astype(np.float64), index=strike_idx))
97 sd.set(("put", "bid"), pd.Series(merged["bid_put"].values.astype(np.float64), index=strike_idx))
98 sd.set(("put", "ask"), pd.Series(merged["ask_put"].values.astype(np.float64), index=strike_idx))
99 sd.set(
100 ("market", "volume"),
101 pd.Series(
102 (merged["volume_call"].fillna(0).values + merged["volume_put"].fillna(0).values).astype(np.float64),
103 index=strike_idx,
104 ),
105 )
106 sd.set(
107 ("market", "open_interest"),
108 pd.Series(
109 (merged["openInterest_call"].fillna(0).values + merged["openInterest_put"].fillna(0).values).astype(
110 np.float64
111 ),
112 index=strike_idx,
113 ),
114 )
116 meta = SmileMetadata(date=date, expiry=expiry_date)
117 return OptionChain(strikedata=sd, metadata=meta)