Coverage for src / qsmile / data / io.py: 100%

43 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-01 22:47 +0000

1"""Load option chain data from parquet files.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6 

7import numpy as np 

8import pandas as pd 

9 

10from qsmile.data.meta import SmileMetadata 

11from qsmile.data.prices import OptionChain 

12from qsmile.data.strikes import StrikeArray 

13 

14_CHAINS_COLS = ["strike", "bid", "ask", "volume", "openInterest"] 

15 

16 

17class SampleDataReader: 

18 """Read option chain parquet files from a directory. 

19 

20 Parameters 

21 ---------- 

22 root : str | Path | None 

23 Directory containing ``chains/*.parquet`` files. 

24 Defaults to ``<project_root>/parquet``. 

25 """ 

26 

27 def __init__(self, root: str | Path | None = None) -> None: 

28 """Create a reader backed by a parquet directory. 

29 

30 Parameters 

31 ---------- 

32 root : str | Path | None 

33 Directory containing ``chains/*.parquet`` files. 

34 Defaults to ``<project_root>/parquet``. 

35 """ 

36 if root is None: 

37 root = Path(__file__).resolve().parent.parent.parent.parent / "parquet" 

38 self._root = Path(root) 

39 

40 def get_chain( 

41 self, 

42 underlying: str, 

43 fetch_date: str, 

44 expiry_date: str, 

45 ) -> OptionChain: 

46 """Load an option chain from parquet and return an ``OptionChain``. 

47 

48 Parameters 

49 ---------- 

50 underlying : str 

51 Ticker symbol, e.g. ``"SPX"``. 

52 fetch_date : str 

53 Fetch / pricing date in ``YYYY-MM-DD`` format. 

54 expiry_date : str 

55 Expiry date in ``YYYY-MM-DD`` format. 

56 

57 Returns: 

58 ------- 

59 OptionChain 

60 Fully constructed option chain with metadata and strike data. 

61 """ 

62 path = self._resolve_path(underlying, fetch_date, expiry_date) 

63 df_raw = pd.read_parquet(path) 

64 return self._build_chain(df_raw) 

65 

66 # ------------------------------------------------------------------ 

67 

68 def _resolve_path( 

69 self, 

70 underlying: str, 

71 fetch_date: str, 

72 expiry_date: str, 

73 ) -> Path: 

74 fd = pd.Timestamp(fetch_date).strftime("%Y%m%d") 

75 ed = pd.Timestamp(expiry_date).strftime("%Y%m%d") 

76 filename = f"{underlying}_{fd}_{ed}.parquet" 

77 path = self._root / "chains" / filename 

78 if not path.exists(): 

79 msg = f"parquet file not found: {path}" 

80 raise FileNotFoundError(msg) 

81 return path 

82 

83 @staticmethod 

84 def _build_chain(df_raw: pd.DataFrame) -> OptionChain: 

85 date = pd.Timestamp(df_raw["fetchDate"].iloc[0]) 

86 expiry_date = pd.Timestamp(df_raw["expiryDate"].iloc[0]) 

87 

88 calls = df_raw[df_raw["optionType"] == "call"][_CHAINS_COLS].set_index("strike") 

89 puts = df_raw[df_raw["optionType"] == "put"][_CHAINS_COLS].set_index("strike") 

90 merged = calls.join(puts, lsuffix="_call", rsuffix="_put", how="inner").sort_index() 

91 

92 strike_idx = pd.Index(merged.index.values.astype(np.float64), name="strike") 

93 

94 sd = StrikeArray() 

95 sd.set(("call", "bid"), pd.Series(merged["bid_call"].values.astype(np.float64), index=strike_idx)) 

96 sd.set(("call", "ask"), pd.Series(merged["ask_call"].values.astype(np.float64), index=strike_idx)) 

97 sd.set(("put", "bid"), pd.Series(merged["bid_put"].values.astype(np.float64), index=strike_idx)) 

98 sd.set(("put", "ask"), pd.Series(merged["ask_put"].values.astype(np.float64), index=strike_idx)) 

99 sd.set( 

100 ("market", "volume"), 

101 pd.Series( 

102 (merged["volume_call"].fillna(0).values + merged["volume_put"].fillna(0).values).astype(np.float64), 

103 index=strike_idx, 

104 ), 

105 ) 

106 sd.set( 

107 ("market", "open_interest"), 

108 pd.Series( 

109 (merged["openInterest_call"].fillna(0).values + merged["openInterest_put"].fillna(0).values).astype( 

110 np.float64 

111 ), 

112 index=strike_idx, 

113 ), 

114 ) 

115 

116 meta = SmileMetadata(date=date, expiry=expiry_date) 

117 return OptionChain(strikedata=sd, metadata=meta)