dad_options/options_scraper.py
2025-11-27 13:37:30 -07:00

195 lines
5.9 KiB
Python

from __future__ import annotations
import argparse
import sys
from datetime import datetime, timezone
from pathlib import Path
import pandas as pd
import yfinance as yf
DEFAULT_EXPIRATION_LIMIT = 10
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Export Yahoo Finance option chains for a symbol to CSV.",
)
parser.add_argument("symbol", help="Ticker symbol, e.g., AAPL")
parser.add_argument(
"-e",
"--expiration",
help="Specific expiration date in YYYY-MM-DD as listed by Yahoo Finance.",
)
parser.add_argument(
"--all-expirations",
action="store_true",
help=(
"Download every listed expiration (default is the first 10 when"
" no --expiration is provided)."
),
)
parser.add_argument(
"--exp-start",
type=int,
help="Start index (0-based) into the available expirations list.",
)
parser.add_argument(
"--exp-end",
type=int,
help="End index (0-based, exclusive) into the available expirations list.",
)
parser.add_argument(
"--calls-only",
action="store_true",
help="Include only call options.",
)
parser.add_argument(
"--puts-only",
action="store_true",
help="Include only put options.",
)
args = parser.parse_args(argv)
if args.calls_only and args.puts_only:
parser.error("Use only one of --calls-only or --puts-only.")
if args.expiration and (args.exp_start is not None or args.exp_end is not None):
parser.error("Use either --expiration or the exp-start/exp-end range, not both.")
if args.all_expirations and (args.exp_start is not None or args.exp_end is not None):
parser.error("Use either --all-expirations or the exp-start/exp-end range, not both.")
if args.exp_start is not None and args.exp_start < 0:
parser.error("--exp-start must be non-negative.")
if args.exp_end is not None and args.exp_end < 0:
parser.error("--exp-end must be non-negative.")
return args
def pick_expirations(
ticker: yf.Ticker,
requested: str | None,
include_all: bool,
default_limit: int | None,
range_start: int | None,
range_end: int | None,
) -> tuple[list[str], list[str]]:
available = list(ticker.options or [])
if not available:
raise RuntimeError("No option expirations found for that symbol.")
if requested:
if requested not in available:
raise ValueError(
f"Expiration {requested!r} not in available dates: {', '.join(available)}"
)
targets = [requested]
elif range_start is not None or range_end is not None:
start = range_start or 0
end = range_end if range_end is not None else len(available)
if start >= len(available):
raise ValueError(
f"--exp-start {start} is beyond available expirations (max index {len(available) - 1})."
)
end = min(end, len(available))
if start >= end:
raise ValueError("--exp-start must be less than --exp-end.")
targets = available[start:end]
elif include_all:
targets = available
else:
limit = default_limit if default_limit is not None else 1
targets = available[:limit]
return targets, available
def fetch_option_frames(
ticker: yf.Ticker, expirations: list[str], include_calls: bool, include_puts: bool
) -> pd.DataFrame:
frames: list[pd.DataFrame] = []
for exp in expirations:
chain = ticker.option_chain(exp)
if include_calls:
calls = chain.calls.copy()
if not calls.empty:
calls["type"] = "call"
calls["expiration"] = exp
frames.append(calls)
if include_puts:
puts = chain.puts.copy()
if not puts.empty:
puts["type"] = "put"
puts["expiration"] = exp
frames.append(puts)
if not frames:
return pd.DataFrame()
combined = pd.concat(frames, ignore_index=True)
combined.sort_values(["expiration", "type", "strike"], inplace=True)
combined.reset_index(drop=True, inplace=True)
return combined
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
include_calls = not args.puts_only
include_puts = not args.calls_only
include_all_expirations = args.all_expirations
default_limit = (
DEFAULT_EXPIRATION_LIMIT if args.expiration is None and not args.all_expirations else None
)
range_start = args.exp_start
range_end = args.exp_end
ticker = yf.Ticker(args.symbol)
try:
target_expirations, available_expirations = pick_expirations(
ticker,
args.expiration,
include_all_expirations,
default_limit,
range_start,
range_end,
)
except (ValueError, RuntimeError) as exc:
print(f"error: {exc}", file=sys.stderr)
return 1
try:
options_df = fetch_option_frames(
ticker, target_expirations, include_calls, include_puts
)
except Exception as exc: # noqa: BLE001
print(f"error: failed to fetch options data: {exc}", file=sys.stderr)
return 1
if options_df.empty:
print("error: no option data returned.", file=sys.stderr)
return 1
timestamp = datetime.now(timezone.utc).isoformat(timespec="seconds")
output_path = (
Path.home()
/ "Desktop"
/ f"options_{args.symbol.upper()}_{timestamp}.csv"
)
output_path.parent.mkdir(parents=True, exist_ok=True)
options_df.to_csv(output_path, index=False)
print(
f"Wrote {len(options_df)} rows across {len(target_expirations)} expiration(s) "
f"to {output_path}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())