avatarLarrimer Prestosa

Free AI web copilot to create summaries, insights and extended knowledge, download it at here

4937

Abstract

nts."</span>)</span></pre></div><div id="86a0"><pre> # <span class="hljs-type">Concatenate</span> the <span class="hljs-class"><span class="hljs-keyword">data</span> to the main <span class="hljs-type">DataFrame</span></span> all_data = pd.concat([<span class="hljs-class"><span class="hljs-keyword">data</span>, all_data], axis=0)</span></pre></div><div id="a879"><pre> <span class="hljs-comment"># Decrement the end date for the next iteration</span> <span class="hljs-attr">current_end_date</span> = current_start_date - timedelta(minutes=<span class="hljs-number">1</span>) <span class="hljs-comment"># One minute before the next start date</span> <span class="hljs-comment"># Drop duplicates, if any</span> <span class="hljs-attr">all_data</span> = all_data.loc[~all_data.index.duplicated(keep=<span class="hljs-string">'first'</span>)]</pre></div><div id="653d"><pre> # Drop <span class="hljs-string">'Adj Close'</span> column <span class="hljs-keyword">if</span> <span class="hljs-string">'Adj Close'</span> <span class="hljs-keyword">in</span> all_data.columns: all_data.drop(<span class="hljs-string">'Adj Close'</span>, <span class="hljs-attribute">axis</span>=1, <span class="hljs-attribute">inplace</span>=<span class="hljs-literal">True</span>)</pre></div><div id="eeb3"><pre> # <span class="hljs-keyword">Round</span> to <span class="hljs-number">2</span> <span class="hljs-keyword">decimal</span> places all_data = all_data.<span class="hljs-keyword">round</span>(<span class="hljs-number">2</span>)</pre></div><div id="1840"><pre> <span class="hljs-meta"># Remove timezone information</span> all_data.<span class="hljs-keyword">index</span> = all_data.<span class="hljs-keyword">index</span>.tz_localize(None)</pre></div><div id="0fdd"><pre> # <span class="hljs-keyword">Filter</span> data <span class="hljs-keyword">to</span> keep <span class="hljs-keyword">only</span> <span class="hljs-number">3</span>:<span class="hljs-number">00</span>pm <span class="hljs-keyword">to</span> <span class="hljs-number">4</span>:<span class="hljs-number">00</span>pm <span class="hljs-keyword">and</span> <span class="hljs-number">9</span>:<span class="hljs-number">30</span>am <span class="hljs-keyword">to</span> <span class="hljs-number">10</span>:<span class="hljs-number">30</span>am filtered_data_morning = all_data.between_time(<span class="hljs-string">'09:30'</span>, <span class="hljs-string">'10:30'</span>) #filtered_data_afternoon = all_data.between_time(<span class="hljs-string">'15:00'</span>, <span class="hljs-string">'16:00'</span>) filtered_data = pd.concat([filtered_data_morning])</pre></div><div id="da25"><pre> <span class="hljs-built_in">print</span>(filtered_data.tail(120))

# Save <span class="hljs-keyword">to</span> CSV
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> filtered_data.empty:
    filtered_data.to_csv(<span class="hljs-string">'SPY_30d_1m.csv'</span>)
    <span class="hljs-built_in">print</span>(<span class="hljs-string">"CSV file generated: SPY_30d_1m.csv"</span>)
<span class="hljs-keyword">else</span>:
    <span class="hljs-built_in">print</span>(<span class="hljs-string">"DataFrame is empty. No CSV file generated."</span>)</pre></div><div id="b614"><pre>    return filtered_data

end_date = datetime<span class="hljs-selector-class">.now</span>()
start_date = end_date - <span class="hljs-built_in">timedelta</span>(days=<span class="hljs-number">24</span>)
all_data = <span class="hljs-built_in">download_histdata</span>(start_date, end_date)</pre></div><h1 id="cee8">Implement XGBoost</h1><p id="8210">Logic is added to determine whether to trade PUT or CALL by comparing last_close_price (10:30) and last_predicted_price:</p><div id="0899"><pre><span class="hljs-keyword">def</span> <span class="hljs-title function_">implement_xgboost</span>(<span class="hljs-params">data</span>):
<span class="hljs-comment"># Filter the DataFrame to include only the relevant time slots</span>
morning_data = data.between_time(<span class="hljs-string">'09:30'</span>, <span class="hljs-string">'10:00'</span>)
cut_off_data = data.between_time(<span class="hljs-string">'10:00'</span>, <span class="hljs-string">'10:00'</span>)    
target_data = data.between_time(<span class="hljs-string">'10:30'</span>, <span class="hljs-string">'10:30'</span>)

<span class="hljs-comment"># Merge the morning data with the target data at 10:30 based on the date</span>
morning_data[<span class="hljs-string">'Date'</span>] = morning_data.index.date
target_data[<span class="hljs-string">'Date'</span>] = target_data.index.date
merged_data = pd.merge(morning_data, target_data[[<span class="hljs-string">'Date'</span>, <span class="hljs-string">'Close'</span>]], on=<span class="hljs-string">'Date'</span>, how=<span

Options

class="hljs-string">'inner'</span>, suffixes=(<span class="hljs-string">'_morning'</span>, <span class="hljs-string">'_target'</span>))</pre></div><div id="30cd"><pre> # Prepare features <span class="hljs-keyword">and</span> target variable X = merged_data.drop(columns=[<span class="hljs-string">'Date'</span>, <span class="hljs-string">'Close_target'</span>]) y = merged_data[<span class="hljs-string">'Close_target'</span>]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, <span class="hljs-attribute">test_size</span>=0.2, <span class="hljs-attribute">random_state</span>=42)

# Initialize <span class="hljs-keyword">and</span> train the model
model = xgb.XGBRegressor(<span class="hljs-attribute">objective</span>=<span class="hljs-string">"reg:squarederror"</span>, <span class="hljs-attribute">n_estimators</span>=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
<span class="hljs-built_in">print</span>(f<span class="hljs-string">'cut_off at 10:00 {cut_off_data['</span>Close<span class="hljs-string">'].tail(1)}'</span>)
<span class="hljs-built_in">print</span>(f<span class="hljs-string">'prediction at 10:30 {y_pred[-1]}'</span>)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
<span class="hljs-built_in">print</span>(f<span class="hljs-string">"RMSE: {rmse}"</span>)</pre></div><div id="58f0"><pre>    <span class="hljs-comment"># Determine the option type</span>
last_close_price = cut_off_data[<span class="hljs-string">'Close'</span>].<span class="hljs-built_in">tail</span>(1).iloc[0]
last_predicted_price = y_pred[-1]

<span class="hljs-keyword">if</span> last_close_price &lt; last_predicted_price:
    option_type = <span class="hljs-string">'call'</span>
<span class="hljs-keyword">else</span>:
    option_type = <span class="hljs-string">'put'</span>

<span class="hljs-built_in">return</span> option_type</pre></div><div id="7c2c"><pre>    <span class="hljs-variable"><span class="hljs-keyword">if</span></span> <span class="hljs-variable"><span class="hljs-keyword">not</span></span> <span class="hljs-variable">all_data.empty</span>:
    <span class="hljs-function"><span class="hljs-title">print</span>(<span class="hljs-string">"Implementing XGBoost..."</span>)</span>
    <span class="hljs-variable">option_type</span> = <span class="hljs-function"><span class="hljs-title">implement_xgboost</span>(<span class="hljs-variable">all_data.copy</span>())</span></pre></div><div id="1c00"><pre>    itm_call, itm_put = get_option_chain(<span class="hljs-string">'SPY'</span>) 
<span class="hljs-keyword">if</span> option_type == <span class="hljs-string">'call'</span>:
    <span class="hljs-built_in">print</span>(<span class="hljs-string">f'In the money CALL <span class="hljs-subst">{itm_call}</span>'</span>)
<span class="hljs-keyword">else</span>:     
    <span class="hljs-built_in">print</span>(<span class="hljs-string">f'In the money PUT <span class="hljs-subst">{itm_put}</span>'</span>)</pre></div><h1 id="9116">Determine the smallest In-the-Money Option</h1><p id="8213">To automate the option selection I have added code to choose the option contract (PUT or CALL) as determined by XGBoost. I also calculated the number of shares based on $10000 capital</p><div id="8d0f"><pre><span class="hljs-keyword">def</span> <span class="hljs-title function_">calculate_contracts</span>(<span class="hljs-params">capital, option_price</span>):
<span class="hljs-comment"># Calculate the number of contracts that can be purchased with the given capital</span>
num_contracts = capital // option_price

<span class="hljs-comment"># Calculate the number of shares that can be controlled with these contracts</span>
num_shares = num_contracts * <span class="hljs-number">100</span>

<span class="hljs-keyword">return</span> num_contracts, num_shares</pre></div><div id="e15e"><pre>capital = <span class="hljs-number">1000</span> <span class="hljs-comment"># $1000 capital</span>

<span class="hljs-keyword">if</span> option_type == <span class="hljs-string">'call'</span>: option_price = itm_call[<span class="hljs-string">'lastPrice'</span>] <span class="hljs-comment"># Price of one option contract</span> <span class="hljs-keyword">else</span>: option_price = itm_put[<span class="hljs-string">'lastPrice'</span>] <span class="hljs-comment"># Price of one option contract</span> num_contracts, num_shares = calculate_contracts(capital, option_price) <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Number of contracts that can be purchased: <span class="hljs-subst">{num_contracts}</span>"</span>)</pre></div><p id="328c">There it is. I would love to hear your comments and make sure to follow me. Cheers!!</p><p id="1c2a">3</p></article></body>

Day Trade SPY options Using XGBoost Predictive Model and Python

Use the first 30 minutes of the trading day (9:30 to 10:00) and use XGBoost to determine whether to buy CALL or PUT contract based on prediction at 10:30.

Photo by Beth Macdonald on Unsplash

Photo by Vladislav Babienko on Unsplash

First 30 minutes of the trading day

The first 30 minutes of the trading day is often volatile due to a combination of factors:

  1. Overnight news affects investor sentiment.
  2. Market orders at the open create a supply-demand imbalance.
  3. Lower liquidity can exaggerate price movements.
  4. Emotional trading from retail investors can cause erratic behavior.
  5. Institutional strategies may exploit or contribute to the volatility.
  6. Unfilled orders from the previous session add to the imbalance.
  7. The opening sets the tone for the rest of the day.
  8. Reduced information at the start can make the market more unpredictable.

These factors collectively result in heightened price swings and trading volumes during the market’s initial 30 minutes.

Which can be a trading opportunity.

The Approach

The premise is by analyzing the first 30 minutes of the trading day using 30 days historical data, one can glean a pattern of the symbol’s behavior for the rest of the day. I implemented XGBoost predictive model to determine the pattern.

features = 1 minute interval from 9:30 to 10:00 of Open, High,Low,Close, Volume

target = price at 10:30

The features and target are arbitrary. You can adjust the times as you fit.

Import the necessary packages and download

Used yfinance to download. Notice yfinance will only allow 7 days for 1minute interval so a loop is added

import warnings
warnings.filterwarnings("ignore")
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# Initialize empty DataFrame to hold the data
all_data = pd.DataFrame()
def download_histdata(start_date, end_date):
    all_data = pd.DataFrame()
    # Loop through to get 7-day chunks of data
    current_end_date = end_date
    while current_end_date > start_date:
        current_start_date = current_end_date - timedelta(days=7)
        print(f"Fetching data from {current_start_date} to {current_end_date}")
        # Download data for the 7-day period
        data = yf.download('SPY', start=current_start_date, end=current_end_date, interval='1m', progress=False)
        print(f"Fetched {data.shape[0]} data points.")
        # Concatenate the data to the main DataFrame
        all_data = pd.concat([data, all_data], axis=0)
        # Decrement the end date for the next iteration
        current_end_date = current_start_date - timedelta(minutes=1)  # One minute before the next start date
    # Drop duplicates, if any
    all_data = all_data.loc[~all_data.index.duplicated(keep='first')]
    # Drop 'Adj Close' column
    if 'Adj Close' in all_data.columns:
        all_data.drop('Adj Close', axis=1, inplace=True)
    # Round to 2 decimal places
    all_data = all_data.round(2)
    # Remove timezone information
    all_data.index = all_data.index.tz_localize(None)
    # Filter data to keep only 3:00pm to 4:00pm and 9:30am to 10:30am
    filtered_data_morning = all_data.between_time('09:30', '10:30')
    #filtered_data_afternoon = all_data.between_time('15:00', '16:00')
    filtered_data = pd.concat([filtered_data_morning])
    print(filtered_data.tail(120))
    
    # Save to CSV
    if not filtered_data.empty:
        filtered_data.to_csv('SPY_30d_1m.csv')
        print("CSV file generated: SPY_30d_1m.csv")
    else:
        print("DataFrame is empty. No CSV file generated.")
    return filtered_data
  
    end_date = datetime.now()
    start_date = end_date - timedelta(days=24)
    all_data = download_histdata(start_date, end_date)

Implement XGBoost

Logic is added to determine whether to trade PUT or CALL by comparing last_close_price (10:30) and last_predicted_price:

def implement_xgboost(data):
    # Filter the DataFrame to include only the relevant time slots
    morning_data = data.between_time('09:30', '10:00')
    cut_off_data = data.between_time('10:00', '10:00')    
    target_data = data.between_time('10:30', '10:30')
    
    # Merge the morning data with the target data at 10:30 based on the date
    morning_data['Date'] = morning_data.index.date
    target_data['Date'] = target_data.index.date
    merged_data = pd.merge(morning_data, target_data[['Date', 'Close']], on='Date', how='inner', suffixes=('_morning', '_target'))
    # Prepare features and target variable
    X = merged_data.drop(columns=['Date', 'Close_target'])
    y = merged_data['Close_target']
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize and train the model
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    print(f'cut_off at 10:00 {cut_off_data['Close'].tail(1)}')
    print(f'prediction at 10:30 {y_pred[-1]}')
    
    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"RMSE: {rmse}")
    # Determine the option type
    last_close_price = cut_off_data['Close'].tail(1).iloc[0]
    last_predicted_price = y_pred[-1]
    
    if last_close_price < last_predicted_price:
        option_type = 'call'
    else:
        option_type = 'put'
    
    return option_type
    if not all_data.empty:
        print("Implementing XGBoost...")
        option_type = implement_xgboost(all_data.copy())
    itm_call, itm_put = get_option_chain('SPY') 
    if option_type == 'call':
        print(f'In the money CALL {itm_call}')
    else:     
        print(f'In the money PUT {itm_put}')

Determine the smallest In-the-Money Option

To automate the option selection I have added code to choose the option contract (PUT or CALL) as determined by XGBoost. I also calculated the number of shares based on $10000 capital

def calculate_contracts(capital, option_price):
    # Calculate the number of contracts that can be purchased with the given capital
    num_contracts = capital // option_price
    
    # Calculate the number of shares that can be controlled with these contracts
    num_shares = num_contracts * 100
    
    return num_contracts, num_shares
capital = 1000 # $1000 capital
if option_type == 'call':
option_price = itm_call['lastPrice'] # Price of one option contract
else:
option_price = itm_put['lastPrice'] # Price of one option contract
num_contracts, num_shares = calculate_contracts(capital, option_price)
print(f"Number of contracts that can be purchased: {num_contracts}")

There it is. I would love to hear your comments and make sure to follow me. Cheers!!

3

Recommended from ReadMedium