Imports & Functions¶

In [1]:
import pandas as pd
import numpy as np

import pandas_datareader.data as web
import plotly.express as px

Finance Functions¶

In [2]:
def get_mv_weights(df_window, gamma=1, output_relative_weights=True):
    """
    Get the Minimum Variance weights for a given window
    :param df_window: Window of data
    :param output_relative_weights: Whether to output relative weights or absolute weights
    """
    # Calculate variables
    mean = df_window.mean().values
    cov = df_window.cov().values
    inv_cov =  np.linalg.pinv(cov)

    # Calculate weights
    absolute_weights = (1 / gamma) * inv_cov @ mean
    relative_weights = absolute_weights / np.abs(np.sum(absolute_weights))

    return relative_weights if output_relative_weights else absolute_weights
In [3]:
def sharpe_ratio(returns):
    """
    Sharpe ratio of returns. Assumes returns are already in excess of the risk-free rate.
    :param returns: DataFrame of returns.
    """
    return returns.mean() / returns.std()
In [4]:
def sortino_ratio(returns):
    """
    Sortino ratio of returns. Assumes returns are already in excess of the risk-free rate.
    :param returns: DataFrame of returns.
    """
    return returns.mean() / returns[returns < 0].std()
In [5]:
def annualized_returns(returns, periods_per_year=None):
    """
    Annualized returns of returns.
    :param returns: DataFrame of returns.
    :param periods_per_year: Number of periods per year. Infers the number of periods per year from the index frequency if not provided.
    """
    if periods_per_year is None:
        periods_per_year = freq_to_periods_per_year(returns.index.freq)

    return (returns + 1).prod() ** (periods_per_year / len(returns)) - 1
In [6]:
def annualized_volatility(returns, periods_per_year=None):
    """
    Annualized volatility of returns.
    :param returns: DataFrame of returns.
    :param periods_per_year: Number of periods per year. Infers the number of periods per year from the index frequency if not provided. 
    """
    if periods_per_year is None:
        periods_per_year = freq_to_periods_per_year(returns.index.freq)

    return returns.std() * np.sqrt(periods_per_year)
In [7]:
def max_drawdown(returns):
    """
    Maximum drawdown of returns.
    :param returns: DataFrame of returns.
    """
    starting_cash = 1 # Starting cash is redundant and is only included for clarity, starting_cash can be anything and the output would stay the same.
    wealth_index = (returns + 1).cumprod().mul(starting_cash)
    wealth_max = wealth_index.cummax()

    return (wealth_index - wealth_max).div(wealth_max).min()
In [8]:
def value_at_risk(returns, level=0.05):
    """
    Value at risk of returns.
    :param returns: DataFrame of returns.
    :param level: Confidence level of the VaR.
    """
    return returns.quantile(level)
In [9]:
def conditional_value_at_risk(returns, level=0.05):
    """
    Conditional value at risk of returns.
    :param returns: DataFrame of returns.
    :param level: Confidence level of the CVaR.
    """
    var = value_at_risk(returns, level=level)
    return returns[returns < var].mean()
In [10]:
def certainty_equivalent_return(returns, gamma=1):
    """
    Certainty Equivalent (CEQ) return, defined as the
    risk-free rate that an investor is willing to accept
    rather than adopting a particular risky portfolio strategy.
    :param returns: DataFrame of returns.
    :param gamma: Risk aversion coefficient.
    """
    return returns.mean() - ((gamma/2) * returns.var())
In [11]:
def annualized_turnover(portfolio_weights, asset_returns):
    """
    Portfolio turnover, defined as the average sum of the 
    absolute value of the trades across the N available assets.
    :param returns: DataFrame of returns.
    :param weights: DataFrame of weights.
    """
    # Make sure asset returns have the same time frame as portfolio weights
    asset_returns = asset_returns.loc[portfolio_weights.index]

    # Get a list of portfolio strategy names
    portfolio_strategies = list(portfolio_weights.columns.get_level_values("Portfolio Strategy").unique())

    # Create an empty series where annualized turnover will be stored
    turnover = pd.Series(name="Annualized Turnover", index=portfolio_strategies, dtype="float64")

    # Loop through each portfolio strategy for their respective turnover calculation
    for strategy in portfolio_strategies:
        # Get strategy weights
        weights = portfolio_weights[strategy]

        # Get portfolio weights at time t+1 before and after rebalancing
        weights_t1_before_rebal = weights * (asset_returns + 1)
        weights_t1_after_rebal = weights.shift(-1)

        # Get the absolute turnover per period
        per_period_turnover = (weights_t1_after_rebal - weights_t1_before_rebal).abs().sum(axis=1)[:-1]

        # Convert per period turnover to annualized turnover
        annualized_turnover = per_period_turnover.mean() * freq_to_periods_per_year(weights.index.freq)

        # Add to series
        turnover[strategy] = annualized_turnover
    
    # Return annualized turnover
    return turnover
In [12]:
def summary_stats(portfolio_returns, portfolio_weights, asset_returns):
    """
    Get summary statistics for a DataFrame of returns
    :param returns: DataFrame of returns
    """
    summary = pd.DataFrame({
        "Sharpe Ratio": sharpe_ratio(portfolio_returns),
        "Sortino Ratio": sortino_ratio(portfolio_returns),
        "Annualized Returns": annualized_returns(portfolio_returns),
        "Annualized Volatility": annualized_volatility(portfolio_returns),
        "Maximum Drawdown": max_drawdown(portfolio_returns),
        "Skewness": portfolio_returns.skew(),
        "Kurtosis": portfolio_returns.kurtosis(),
        "VaR (0.05)": value_at_risk(portfolio_returns),
        "CVaR (0.05)": conditional_value_at_risk(portfolio_returns),
        "CEQ": certainty_equivalent_return(portfolio_returns),
        "Annualized Turnover": annualized_turnover(portfolio_weights, asset_returns)
    })

    return summary

Other Functions¶

In [13]:
def generate_html(dataframe: pd.DataFrame, page_length=10, paging=True, info=True):
    """
    Generate HTML with jQuery Data Tables.
    Original code found here: https://github.com/x4nth055/pythoncode-tutorials/tree/master/general/dataframe-to-html
    :param dataframe: Pandas DataFrame
    """
    # get the table HTML from the dataframe
    table_html = dataframe.to_html(table_id="table")

    # Convert boolean to string
    paging = str(paging).lower()
    info = str(info).lower()

    # construct the complete HTML with jQuery Data tables
    # You can disable paging or enable y scrolling on lines 20 and 21 respectively
    #  # This is used to avoid templating issues when publishing to my blog.
    html = f"""
    <html>
    <header>
        <link href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css" rel="stylesheet">
        <script src="https://code.jquery.com/jquery-3.6.0.slim.min.js" integrity="sha256-u7e5khyithlIdTpu22PHhENmPcRdFiHRjhAuHcs05RI=" crossorigin="anonymous"></script>
        <script type="text/javascript" src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
        <script>
            $(document).ready( function () {{
                $('#table').DataTable({{
                    searching: false,
                    lengthChange: false,
                    pageLength: {page_length},
                    paging: {paging},
                    info: {info},
                }});
            }});
        </script>

        <style>
            body {{
                background-color: white;
            }}
            table.dataTable {{
                width: 100% !important;
            }}
            table.dataTable tbody tr {{
                background-color: transparent;
            }}
        </style>
    </header>
    <body>
    {table_html}
    </body>
    </html>
    """
    #   # This is used to avoid templating issues when publishing to my blog.

    # return the html
    return html
In [14]:
def freq_to_periods_per_year(freq):
    """
    Convert a frequency object to the number of periods per year.
    :param freq: Frequency object. Must be None, D, W, M, Q, or A.
    """
    if freq is None:
        return 252
    elif freq == "D":
        return 252
    elif freq == "W":
        return 52
    elif freq == "M":
        return 12
    elif freq == "Q":
        return 4
    elif freq == "A":
        return 1
    else:
        raise ValueError("freq must be None, D, W, M, Q, or A")

Generate Optimization Example Plot¶

In [15]:
x = np.linspace(0, 100, 1000)
y = -1 * (x-50)**2
y = y - min(y)

fig = px.line(x=x, y=y)
fig.add_hline(y=max(y), line_dash="dash", line_color="red")

fig.update_xaxes(title_text="Asset Weights (Utility Function Inputs)", showticklabels=False)
fig.update_yaxes(title_text="Utility", showticklabels=False)

fig.show()
fig.write_html("parabola_max.html")

Mean-Variance Optimization¶

In [16]:
# FamaFrench 10 Industry Portfolios
industries = web.DataReader("10_Industry_Portfolios", "famafrench", start="1963-07", end="2023-01")[0].div(100)

# FamaFrench Factors
factors = web.DataReader("F-F_Research_Data_Factors", "famafrench", start="1963-07", end="2023-01")[0].div(100)

# Augment Industries to get Excess Returns
industries_ex = industries.sub(factors["RF"], axis=0)

# Add market factor
industries_ex["Mkt-RF"] = factors["Mkt-RF"]
In [17]:
# DataFrames for weights and returns to be appended.
# There are more efficient ways of doing this but I'm trying to make the code as readable and consistent as possible.
mv_weights_in_sample = pd.DataFrame(columns=industries_ex.columns)
mv_weights_out_sample = pd.DataFrame(columns=industries_ex.columns)
mv_weights_full_sample = pd.DataFrame(columns=industries_ex.columns)
ew_weights = pd.DataFrame(columns=industries_ex.columns)
df_returns = pd.DataFrame()

# How many periods to use for the window
window_size = 180

# Loop through the data to obtain mean-variance weights
for i, date in enumerate(industries_ex.index):
    if i < (window_size - 1): continue

    # Get the window of data ending at 'date' with a window size of 'window_size'
    # Note that iloc[A,B] fetches values from A to B-1
    start_i = i - (window_size - 1)
    end_i = i + 1
    df_window = industries_ex.iloc[start_i:end_i]

    # Get the mean-variance weights
    weights = get_mv_weights(df_window)

    # Append the weights to the DataFrame
    mv_weights_in_sample.loc[date] = weights

# Update necessary weight DataFrames
mv_weights_out_sample = mv_weights_in_sample.shift(1)

mv_weights_full_sample = get_mv_weights(industries_ex).reshape(1, -1)
mv_weights_full_sample = np.repeat(mv_weights_full_sample, industries_ex.shape[0], axis=0)
mv_weights_full_sample = pd.DataFrame(mv_weights_full_sample, columns=industries_ex.columns, index=industries_ex.index)

ew_weights = np.ones((industries_ex.shape[0], industries_ex.shape[1])) / industries_ex.shape[1]
ew_weights = pd.DataFrame(ew_weights, columns=industries_ex.columns, index=industries_ex.index)

# Get the 1/N portfolio returns (useful as a benchmark)
df_returns["1/N"] = industries_ex.mul(ew_weights, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (in-sample)
df_returns["MV (In-Sample)"] = industries_ex.iloc[window_size-1:].mul(mv_weights_in_sample, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (out-of-sample)
df_returns["MV (Out-Sample)"] = industries_ex.iloc[window_size-1:].mul(mv_weights_out_sample, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (full-sample)
df_returns["MV (Full-Sample)"] = industries_ex.mul(mv_weights_full_sample, axis=1).sum(axis=1)

# Drop invalid rows such that all portfolios start at the same time.
# Note that we drop window_size + 1 number of rows because the row index starts at 0.
# We need to drop an extra row because out-of-sample mean-variance weights are shifted by 1 period.
df_returns = df_returns.iloc[window_size:]
In [18]:
# Drop unnecessary rows from the weight DataFrames while we're at it (this will be useful in our results section)
ew_weights = ew_weights.loc[df_returns.index]
mv_weights_in_sample = mv_weights_in_sample.loc[df_returns.index]
mv_weights_out_sample = mv_weights_out_sample.loc[df_returns.index]
mv_weights_full_sample = mv_weights_full_sample.loc[df_returns.index]

# Create a single DataFrame containing portfolio weights for every strategy (also useful for results section)
header = pd.MultiIndex.from_product([df_returns.columns, industries_ex.columns], names=["Portfolio Strategy", "Asset"])
df_portfolio_weights = pd.DataFrame(columns=header)

df_portfolio_weights["1/N"] = ew_weights
df_portfolio_weights["MV (In-Sample)"] = mv_weights_in_sample
df_portfolio_weights["MV (Out-Sample)"] = mv_weights_out_sample
df_portfolio_weights["MV (Full-Sample)"] = mv_weights_full_sample
In [19]:
# Save portfolio returns to HTML for my blog
html_returns = generate_html(df_returns)
with open("mv_returns.html", "w") as file:
    file.write(html_returns)

# Print returns here as well
df_returns
Out[19]:
1/N MV (In-Sample) MV (Out-Sample) MV (Full-Sample)
Date
1978-07 0.047664 0.012951 -0.009586 0.046217
1978-08 0.033936 -0.033435 -0.053118 0.024945
1978-09 -0.014209 0.143651 0.130396 -0.027631
1978-10 -0.115382 0.033149 0.009592 -0.110844
1978-11 0.026473 0.093890 0.091734 0.047838
... ... ... ... ...
2022-07 0.093573 0.076109 0.060402 0.029736
2022-08 -0.029636 -0.017365 -0.020830 0.011081
2022-09 -0.092091 0.005298 -0.016549 -0.073278
2022-10 0.080955 0.051083 0.034204 0.126058
2022-11 0.035536 0.053048 0.046345 0.023015

533 rows × 4 columns

Results¶

Make sure the "Mean-Variance Optimization" section has been run to populate the df_returns DataFrame.

In [20]:
# Get cumulative returns
df_returns_cum = (df_returns + 1).cumprod().sub(1)
df_returns_cum.index = df_returns_cum.index.to_timestamp()

# Plot the cumulative returns
fig = px.line(df_returns_cum, title="Cumulative Returns Over Time")
fig.update_yaxes(title_text="Returns", tickformat=".0%")
fig.update_layout(legend_title_text="Portfolio", title_x=0.5)
fig.show()

# Save the plot to HTML for my blog
fig.write_html("cumulative_returns.html")
In [21]:
# Get the summary statistics for each strategy
stats = summary_stats(df_returns, df_portfolio_weights, industries_ex).T

# Save summary statistics to HTML for my blog
stats.to_markdown("summary_stats.md")

# Print summary statistics here as well
stats
Out[21]:
1/N MV (In-Sample) MV (Out-Sample) MV (Full-Sample)
Sharpe Ratio 0.172825 0.228963 0.038555 0.238449
Sortino Ratio 0.229944 0.307529 0.042100 0.381380
Annualized Returns 0.080959 0.216877 -0.052830 0.127804
Annualized Volatility 0.149322 0.308717 0.377176 0.162132
Maximum Drawdown -0.496305 -0.830373 -0.996717 -0.313587
Skewness -0.621345 0.980511 -1.120290 -0.053701
Kurtosis 2.428163 14.247974 22.282114 1.122496
VaR (0.05) -0.067842 -0.095657 -0.125124 -0.061210
CVaR (0.05) -0.099081 -0.175262 -0.259063 -0.093562
CEQ 0.006521 0.016434 -0.001730 0.010065
Annualized Turnover 0.483863 40.531512 45.708499 3.984118