import pandas as pd
import numpy as np

import pandas_datareader.data as web
import plotly.express as px

def get_mv_weights(df_window, gamma=1, output_relative_weights=True):
    """
    Get the Minimum Variance weights for a given window
    :param df_window: Window of data
    :param output_relative_weights: Whether to output relative weights or absolute weights
    """
    # Calculate variables
    mean = df_window.mean().values
    cov = df_window.cov().values
    inv_cov =  np.linalg.pinv(cov)

    # Calculate weights
    absolute_weights = (1 / gamma) * inv_cov @ mean
    relative_weights = absolute_weights / np.abs(np.sum(absolute_weights))

    return relative_weights if output_relative_weights else absolute_weights

def sharpe_ratio(returns):
    """
    Sharpe ratio of returns. Assumes returns are already in excess of the risk-free rate.
    :param returns: DataFrame of returns.
    """
    return returns.mean() / returns.std()

def sortino_ratio(returns):
    """
    Sortino ratio of returns. Assumes returns are already in excess of the risk-free rate.
    :param returns: DataFrame of returns.
    """
    return returns.mean() / returns[returns < 0].std()

def annualized_returns(returns, periods_per_year=None):
    """
    Annualized returns of returns.
    :param returns: DataFrame of returns.
    :param periods_per_year: Number of periods per year. Infers the number of periods per year from the index frequency if not provided.
    """
    if periods_per_year is None:
        periods_per_year = freq_to_periods_per_year(returns.index.freq)

    return (returns + 1).prod() ** (periods_per_year / len(returns)) - 1

def annualized_volatility(returns, periods_per_year=None):
    """
    Annualized volatility of returns.
    :param returns: DataFrame of returns.
    :param periods_per_year: Number of periods per year. Infers the number of periods per year from the index frequency if not provided. 
    """
    if periods_per_year is None:
        periods_per_year = freq_to_periods_per_year(returns.index.freq)

    return returns.std() * np.sqrt(periods_per_year)

def max_drawdown(returns):
    """
    Maximum drawdown of returns.
    :param returns: DataFrame of returns.
    """
    starting_cash = 1 # Starting cash is redundant and is only included for clarity, starting_cash can be anything and the output would stay the same.
    wealth_index = (returns + 1).cumprod().mul(starting_cash)
    wealth_max = wealth_index.cummax()

    return (wealth_index - wealth_max).div(wealth_max).min()

def value_at_risk(returns, level=0.05):
    """
    Value at risk of returns.
    :param returns: DataFrame of returns.
    :param level: Confidence level of the VaR.
    """
    return returns.quantile(level)

def conditional_value_at_risk(returns, level=0.05):
    """
    Conditional value at risk of returns.
    :param returns: DataFrame of returns.
    :param level: Confidence level of the CVaR.
    """
    var = value_at_risk(returns, level=level)
    return returns[returns < var].mean()

def certainty_equivalent_return(returns, gamma=1):
    """
    Certainty Equivalent (CEQ) return, defined as the
    risk-free rate that an investor is willing to accept
    rather than adopting a particular risky portfolio strategy.
    :param returns: DataFrame of returns.
    :param gamma: Risk aversion coefficient.
    """
    return returns.mean() - ((gamma/2) * returns.var())

def annualized_turnover(portfolio_weights, asset_returns):
    """
    Portfolio turnover, defined as the average sum of the 
    absolute value of the trades across the N available assets.
    :param returns: DataFrame of returns.
    :param weights: DataFrame of weights.
    """
    # Make sure asset returns have the same time frame as portfolio weights
    asset_returns = asset_returns.loc[portfolio_weights.index]

    # Get a list of portfolio strategy names
    portfolio_strategies = list(portfolio_weights.columns.get_level_values("Portfolio Strategy").unique())

    # Create an empty series where annualized turnover will be stored
    turnover = pd.Series(name="Annualized Turnover", index=portfolio_strategies, dtype="float64")

    # Loop through each portfolio strategy for their respective turnover calculation
    for strategy in portfolio_strategies:
        # Get strategy weights
        weights = portfolio_weights[strategy]

        # Get portfolio weights at time t+1 before and after rebalancing
        weights_t1_before_rebal = weights * (asset_returns + 1)
        weights_t1_after_rebal = weights.shift(-1)

        # Get the absolute turnover per period
        per_period_turnover = (weights_t1_after_rebal - weights_t1_before_rebal).abs().sum(axis=1)[:-1]

        # Convert per period turnover to annualized turnover
        annualized_turnover = per_period_turnover.mean() * freq_to_periods_per_year(weights.index.freq)

        # Add to series
        turnover[strategy] = annualized_turnover
    
    # Return annualized turnover
    return turnover

def summary_stats(portfolio_returns, portfolio_weights, asset_returns):
    """
    Get summary statistics for a DataFrame of returns
    :param returns: DataFrame of returns
    """
    summary = pd.DataFrame({
        "Sharpe Ratio": sharpe_ratio(portfolio_returns),
        "Sortino Ratio": sortino_ratio(portfolio_returns),
        "Annualized Returns": annualized_returns(portfolio_returns),
        "Annualized Volatility": annualized_volatility(portfolio_returns),
        "Maximum Drawdown": max_drawdown(portfolio_returns),
        "Skewness": portfolio_returns.skew(),
        "Kurtosis": portfolio_returns.kurtosis(),
        "VaR (0.05)": value_at_risk(portfolio_returns),
        "CVaR (0.05)": conditional_value_at_risk(portfolio_returns),
        "CEQ": certainty_equivalent_return(portfolio_returns),
        "Annualized Turnover": annualized_turnover(portfolio_weights, asset_returns)
    })

    return summary

def generate_html(dataframe: pd.DataFrame, page_length=10, paging=True, info=True):
    """
    Generate HTML with jQuery Data Tables.
    Original code found here: https://github.com/x4nth055/pythoncode-tutorials/tree/master/general/dataframe-to-html
    :param dataframe: Pandas DataFrame
    """
    # get the table HTML from the dataframe
    table_html = dataframe.to_html(table_id="table")

    # Convert boolean to string
    paging = str(paging).lower()
    info = str(info).lower()

    # construct the complete HTML with jQuery Data tables
    # You can disable paging or enable y scrolling on lines 20 and 21 respectively
    #  # This is used to avoid templating issues when publishing to my blog.
    html = f"""
    <html>
    <header>
        <link href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css" rel="stylesheet">
        <script src="https://code.jquery.com/jquery-3.6.0.slim.min.js" integrity="sha256-u7e5khyithlIdTpu22PHhENmPcRdFiHRjhAuHcs05RI=" crossorigin="anonymous"></script>
        <script type="text/javascript" src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
        <script>
            $(document).ready( function () {{
                $('#table').DataTable({{
                    searching: false,
                    lengthChange: false,
                    pageLength: {page_length},
                    paging: {paging},
                    info: {info},
                }});
            }});
        </script>

        <style>
            body {{
                background-color: white;
            }}
            table.dataTable {{
                width: 100% !important;
            }}
            table.dataTable tbody tr {{
                background-color: transparent;
            }}
        </style>
    </header>
    <body>
    {table_html}
    </body>
    </html>
    """
    #   # This is used to avoid templating issues when publishing to my blog.

    # return the html
    return html

def freq_to_periods_per_year(freq):
    """
    Convert a frequency object to the number of periods per year.
    :param freq: Frequency object. Must be None, D, W, M, Q, or A.
    """
    if freq is None:
        return 252
    elif freq == "D":
        return 252
    elif freq == "W":
        return 52
    elif freq == "M":
        return 12
    elif freq == "Q":
        return 4
    elif freq == "A":
        return 1
    else:
        raise ValueError("freq must be None, D, W, M, Q, or A")

x = np.linspace(0, 100, 1000)
y = -1 * (x-50)**2
y = y - min(y)

fig = px.line(x=x, y=y)
fig.add_hline(y=max(y), line_dash="dash", line_color="red")

fig.update_xaxes(title_text="Asset Weights (Utility Function Inputs)", showticklabels=False)
fig.update_yaxes(title_text="Utility", showticklabels=False)

fig.show()
fig.write_html("parabola_max.html")

# FamaFrench 10 Industry Portfolios
industries = web.DataReader("10_Industry_Portfolios", "famafrench", start="1963-07", end="2023-01")[0].div(100)

# FamaFrench Factors
factors = web.DataReader("F-F_Research_Data_Factors", "famafrench", start="1963-07", end="2023-01")[0].div(100)

# Augment Industries to get Excess Returns
industries_ex = industries.sub(factors["RF"], axis=0)

# Add market factor
industries_ex["Mkt-RF"] = factors["Mkt-RF"]

# DataFrames for weights and returns to be appended.
# There are more efficient ways of doing this but I'm trying to make the code as readable and consistent as possible.
mv_weights_in_sample = pd.DataFrame(columns=industries_ex.columns)
mv_weights_out_sample = pd.DataFrame(columns=industries_ex.columns)
mv_weights_full_sample = pd.DataFrame(columns=industries_ex.columns)
ew_weights = pd.DataFrame(columns=industries_ex.columns)
df_returns = pd.DataFrame()

# How many periods to use for the window
window_size = 180

# Loop through the data to obtain mean-variance weights
for i, date in enumerate(industries_ex.index):
    if i < (window_size - 1): continue

    # Get the window of data ending at 'date' with a window size of 'window_size'
    # Note that iloc[A,B] fetches values from A to B-1
    start_i = i - (window_size - 1)
    end_i = i + 1
    df_window = industries_ex.iloc[start_i:end_i]

    # Get the mean-variance weights
    weights = get_mv_weights(df_window)

    # Append the weights to the DataFrame
    mv_weights_in_sample.loc[date] = weights

# Update necessary weight DataFrames
mv_weights_out_sample = mv_weights_in_sample.shift(1)

mv_weights_full_sample = get_mv_weights(industries_ex).reshape(1, -1)
mv_weights_full_sample = np.repeat(mv_weights_full_sample, industries_ex.shape[0], axis=0)
mv_weights_full_sample = pd.DataFrame(mv_weights_full_sample, columns=industries_ex.columns, index=industries_ex.index)

ew_weights = np.ones((industries_ex.shape[0], industries_ex.shape[1])) / industries_ex.shape[1]
ew_weights = pd.DataFrame(ew_weights, columns=industries_ex.columns, index=industries_ex.index)

# Get the 1/N portfolio returns (useful as a benchmark)
df_returns["1/N"] = industries_ex.mul(ew_weights, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (in-sample)
df_returns["MV (In-Sample)"] = industries_ex.iloc[window_size-1:].mul(mv_weights_in_sample, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (out-of-sample)
df_returns["MV (Out-Sample)"] = industries_ex.iloc[window_size-1:].mul(mv_weights_out_sample, axis=1).sum(axis=1)

# Get the mean-variance portfolio returns (full-sample)
df_returns["MV (Full-Sample)"] = industries_ex.mul(mv_weights_full_sample, axis=1).sum(axis=1)

# Drop invalid rows such that all portfolios start at the same time.
# Note that we drop window_size + 1 number of rows because the row index starts at 0.
# We need to drop an extra row because out-of-sample mean-variance weights are shifted by 1 period.
df_returns = df_returns.iloc[window_size:]

# Drop unnecessary rows from the weight DataFrames while we're at it (this will be useful in our results section)
ew_weights = ew_weights.loc[df_returns.index]
mv_weights_in_sample = mv_weights_in_sample.loc[df_returns.index]
mv_weights_out_sample = mv_weights_out_sample.loc[df_returns.index]
mv_weights_full_sample = mv_weights_full_sample.loc[df_returns.index]

# Create a single DataFrame containing portfolio weights for every strategy (also useful for results section)
header = pd.MultiIndex.from_product([df_returns.columns, industries_ex.columns], names=["Portfolio Strategy", "Asset"])
df_portfolio_weights = pd.DataFrame(columns=header)

df_portfolio_weights["1/N"] = ew_weights
df_portfolio_weights["MV (In-Sample)"] = mv_weights_in_sample
df_portfolio_weights["MV (Out-Sample)"] = mv_weights_out_sample
df_portfolio_weights["MV (Full-Sample)"] = mv_weights_full_sample

# Save portfolio returns to HTML for my blog
html_returns = generate_html(df_returns)
with open("mv_returns.html", "w") as file:
    file.write(html_returns)

# Print returns here as well
df_returns

# Get cumulative returns
df_returns_cum = (df_returns + 1).cumprod().sub(1)
df_returns_cum.index = df_returns_cum.index.to_timestamp()

# Plot the cumulative returns
fig = px.line(df_returns_cum, title="Cumulative Returns Over Time")
fig.update_yaxes(title_text="Returns", tickformat=".0%")
fig.update_layout(legend_title_text="Portfolio", title_x=0.5)
fig.show()

# Save the plot to HTML for my blog
fig.write_html("cumulative_returns.html")

# Get the summary statistics for each strategy
stats = summary_stats(df_returns, df_portfolio_weights, industries_ex).T

# Save summary statistics to HTML for my blog
stats.to_markdown("summary_stats.md")

# Print summary statistics here as well
stats

	1/N	MV (In-Sample)	MV (Out-Sample)	MV (Full-Sample)
Date
1978-07	0.047664	0.012951	-0.009586	0.046217
1978-08	0.033936	-0.033435	-0.053118	0.024945
1978-09	-0.014209	0.143651	0.130396	-0.027631
1978-10	-0.115382	0.033149	0.009592	-0.110844
1978-11	0.026473	0.093890	0.091734	0.047838
...	...	...	...	...
2022-07	0.093573	0.076109	0.060402	0.029736
2022-08	-0.029636	-0.017365	-0.020830	0.011081
2022-09	-0.092091	0.005298	-0.016549	-0.073278
2022-10	0.080955	0.051083	0.034204	0.126058
2022-11	0.035536	0.053048	0.046345	0.023015

	1/N	MV (In-Sample)	MV (Out-Sample)	MV (Full-Sample)
Sharpe Ratio	0.172825	0.228963	0.038555	0.238449
Sortino Ratio	0.229944	0.307529	0.042100	0.381380
Annualized Returns	0.080959	0.216877	-0.052830	0.127804
Annualized Volatility	0.149322	0.308717	0.377176	0.162132
Maximum Drawdown	-0.496305	-0.830373	-0.996717	-0.313587
Skewness	-0.621345	0.980511	-1.120290	-0.053701
Kurtosis	2.428163	14.247974	22.282114	1.122496
VaR (0.05)	-0.067842	-0.095657	-0.125124	-0.061210
CVaR (0.05)	-0.099081	-0.175262	-0.259063	-0.093562
CEQ	0.006521	0.016434	-0.001730	0.010065
Annualized Turnover	0.483863	40.531512	45.708499	3.984118

Imports & Functions¶

Finance Functions¶

Other Functions¶

Generate Optimization Example Plot¶

Mean-Variance Optimization¶

Results¶