Source code for sgfixedincome_pkg.analysis

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sgfixedincome_pkg import equations


[docs]
def filter_df(combined_df, investment_amount=None, min_tenure=0, max_tenure=999, 
              min_rate=None, consider_tbills=True, consider_ssbs=True, consider_fd=True, 
              include_providers=None, exclude_providers=None):
    """
    Filters the combined_df based on provided criteria, including investment amount, tenure, rate, 
    product provider, product, and whether to consider T-bills and SSBs.

    Parameters:
        combined_df (pd.DataFrame): DataFrame containing columns 'Tenure', 'Rate', 'Deposit lower bound', 
                                    'Deposit upper bound', 'Required multiples', 'Product provider', 'Product'.
        investment_amount (float, optional): The investment amount to filter available rates and products for that amount.
        min_tenure (int, optional): The minimum tenure (in months) to filter. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to filter. Default is 999.
        min_rate (float, optional): The minimum rate (% p.a.) to filter. Default is None (no filtering).
        consider_tbills (bool, optional): Whether to consider T-bills. Default is True.
        consider_ssbs (bool, optional): Whether to consider SSBs. Default is True.
        consider_fd (bool, optional): Whether to consider fixed deposits. Default is True.
        include_providers (list, optional): Exclusive list of providers to include. Default is None.
        exclude_providers (list, optional): List of providers to exclude. Default is None.

    Returns:
        pd.DataFrame: The filtered DataFrame based on the provided criteria.
    """
    filtered_df = combined_df.copy()

    # Filter based on investment amount (if provided)
    if investment_amount is not None:
        filtered_df = filtered_df[(filtered_df['Deposit lower bound'] <= investment_amount) & 
                                   (filtered_df['Deposit upper bound'] >= investment_amount)]

    # Filter based on tenure range
    filtered_df = filtered_df[(filtered_df['Tenure'] >= min_tenure) & 
                               (filtered_df['Tenure'] <= max_tenure)]
    
    # Filter based on rate range (if provided)
    if min_rate is not None:
        filtered_df = filtered_df[filtered_df['Rate'] >= min_rate]

    # Filter based on product (SSB, T-bill, Fixed Deposit)
    if not consider_tbills:
        filtered_df = filtered_df[~filtered_df['Product'].str.contains("T-bill", case=False, na=False)]
    if not consider_ssbs:
        filtered_df = filtered_df[~filtered_df['Product'].str.contains("SSB", case=False, na=False)]
    if not consider_fd:
        filtered_df = filtered_df[~filtered_df['Product'].str.contains("Fixed Deposit", case=False, na=False)]

    # Filter based on product provider (if provided)
    if include_providers:
        filtered_df = filtered_df[filtered_df['Product provider'].isin(include_providers)]
    
    # Filter based on product provider (if provided)
    if exclude_providers:
        filtered_df = filtered_df[~filtered_df['Product provider'].isin(exclude_providers)]

    # If no rows remain after filtering, raise an exception
    if filtered_df.empty:
        raise ValueError("No products found matching the provided criteria.")
    
    return filtered_df.reset_index(drop=True)



[docs]
def best_returns(combined_df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Calculate the highest total dollar return achievable for each possible tenure,
    considering that the offered rates and available products differ across invested amounts. 

    This function assumes you only can select one product to invest in, and finds the highest dollar return
    attainable for each tenure. For products which only accept investment in specific multiples, we allocate the
    maximum amount of investment to them given the investment amount, and assume the remaining cash earns no return.
    
    As such, for each tenure, the product delivering the best return (our concern here) may differ from the product 
    with the highest rates. For example, product 'A' with a higher rate but which has required multiples of investment
    may produce lower total dollar return compared to product 'B' with a lower rate but no required multiples, as the 
    full amount of cash cannot be invested in product 'A' but can be fully invested into product 'B'.

    Parameters:
        combined_df (pd.DataFrame): DataFrame containing columns 'Tenure', 'Rate', 'Deposit lower bound', 
                                    'Deposit upper bound', 'Required multiples', 'Product provider', 'Product'.
        investment_amount (float): The investment amount to filter available rates and products for that amount.
        min_tenure (int, optional): The minimum tenure (in months) to consider. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to consider. Default is 999.

    Returns:
        pd.DataFrame: A DataFrame with products that deliver the highest dollar return for each tenure, 
        product details, and total dollar return from the investment.
    """
    # Filter rows where the investment_amount and Tenure is within the allowed bounds
    valid_inv_df = filter_df(
        combined_df,
        investment_amount=investment_amount, 
        min_tenure=min_tenure, 
        max_tenure=max_tenure
        ).copy()
    
    # If no valid rows remain after filtering, raise an exception
    if valid_inv_df.empty:
        raise ValueError(f"Cannot find valid products for an investment amount of {investment_amount}.")
    
    # Calculate the maximum investable amount based on required multiples
    def calculate_invested_amount(row):
        if pd.isna(row['Required multiples']):
            return investment_amount  # No multiples restriction, use the entire amount
        max_multiple = investment_amount // row['Required multiples']
        return max_multiple * row['Required multiples']  # Maximum valid investable amount
    
    valid_inv_df['Invested amount'] = valid_inv_df.apply(
        calculate_invested_amount, axis=1
        )

    # Calculate the total dollar return for each row using the 'Invested amount'
    valid_inv_df.loc[:, 'Total Dollar Return'] = valid_inv_df.apply(
        lambda row: equations.calculate_dollar_return(
            row['Invested amount'],
            row['Rate'],
            row['Tenure']
        ),
        axis=1
    )

    # Group by tenure and get the row with the maximum total return for each tenure
    best_returns_df = valid_inv_df.loc[
        valid_inv_df.groupby('Tenure')['Total Dollar Return'].idxmax()
        ]
    best_returns_df.sort_values(by='Tenure', inplace=True) # Sort by tenure

    return best_returns_df.reset_index(drop=True)



[docs]
def best_rates(combined_df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Display the highest rates offered for each possible tenure given an investment amount.

    Parameters:
        combined_df (pd.DataFrame): DataFrame containing columns 'Tenure', 'Rate', 'Deposit lower bound', 
                                    'Deposit upper bound', 'Required multiples', 'Product provider', 'Product'.
        investment_amount (float): The investment amount to filter available rates and products for that amount.
        min_tenure (int, optional): The minimum tenure (in months) to consider. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to consider. Default is 999.

    Returns:
        pd.DataFrame: A DataFrame with the products offering the best rate (in % p.a.) for each tenure.
    """
    # Filter rows where the investment_amount and Tenure is within the allowed bounds
    valid_inv_df = filter_df(
        combined_df,
        investment_amount=investment_amount, 
        min_tenure=min_tenure, 
        max_tenure=max_tenure
        ).copy()
    
    # If no valid rows remain after filtering, raise an exception
    if valid_inv_df.empty:
        raise ValueError(f"Cannot find valid products for an investment amount of {investment_amount}.")

    # Group by tenure and get the row with the highest rate for each tenure
    best_rates_df = valid_inv_df.loc[
        valid_inv_df.groupby('Tenure')['Rate'].idxmax()
        ]
    best_rates_df.sort_values(by='Tenure', inplace=True) # Sort by tenure

    return best_rates_df.reset_index(drop=True)



[docs]
def products(combined_df):
    """
    Returns a list of unique products in the dataset by joining the 'Product provider' and 'Product' columns.
    It considers unique combinations of these joined strings.

    Parameters:
        combined_df (pd.DataFrame): DataFrame containing the columns 'Product provider' and 'Product'.

    Returns:
        list: A list of unique product combinations in the format 'Product provider - Product'.
    """
    # Combine 'Product provider' and 'Product' into a single string for each row
    product_combinations = combined_df['Product provider'] + ' - ' + combined_df['Product']
    
    # Get the unique combinations and return them as a list
    unique_products = product_combinations.unique().tolist()

    return unique_products



[docs]
def plot_rates_vs_tenure(df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Plots a graph of Rate (% p.a.) vs Tenure (in months) for a given investment amount
    with optional filtering by tenure range. Each unique 'Product provider - Product' 
    pair is plotted as a separate line.

    Parameters:
        df (pd.DataFrame): DataFrame containing the data to plot. Must include columns:
                            'Tenure', 'Rate', 'Deposit lower bound', 'Deposit upper bound', 
                            'Product provider', 'Product'.
        investment_amount (float): The investment amount to filter rows for the plot.
        min_tenure (int, optional): Minimum tenure (in months) to include. Default is 0.
        max_tenure (int or float, optional): Maximum tenure (in months) to include. Default is 999.

    Raises:
        ValueError: If no valid rows remain after filtering based on the investment amount and tenure.
    """
    # Filter rows by investment amount and tenure range
    filtered_df = filter_df(
        df,
        investment_amount=investment_amount, 
        min_tenure=min_tenure, 
        max_tenure=max_tenure
        ).copy()

    # Raise an exception if no valid rows remain
    if filtered_df.empty:
        raise ValueError(f"No data available for the investment amount of {investment_amount} "
                         f"and tenure range {min_tenure}-{max_tenure} months.")

    # Create a unique identifier for each product provider-product pair
    filtered_df['Product Combination'] = (
        filtered_df['Product provider'] + ' - ' + filtered_df['Product']
    )

    # Plotting
    plt.figure(figsize=(12, 8))
    sns.lineplot(
        data=filtered_df,
        x='Tenure',
        y='Rate',
        hue='Product Combination',
        marker='o'
    )

    # Customizing the plot
    plt.title(
        f'Rate (% p.a.) vs Tenure for Investment Amount: {investment_amount}\n'
        f'Filtered by Tenure: {min_tenure} to {max_tenure} months',
        fontsize=16
    )
    plt.xlabel('Tenure (Months)', fontsize=14)
    plt.ylabel('Rate (% p.a.)', fontsize=14)
    plt.legend(title='Product Provider - Product', fontsize=12, title_fontsize=14, loc='best')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()

    # Show the plot
    plt.show()



[docs]
def plot_best_rates(df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Plot of best rates (% p.a.) for each tenure for a given investment amount, across
    available products. The plot color-codes the points by provider-product pair.

    Parameters:
        df (pd.DataFrame): DataFrame containing columns 'Tenure', 'Rate', 'Deposit lower bound', 
                           'Deposit upper bound', 'Required multiples', 'Product provider', 'Product'.
        investment_amount (float): The investment amount to filter available rates and products for that amount and
                                    to calculate the total return.
        min_tenure (int, optional): The minimum tenure (in months) to consider. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to consider. Default is 999.
    """
    # Get best rates DataFrame
    best_rates_df = best_rates(df, investment_amount, min_tenure, max_tenure)
    
    # Combine 'Product provider' and 'Product' for unique identification
    best_rates_df['Provider-Product'] = best_rates_df['Product provider'] + " - " + best_rates_df['Product']
    
    # Create the plot
    plt.figure(figsize=(12, 8))
    
    # Draw a single continuous line
    plt.plot(best_rates_df['Tenure'], best_rates_df['Rate'], 
             linestyle='-', color='black', alpha=0.7)
    
    # Add points, color-coded by Provider-Product
    sns.scatterplot(
        data=best_rates_df,
        x='Tenure',
        y='Rate',
        hue='Provider-Product',
        palette='tab10',  # Customize color palette as needed
        s=100,  # Size of points
        edgecolor='black'
    )
    
    # Add labels and grid
    plt.title("Rate (% p.a.) by Tenure (Color-coded by Provider-Product)", fontsize=16)
    plt.xlabel("Tenure (months)", fontsize=14)
    plt.ylabel("Rate (% p.a.)", fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend(title="Provider-Product", fontsize=14, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()

    # Show the plot
    plt.show()



[docs]
def plot_bank_offerings_with_fuzz(df, product_provider, fuzz_factor=0.02):
    """
    Plots a graph of Rate (% p.a.) vs Tenure (in months) for a given bank, where each line represents a 
    different deposit range (created by joining 'Deposit lower bound' and 'Deposit upper bound').
    Adds small fuzz to the points to avoid overlap.

    Parameters:
        df (pd.DataFrame): DataFrame containing columns 'Tenure', 'Rate', 'Deposit lower bound', 
                            'Deposit upper bound', 'Product provider'.
        product_provider (str): The bank name (Product provider) to filter the data for.
        fuzz_factor (float, optional): The amount of fuzz (random noise) to add to the points. Default is 0.02.

    Raises:
        ValueError: If no data is available for the given product_provider.
    """
    # Filter rows for the specified product provider and create a copy
    filtered_df = df[df['Product provider'] == product_provider].copy()

    # Raise an exception if no valid rows remain after filtering
    if filtered_df.empty:
        raise ValueError(f"No data available for the product provider {product_provider}.")

    # Create a unique identifier for each deposit range and add small random fuzz
    filtered_df = filtered_df.assign(
        Deposit_Range=filtered_df['Deposit lower bound'].astype(str) + '-' + filtered_df['Deposit upper bound'].astype(str),
        Tenure_fuzzed=filtered_df['Tenure'] + np.random.uniform(-fuzz_factor, fuzz_factor, len(filtered_df)),
        Rate_fuzzed=filtered_df['Rate'] + np.random.uniform(-fuzz_factor, fuzz_factor, len(filtered_df))
    )

    # Plotting
    plt.figure(figsize=(12, 8))

    # Plot a separate line for each deposit range
    sns.lineplot(
        data=filtered_df,
        x='Tenure_fuzzed',
        y='Rate_fuzzed',
        hue='Deposit_Range',
        style='Deposit_Range',
        markers=True,
        dashes=False,
        palette='tab10'
    )

    # Customizing the plot
    plt.title(f'Rate (% p.a.) vs Tenure for {product_provider}', fontsize=16)
    plt.xlabel('Tenure (Months)', fontsize=14)
    plt.ylabel('Rate (% p.a.)', fontsize=14)
    plt.legend(title='Deposit Range', fontsize=12, title_fontsize=14, loc='best')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()

    # Show the plot
    plt.show()



[docs]
def better_allocation(df, investment_amount, tenure):
    """
    Returns a better strategy to improve effective rate by allocating investment across 
    different products. 
    
    Strategy:
    
    1. First sorts all available products by interest rate in descending order
    2. For each provider-product pair, only allows investment in one deposit range 
       (the one with highest rate that we can afford given remaining funds)
    3. Allocates maximum possible amount to each product while respecting:
       deposit range bounds (lower and upper limits),
       required multiples (if any), and
       available remaining investment amount
    4. Continues allocation until either the entire investment amount is allocated, or
       no more valid products are available

    Note that while this strategy often produces returns at least as good as 
    investing in any single product, it may sometimes produce lower returns,
    and may also be different from the globally optimal allocation. For example,
    consider the case where we 'use up' a provider-product pair by allocating 
    the full amount to a low deposit range with high rates, and because of that
    forfeit the ability to invest in the same provider-product at a higher deposit
    range but a slightly lower rate. The overall return may be higher by allocating
    more to a slightly lower rate deposit range.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Tenure', 'Rate', 'Deposit lower bound', 
                            'Deposit upper bound', 'Product provider', 'Product'.
        investment_amount (float): The total investment amount to allocate across different products.
        tenure (int): The tenure in months for the investment.

    Returns:
        pd.DataFrame: A DataFrame containing:

            - Product provider: Provider offering the product
            - Product: Type of investment product
            - Allocated amount: Amount invested in this product
            - Rate (% p.a.): Annual interest rate as percentage
            - Expected return ($): Expected dollar return from this allocation
            Plus a summary row with totals and effective overall rate
    """
    
    # Filter rows by the investment amount and the given tenure
    filtered_df = df[
        (df['Deposit lower bound'] <= investment_amount) & 
        (df['Tenure'] == tenure)
    ]

    # Raise an exception if no valid rows remain after filtering
    if filtered_df.empty:
        raise ValueError(f"No data available for the investment amount of {investment_amount} and tenure of {tenure} months.")
    
    # Sort products by rate in descending order (highest rate first)
    filtered_df = filtered_df.sort_values(by='Rate', ascending=False)

    remaining_amount = investment_amount
    allocations = []
    total_return = 0
    used_provider_products = set()  # Track which provider-product pairs we've used

    # Allocate the investment to the products
    for _, product in filtered_df.iterrows():
        # Check how much we can allocate to this product
        if remaining_amount <= 0:
            break

        # Create unique identifier for provider-product pair
        provider_product = (product['Product provider'], product['Product'])
        
        # Skip if we've already allocated to this provider-product pair
        if provider_product in used_provider_products:
            continue

        max_allocatable = min(
            product['Deposit upper bound'],
            remaining_amount
        )
        
        # If we can't meet the minimum deposit, skip this product
        if max_allocatable < product['Deposit lower bound']:
            continue

        # Calculate the allocation
        allocation = max_allocatable

        # Handle required multiples if they exist
        if pd.notna(product['Required multiples']) and product['Required multiples'] > 0:
            # Calculate maximum number of multiples we can allocate
            num_multiples = int(allocation // product['Required multiples'])
            allocation = num_multiples * product['Required multiples']
            
            # If we can't meet even one multiple, skip this product
            if allocation < product['Required multiples']:
                continue
        
        # Calculate return for this allocation
        product_return = equations.calculate_dollar_return(allocation, product['Rate'], tenure)
        
        # Track the allocation
        allocations.append({
            'Product provider': product['Product provider'],
            'Product': product['Product'],
            'Allocated amount': allocation,
            'Rate (% p.a.)': product['Rate'],
            'Expected return ($)': product_return
        })

        # Mark this provider-product pair as used
        used_provider_products.add(provider_product)
        
        # Deduct the allocated amount and update total return
        remaining_amount -= allocation
        total_return += product_return

    # If we have allocations, calculate summary statistics
    if allocations:
        # Calculate per annum rate
        total_percentage_return = (total_return / investment_amount) * 100
        per_annum_rate = equations.calculate_per_annum_rate(total_percentage_return, tenure)
        
        # Convert allocations to a DataFrame
        allocation_df = pd.DataFrame(allocations)

        # Add total summary row
        summary_row = pd.DataFrame([{
            'Product provider': 'Total',
            'Product': 'All Products',
            'Allocated amount': investment_amount - remaining_amount,
            'Rate (% p.a.)': per_annum_rate,
            'Expected return ($)': total_return
        }])

        allocation_df = pd.concat([allocation_df, summary_row], ignore_index=True)
        
        return allocation_df
    else:
        raise ValueError("Could not find any valid allocations with the given constraints.")



[docs]
def plot_better_allocation_strategy(df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Plot the Rate (% p.a.) against Tenure (Months) for the better allocation strategy 
    across all tenures available in the dataframe.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Tenure', 'Rate', 'Deposit lower bound', 
                            'Deposit upper bound', 'Product provider', 'Product'.
        investment_amount (float): The total investment amount to allocate across different products.
        min_tenure (int, optional): The minimum tenure (in months) to include. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to include. Default is 999.
    """
    # Extract all unique tenures from the dataframe and sort them
    tenures = sorted(df['Tenure'].unique())
    tenures = [t for t in tenures if min_tenure <= t <= max_tenure]
    rates = []

    for tenure in tenures:
        try:
            # Get a better allocation for the given tenure
            allocation_df = better_allocation(df, investment_amount, tenure)
            # Extract the total Rate (% p.a.) from the summary row
            total_rate = allocation_df.loc[allocation_df['Product provider'] == 'Total', 'Rate (% p.a.)'].values[0]
            rates.append(total_rate)
        except ValueError:
            # Append NaN if no valid data for the tenure
            rates.append(None)

    # Plot the results
    plt.figure(figsize=(10, 6))
    plt.plot(tenures, rates, marker='o', linestyle='-', color='blue', label='Effective Rate (% p.a.)')
    plt.title('Better Allocation Strategy: Rate (% p.a.) vs. Tenure (Months)', fontsize=16)
    plt.xlabel('Tenure (Months)', fontsize=12)
    plt.ylabel('Rate (% p.a.)', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.xticks(tenures, rotation=45)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.show()



[docs]
def plot_pure_and_better_allocation_strategy_rates(df, investment_amount, min_tenure=0, max_tenure=999):
    """
    Overlay plot for best rates (% p.a.) and effective better allocation strategy rates for each tenure.

    Parameters:
        df (pd.DataFrame): DataFrame containing 'Tenure', 'Rate', 'Deposit lower bound', 
                            'Deposit upper bound', 'Product provider', 'Product'.
        investment_amount (float): The investment amount to filter available rates and products.
        min_tenure (int, optional): The minimum tenure (in months) to include. Default is 0.
        max_tenure (int, optional): The maximum tenure (in months) to include. Default is 999.
    """
    # Get best rates DataFrame
    best_rates_df = best_rates(df, investment_amount, min_tenure, max_tenure)
    
    # Combine 'Product provider' and 'Product' for unique identification
    best_rates_df['Provider-Product'] = best_rates_df['Product provider'] + " - " + best_rates_df['Product']
    
    # Extract tenures for the better allocation strategy and filter by min/max tenure
    tenures = sorted(df['Tenure'].unique())
    tenures = [t for t in tenures if min_tenure <= t <= max_tenure]
    
    # Calculate rates for better allocation strategy
    better_allocation_rates = []
    for tenure in tenures:
        try:
            allocation_df = better_allocation(df, investment_amount, tenure)
            total_rate = allocation_df.loc[allocation_df['Product provider'] == 'Total', 'Rate (% p.a.)'].values[0]
            better_allocation_rates.append(total_rate)
        except ValueError:
            better_allocation_rates.append(None)
    
    # Create the combined plot
    plt.figure(figsize=(12, 8))
    
    # Plot the best individual rates
    sns.scatterplot(
        data=best_rates_df,
        x='Tenure',
        y='Rate',
        hue='Provider-Product',
        palette='tab10',
        s=100,
        edgecolor='black'
    )
    plt.plot(best_rates_df['Tenure'], best_rates_df['Rate'], 
             linestyle='-', color='black', linewidth=2, alpha=0.4, label='Best Individual Rates (Line)')
    
    # Plot the better allocation strategy rates as a continuous line
    plt.plot(tenures, better_allocation_rates, 
             linestyle='-', color='blue', marker='o', alpha=0.4, label='Better Allocation Strategy Rates', linewidth=2)
    
    # Add labels, legend, and grid
    plt.title("Comparison of Best Individual Rates and Better Allocation Strategy Rates", fontsize=16)
    plt.xlabel("Tenure (Months)", fontsize=14)
    plt.ylabel("Rate (% p.a.)", fontsize=14)
    plt.grid(alpha=0.3)
    plt.legend(fontsize=12, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    
    # Show the plot
    plt.show()