import tqdm
import datetime
import requests
import numpy as np
import pandas as pd
import seaborn as sns
import networkx as nx
import plotly.express as px
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import yfinance as yf

import warnings
warnings.filterwarnings('ignore')


#Extracting list of S&P 500 companies using BeautifulSoup.
resp = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = BeautifulSoup(resp.text, 'lxml')
table = soup.find('table', {'class': 'wikitable sortable'})
tickers = []
for row in table.findAll('tr')[1:]:
    ticker = row.findAll('td')[0].text.strip('\n')
    tickers.append(ticker)
    
tickers = [ticker.replace('.', '-') for ticker in tickers] # list of S&P 500 stocks


# Override the pandas data reader
yf.pdr_override()

# Define the start and end dates
startdate = datetime.datetime(2011, 1, 1)
enddate = datetime.datetime(2020, 12, 31)

# Create an empty DataFrame to store the price data
price_data = pd.DataFrame()

try:
    # Retrieve data for all tickers within the specified date range
    # We will get the Open, High, Low, Close, and Volume data for the specified tickers
    price_data = yf.download(tickers, start=startdate, end=enddate)
    
except Exception as e:
    print(f"Error fetching data: {e}")

# Keep only the "Close" column
price_data = price_data['Close']

# Save the data to a CSV file
price_data.to_csv('snp500_price_data_2011_to_2020.csv')

[*********************100%***********************]  503 of 503 completed

3 Failed downloads:
- OGN: Data doesn't exist for startDate = 1293858000, endDate = 1609390800
- CEG: Data doesn't exist for startDate = 1293858000, endDate = 1609390800
- GEHC: Data doesn't exist for startDate = 1293858000, endDate = 1609390800


price_data = pd.read_csv('snp500_price_data_2011_to_2020.csv', index_col=[0])


# Print the first few rows of the price data
price_data.head()


figure = plt.figure(figsize=(16, 8))
sns.heatmap(price_data.T.isnull());


price_data_cleaned = price_data.dropna(axis=1) # dropping na values columnwise


figure = plt.figure(figsize=(16, 8))
sns.heatmap(price_data_cleaned.T.isnull());


def get_year_wise_snp_500_data(data, year):
    year_wise_data = data.loc['{}-01-01'.format(year):'{}-12-31'.format(year)]
    
    return year_wise_data


# Getting year wise data of S&P stocks from 2011 to 2020
snp_500_2011 = get_year_wise_snp_500_data(price_data_cleaned, 2011)
snp_500_2012 = get_year_wise_snp_500_data(price_data_cleaned, 2012)
snp_500_2013 = get_year_wise_snp_500_data(price_data_cleaned, 2013)
snp_500_2014 = get_year_wise_snp_500_data(price_data_cleaned, 2014)
snp_500_2015 = get_year_wise_snp_500_data(price_data_cleaned, 2015)
snp_500_2016 = get_year_wise_snp_500_data(price_data_cleaned, 2016)
snp_500_2017 = get_year_wise_snp_500_data(price_data_cleaned, 2017)
snp_500_2018 = get_year_wise_snp_500_data(price_data_cleaned, 2018)
snp_500_2019 = get_year_wise_snp_500_data(price_data_cleaned, 2019)
snp_500_2020 = get_year_wise_snp_500_data(price_data_cleaned, 2020)


snp_500_2011


snp_500_2011.shift(1)


# Calculating daily log returns by subtracting between two days with the help of shift function
log_returns_2011 = np.log(snp_500_2011.shift(1)) - np.log(snp_500_2011)
log_returns_2012 = np.log(snp_500_2012.shift(1)) - np.log(snp_500_2012)
log_returns_2013 = np.log(snp_500_2013.shift(1)) - np.log(snp_500_2013)
log_returns_2014 = np.log(snp_500_2014.shift(1)) - np.log(snp_500_2014)
log_returns_2015 = np.log(snp_500_2015.shift(1)) - np.log(snp_500_2015)
log_returns_2016 = np.log(snp_500_2016.shift(1)) - np.log(snp_500_2016)
log_returns_2017 = np.log(snp_500_2017.shift(1)) - np.log(snp_500_2017)
log_returns_2018 = np.log(snp_500_2018.shift(1)) - np.log(snp_500_2018)
log_returns_2019 = np.log(snp_500_2019.shift(1)) - np.log(snp_500_2019)
log_returns_2020 = np.log(snp_500_2020.shift(1)) - np.log(snp_500_2020)


# Computing adjacency matrix:
return_correlation_2011 = log_returns_2011.corr()
return_correlation_2012 = log_returns_2012.corr()
return_correlation_2013 = log_returns_2013.corr()
return_correlation_2014 = log_returns_2014.corr()
return_correlation_2015 = log_returns_2015.corr()
return_correlation_2016 = log_returns_2016.corr()
return_correlation_2017 = log_returns_2017.corr()
return_correlation_2018 = log_returns_2018.corr()
return_correlation_2019 = log_returns_2019.corr()
return_correlation_2020 = log_returns_2020.corr()


figure, axes = plt.subplots(5, 2, figsize=(30, 30))
sns.heatmap(return_correlation_2011, ax=axes[0, 0]);
sns.heatmap(return_correlation_2012, ax=axes[0, 1]);
sns.heatmap(return_correlation_2013, ax=axes[1, 0]);
sns.heatmap(return_correlation_2014, ax=axes[1, 1]);
sns.heatmap(return_correlation_2015, ax=axes[2, 0]);
sns.heatmap(return_correlation_2016, ax=axes[2, 1]);
sns.heatmap(return_correlation_2017, ax=axes[3, 0]);
sns.heatmap(return_correlation_2018, ax=axes[3, 1]);
sns.heatmap(return_correlation_2019, ax=axes[4, 0]);
sns.heatmap(return_correlation_2020, ax=axes[4, 1]);


graph_2011 = nx.Graph(return_correlation_2011)


figure = plt.figure(figsize=(22, 10))
nx.draw_networkx(graph_2011, with_labels=False)


distance_2011 = np.sqrt(2 * (1 - return_correlation_2011))
distance_2012 = np.sqrt(2 * (1 - return_correlation_2012))
distance_2013 = np.sqrt(2 * (1 - return_correlation_2013))
distance_2014 = np.sqrt(2 * (1 - return_correlation_2014))
distance_2015 = np.sqrt(2 * (1 - return_correlation_2015))
distance_2016 = np.sqrt(2 * (1 - return_correlation_2016))
distance_2017 = np.sqrt(2 * (1 - return_correlation_2017))
distance_2018 = np.sqrt(2 * (1 - return_correlation_2018))
distance_2019 = np.sqrt(2 * (1 - return_correlation_2019))
distance_2020 = np.sqrt(2 * (1 - return_correlation_2020))


distance_2011_graph = nx.Graph(distance_2011)
distance_2012_graph = nx.Graph(distance_2012)
distance_2013_graph = nx.Graph(distance_2013)
distance_2014_graph = nx.Graph(distance_2014)
distance_2015_graph = nx.Graph(distance_2015)
distance_2016_graph = nx.Graph(distance_2016)
distance_2017_graph = nx.Graph(distance_2017)
distance_2018_graph = nx.Graph(distance_2018)
distance_2019_graph = nx.Graph(distance_2019)
distance_2020_graph = nx.Graph(distance_2020)


graph_2011_filtered = nx.minimum_spanning_tree(distance_2011_graph)
graph_2012_filtered = nx.minimum_spanning_tree(distance_2012_graph)
graph_2013_filtered = nx.minimum_spanning_tree(distance_2013_graph)
graph_2014_filtered = nx.minimum_spanning_tree(distance_2014_graph)
graph_2015_filtered = nx.minimum_spanning_tree(distance_2015_graph)
graph_2016_filtered = nx.minimum_spanning_tree(distance_2016_graph)
graph_2017_filtered = nx.minimum_spanning_tree(distance_2017_graph)
graph_2018_filtered = nx.minimum_spanning_tree(distance_2018_graph)
graph_2019_filtered = nx.minimum_spanning_tree(distance_2019_graph)
graph_2020_filtered = nx.minimum_spanning_tree(distance_2020_graph)


figure, axes = plt.subplots(10, 1, figsize=(24, 120))
nx.draw_networkx(graph_2011_filtered, with_labels=False, ax=axes[0])
nx.draw_networkx(graph_2012_filtered, with_labels=False, ax=axes[1])
nx.draw_networkx(graph_2013_filtered, with_labels=False, ax=axes[2])
nx.draw_networkx(graph_2014_filtered, with_labels=False, ax=axes[3])
nx.draw_networkx(graph_2015_filtered, with_labels=False, ax=axes[4])
nx.draw_networkx(graph_2016_filtered, with_labels=False, ax=axes[5])
nx.draw_networkx(graph_2017_filtered, with_labels=False, ax=axes[6])
nx.draw_networkx(graph_2018_filtered, with_labels=False, ax=axes[7])
nx.draw_networkx(graph_2019_filtered, with_labels=False, ax=axes[8])
nx.draw_networkx(graph_2020_filtered, with_labels=False, ax=axes[9])


average_degree_connectivity = []
average_shortest_path_length = []
year = [2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]

for graph in [graph_2011_filtered, graph_2012_filtered, graph_2013_filtered, graph_2014_filtered, graph_2015_filtered, 
             graph_2016_filtered, graph_2017_filtered, graph_2018_filtered, graph_2019_filtered, graph_2020_filtered]:
    average_shortest_path_length.append(nx.average_shortest_path_length(graph))


figure = plt.figure(figsize=(22, 8))
sns.lineplot(x='year', y='average_shortest_path_length', 
             data=pd.DataFrame({'year': year, 'average_shortest_path_length': average_shortest_path_length}));


log_returns_2011_till_2020 = np.log(price_data_cleaned.shift(1)) - np.log(price_data_cleaned)
return_correlation_2011_till_2020 = log_returns_2011_till_2020.corr()


figure = plt.figure(figsize=(24, 8))
sns.heatmap(return_correlation_2011_till_2020);


distance_2011_till_2020 = np.sqrt(2 * (1 - return_correlation_2011_till_2020))
distance_2011_till_2020_graph = nx.Graph(distance_2011_till_2020)
distance_2011_till_2020_graph_filtered = nx.minimum_spanning_tree(distance_2011_till_2020_graph)


# Create a figure
figure = plt.figure(figsize=(24, 8))

# Compute the Kamada-Kawai layout
layout = nx.kamada_kawai_layout(distance_2011_till_2020_graph_filtered)

# Draw the graph using Kamada-Kawai layout and the computed layout
nx.draw(distance_2011_till_2020_graph_filtered, pos=layout)


degree_centrality = nx.degree_centrality(distance_2011_till_2020_graph_filtered)
closeness_centrality = nx.closeness_centrality(distance_2011_till_2020_graph_filtered)
betweenness_centrality = nx.betweenness_centrality(distance_2011_till_2020_graph_filtered)
eigenvector_centrality=nx.eigenvector_centrality_numpy(distance_2011_till_2020_graph_filtered)


keys = []
values = []

for key, value in degree_centrality.items():
    keys.append(key)
    values.append(value)

dc_data = pd.DataFrame({'stocks': keys, 'degree_centrality': values}).sort_values('degree_centrality', ascending=False)
px.bar(data_frame=dc_data, x='stocks', y='degree_centrality', template='plotly_dark')


keys = []
values = []

for key, value in closeness_centrality.items():
    keys.append(key)
    values.append(value)

cc_data = pd.DataFrame({'stocks': keys, 'closeness_centrality': values}).sort_values('closeness_centrality', 
                                                                                       ascending=False)
px.bar(data_frame=cc_data, x='stocks', y='closeness_centrality', template='plotly_dark')


keys = []
values = []

for key, value in betweenness_centrality.items():
    keys.append(key)
    values.append(value)

bc_data = pd.DataFrame({'stocks': keys, 'betweenness_centrality': values}).sort_values('betweenness_centrality', 
                                                                                       ascending=False)
px.bar(data_frame=bc_data, x='stocks', y='betweenness_centrality', template='plotly_dark')


# we already computed degree centrality above

# we already computed betweenness centrality above

# distance on degree criterion
distance_degree_criteria = {}
node_with_largest_degree_centrality = max(dict(degree_centrality), key=dict(degree_centrality).get)
for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_degree_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node, 
                                                             node_with_largest_degree_centrality)

# distance on correlation criterion
distance_correlation_criteria = {}
sum_correlation = {}

for node in distance_2011_till_2020_graph_filtered.nodes():
    neighbors = nx.neighbors(distance_2011_till_2020_graph_filtered, node)
    sum_correlation[node] = sum(return_correlation_2011_till_2020[node][neighbor] for neighbor in neighbors)

node_with_highest_correlation = max(sum_correlation, key=sum_correlation.get)
    
for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_correlation_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node, 
                                                             node_with_highest_correlation)    

# distance on distance criterion
distance_distance_criteria = {}
mean_distance = {}

for node in distance_2011_till_2020_graph_filtered.nodes():
    nodes = list(distance_2011_till_2020_graph_filtered.nodes())
    nodes.remove(node)
    distance_distance = [nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node, ns) for ns in nodes]
    mean_distance[node] = np.mean(distance_distance)
    
node_with_minimum_mean_distance = min(mean_distance, key=mean_distance.get)

for node in distance_2011_till_2020_graph_filtered.nodes():
    distance_distance_criteria[node] = nx.shortest_path_length(distance_2011_till_2020_graph_filtered, node, 
                                                             node_with_minimum_mean_distance)


node_stats = pd.DataFrame.from_dict(dict(degree_centrality), orient='index')
node_stats.columns = ['degree_centrality']
node_stats['betweenness_centrality'] = betweenness_centrality.values()

node_stats['average_centrality'] = 0.5 * (node_stats['degree_centrality'] + node_stats['betweenness_centrality'])

node_stats['distance_degree_criteria'] = distance_degree_criteria.values()
node_stats['distance_correlation_criteria'] = distance_correlation_criteria.values()
node_stats['distance_distance_criteria'] = distance_distance_criteria.values()
node_stats['average_distance'] = (node_stats['distance_degree_criteria'] + node_stats['distance_correlation_criteria'] +
                                  node_stats['distance_distance_criteria']) / 3


node_stats.head()


central_stocks = node_stats.sort_values('average_centrality', ascending=False).head(15)
central_portfolio = [stock for stock in central_stocks.index.values]


peripheral_stocks = node_stats.sort_values('average_distance', ascending=False).head(15)
peripheral_portfolio = [stock for stock in peripheral_stocks.index.values]


central_stocks


peripheral_stocks


color = []

for node in distance_2011_till_2020_graph_filtered:
    if node in central_portfolio:
        color.append('red')
        
    elif node in peripheral_portfolio:
        color.append('green')
        
    else:
        color.append('blue')


figure = plt.figure(figsize=(24, 8))
nx.draw_kamada_kawai(distance_2011_till_2020_graph_filtered, with_labels=False, node_color=color)


# Define the start and end dates
startdate = datetime.datetime(2021, 1, 1)
enddate = datetime.datetime(2021, 12, 31)

# Create an empty DataFrame to store the price data
price_data_2021 = pd.DataFrame()

try:
    # Retrieve data for all tickers within the specified date range
    # We will get the Open, High, Low, Close, and Volume data for the specified tickers
    price_data_2021 = yf.download(tickers, start=startdate, end=enddate)
    
except Exception as e:
    print(f"Error fetching data: {e}")

# Keep only the "Close" column
price_data_2021 = price_data_2021['Close']

# Save the data to a CSV file
price_data_2021.to_csv('snp500_price_data_2021.csv')

[*********************100%***********************]  503 of 503 completed

2 Failed downloads:
- CEG: Data doesn't exist for startDate = 1609477200, endDate = 1640926800
- GEHC: Data doesn't exist for startDate = 1609477200, endDate = 1640926800


#Reading data for 2021 S&P 500 stocks:
price_data_2021 = pd.read_csv('snp500_price_data_2021.csv', index_col=[0])


price_data_2021.head()


# Fetch S&P 500 index data using yfinance
snp_500_ticker = '^GSPC'  # Ticker symbol for S&P 500 index
snp_500_2021 = yf.download(snp_500_ticker, start=startdate, end=enddate)

# Keep only the "Close" column
snp_500_2021 = snp_500_2021['Close']

# Save the S&P 500 index data to a CSV file
snp_500_2021.to_csv('snp500_index_data_2021.csv')

[*********************100%***********************]  1 of 1 completed


# Removing NA values:
price_data_2021 = price_data_2021.dropna(axis=1)
snp_500_2021 = snp_500_2021.dropna()


price_data_2021 = price_data_2021['2021-01-04':]


# Calculate the portfolio values
amount = 100000

central_portfolio_value = pd.DataFrame()
for stock in central_portfolio:
    central_portfolio_value[stock] = price_data_2021[stock]
    
portfolio_unit = central_portfolio_value.sum(axis=1)[0]
share = amount / portfolio_unit
central_portfolio_value = central_portfolio_value.sum(axis=1) * share

peripheral_portfolio_value = pd.DataFrame()
for stock in peripheral_portfolio:
    peripheral_portfolio_value[stock] = price_data_2021[stock]
    
portfolio_unit = peripheral_portfolio_value.sum(axis=1)[0]
share = amount / portfolio_unit
peripheral_portfolio_value = peripheral_portfolio_value.sum(axis=1) * share


snp_500_2021_value = snp_500_2021 * (amount / snp_500_2021.iloc[0])


all_portfolios = pd.concat([snp_500_2021_value, central_portfolio_value, peripheral_portfolio_value], axis=1)
all_portfolios.columns = ['snp500', 'central_portfolio', 'peripheral_portfolio']


# Plotting
figure, ax = plt.subplots(figsize=(16, 8))
snp_500_line = ax.plot(all_portfolios['snp500'], label='S&P 500')
central_portfolio_line = ax.plot(all_portfolios['central_portfolio'], label='Central Portfolio')
peripheral_portfolio_line = ax.plot(all_portfolios['peripheral_portfolio'], label='Peripheral Portfolio')
ax.legend(loc='upper left')
plt.show()

	A	AAL	AAP	AAPL	ABBV	ABC	ABT	ACGL	ACN	ADBE	...	WYNN	XEL	XOM	XRAY	XYL	YUM	ZBH	ZBRA	ZION	ZTS
Date
2011-01-03 00:00:00	29.957083	10.65	65.500000	11.770357	NaN	34.320000	22.944036	9.832222	48.590000	31.290001	...	108.989998	23.559999	74.550003	35.099998	NaN	35.291157	53.368931	38.200001	25.17	NaN
2011-01-04 00:00:00	29.678112	10.62	62.240002	11.831786	NaN	33.939999	23.159946	9.771111	48.270000	31.510000	...	112.389999	23.660000	74.900002	34.709999	NaN	34.751976	52.660194	37.840000	24.76	NaN
2011-01-05 00:00:00	29.613733	11.10	62.320000	11.928571	NaN	34.459999	23.159946	9.785556	48.279999	32.220001	...	113.839996	23.520000	74.699997	35.040001	NaN	34.917328	52.699028	37.799999	24.90	NaN
2011-01-06 00:00:00	29.670959	11.24	60.270000	11.918929	NaN	34.860001	23.111965	9.653333	48.459999	32.270000	...	114.669998	23.500000	75.180000	34.840000	NaN	35.161755	51.067963	37.480000	24.83	NaN
2011-01-07 00:00:00	29.771103	11.40	61.880001	12.004286	NaN	34.930000	23.207926	9.580000	48.540001	32.040001	...	118.730003	23.709999	75.589996	34.700001	NaN	35.600288	51.058250	37.599998	24.57	NaN

	A	AAL	AAP	AAPL	ABC	ABT	ACGL	ACN	ADBE	ADI	...	WTW	WY	WYNN	XEL	XOM	XRAY	YUM	ZBH	ZBRA	ZION
Date
2011-01-03 00:00:00	29.957083	10.65	65.500000	11.770357	34.320000	22.944036	9.832222	48.590000	31.290001	37.950001	...	93.139076	19.760000	108.989998	23.559999	74.550003	35.099998	35.291157	53.368931	38.200001	25.170000
2011-01-04 00:00:00	29.678112	10.62	62.240002	11.831786	33.939999	23.159946	9.771111	48.270000	31.510000	37.520000	...	91.576157	19.549999	112.389999	23.660000	74.900002	34.709999	34.751976	52.660194	37.840000	24.760000
2011-01-05 00:00:00	29.613733	11.10	62.320000	11.928571	34.459999	23.159946	9.785556	48.279999	32.220001	37.599998	...	92.847679	20.160000	113.839996	23.520000	74.699997	35.040001	34.917328	52.699028	37.799999	24.900000
2011-01-06 00:00:00	29.670959	11.24	60.270000	11.918929	34.860001	23.111965	9.653333	48.459999	32.270000	37.810001	...	93.112579	20.020000	114.669998	23.500000	75.180000	34.840000	35.161755	51.067963	37.480000	24.830000
2011-01-07 00:00:00	29.771103	11.40	61.880001	12.004286	34.930000	23.207926	9.580000	48.540001	32.040001	37.590000	...	92.953644	20.570000	118.730003	23.709999	75.589996	34.700001	35.600288	51.058250	37.599998	24.570000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2011-12-23 00:00:00	25.293276	5.62	70.250000	14.404643	37.419998	26.878395	12.500000	52.889999	28.290001	36.209999	...	103.337746	18.170000	110.000000	27.200001	85.220001	35.630001	42.437096	51.660194	36.520000	16.330000
2011-12-27 00:00:00	25.214592	5.40	70.919998	14.518929	37.709999	26.979155	12.550000	52.930000	28.500000	36.380001	...	103.761589	18.360001	113.529999	27.620001	85.279999	35.529999	42.631203	51.728153	36.590000	16.400000
2011-12-28 00:00:00	24.399141	5.19	70.260002	14.380000	36.830002	26.787233	12.386667	52.860001	28.020000	35.599998	...	102.516556	18.480000	109.220001	27.459999	84.180000	34.849998	42.401150	51.611649	35.700001	16.090000
2011-12-29 00:00:00	25.078684	5.23	70.349998	14.468571	37.330002	27.027134	12.440000	53.840000	28.309999	35.849998	...	102.966888	18.879999	110.339996	27.709999	85.269997	35.299999	42.832497	51.883495	35.980000	16.480000
2011-12-30 00:00:00	24.985695	5.07	69.629997	14.464286	37.189999	26.979155	12.410000	53.230000	28.270000	35.779999	...	102.781456	18.670000	110.489998	27.639999	84.760002	34.990002	42.422718	51.864079	35.779999	16.280001

	A	AAL	AAP	AAPL	ABC	ABT	ACGL	ACN	ADBE	ADI	...	WTW	WY	WYNN	XEL	XOM	XRAY	YUM	ZBH	ZBRA	ZION
Date
2011-01-03 00:00:00	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2011-01-04 00:00:00	29.957083	10.65	65.500000	11.770357	34.320000	22.944036	9.832222	48.590000	31.290001	37.950001	...	93.139076	19.760000	108.989998	23.559999	74.550003	35.099998	35.291157	53.368931	38.200001	25.17
2011-01-05 00:00:00	29.678112	10.62	62.240002	11.831786	33.939999	23.159946	9.771111	48.270000	31.510000	37.520000	...	91.576157	19.549999	112.389999	23.660000	74.900002	34.709999	34.751976	52.660194	37.840000	24.76
2011-01-06 00:00:00	29.613733	11.10	62.320000	11.928571	34.459999	23.159946	9.785556	48.279999	32.220001	37.599998	...	92.847679	20.160000	113.839996	23.520000	74.699997	35.040001	34.917328	52.699028	37.799999	24.90
2011-01-07 00:00:00	29.670959	11.24	60.270000	11.918929	34.860001	23.111965	9.653333	48.459999	32.270000	37.810001	...	93.112579	20.020000	114.669998	23.500000	75.180000	34.840000	35.161755	51.067963	37.480000	24.83
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2011-12-23 00:00:00	24.921316	5.70	69.379997	14.233929	37.080002	26.700871	12.300000	52.580002	27.889999	35.939999	...	101.907288	18.299999	108.040001	27.000000	84.290001	35.549999	41.976994	51.495148	36.380001	16.15
2011-12-27 00:00:00	25.293276	5.62	70.250000	14.404643	37.419998	26.878395	12.500000	52.889999	28.290001	36.209999	...	103.337746	18.170000	110.000000	27.200001	85.220001	35.630001	42.437096	51.660194	36.520000	16.33
2011-12-28 00:00:00	25.214592	5.40	70.919998	14.518929	37.709999	26.979155	12.550000	52.930000	28.500000	36.380001	...	103.761589	18.360001	113.529999	27.620001	85.279999	35.529999	42.631203	51.728153	36.590000	16.40
2011-12-29 00:00:00	24.399141	5.19	70.260002	14.380000	36.830002	26.787233	12.386667	52.860001	28.020000	35.599998	...	102.516556	18.480000	109.220001	27.459999	84.180000	34.849998	42.401150	51.611649	35.700001	16.09
2011-12-30 00:00:00	25.078684	5.23	70.349998	14.468571	37.330002	27.027134	12.440000	53.840000	28.309999	35.849998	...	102.966888	18.879999	110.339996	27.709999	85.269997	35.299999	42.832497	51.883495	35.980000	16.48

	degree_centrality	betweenness_centrality	average_centrality	distance_degree_criteria	distance_correlation_criteria	distance_distance_criteria	average_distance
A	0.011161	0.017797	0.014479	5	8	6	6.333333
AAL	0.002232	0.000000	0.001116	8	13	7	9.333333
AAP	0.002232	0.000000	0.001116	8	5	9	7.333333
AAPL	0.002232	0.000000	0.001116	14	19	13	15.333333
ABC	0.002232	0.000000	0.001116	16	21	15	17.333333

	degree_centrality	betweenness_centrality	average_centrality	distance_degree_criteria	distance_correlation_criteria	distance_distance_criteria	average_distance
LNC	0.017857	0.651157	0.334507	1	6	0	2.333333
AMP	0.031250	0.544233	0.287742	0	5	1	2.000000
AME	0.020089	0.524938	0.272514	1	4	2	2.333333
PRU	0.011161	0.463177	0.237169	2	7	1	3.333333
GL	0.020089	0.450463	0.235276	3	8	2	4.333333
PH	0.024554	0.428122	0.226338	3	2	4	3.000000
ETN	0.017857	0.421500	0.219679	2	3	3	2.666667
TFC	0.008929	0.392607	0.200768	5	10	4	6.333333
USB	0.006696	0.385776	0.196236	4	9	3	5.333333
JPM	0.011161	0.354217	0.182689	7	12	6	8.333333
PNC	0.004464	0.350022	0.177243	6	11	5	7.333333
BRK-B	0.008929	0.336300	0.172614	8	13	7	9.333333
HST	0.006696	0.331136	0.168916	2	7	1	3.333333
ADP	0.011161	0.313559	0.162360	9	14	8	10.333333
REG	0.006696	0.292376	0.149536	4	9	3	5.333333

Context and Problem Statement¶

Proposed Approach¶

Loading the Libraries¶

Getting the S&P 500 Components¶

Getting the Price Data for all the S&P 500 components in the last 10 years¶

Missing Data due to Index Rebalancing¶

Getting Yearwise Data¶

Computing the Daily Log Returns¶

Computing the Correlation of Returns¶

Inferences¶

Creating Graphs¶

Filtering Graphs using MST¶

Computing Graph Statistics over Time¶

Portfolio Construction¶

Selecting Stocks based on Network Topological Parameters¶

Selecting the top 15 stocks for both Central Stocks and Peripheral Stocks¶

Performance Evalutation¶

	degree_centrality	betweenness_centrality	average_centrality	distance_degree_criteria	distance_correlation_criteria	distance_distance_criteria	average_distance
NFLX	0.002232	0.000000	0.001116	16	21	15	17.333333
ABC	0.002232	0.000000	0.001116	16	21	15	17.333333
CAG	0.002232	0.000000	0.001116	16	21	15	17.333333
WBA	0.002232	0.000000	0.001116	16	21	15	17.333333
CHD	0.002232	0.000000	0.001116	16	21	15	17.333333
DXCM	0.002232	0.000000	0.001116	15	20	14	16.333333
DLTR	0.002232	0.000000	0.001116	15	20	14	16.333333
SJM	0.002232	0.000000	0.001116	15	20	14	16.333333
MCK	0.004464	0.004464	0.004464	15	20	14	16.333333
CPB	0.004464	0.004464	0.004464	15	20	14	16.333333
K	0.002232	0.000000	0.001116	15	20	14	16.333333
AMZN	0.004464	0.004464	0.004464	15	20	14	16.333333
CLX	0.004464	0.004464	0.004464	15	20	14	16.333333
GOOGL	0.002232	0.000000	0.001116	15	20	14	16.333333
CVS	0.004464	0.004464	0.004464	15	20	14	16.333333

	A	AAL	AAP	AAPL	ABBV	ABC	ABT	ACGL	ACN	ADBE	...	WYNN	XEL	XOM	XRAY	XYL	YUM	ZBH	ZBRA	ZION	ZTS
Date
2021-01-04 00:00:00	118.639999	15.13	157.339996	129.410004	105.410004	96.500000	109.110001	34.900002	256.459991	485.339996	...	106.900002	65.660004	41.500000	53.750000	99.690002	105.820000	148.699036	378.130005	42.930000	163.589996
2021-01-05 00:00:00	119.610001	15.43	157.169998	131.009995	106.500000	97.760002	110.459999	35.040001	257.920013	485.690002	...	110.190002	65.019997	43.500000	55.290001	99.570000	105.830002	151.271851	380.570007	43.610001	164.729996
2021-01-06 00:00:00	122.889999	15.52	166.250000	126.599998	105.580002	106.169998	110.230003	36.580002	260.739990	466.309998	...	110.849998	66.459999	44.610001	57.959999	103.769997	106.410004	155.582520	394.820007	48.509998	167.149994
2021-01-07 00:00:00	126.160004	15.38	167.669998	130.919998	106.709999	110.129997	111.300003	36.240002	263.200012	477.739990	...	109.750000	65.160004	44.959999	57.660000	108.480003	105.599998	154.660187	409.100006	49.990002	167.000000
2021-01-08 00:00:00	127.059998	15.13	170.059998	132.050003	107.269997	110.029999	111.610001	36.439999	264.160004	485.100006	...	109.029999	65.760002	45.459999	58.180000	107.379997	107.150002	154.320389	405.470001	48.720001	168.110001