The goal of this project was to complete network analyses from an Excel data file with regards to NICHD and six parter organizations. This project was completed in three days under a narrow deadline. A lot of the code is rushed and unpolished, but ultimately provides an example of network analysis for real work in a short timeframe.
Steps for the project:
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import seaborn as sns
from networkx.algorithms.approximation import clique
from networkx.algorithms.clique import find_cliques
from networkx.algorithms.centrality import degree_centrality
from networkx.algorithms.cluster import clustering
from networkx.algorithms.cluster import average_clustering
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', None)
orgs = ['AAP','ACOG','CDC','CPSC','First Candle','HRSA','NICHD']
colors = sns.color_palette("hls", 7)
sns.palplot(sns.color_palette("hls", 7))
def lighten(colr, deg=.37):
colr = tuple([c+deg for c in colr])
return tuple([c if c<=1 else 1 for c in colr])
def darken(colr,deg=.37):
colr = tuple([c-deg for c in colr])
return tuple([c if c>=0 else 0 for c in colr])
sts_logo = '\n'+r'Safe to Sleep$^®$ Campaign:'+'\n'
def remove_str(s):
if 'Safe Kids' in s:
return 'Safe Kids'
if 'Digital Media' in s:
return 'Digital media'
for o in orgs:
if o in s:
return o
remove_l = ['• ','*']
if 'Internal groups and committees focused on high-risk communities' in s:
return 'Internal groups for high-risk'
if '.org' in s:
return 'Websites'
if 'SUID/SIDS Project' in s:
return 'SUID/SIDS Project'
if 'National Fetal and Infant' in s:
return 'NFIMR'
if 'American Board of Obs' in s:
return 'ABOG'
if 'State and local' in s:
return 'State/local health agencies'
if 'National Center for Fatality' in s:
return 'NCRFP'
if 'Organizations that work with' in s:
return 'Organizations that work with families'
if 'State health offices and public health professionals' in s:
return 'State health offices'
if '(' in s:
s = s.split(' (')[0]
for r in remove_l:
s = s.replace(r,'')
if s[-1] == ' ':
s = s[:-1]
if s[0] == ' ':
s = s[1:]
if 'Grantees' in s:
return 'Grantees'
if s == 'Website' or s == 'website':
return 'Websites'
# techinical corrections
if 'NAPPS-' in s:
return 'NAPPSS-IIN'
return s
def clean_data(cell):
try:
return [remove_str(i) for i in cell.split('\n') if len(i) >0 ]
except AttributeError as e:
return ['None']
excel_path = "net_an/NICHD_STS.xlsx"
sleep_df = pd.read_excel(excel_path)
sleep_df.columns = ['Category'] + [i for i in sleep_df.columns[1:]]
sleep_df = sleep_df.drop(sleep_df.index[-2:])
for c in sleep_df.columns:
if c != 'Category':
sleep_df[c] = sleep_df[c].apply(clean_data)
def create_adjacency_matrix(vals, filename):
# create adjacency.txt for easy networkx import
with open(f'{filename}.txt','w') as file:
for l in vals:
if len(l) > 1:
file.write('|'.join(l)+'\n')
return nx.read_adjlist(f'{file_name}.txt',delimiter='|')
# create the adj_matrix list
col_org_list = list(zip(list(range(1,8)),orgs))
big_m =[]
for i,org in col_org_list:
for val in sleep_df.iloc[5].values[i]:
small_m = [org,val]
big_m.append(small_m)
file_name ='partners_edgecount'
G = create_adjacency_matrix(big_m,file_name)
G = nx.DiGraph(G)
main_nodes = [i for i in G.nodes if i in big_m[-1]]
main_nodes.append('NICHD')
node_count = {}
special_nodes = []
for val in list(G.nodes):
n = len(G.edges(val))
if n < 10 and n>1:
special_nodes.append(val)
if n == 1:
n = ''
node_count[val] = n
# Begin Visualization
fig = plt.figure(figsize=(30,28))
plt.title(f'{sts_logo}Organizations and their partners',size=15,color='black',fontsize=50)
pos = nx.kamada_kawai_layout(G)
pos['AAP'] = [pos['AAP'][0]-.2,pos['AAP'][1]]
pos['HRSA'] = [pos['HRSA'][0],pos['AAP'][1]-.25]
for i in range(1,8):
edges=G.edges(orgs[i-1])
nx.draw_networkx_edges(G,pos=pos,
style='dashed',
width=3,
arrow=False,
edgelist=edges,
edge_color=[colors[i-1] for n in edges],
alpha=.8)
nx.draw_networkx_nodes(G,pos,nodelist=special_nodes,node_color='yellow',node_size=2200,alpha=1)
investigate_nodes = ['ASTHO','JPMA','NACCHO']
nx.draw_networkx_nodes(G,pos,nodelist=investigate_nodes,node_color='red',node_size=2200,alpha=1)
for i in range(1,8):
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color='black',node_size=1500,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color='white',node_size=1300,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color=[colors[i-1]],node_size=1300,alpha=.5)
nx.draw_networkx_nodes(G,pos,nodelist=special_nodes,node_color='white',node_size=1500,alpha=.8)
for i in range(1,8):
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color='black',node_size=3500,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color=[colors[i-1]],node_size=3000,alpha=1)
nx.draw_networkx_labels(G,pos=pos,font_size=18,labels=node_count,font_weight='bold',font_color='black')
# clustering_labels = clustering(G)
# for k in list(clustering_labels.keys()):
# clustering_labels[k] = round(clustering_labels[k], 3)
# nx.draw_networkx_labels(G,pos=pos,font_size=18,labels=clustering_labels,font_weight='bold',font_color='black')
for k in pos:
pos[k] = [pos[k][0],pos[k][1]+.038]
for i in range(1,8):
org_labels = {}
for val in sleep_df.iloc[5].values[i]:
org_labels[val] = val
nx.draw_networkx_labels(G,
pos=pos,
font_size=18,
labels=org_labels,
font_color=darken(colors[i-1],.4),
bbox=dict(facecolor=lighten(colors[i-1],.4),
alpha=.5,
edgecolor=darken(colors[i-1],.5)))
for k in pos:
pos[k] = [pos[k][0],pos[k][1]+.009]
for i in range(1,8):
org_labels = {}
org_labels[orgs[i-1]] = orgs[i-1]
nx.draw_networkx_labels(G,pos=pos,font_size=35,labels=org_labels,font_color=darken(colors[i-1],.45),bbox=dict(facecolor=lighten(colors[i-1],.3),alpha=1,edgecolor=darken(colors[i-1],.5)))
plt.annotate(
s = f"\n Metric Used: Edge Count \n\n Average Clustering: {round(average_clustering(G),3)} \n Density: {round(nx.density(G),3)} \n",
xy=(-.9, -.9),
va='top',
ha='left',
fontsize = 20,
bbox=dict(facecolor='lightgray', alpha=.8,pad=.5,),
)
plt.axis('off')
plt.tight_layout()
plt.savefig(f'final_visual/{file_name}.png',dpi=300)
plt.show()
# create the adj_matrix list
col_org_list = list(zip(list(range(1,8)),orgs))
big_m =[]
for i,org in col_org_list:
for val in sleep_df.iloc[5].values[i]:
small_m = [org,val]
big_m.append(small_m)
file_name ='partners_clusteringcoef'
G = create_adjacency_matrix(big_m,file_name)
G = nx.DiGraph(G)
main_nodes = [i for i in G.nodes if i in big_m[-1]]
main_nodes.append('NICHD')
node_count = {}
special_nodes = []
for val in list(G.nodes):
n = len(G.edges(val))
if n < 10 and n>1:
special_nodes.append(val)
if n == 1:
n = ''
node_count[val] = n
# Begin Visualization
fig = plt.figure(figsize=(30,28))
plt.title(f'{sts_logo}Organizations and their partners',size=15,color='black',fontsize=50)
pos = nx.kamada_kawai_layout(G)
pos['AAP'] = [pos['AAP'][0]-.2,pos['AAP'][1]]
pos['HRSA'] = [pos['HRSA'][0],pos['AAP'][1]-.25]
for i in range(1,8):
edges=G.edges(orgs[i-1])
nx.draw_networkx_edges(G,pos=pos,
style='dashed',
width=3,
arrow=False,
edgelist=edges,
edge_color=[colors[i-1] for n in edges],
alpha=.8)
nx.draw_networkx_nodes(G,pos,nodelist=special_nodes,node_color='yellow',node_size=2200,alpha=1)
investigate_nodes = ['ASTHO','JPMA','NACCHO']
nx.draw_networkx_nodes(G,pos,nodelist=investigate_nodes,node_color='red',node_size=2200,alpha=1)
for i in range(1,8):
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color='black',node_size=1500,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color='white',node_size=1300,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[5].values[i],node_color=[colors[i-1]],node_size=1300,alpha=.5)
nx.draw_networkx_nodes(G,pos,nodelist=special_nodes,node_color='white',node_size=1500,alpha=.8)
for i in range(1,8):
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color='black',node_size=3500,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color=[colors[i-1]],node_size=3000,alpha=1)
# nx.draw_networkx_labels(G,pos=pos,font_size=18,labels=node_count,font_weight='bold',font_color='black')
clustering_labels = clustering(G)
for k in list(clustering_labels.keys()):
clustering_labels[k] = round(clustering_labels[k], 3)
nx.draw_networkx_labels(G,pos=pos,font_size=18,labels=clustering_labels,font_weight='bold',font_color='black')
for k in pos:
pos[k] = [pos[k][0],pos[k][1]+.038]
for i in range(1,8):
org_labels = {}
for val in sleep_df.iloc[5].values[i]:
org_labels[val] = val
nx.draw_networkx_labels(G,
pos=pos,
font_size=18,
labels=org_labels,
font_color=darken(colors[i-1],.4),
bbox=dict(facecolor=lighten(colors[i-1],.4),
alpha=.5,
edgecolor=darken(colors[i-1],.5)))
for k in pos:
pos[k] = [pos[k][0],pos[k][1]+.009]
for i in range(1,8):
org_labels = {}
org_labels[orgs[i-1]] = orgs[i-1]
nx.draw_networkx_labels(G,pos=pos,font_size=35,labels=org_labels,font_color=darken(colors[i-1],.45),bbox=dict(facecolor=lighten(colors[i-1],.3),alpha=1,edgecolor=darken(colors[i-1],.5)))
plt.annotate(
s = f"\n Metric Used: Clustering Coefficient \n\n Average Clustering: {round(average_clustering(G),3)} \n Density: {round(nx.density(G),3)} \n",
xy=(-.9, -.9),
va='top',
ha='left',
fontsize = 20,
bbox=dict(facecolor='lightgray', alpha=.8,pad=.5,),
)
plt.axis('off')
plt.tight_layout()
plt.savefig(f'final_visual/{file_name}.png')
plt.show()
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
columns = ['Audience','weight','Organization','audience']
all_targets = []
i=0
for l in list(sleep_df.iloc[2].values[1:]):
for m in l:
all_targets.append([m,10, orgs[i],'STS'])
i+=1
i=0
for l in list(sleep_df.iloc[0].values[1:]):
for m in l:
all_targets.append([m,4, orgs[i], 'Primary'])
i+=1
i=0
for l in list(sleep_df.iloc[1].values[1:]):
for m in l:
all_targets.append([m,1, orgs[i],'Secondary'])
i+=1
viz = pd.DataFrame(all_targets, columns=columns)
viz = viz[viz['Audience'] != 'None']
fig = plt.figure(figsize=(9,12))
fig.set_tight_layout(True)
plt.title(f"{sts_logo}Organizations and target audience",size=20,loc='center',x=0.2)
# plt.title(f'{check} Campaign:')
ax = plt.subplot(111)
g = sns.scatterplot(data=viz,
x="Organization",
y="Audience",
hue='Organization',
size="weight",
style='audience',
markers =['o','P','^'],
sizes=(80, 325),
alpha=.5,
palette=colors)
legend_elements = [ Line2D([0], [0], marker='o', color='w', label='STS Target',
markerfacecolor='gray', markersize=8),
Line2D([0], [0], marker='P', color='w', label='Primary Target',
markerfacecolor='gray', markersize=10),
Line2D([0], [0], marker='^', color='w', label='Secondary Target',
markerfacecolor='gray', markersize=10)]
lgnd = ax.legend(handles=legend_elements, loc='lower right',title="Target Type:\n")
# lgnd.legendHandles[0].set_pos
# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# Only show ticks on the left and bottom spines
ax.yaxis.set_ticks_position('left')
ax.xaxis.set_ticks_position('bottom')
plt.savefig(f'final_visual/target_scatter.png', dpi=300)
plt.show()
# create the adj_matrix list
col_org_list = list(zip(list(range(1,8)),orgs))
big_m =[]
for i,org in col_org_list:
for val in sleep_df.iloc[3].values[i]:
small_m = [org,val]
big_m.append(small_m)
file_name ='outreach_centrality'
G = create_adjacency_matrix(big_m,file_name)
print(f"Average Clustering: {average_clustering(G)}")
print(f"Density: {nx.density(G)}")
# Begin Visualization
fig = plt.figure(figsize=(20,20))
plt.suptitle(f'{sts_logo}Outreach',size=15,color='black',fontsize=50)
plt.tight_layout()
# draw edges
pos = nx.kamada_kawai_layout(G,center=(0,0),scale=1.5)
# Customize positions for outlier nodes:
for v in sleep_df.iloc[3].values[6]:
pos[v] = [pos[v][0],pos[v][1]-1.4]
pos[orgs[5]] = [pos[v][0]+.25,pos[v][1]+.2]
for v in sleep_df.iloc[3].values[7]:
if v != 'Social media':
pos[v] = [pos[v][0]-.35,pos[v][1]]
if 'Information' in v or 'Partner' in v:
pos[v] = [pos[v][0]-.12,pos[v][1]]
pos['Journals'] = [pos['Journals'][0],pos['Journals'][1]+.5]
for i in range(1,8):
edges=G.edges(orgs[i-1])
nx.draw_networkx_edges(G,pos=pos,
style='dashed',
width=3,
arrow=False,
edgelist=edges,
edge_color=[colors[i-1] for n in edges],
alpha=.8)
for i in range(1,8):
# nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[3].values[i],node_color='black',node_size=1500,alpha=1)
# nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[3].values[i],node_color='white',node_size=1300,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=sleep_df.iloc[3].values[i],node_color=[colors[i-1]],node_size=1300,alpha=.5)
for i in range(1,8):
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color='black',node_size=2700,alpha=1)
nx.draw_networkx_nodes(G,pos,nodelist=[orgs[i-1]],node_color=[colors[i-1]],node_size=2500,alpha=1)
node_count = {}
special_nodes = []
for val in list(G.nodes):
n = len(G.edges(val))
if n < 10 and n>1:
special_nodes.append(val)
if n == 1:
n = ''
node_count[val] = n
centrality_labels = degree_centrality(G)
for k in list(centrality_labels.keys()):
centrality_labels[k] = round(centrality_labels[k], 2)
nx.draw_networkx_labels(G,pos=pos,font_size=10,labels=centrality_labels,font_weight='bold',font_color='black')
for k in pos:
pos[k] = [pos[k][0],pos[k][1]-.09]
for i in range(1,8):
org_labels = {}
for val in sleep_df.iloc[3].values[i]:
if val not in orgs:
org_labels[val] = val
nx.draw_networkx_labels(G,pos=pos,font_size=15,labels=org_labels,font_color=darken(colors[i-1],.4),bbox=dict(facecolor=lighten(colors[i-1],.4),alpha=.85,edgecolor=darken(colors[i-1],.5)))
for k in pos:
pos[k] = [pos[k][0],pos[k][1]-.02]
for i in range(1,8):
org_labels = {}
org_labels[orgs[i-1]] = orgs[i-1]
nx.draw_networkx_labels(G,pos=pos,font_size=19,labels=org_labels,font_color=darken(colors[i-1],.45),bbox=dict(facecolor=lighten(colors[i-1],.3),alpha=1,edgecolor=darken(colors[i-1],.5)))
plt.annotate(
s = f"\n Metric Used: Degree of Centrality \n\n Average Clustering: {round(average_clustering(G),3)} \n Density: {round(nx.density(G),3)} \n",
xy=(-1.5, -1.5),
va='top',
ha='left',
fontsize = 20,
bbox=dict(facecolor='lightgray', alpha=.8,pad=.5,),
)
plt.axis('off')
plt.savefig(f'final_visual/{file_name}.png',dpi=300)
plt.show()
asset_lists = list(sleep_df.iloc[6].values[1:])
asset_lists_reduced = [['Education/Training',
'Daily briefing with AAP members',
'Hospital associations',
'Mailing lists',
'Publications',
'Periodic survey',
'Webpages'],
['Annual clinical meeting',
'E-Learning',
'Committee opinions and practice bulletins',
'Publications',
'Digital media',
'Toolkits',
'Webinars'],
['Clinician materials',
'Grand Rounds',
'Monthly presentations',
'Publications',
'Presentations for professional conferences',
'SUID and SDY Case Registry/vital statistics',
'Webpages'],
['Partner listserv',
'Videos'],
['Education/Training',
'Conferences',
'Digital partnerships',
'Educational materials',
'Grand Rounds',
'Guardian program',
'PSAs',
'Videos',
'E-Learning',
'Webinars'],
["Toolkits",
'MCHB Challenges',
"Educational materials"],
['Digital media',
'Publications',
'Toolkits',
'Education/Training',
'Videos',
'Webinars']]
all_asset_lists = [i for j in asset_lists_reduced for i in j ]
assets_dict = {orgs[i]:asset_lists_reduced[i] for i in range(len(orgs))}
AG = nx.Graph(assets_dict)
plt.figure(figsize=(20,20))
plt.title(f'{sts_logo}Assets',size=15,color='black',fontsize=50)
plt.tight_layout()
post=nx.kamada_kawai_layout(AG)
AG = nx.Graph(assets_dict)
for i,org in enumerate(orgs):
#add org nodes
nx.draw_networkx_nodes(AG,post,
nodelist=[org],
node_color=[colors[i]],
node_size=2000,
alpha=1)
#add asset nodes
nx.draw_networkx_nodes(AG,post,
nodelist=asset_lists_reduced[i],
node_color=[colors[i]],
node_size=1000,
alpha=0.5)
#add all edges
edges=AG.edges([orgs[i]])
nx.draw_networkx_edges(AG,pos=post,
style='dashed',
width=3,
arrow=False,
edgelist=edges,
edge_color=[colors[i] for n in edges],
alpha=.8)
#add additional features to asset nodes
nx.draw_networkx_nodes(AG,post,
nodelist=asset_lists_reduced[i],
node_color='black',
node_size=1100,
alpha=1)
nx.draw_networkx_nodes(AG,post,
nodelist=asset_lists_reduced[i],
node_color='white',node_size=1000,
alpha=1)
nx.draw_networkx_nodes(AG,post,
nodelist=asset_lists_reduced[i],
node_color=[colors[i]],
node_size=1000,
alpha=.5)
#add additional features to org nodes
nx.draw_networkx_nodes(AG,post,
nodelist=[org],
node_color='black',
node_size=2100,alpha=1)
nx.draw_networkx_nodes(AG,post,
nodelist=[org],
node_color=[colors[i]],
node_size=2000,
alpha=1)
node_count = {}
for val in list(AG.nodes):
if len(AG.edges(val))>1:
node_count[val] = len(AG.edges(val))
nx.draw_networkx_labels(AG,pos=post,
font_size=18,
labels=node_count,
font_color='black')
for k in post:
post[k] = [post[k][0],post[k][1]+.05]
for i in range(1,8):
org_labels = {val:val for val in asset_lists_reduced[i-1]}
nx.draw_networkx_labels(AG,pos=post,
font_size=15,
labels=org_labels,
font_color=darken(colors[i-1],.4),
bbox=dict(facecolor=lighten(colors[i-1],.4),
alpha=.85,edgecolor=darken(colors[i-1],.5)))
for i in range(1,8):
org_labels = {}
org_labels[orgs[i-1]] = orgs[i-1]
nx.draw_networkx_labels(AG,pos=post,
font_size=19,
labels=org_labels,
font_color=darken(colors[i-1],.45),
bbox=dict(facecolor=lighten(colors[i-1],.3),
alpha=1,edgecolor=darken(colors[i-1],.5)))
plt.annotate(
s = f"\n Metric Used: Edge Count \n\n Average Clustering: {round(average_clustering(G),3)} \n Density: {round(nx.density(G),3)} \n",
xy=(.4, -.5),
va='top',
ha='left',
fontsize = 20,
bbox=dict(facecolor='lightgray', alpha=.8,pad=.5,),
)
plt.axis('Off')
plt.savefig('final_visual/asset_edgecount.png',dpi=300)