Module BML.transform.nodes_features
Expand source code
from BML.transform import Graph, BaseTransform
from BML.utils import ProcessingQueue, timeFormat, printAndLog
import multiprocessing
import networkit as nk
import networkx as nx
import numpy as np
import random
import time
import threading
from networkx import find_cliques
def my_node_clique_number(G, nodes=None, cliques=None):
"""Returns the size of the largest maximal clique containing
each given node.
Returns a single or list depending on input nodes.
Optional list of cliques can be input if already computed.
"""
if cliques is None:
if nodes is not None:
# Use ego_graph to decrease size of graph
if isinstance(nodes, list):
d = {}
for n in nodes:
H = nx.ego_graph(G, n)
d[n] = max(len(c) for c in find_cliques(H))
else:
H = nx.ego_graph(G, nodes)
d = max(len(c) for c in find_cliques(H))
return d
# nodes is None--find all cliques
cliques = list(find_cliques(G))
all_nodes = False
if nodes is None:
all_nodes = True
nodes = list(G.nodes()) # none, get entire graph
if not isinstance(nodes, list): # check for a list
v = nodes
# assume it is a single value
d = max([len(c) for c in cliques if v in c])
else:
d = {}
for v in nodes:
d[v] = 0
for c in cliques:
l = len(c)
for v in c:
if(all_nodes or v in nodes):
d[v] = max(d[v],l)
return d
def my_number_of_cliques(G, nodes=None, cliques=None):
"""Returns the number of maximal cliques for each node.
Returns a single or list depending on input nodes.
Optional list of cliques can be input if already computed.
"""
if cliques is None:
cliques = list(find_cliques(G))
all_nodes = False
if nodes is None:
all_nodes = True
nodes = list(G.nodes()) # none, get entire graph
if not isinstance(nodes, list): # check for a list
v = nodes
# assume it is a single value
numcliq = len([1 for c in cliques if v in c])
else:
numcliq = {}
for v in nodes:
numcliq[v] = 0
for c in cliques:
for v in c:
if(all_nodes or v in nodes):
numcliq[v]+=1
return numcliq
def nx2cu(G):
import cugraph, cudf
edges = [(int(a),int(b)) for a,b in [*G.edges]]
edgelistDF = cudf.DataFrame(edges, columns=['src','dst'])
Gcu = cugraph.from_cudf_edgelist(edgelistDF, source='src', destination='dst', renumber=True)
return(Gcu)
def nx2nk(G):
Gnk = nk.nxadapter.nx2nk(G)
Gnk.indexEdges()
return(Gnk)
def dictKeys(d, keys):
subD = {}
keys2 = dict(d).keys()
for k in keys:
if(k in keys2):
subD[k] = d[k]
return(subD)
def valuesDict(values, keys):
return(dict(zip(keys, values)))
def betweenness(G, nodes):
d = nx.betweenness_centrality(G)
return(dictKeys(d,nodes))
def betweenness_nk(G, Gnk, nodes):
d = nk.centrality.Betweenness(Gnk,normalized=True).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def load(G, nodes):
d = nx.load_centrality(G)
return(dictKeys(d, nodes))
def closeness(G, nodes):
v = [nx.closeness_centrality(G, u=n) for n in nodes]
return(valuesDict(v, nodes))
def closeness_nk(G, Gnk, nodes):
d = nk.centrality.Closeness(Gnk, False, False).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def approx_closeness_nk(G, Gnk, nodes):
d = nk.centrality.ApproxCloseness(Gnk, len(nodes)).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def harmonic(G, nodes):
d = nx.harmonic_centrality(G, nbunch=nodes)
return(d)
def harmonic_nk(G, Gnk, nodes):
d = nk.centrality.HarmonicCloseness(Gnk, normalized=False).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def degree(G, nodes):
d = G.degree
return(dictKeys(d,nodes))
def degree_centrality(G, nodes):
d = nx.degree_centrality(G)
return(dictKeys(d,nodes))
def degree_centrality_nk(G, Gnk, nodes):
d = nk.centrality.DegreeCentrality(Gnk, normalized=True).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def eigenvector(G, nodes):
d = nx.eigenvector_centrality(G)
return(dictKeys(d,nodes))
def eigenvector_nk(G, Gnk, nodes):
d = nk.centrality.EigenvectorCentrality(Gnk).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def pagerank(G, nodes):
d = nx.pagerank(G)
return(dictKeys(d,nodes))
def pagerank_nk(G, Gnk, nodes):
d = nk.centrality.PageRank(Gnk).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def number_of_cliques(G, nodes):
if(nodes==G.nodes):
d = my_number_of_cliques(G)
else:
d = my_number_of_cliques(G, nodes=list(nodes))
return(dictKeys(d, nodes))
def number_of_cliques_nk(G, Gnk, nodes):
cliques = nk.clique.MaximalCliques(Gnk).run().getCliques()
d = {}
for n,v in zip(G.nodes, Gnk.iterNodes()):
if(n in nodes):
d[n] = len([1 for c in cliques if v in c])
return(d)
def node_clique_number(G, nodes):
if(nodes==G.nodes):
d = my_node_clique_number(G)
else:
d = my_node_clique_number(G, nodes=list(nodes))
return(dictKeys(d, nodes))
def node_clique_number_nk(G, Gnk, nodes):
cliques = nk.clique.MaximalCliques(Gnk).run().getCliques()
v = {}
for node in Gnk.iterNodes():
v[node] = 0
for clique in cliques:
l = len(clique)
for node in clique:
v[node] = max(v[node], l)
return(dictKeys(valuesDict(v.values(), G.nodes), nodes))
def clustering(G, nodes):
d = nx.clustering(G, nodes=nodes)
return(dictKeys(d,nodes))
def clustering_nk(G, Gnk, nodes):
d = nk.centrality.LocalClusteringCoefficient(Gnk).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def triangles(G, nodes):
d = nx.triangles(G, nodes=nodes)
return(dictKeys(d,nodes))
def triangles_nk(G, Gnk, nodes):
d = nk.sparsification.TriangleEdgeScore(Gnk).run().scores()
return(dictKeys(valuesDict(d, G.nodes), nodes))
def square_clustering(G, nodes):
d = nx.square_clustering(G, nodes=nodes)
return(dictKeys(d,nodes))
def average_neighbor_degree(G, nodes):
d = nx.average_neighbor_degree(G, nodes=nodes)
return(dictKeys(d,nodes))
def eccentricity(G, nodes):
v = [nx.eccentricity(G, v=n) for n in nodes]
return(valuesDict(v, nodes))
def eccentricity_nk(G, Gnk, nodes):
d = {}
for n,v in zip(G.nodes, Gnk.iterNodes()):
if(n in nodes):
_,d[n] = nk.distance.Eccentricity.getValue(Gnk,v)
return(d)
def local_efficiency(G, nodes):
v = [nx.global_efficiency(G.subgraph(G[n])) for n in nodes]
return(valuesDict(v, nodes))
def global_efficiency_nk(Gnk):
n = Gnk.numberOfNodes()
denom = n * (n - 1)
if denom != 0:
g_eff = 0
lengths = nk.distance.APSP(Gnk).run().getDistances()
for l in lengths:
for distance in l:
if distance > 0:
g_eff += 1 / distance
g_eff /= denom
else:
g_eff = 0
return g_eff
def local_efficiency_nk(G, Gnk, nodes):
v = [global_efficiency_nk(nx2nk(G.subgraph(G[n]))) for n in nodes]
return(valuesDict(v, nodes))
def average_shortest_path_length(G, nodes):
def average_shortest_path_length_node(G, n):
return(np.mean(list(nx.single_source_shortest_path_length(G,n).values())))
v = [average_shortest_path_length_node(G, n) for n in nodes]
return(valuesDict(v, nodes))
def average_shortest_path_length_nk(G, Gnk, nodes):
def average_shortest_path_length_node(Gnk, n):
return(np.mean(nk.distance.Dijkstra(Gnk, n).run().getDistances()))
d = {}
for n,v in zip(G.nodes, Gnk.iterNodes()):
if(n in nodes):
d[n] = average_shortest_path_length_node(Gnk, v)
return(d)
def connectivity(G, nodes): # too slow, see approx version
v = []
for n in nodes:
v.append(np.mean([nx.connectivity.local_node_connectivity(G,n,t) for t in nodes]))
return(valuesDict(v, nodes))
def run(function, results, key, logFiles, verbose,*args):
if(verbose):
printAndLog("Start:%s" % (key), logFiles)
s = time.time()
try:
results[key] = function(*args)
except Exception as e:
print("Error with feature: " + key)
raise e
if(verbose):
printAndLog("End:%s (%s)" % (key, timeFormat(time.time()-s)), logFiles)
def computeFeatures(features, logFiles, verbose=False):
results = {}
for key, value in features.items():
function, *args = value
run(function, results, key, logFiles, verbose,*args)
return(results)
def computeFeaturesParallelized(features, nbProcess, logFiles, verbose=False):
manager = multiprocessing.Manager()
results = manager.dict()
pq = ProcessingQueue(nbProcess=nbProcess)
for key, value in features.items():
function, *args = value
pq.addProcess(target=run, args=(function, results, key, logFiles, verbose,*args))
pq.run()
r_copy = {}
for k in list(features.keys()):
r_copy[k] = results[k].copy()
del results[k]
return(r_copy)
def removedExcludedFeatures(features, excluded, included):
if(len(included)>0):
for key in list(features.keys()):
if(not key in included):
del features[key]
else:
for key in excluded:
if(key in features):
del features[key]
return(features)
class NodesFeatures(Graph):
fileExtension = ".json"
def __init__(self, primingFile, dataFile, params, outFolder, logFiles):
self.params["use_networkit"] = True
self.params["all_nodes"] = True
self.params["nodes"] = None
self.params["exclude_features"] = [] # Excluded by default
self.params["include_features"] = [
'degree', 'degree_centrality', 'average_neighbor_degree', 'node_clique_number',
'number_of_cliques', 'eigenvector', 'pagerank', 'clustering', 'triangles'
]
self.params["verbose"] = False
self.params["nbProcessFeatures"] = multiprocessing.cpu_count()
Graph.__init__(self, primingFile, dataFile, params, outFolder, logFiles)
def save(self):
BaseTransform.save(self)
def getNodes(self, G):
nodes = self.params["nodes"]
if(nodes is None):
if(not self.params["all_nodes"]):
core_number = nx.core_number(G)
k = np.percentile(list(core_number.values()),98)
G = nx.k_core(G, k, core_number)
nodes = G.nodes
return(G, nodes)
def getFeatures(self, G, nodes):
features_nx = {}
features_nk = {}
features_nx["load"] = (load, G, nodes)
features_nx["degree"] = (degree, G, nodes) # already fast with nx
features_nx["degree_centrality"] = (degree_centrality, G, nodes) # already fast with nx
features_nx["square_clustering"] = (square_clustering, G, nodes)
features_nx["average_neighbor_degree"] = (average_neighbor_degree, G, nodes)
features_nx["node_clique_number"] = (node_clique_number, G, nodes) # Fast with my fix
features_nx["number_of_cliques"] = (number_of_cliques, G, nodes) # Fast with my fix
if(self.params["use_networkit"]):
Gnk = nx2nk(G)
features_nk["closeness"] = (closeness_nk, G, Gnk, nodes)
features_nk["betweenness"] = (betweenness_nk, G, Gnk, nodes)
features_nk["local_efficiency"] = (local_efficiency_nk, G, Gnk, nodes)
features_nk["harmonic"] = (harmonic_nk, G, Gnk, nodes)
features_nk["eigenvector"] = (eigenvector_nk, G, Gnk, nodes)
features_nk["pagerank"] = (pagerank_nk, G, Gnk, nodes)
features_nk["clustering"] = (clustering_nk, G, Gnk, nodes)
features_nk["triangles"] = (triangles_nk, G, Gnk, nodes)
features_nk["eccentricity"] = (eccentricity_nk, G, Gnk, nodes)
features_nk["average_shortest_path_length"] = (average_shortest_path_length_nk, G, Gnk, nodes)
else:
features_nx["closeness"] = (closeness, G, nodes)
features_nx["betweenness"] = (betweenness, G, nodes)
features_nx["local_efficiency"] = (local_efficiency, G, nodes)
features_nx["harmonic"] = (harmonic, G, nodes)
features_nx["eigenvector"] = (eigenvector, G, nodes)
features_nx["pagerank"] = (pagerank, G, nodes)
features_nx["clustering"] = (clustering, G, nodes)
features_nx["triangles"] = (triangles, G, nodes)
features_nx["eccentricity"] = (eccentricity, G, nodes)
features_nx["average_shortest_path_length"] = (average_shortest_path_length, G, nodes)
return(features_nx, features_nk)
def computeFeatures(self, G, features_nx, features_nk):
results = {}
def func(results,features_nk,verbose):
results.update(computeFeatures(features_nk, self.logFiles, verbose))
thread = threading.Thread(target=func, args=(results, features_nk, self.params["verbose"]))
thread.start()
results.update(computeFeaturesParallelized(features_nx, self.params["nbProcessFeatures"], self.logFiles, self.params["verbose"]))
thread.join()
return(results)
def transforms(self, index, G):
G, nodes = self.getNodes(G)
features_nx, features_nk = self.getFeatures(G, nodes)
features_nx = removedExcludedFeatures(features_nx, self.params["exclude_features"], self.params["include_features"])
features_nk = removedExcludedFeatures(features_nk, self.params["exclude_features"], self.params["include_features"])
results = self.computeFeatures(G, features_nx, features_nk)
return(results)
Functions
def approx_closeness_nk(G, Gnk, nodes)
-
Expand source code
def approx_closeness_nk(G, Gnk, nodes): d = nk.centrality.ApproxCloseness(Gnk, len(nodes)).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def average_neighbor_degree(G, nodes)
-
Expand source code
def average_neighbor_degree(G, nodes): d = nx.average_neighbor_degree(G, nodes=nodes) return(dictKeys(d,nodes))
def average_shortest_path_length(G, nodes)
-
Expand source code
def average_shortest_path_length(G, nodes): def average_shortest_path_length_node(G, n): return(np.mean(list(nx.single_source_shortest_path_length(G,n).values()))) v = [average_shortest_path_length_node(G, n) for n in nodes] return(valuesDict(v, nodes))
def average_shortest_path_length_nk(G, Gnk, nodes)
-
Expand source code
def average_shortest_path_length_nk(G, Gnk, nodes): def average_shortest_path_length_node(Gnk, n): return(np.mean(nk.distance.Dijkstra(Gnk, n).run().getDistances())) d = {} for n,v in zip(G.nodes, Gnk.iterNodes()): if(n in nodes): d[n] = average_shortest_path_length_node(Gnk, v) return(d)
def betweenness(G, nodes)
-
Expand source code
def betweenness(G, nodes): d = nx.betweenness_centrality(G) return(dictKeys(d,nodes))
def betweenness_nk(G, Gnk, nodes)
-
Expand source code
def betweenness_nk(G, Gnk, nodes): d = nk.centrality.Betweenness(Gnk,normalized=True).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def closeness(G, nodes)
-
Expand source code
def closeness(G, nodes): v = [nx.closeness_centrality(G, u=n) for n in nodes] return(valuesDict(v, nodes))
def closeness_nk(G, Gnk, nodes)
-
Expand source code
def closeness_nk(G, Gnk, nodes): d = nk.centrality.Closeness(Gnk, False, False).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def clustering(G, nodes)
-
Expand source code
def clustering(G, nodes): d = nx.clustering(G, nodes=nodes) return(dictKeys(d,nodes))
def clustering_nk(G, Gnk, nodes)
-
Expand source code
def clustering_nk(G, Gnk, nodes): d = nk.centrality.LocalClusteringCoefficient(Gnk).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def computeFeatures(features, logFiles, verbose=False)
-
Expand source code
def computeFeatures(features, logFiles, verbose=False): results = {} for key, value in features.items(): function, *args = value run(function, results, key, logFiles, verbose,*args) return(results)
def computeFeaturesParallelized(features, nbProcess, logFiles, verbose=False)
-
Expand source code
def computeFeaturesParallelized(features, nbProcess, logFiles, verbose=False): manager = multiprocessing.Manager() results = manager.dict() pq = ProcessingQueue(nbProcess=nbProcess) for key, value in features.items(): function, *args = value pq.addProcess(target=run, args=(function, results, key, logFiles, verbose,*args)) pq.run() r_copy = {} for k in list(features.keys()): r_copy[k] = results[k].copy() del results[k] return(r_copy)
def connectivity(G, nodes)
-
Expand source code
def connectivity(G, nodes): # too slow, see approx version v = [] for n in nodes: v.append(np.mean([nx.connectivity.local_node_connectivity(G,n,t) for t in nodes])) return(valuesDict(v, nodes))
def degree(G, nodes)
-
Expand source code
def degree(G, nodes): d = G.degree return(dictKeys(d,nodes))
def degree_centrality(G, nodes)
-
Expand source code
def degree_centrality(G, nodes): d = nx.degree_centrality(G) return(dictKeys(d,nodes))
def degree_centrality_nk(G, Gnk, nodes)
-
Expand source code
def degree_centrality_nk(G, Gnk, nodes): d = nk.centrality.DegreeCentrality(Gnk, normalized=True).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def dictKeys(d, keys)
-
Expand source code
def dictKeys(d, keys): subD = {} keys2 = dict(d).keys() for k in keys: if(k in keys2): subD[k] = d[k] return(subD)
def eccentricity(G, nodes)
-
Expand source code
def eccentricity(G, nodes): v = [nx.eccentricity(G, v=n) for n in nodes] return(valuesDict(v, nodes))
def eccentricity_nk(G, Gnk, nodes)
-
Expand source code
def eccentricity_nk(G, Gnk, nodes): d = {} for n,v in zip(G.nodes, Gnk.iterNodes()): if(n in nodes): _,d[n] = nk.distance.Eccentricity.getValue(Gnk,v) return(d)
def eigenvector(G, nodes)
-
Expand source code
def eigenvector(G, nodes): d = nx.eigenvector_centrality(G) return(dictKeys(d,nodes))
def eigenvector_nk(G, Gnk, nodes)
-
Expand source code
def eigenvector_nk(G, Gnk, nodes): d = nk.centrality.EigenvectorCentrality(Gnk).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def global_efficiency_nk(Gnk)
-
Expand source code
def global_efficiency_nk(Gnk): n = Gnk.numberOfNodes() denom = n * (n - 1) if denom != 0: g_eff = 0 lengths = nk.distance.APSP(Gnk).run().getDistances() for l in lengths: for distance in l: if distance > 0: g_eff += 1 / distance g_eff /= denom else: g_eff = 0 return g_eff
def harmonic(G, nodes)
-
Expand source code
def harmonic(G, nodes): d = nx.harmonic_centrality(G, nbunch=nodes) return(d)
def harmonic_nk(G, Gnk, nodes)
-
Expand source code
def harmonic_nk(G, Gnk, nodes): d = nk.centrality.HarmonicCloseness(Gnk, normalized=False).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def load(G, nodes)
-
Expand source code
def load(G, nodes): d = nx.load_centrality(G) return(dictKeys(d, nodes))
def local_efficiency(G, nodes)
-
Expand source code
def local_efficiency(G, nodes): v = [nx.global_efficiency(G.subgraph(G[n])) for n in nodes] return(valuesDict(v, nodes))
def local_efficiency_nk(G, Gnk, nodes)
-
Expand source code
def local_efficiency_nk(G, Gnk, nodes): v = [global_efficiency_nk(nx2nk(G.subgraph(G[n]))) for n in nodes] return(valuesDict(v, nodes))
def my_node_clique_number(G, nodes=None, cliques=None)
-
Returns the size of the largest maximal clique containing each given node. Returns a single or list depending on input nodes. Optional list of cliques can be input if already computed.
Expand source code
def my_node_clique_number(G, nodes=None, cliques=None): """Returns the size of the largest maximal clique containing each given node. Returns a single or list depending on input nodes. Optional list of cliques can be input if already computed. """ if cliques is None: if nodes is not None: # Use ego_graph to decrease size of graph if isinstance(nodes, list): d = {} for n in nodes: H = nx.ego_graph(G, n) d[n] = max(len(c) for c in find_cliques(H)) else: H = nx.ego_graph(G, nodes) d = max(len(c) for c in find_cliques(H)) return d # nodes is None--find all cliques cliques = list(find_cliques(G)) all_nodes = False if nodes is None: all_nodes = True nodes = list(G.nodes()) # none, get entire graph if not isinstance(nodes, list): # check for a list v = nodes # assume it is a single value d = max([len(c) for c in cliques if v in c]) else: d = {} for v in nodes: d[v] = 0 for c in cliques: l = len(c) for v in c: if(all_nodes or v in nodes): d[v] = max(d[v],l) return d
def my_number_of_cliques(G, nodes=None, cliques=None)
-
Returns the number of maximal cliques for each node.
Returns a single or list depending on input nodes. Optional list of cliques can be input if already computed.
Expand source code
def my_number_of_cliques(G, nodes=None, cliques=None): """Returns the number of maximal cliques for each node. Returns a single or list depending on input nodes. Optional list of cliques can be input if already computed. """ if cliques is None: cliques = list(find_cliques(G)) all_nodes = False if nodes is None: all_nodes = True nodes = list(G.nodes()) # none, get entire graph if not isinstance(nodes, list): # check for a list v = nodes # assume it is a single value numcliq = len([1 for c in cliques if v in c]) else: numcliq = {} for v in nodes: numcliq[v] = 0 for c in cliques: for v in c: if(all_nodes or v in nodes): numcliq[v]+=1 return numcliq
def node_clique_number(G, nodes)
-
Expand source code
def node_clique_number(G, nodes): if(nodes==G.nodes): d = my_node_clique_number(G) else: d = my_node_clique_number(G, nodes=list(nodes)) return(dictKeys(d, nodes))
def node_clique_number_nk(G, Gnk, nodes)
-
Expand source code
def node_clique_number_nk(G, Gnk, nodes): cliques = nk.clique.MaximalCliques(Gnk).run().getCliques() v = {} for node in Gnk.iterNodes(): v[node] = 0 for clique in cliques: l = len(clique) for node in clique: v[node] = max(v[node], l) return(dictKeys(valuesDict(v.values(), G.nodes), nodes))
def number_of_cliques(G, nodes)
-
Expand source code
def number_of_cliques(G, nodes): if(nodes==G.nodes): d = my_number_of_cliques(G) else: d = my_number_of_cliques(G, nodes=list(nodes)) return(dictKeys(d, nodes))
def number_of_cliques_nk(G, Gnk, nodes)
-
Expand source code
def number_of_cliques_nk(G, Gnk, nodes): cliques = nk.clique.MaximalCliques(Gnk).run().getCliques() d = {} for n,v in zip(G.nodes, Gnk.iterNodes()): if(n in nodes): d[n] = len([1 for c in cliques if v in c]) return(d)
def nx2cu(G)
-
Expand source code
def nx2cu(G): import cugraph, cudf edges = [(int(a),int(b)) for a,b in [*G.edges]] edgelistDF = cudf.DataFrame(edges, columns=['src','dst']) Gcu = cugraph.from_cudf_edgelist(edgelistDF, source='src', destination='dst', renumber=True) return(Gcu)
def nx2nk(G)
-
Expand source code
def nx2nk(G): Gnk = nk.nxadapter.nx2nk(G) Gnk.indexEdges() return(Gnk)
def pagerank(G, nodes)
-
Expand source code
def pagerank(G, nodes): d = nx.pagerank(G) return(dictKeys(d,nodes))
def pagerank_nk(G, Gnk, nodes)
-
Expand source code
def pagerank_nk(G, Gnk, nodes): d = nk.centrality.PageRank(Gnk).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def removedExcludedFeatures(features, excluded, included)
-
Expand source code
def removedExcludedFeatures(features, excluded, included): if(len(included)>0): for key in list(features.keys()): if(not key in included): del features[key] else: for key in excluded: if(key in features): del features[key] return(features)
def run(function, results, key, logFiles, verbose, *args)
-
Expand source code
def run(function, results, key, logFiles, verbose,*args): if(verbose): printAndLog("Start:%s" % (key), logFiles) s = time.time() try: results[key] = function(*args) except Exception as e: print("Error with feature: " + key) raise e if(verbose): printAndLog("End:%s (%s)" % (key, timeFormat(time.time()-s)), logFiles)
def square_clustering(G, nodes)
-
Expand source code
def square_clustering(G, nodes): d = nx.square_clustering(G, nodes=nodes) return(dictKeys(d,nodes))
def triangles(G, nodes)
-
Expand source code
def triangles(G, nodes): d = nx.triangles(G, nodes=nodes) return(dictKeys(d,nodes))
def triangles_nk(G, Gnk, nodes)
-
Expand source code
def triangles_nk(G, Gnk, nodes): d = nk.sparsification.TriangleEdgeScore(Gnk).run().scores() return(dictKeys(valuesDict(d, G.nodes), nodes))
def valuesDict(values, keys)
-
Expand source code
def valuesDict(values, keys): return(dict(zip(keys, values)))
Classes
class NodesFeatures (primingFile, dataFile, params, outFolder, logFiles)
-
Expand source code
class NodesFeatures(Graph): fileExtension = ".json" def __init__(self, primingFile, dataFile, params, outFolder, logFiles): self.params["use_networkit"] = True self.params["all_nodes"] = True self.params["nodes"] = None self.params["exclude_features"] = [] # Excluded by default self.params["include_features"] = [ 'degree', 'degree_centrality', 'average_neighbor_degree', 'node_clique_number', 'number_of_cliques', 'eigenvector', 'pagerank', 'clustering', 'triangles' ] self.params["verbose"] = False self.params["nbProcessFeatures"] = multiprocessing.cpu_count() Graph.__init__(self, primingFile, dataFile, params, outFolder, logFiles) def save(self): BaseTransform.save(self) def getNodes(self, G): nodes = self.params["nodes"] if(nodes is None): if(not self.params["all_nodes"]): core_number = nx.core_number(G) k = np.percentile(list(core_number.values()),98) G = nx.k_core(G, k, core_number) nodes = G.nodes return(G, nodes) def getFeatures(self, G, nodes): features_nx = {} features_nk = {} features_nx["load"] = (load, G, nodes) features_nx["degree"] = (degree, G, nodes) # already fast with nx features_nx["degree_centrality"] = (degree_centrality, G, nodes) # already fast with nx features_nx["square_clustering"] = (square_clustering, G, nodes) features_nx["average_neighbor_degree"] = (average_neighbor_degree, G, nodes) features_nx["node_clique_number"] = (node_clique_number, G, nodes) # Fast with my fix features_nx["number_of_cliques"] = (number_of_cliques, G, nodes) # Fast with my fix if(self.params["use_networkit"]): Gnk = nx2nk(G) features_nk["closeness"] = (closeness_nk, G, Gnk, nodes) features_nk["betweenness"] = (betweenness_nk, G, Gnk, nodes) features_nk["local_efficiency"] = (local_efficiency_nk, G, Gnk, nodes) features_nk["harmonic"] = (harmonic_nk, G, Gnk, nodes) features_nk["eigenvector"] = (eigenvector_nk, G, Gnk, nodes) features_nk["pagerank"] = (pagerank_nk, G, Gnk, nodes) features_nk["clustering"] = (clustering_nk, G, Gnk, nodes) features_nk["triangles"] = (triangles_nk, G, Gnk, nodes) features_nk["eccentricity"] = (eccentricity_nk, G, Gnk, nodes) features_nk["average_shortest_path_length"] = (average_shortest_path_length_nk, G, Gnk, nodes) else: features_nx["closeness"] = (closeness, G, nodes) features_nx["betweenness"] = (betweenness, G, nodes) features_nx["local_efficiency"] = (local_efficiency, G, nodes) features_nx["harmonic"] = (harmonic, G, nodes) features_nx["eigenvector"] = (eigenvector, G, nodes) features_nx["pagerank"] = (pagerank, G, nodes) features_nx["clustering"] = (clustering, G, nodes) features_nx["triangles"] = (triangles, G, nodes) features_nx["eccentricity"] = (eccentricity, G, nodes) features_nx["average_shortest_path_length"] = (average_shortest_path_length, G, nodes) return(features_nx, features_nk) def computeFeatures(self, G, features_nx, features_nk): results = {} def func(results,features_nk,verbose): results.update(computeFeatures(features_nk, self.logFiles, verbose)) thread = threading.Thread(target=func, args=(results, features_nk, self.params["verbose"])) thread.start() results.update(computeFeaturesParallelized(features_nx, self.params["nbProcessFeatures"], self.logFiles, self.params["verbose"])) thread.join() return(results) def transforms(self, index, G): G, nodes = self.getNodes(G) features_nx, features_nk = self.getFeatures(G, nodes) features_nx = removedExcludedFeatures(features_nx, self.params["exclude_features"], self.params["include_features"]) features_nk = removedExcludedFeatures(features_nk, self.params["exclude_features"], self.params["include_features"]) results = self.computeFeatures(G, features_nx, features_nk) return(results)
Ancestors
Subclasses
Class variables
var fileExtension
Methods
def computeFeatures(self, G, features_nx, features_nk)
-
Expand source code
def computeFeatures(self, G, features_nx, features_nk): results = {} def func(results,features_nk,verbose): results.update(computeFeatures(features_nk, self.logFiles, verbose)) thread = threading.Thread(target=func, args=(results, features_nk, self.params["verbose"])) thread.start() results.update(computeFeaturesParallelized(features_nx, self.params["nbProcessFeatures"], self.logFiles, self.params["verbose"])) thread.join() return(results)
def getFeatures(self, G, nodes)
-
Expand source code
def getFeatures(self, G, nodes): features_nx = {} features_nk = {} features_nx["load"] = (load, G, nodes) features_nx["degree"] = (degree, G, nodes) # already fast with nx features_nx["degree_centrality"] = (degree_centrality, G, nodes) # already fast with nx features_nx["square_clustering"] = (square_clustering, G, nodes) features_nx["average_neighbor_degree"] = (average_neighbor_degree, G, nodes) features_nx["node_clique_number"] = (node_clique_number, G, nodes) # Fast with my fix features_nx["number_of_cliques"] = (number_of_cliques, G, nodes) # Fast with my fix if(self.params["use_networkit"]): Gnk = nx2nk(G) features_nk["closeness"] = (closeness_nk, G, Gnk, nodes) features_nk["betweenness"] = (betweenness_nk, G, Gnk, nodes) features_nk["local_efficiency"] = (local_efficiency_nk, G, Gnk, nodes) features_nk["harmonic"] = (harmonic_nk, G, Gnk, nodes) features_nk["eigenvector"] = (eigenvector_nk, G, Gnk, nodes) features_nk["pagerank"] = (pagerank_nk, G, Gnk, nodes) features_nk["clustering"] = (clustering_nk, G, Gnk, nodes) features_nk["triangles"] = (triangles_nk, G, Gnk, nodes) features_nk["eccentricity"] = (eccentricity_nk, G, Gnk, nodes) features_nk["average_shortest_path_length"] = (average_shortest_path_length_nk, G, Gnk, nodes) else: features_nx["closeness"] = (closeness, G, nodes) features_nx["betweenness"] = (betweenness, G, nodes) features_nx["local_efficiency"] = (local_efficiency, G, nodes) features_nx["harmonic"] = (harmonic, G, nodes) features_nx["eigenvector"] = (eigenvector, G, nodes) features_nx["pagerank"] = (pagerank, G, nodes) features_nx["clustering"] = (clustering, G, nodes) features_nx["triangles"] = (triangles, G, nodes) features_nx["eccentricity"] = (eccentricity, G, nodes) features_nx["average_shortest_path_length"] = (average_shortest_path_length, G, nodes) return(features_nx, features_nk)
def getNodes(self, G)
-
Expand source code
def getNodes(self, G): nodes = self.params["nodes"] if(nodes is None): if(not self.params["all_nodes"]): core_number = nx.core_number(G) k = np.percentile(list(core_number.values()),98) G = nx.k_core(G, k, core_number) nodes = G.nodes return(G, nodes)
def save(self)
-
Expand source code
def save(self): BaseTransform.save(self)
def transforms(self, index, G)
-
Expand source code
def transforms(self, index, G): G, nodes = self.getNodes(G) features_nx, features_nk = self.getFeatures(G, nodes) features_nx = removedExcludedFeatures(features_nx, self.params["exclude_features"], self.params["include_features"]) features_nk = removedExcludedFeatures(features_nk, self.params["exclude_features"], self.params["include_features"]) results = self.computeFeatures(G, features_nx, features_nk) return(results)