зеркало из https://github.com/microsoft/topologic.git
Modularity partitions (#41)
* Commit of current status prior to actually updating the tests. The current tests are less tests and more an exploration in why our modularity calculation differed from the python-louvai modularity calculation * Updating tests and documentation * Wanted to make sure we handled disconnected nodes appropriately * Added release notes * Restricting us to the 2.x versions of Sphinx, 3.0 breaks us Co-authored-by: Dwayne Pryce <dwpryce@microsoft.com>
This commit is contained in:
Родитель
f1b43627d6
Коммит
da0440722e
|
@ -1,4 +1,6 @@
|
|||
# Release Notes
|
||||
## 0.1.3
|
||||
- Added `modularity` and `modularity_components` functions, and deprecated `q_score`.
|
||||
## 0.1.2
|
||||
- Rename `self_loop_augmentation` to `diagonal_augmentation` and use weighted degree to perform calculation instead of degree only.
|
||||
- Fix bug when getting the length of edges when performing graph augmentations.
|
||||
|
|
2
setup.py
2
setup.py
|
@ -52,7 +52,7 @@ setuptools.setup(
|
|||
'pytest',
|
||||
'flake8',
|
||||
'mypy',
|
||||
'sphinx',
|
||||
'sphinx>=2.4.4,<3.0.0',
|
||||
'sphinx-rtd-theme',
|
||||
'testfixtures',
|
||||
'recommonmark'
|
||||
|
|
|
@ -4,8 +4,24 @@
|
|||
import networkx as nx
|
||||
import numpy as np
|
||||
from topologic import PartitionedGraph
|
||||
from topologic.partition import q_score
|
||||
from topologic.partition import modularity, modularity_components, q_score
|
||||
import unittest
|
||||
from typing import Dict
|
||||
import community # python-louvain module
|
||||
|
||||
from tests.utils import data_file
|
||||
|
||||
|
||||
def _modularity_graph() -> nx.Graph:
|
||||
graph = nx.Graph()
|
||||
graph.add_edge("a", "b", weight=4.0)
|
||||
graph.add_edge("b", "c", weight=3.0)
|
||||
graph.add_edge("e", "f", weight=5.0)
|
||||
|
||||
return graph
|
||||
|
||||
|
||||
_PARTITIONS: Dict[str, int] = {'a': 0, 'b': 0, 'c': 0, 'e': 1, 'f': 1}
|
||||
|
||||
|
||||
class TestModularity(unittest.TestCase):
|
||||
|
@ -14,13 +30,56 @@ class TestModularity(unittest.TestCase):
|
|||
q_score("foo")
|
||||
|
||||
def test_q_score(self):
|
||||
graph = nx.Graph()
|
||||
graph.add_edge("a", "b", weight=4.0)
|
||||
graph.add_edge("b", "c", weight=3.0)
|
||||
graph.add_edge("e", "f", weight=5.0)
|
||||
graph = _modularity_graph()
|
||||
|
||||
partition = {'a': 0, 'b': 0, 'c': 0, 'e': 1, 'f': 1}
|
||||
partition = _PARTITIONS
|
||||
part_graph = PartitionedGraph(graph, partition)
|
||||
modularity = q_score(part_graph)
|
||||
self.assertIsInstance(modularity, float)
|
||||
np.testing.assert_almost_equal(0.48611111111111105, modularity)
|
||||
modularity_value = q_score(part_graph)
|
||||
self.assertIsInstance(modularity_value, float)
|
||||
np.testing.assert_almost_equal(0.48611111111111105, modularity_value)
|
||||
|
||||
def test_modularity(self):
|
||||
graph = _modularity_graph() # links = 12.0
|
||||
partition = _PARTITIONS # in community degree for -> 0: 14, 1: 10, community degree -> 0:14, 1:10
|
||||
# modularity component for partition 0: (14.0 / (2.0 * 12.0)) - (1.0 * ((14.0 / (2 * 12.0)) ** 2.0))
|
||||
# (cont): 0.5833333333333334 - 0.34027777777777785 = 0.24305555555555552
|
||||
# modularity component for partition 1: (10.0 / (2.0 * 12.0)) - (1.0 * ((10.0 / (2 * 12.0)) ** 2.0))
|
||||
# (cont): 0.4166666666666667 - 0.17361111111111113 = 0.24305555555555555
|
||||
modularity_value = modularity(graph, partition)
|
||||
|
||||
np.testing.assert_almost_equal(0.48611111111111105, modularity_value)
|
||||
|
||||
def test_modularity_components(self):
|
||||
graph = nx.Graph()
|
||||
with open(data_file("large-graph.csv"), "r") as edge_list_io:
|
||||
for line in edge_list_io:
|
||||
source, target, weight = line.strip().split(",")
|
||||
previous_weight = graph.get_edge_data(source, target, {"weight": 0})["weight"]
|
||||
weight = float(weight) + previous_weight
|
||||
graph.add_edge(source, target, weight=weight)
|
||||
|
||||
partitions = {}
|
||||
with open(data_file("large-graph-partitions.csv"), "r") as communities_io:
|
||||
for line in communities_io:
|
||||
vertex, comm = line.strip().split(",")
|
||||
partitions[vertex] = int(comm)
|
||||
|
||||
partition_count = max(partitions.values())
|
||||
|
||||
graph.add_node("disconnected_node")
|
||||
partitions["disconnected_node"] = partition_count + 1
|
||||
|
||||
components = modularity_components(graph, partitions)
|
||||
|
||||
# from python louvain
|
||||
community_modularity = community.modularity(partitions, graph)
|
||||
total_modularity = sum(components.values())
|
||||
|
||||
self.assertSetEqual(set(components.keys()), set(partitions.values()))
|
||||
self.assertEqual(0, components[partition_count + 1])
|
||||
|
||||
# the following test is not super inspiring. I am not a floating point number specialist, but as far as I can
|
||||
# tell it's because networkx.Graph().degree() returns 2 times the edge weight for each value, which
|
||||
# we then divide by 2.0 immediately and sum, whereas in our version we don't do this step.
|
||||
# aside from (not) doing that, the only other difference is using math.pow instead of `**`.
|
||||
np.testing.assert_almost_equal(community_modularity, total_modularity, decimal=3)
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -88,3 +88,14 @@ def validate_minimal_graph(
|
|||
raise ValueError("The graph provided has no edges")
|
||||
if not nx.is_weighted(graph, weight=weight_attribute):
|
||||
raise ValueError("The graph provided is not fully weighted")
|
||||
|
||||
|
||||
def assert_is_undirected(graph: nx.Graph):
|
||||
"""
|
||||
Asserts that an object is an undirected graph
|
||||
|
||||
:param graph: Graph to check
|
||||
:raises ValueError: If a graph is not an undirected graph
|
||||
"""
|
||||
if graph.is_directed():
|
||||
raise ValueError("graph must be an undirected graph")
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from .modularity import q_score
|
||||
from .modularity import modularity, modularity_components, q_score
|
||||
from .louvain_stub import louvain
|
||||
from .induce import induce_graph_by_communities
|
||||
|
||||
__all__ = ['induce_graph_by_communities', 'louvain', 'q_score']
|
||||
__all__ = [
|
||||
'induce_graph_by_communities',
|
||||
'louvain',
|
||||
'modularity',
|
||||
'modularity_components',
|
||||
'q_score'
|
||||
]
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import math
|
||||
import networkx as nx
|
||||
import community
|
||||
from collections import defaultdict
|
||||
from typing import Any, Dict
|
||||
import warnings
|
||||
from .. import assertions
|
||||
from ..partitioned_graph import PartitionedGraph
|
||||
|
||||
|
@ -11,13 +16,15 @@ def q_score(
|
|||
weight_column: str = 'weight'
|
||||
) -> float:
|
||||
"""
|
||||
Deprecated: See modularity() for replacement.
|
||||
|
||||
Given a topologic PartitionedGraph, return the q score - or modularity of a graph.
|
||||
|
||||
See also: https://en.wikipedia.org/wiki/Modularity_(networks)
|
||||
|
||||
:param graph: Partitioned graph contains a dictionary of all the communities in a graph, optimized for
|
||||
:param partitioned_graph: Partitioned graph contains a dictionary of all the communities in a graph, optimized for
|
||||
best modularity. This partition structure is used when computing final q_score / modularity of graph.
|
||||
:type partitioned_graph: topologic.PartitionedGraph
|
||||
:type partitioned_graph: Optional[topologic.PartitionedGraph]
|
||||
:param str weight_column: weight column to use in computing modularity.
|
||||
:raise UnweightedGraphError: if graph does not contain weight_column in edge attributes
|
||||
:raise KeyError: If the partition is not a partition of all graph nodes. This should not occur if PartitionedGraph
|
||||
|
@ -27,6 +34,10 @@ def q_score(
|
|||
:return: q_score, or modularity, of this graph using the provided partitioning scheme.
|
||||
:rtype: float
|
||||
"""
|
||||
warnings.warn(
|
||||
"topologic.partition.q_score() has been deprecated in favor of topologic.partition.modularity()",
|
||||
DeprecationWarning
|
||||
)
|
||||
if isinstance(partitioned_graph, PartitionedGraph):
|
||||
partition = partitioned_graph.community_partitions
|
||||
extracted_graph = partitioned_graph.graph
|
||||
|
@ -35,3 +46,96 @@ def q_score(
|
|||
raise TypeError('Expected type topologic.PartitionedGraph')
|
||||
|
||||
return community.modularity(partition, extracted_graph, weight_column)
|
||||
|
||||
|
||||
def modularity(
|
||||
graph: nx.Graph,
|
||||
partitions: Dict[Any, int],
|
||||
weight_attribute: str = "weight",
|
||||
resolution: float = 1.0
|
||||
) -> float:
|
||||
"""
|
||||
Given an undirected graph and a dictionary of vertices to community ids, calculate the modularity.
|
||||
|
||||
See also: https://en.wikipedia.org/wiki/Modularity_(networks)
|
||||
|
||||
:param nx.Graph graph: An undirected graph
|
||||
:param Dict[Any, int] partitions: A dictionary representing a community partitioning scheme with the keys being the
|
||||
vertex and the value being a community id. Within topologic, these community ids are required to be ints.
|
||||
:param str weight_attribute: The edge data attribute on the graph that contains a float weight for the edge.
|
||||
:param float resolution: The resolution to use when calculating the modularity.
|
||||
:return: The modularity quality score for the given network and community partition schema.
|
||||
:raise TypeError: If the graph is not a networkx Graph
|
||||
:raise ValueError: If the graph is unweighted
|
||||
:raise ValueError: If the graph is directed
|
||||
"""
|
||||
assertions.assert_is_graph(graph)
|
||||
assertions.assert_is_weighted(graph, weight_attribute)
|
||||
assertions.assert_is_undirected(graph)
|
||||
|
||||
components = modularity_components(graph, partitions, weight_attribute, resolution)
|
||||
|
||||
return sum(components.values())
|
||||
|
||||
|
||||
def _modularity_component(
|
||||
degree_sum_within_community: float,
|
||||
degree_sum: float,
|
||||
total_network_edge_weight: float,
|
||||
resolution: float
|
||||
) -> float:
|
||||
degree_within_community_ratio = degree_sum_within_community / total_network_edge_weight
|
||||
community_degree_ratio = math.pow(degree_sum / (2.0 * total_network_edge_weight), 2.0)
|
||||
|
||||
return degree_within_community_ratio - resolution * community_degree_ratio
|
||||
|
||||
|
||||
def modularity_components(
|
||||
graph: nx.Graph,
|
||||
partitions: Dict[Any, int],
|
||||
weight_attribute: str = "weight",
|
||||
resolution: float = 1.0
|
||||
) -> Dict[int, float]:
|
||||
"""
|
||||
Given an undirected, weighted graph and a community partition dictionary, calculates a modularity quantum for each
|
||||
community ID. The sum of these quanta is the modularity of the graph and partitions provided.
|
||||
|
||||
:param nx.Graph graph: An undirected graph
|
||||
:param Dict[Any, int] partitions: A dictionary representing a community partitioning scheme with the keys being the
|
||||
vertex and the value being a community id. Within topologic, these community ids are required to be ints.
|
||||
:param str weight_attribute: The edge data attribute on the graph that contains a float weight for the edge.
|
||||
:param float resolution: The resolution to use when calculating the modularity.
|
||||
:return: A dictionary of the community id to the modularity component of that community
|
||||
:rtype: Dict[int, float]
|
||||
:raise TypeError: If the graph is not a networkx Graph
|
||||
:raise ValueError: If the graph is unweighted
|
||||
:raise ValueError: If the graph is directed
|
||||
"""
|
||||
|
||||
assertions.assert_is_graph(graph)
|
||||
assertions.assert_is_weighted(graph, weight_attribute)
|
||||
assertions.assert_is_undirected(graph)
|
||||
|
||||
total_edge_weight = 0.0
|
||||
|
||||
communities = set(partitions.values())
|
||||
|
||||
degree_sums_within_community: Dict[int, float] = defaultdict(lambda: 0.0)
|
||||
degree_sums_for_community: Dict[int, float] = defaultdict(lambda: 0.0)
|
||||
for vertex, neighbor_vertex, weight in graph.edges(data=weight_attribute):
|
||||
vertex_community = partitions[vertex]
|
||||
neighbor_community = partitions[neighbor_vertex]
|
||||
if vertex_community == neighbor_community:
|
||||
if vertex == neighbor_vertex:
|
||||
degree_sums_within_community[vertex_community] += weight * 2.0
|
||||
else:
|
||||
degree_sums_within_community[vertex_community] += weight
|
||||
degree_sums_for_community[vertex_community] += weight * 2.0
|
||||
total_edge_weight += weight
|
||||
|
||||
return {comm: _modularity_component(
|
||||
degree_sums_within_community[comm],
|
||||
degree_sums_for_community[comm],
|
||||
total_edge_weight,
|
||||
resolution
|
||||
) for comm in communities}
|
||||
|
|
Загрузка…
Ссылка в новой задаче