refactor: migrate igraph network_p2p

2020-12-18 11:06:06 +00:00 · 2020-12-18 11:06:06 +00:00 · 8574145fc1
--- a/1
+++ b/1
@ -152,7 +152,6 @@ import(Information)
 import(dplyr)
 import(ggplot2)
 import(ggraph)
-import(network)
 import(reshape2)
 import(scales)
 import(tidyselect)
--- a/R/network_p2p.R
+++ b/R/network_p2p.R
@ -12,25 +12,40 @@
 #' @param data Data frame containing a person-to-person query.
 #' @param hrvar String containing the label for the HR attribute.
 #' @param return Character vector specifying what to return, defaults to "pdf".
-#' Other valid inputs are "plot", "network" and "table". "network" returns the `network`
-#' object used to generate the network plot. The "pdf" option is highly recommended over the "plot"
-#' option as the PDF export format has significantly higher performance. The "plot" option is highly
-#' computationally intensive and is not generally recommended.
+#' Valid inputs are:
+#'   - "pdf": saves the network plot as a PDF in the specified path. See `path`. This is the recommended
+#'   output format as large networks can be slow in other formats.
+#'   - "plot": returns a ggplot object of the network plot. It is not recommended that you run this without
+#'   assigning the output to an object as plotting to the R console can be slow for large networks.
+#'   - "table": returns the edgelist data frame used in the network.
+#'   - "network": returns the igraph object used to create the network plot.
 #' @param path File path for saving the PDF output. Defaults to "network_p2p".
 #' @param bg_fill String to specify background fill colour.
 #' @param font_col String to specify font and link colour.
 #' @param legend_pos String to specify position of legend. Defaults to "bottom". See `ggplot2::theme()`.
-#' @param palette Function for generating a colour palette with a single argument `n`. Uses `rainbow()` by default.
-#' @param ... Additional arguments to pass to `GGally::ggnet2()`.
-#' For instance, you may specify the argument `mode` to change the node placement algorithm.
+#' @param palette Function for generating a colour palette with a single argument `n`. Uses "rainbow" by default.
+#' @param node_alpha A numeric value between 0 and 1 to specify the transparency of the nodes.
+#' @param algorithm String to specify the node placement algorithm to be used. Defaults to "fr" for the force-directed
+#' algorithm of Fruchterman and Reingold. See <https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html> for a
+#' full list of options.
 #'
-#' @details
-#' For specifying the node placement algorithm, please see the `gplot.layout` documentation for the **sna**
-#' package, which provides a full list of the valid functions that can be passed into the `mode` argument.
+#' @examples
+#' ## Simulate simple P2P network
+#' sim_net <-
+#'   data.frame(TieOrigin_PersonId = sample(seq(1, 100), size = 100, replace = TRUE),
+#'              TieDestination_PersonId = sample(seq(1, 100), size = 100, replace = TRUE)) %>%
+#'   dplyr::mutate(TieOrigin_Organization = ifelse(TieOrigin_PersonId >= 50, "A", "B"),
+#'                 TieDestination_Organization = ifelse(TieDestination_PersonId >= 50, "A", "B"),
+#'                 StrongTieScore = rep(1, 100))
+#'
+#' ## Run plot
+#' ## ONLY return 'plot' instead of 'pdf' when data size is small
+#' network_p2p(data = sim_net,
+#'             hrvar = "Organization",
+#'             return = "plot")
 #'
 #' @import ggplot2
 #' @import dplyr
-#' @import network
 #' @importFrom grDevices rainbow
 #'
 #' @export
@ -42,44 +57,52 @@ network_p2p <- function(data,
                        font_col = "#FFFFFF",
                        legend_pos = "bottom",
                        palette = "rainbow",
-                        ...){
-
-  data <-
-    data %>%
-    filter(StrongTieType > 0)
+                        node_alpha = 0.7,
+                        algorithm = "fr"){

+  ## No filtering
  tieorigin_var <- paste0("TieOrigin_", hrvar)
  tiedestin_var <- paste0("TieDestination_", hrvar)

-
-  ## Extract edge list
-  strong_edgelist <-
+  ## Set edges df
+  edges <-
    data %>%
-    select(TieOrigin_PersonId, TieDestination_PersonId)
+    select(from = "TieOrigin_PersonId",
+           to = "TieDestination_PersonId",
+           weight = "StrongTieScore") %>%
+    select(-weight) # Overwrite - no info on edge

-  ## Create basic network object
-  mynet <-
-    network::network(strong_edgelist, matrix.type="edgelist")
+  ## Vertices data frame to provide meta-data
+  vert_ft <-
+    rbind(
+      # TieOrigin
+      edges %>%
+        left_join(select(data, TieOrigin_PersonId, tieorigin_var),
+                  by = c("from"  = "TieOrigin_PersonId")) %>%
+        select(node = "from", !!sym(hrvar) := tieorigin_var),

-  ## Extract list of nodes from network object
-  myedges <- data.frame(PersonId = network.vertex.names(mynet))
+      # TieDestination
+      edges %>%
+        left_join(select(data, TieDestination_PersonId, tiedestin_var),
+                  by = c("to"  = "TieDestination_PersonId")) %>%
+        select(node = "to", !!sym(hrvar) := tiedestin_var)
+    )

-  ## Extract attributes
-  totalAttributes <-
-    bind_rows(select(data, PersonId = "TieOrigin_PersonId", !!sym(hrvar) := tieorigin_var),
-              select(data, PersonId = "TieOrigin_PersonId", !!sym(hrvar) := tiedestin_var)) %>%
-    unique()
-
-  ## Merge list of nodes and attributes
-  myedges <- merge(myedges, totalAttributes, by = "PersonId", all.x = TRUE)
-
-  ## Add attributes to network object
-  mynet %v% hrvar <- myedges[[hrvar]]
+  ## Create igraph object
+  g <-
+    igraph::graph_from_data_frame(edges,
+                                  directed = FALSE, # Directed, but FALSE for visualization
+                                  vertices = unique(vert_ft)) %>% # remove duplicates
+    igraph::simplify()

  ## Palette
+  ## Create tibble
+  pal <-
+    tibble(!!sym(hrvar) := g %>%
+             igraph::get.vertex.attribute(hrvar) %>%
+             unique())

-  pal <- tibble(!!sym(hrvar) := unique(myedges[[hrvar]]))
-
+  ## Apply palette function
  col_pal <- do.call(what = palette, args = list(nrow(pal)))

  ## named character vector
@ -90,24 +113,21 @@ network_p2p <- function(data,

  if(return == "table"){

-    data
+    edges

  } else if(return == "network"){

-    mynet
+    g

  } else if(return %in% c("plot", "pdf")){

    outputPlot <-
-      mynet %>%
-      GGally::ggnet2(size = 1,
-                     color = hrvar,
-                     label = FALSE,
-                     edge.size = .01,
-                     edge.alpha = .15,
-                     node.alpha =  .8,
-                     palette = pal,
-                     ...) +
+      g %>%
+      ggraph::ggraph(layout = "igraph", algorithm = algorithm) +
+      ggraph::geom_edge_link(colour = "lightgrey", edge_width = 0.01, alpha = 0.15) +
+      ggraph::geom_node_point(aes(colour = !!sym(hrvar)), alpha = node_alpha) +
+      scale_colour_discrete(type = pal) +
+      theme_void() +
      theme(legend.position = legend_pos,
            legend.background = element_rect(fill = bg_fill),
            plot.background = element_rect(fill = bg_fill),
--- a/man/network_p2p.Rd
+++ b/man/network_p2p.Rd
@ -13,7 +13,8 @@ network_p2p(
  font_col = "#FFFFFF",
  legend_pos = "bottom",
  palette = "rainbow",
-  ...
+  node_alpha = 0.7,
+  algorithm = "fr"
 )
 }
 \arguments{
@ -22,10 +23,15 @@ network_p2p(
 \item{hrvar}{String containing the label for the HR attribute.}

 \item{return}{Character vector specifying what to return, defaults to "pdf".
-Other valid inputs are "plot", "network" and "table". "network" returns the \code{network}
-object used to generate the network plot. The "pdf" option is highly recommended over the "plot"
-option as the PDF export format has significantly higher performance. The "plot" option is highly
-computationally intensive and is not generally recommended.}
+Valid inputs are:
+\itemize{
+\item "pdf": saves the network plot as a PDF in the specified path. See \code{path}. This is the recommended
+output format as large networks can be slow in other formats.
+\item "plot": returns a ggplot object of the network plot. It is not recommended that you run this without
+assigning the output to an object as plotting to the R console can be slow for large networks.
+\item "table": returns the edgelist data frame used in the network.
+\item "network": returns the igraph object used to create the network plot.
+}}

 \item{path}{File path for saving the PDF output. Defaults to "network_p2p".}

@ -35,16 +41,31 @@ computationally intensive and is not generally recommended.}

 \item{legend_pos}{String to specify position of legend. Defaults to "bottom". See \code{ggplot2::theme()}.}

-\item{palette}{Function for generating a colour palette with a single argument \code{n}. Uses \code{rainbow()} by default.}
+\item{palette}{Function for generating a colour palette with a single argument \code{n}. Uses "rainbow" by default.}

-\item{...}{Additional arguments to pass to \code{GGally::ggnet2()}.
-For instance, you may specify the argument \code{mode} to change the node placement algorithm.}
+\item{node_alpha}{A numeric value between 0 and 1 to specify the transparency of the nodes.}
+
+\item{algorithm}{String to specify the node placement algorithm to be used. Defaults to "fr" for the force-directed
+algorithm of Fruchterman and Reingold. See \url{https://rdrr.io/cran/ggraph/man/layout_tbl_graph_igraph.html} for a
+full list of options.}
 }
 \description{
 Pass a data frame containing a person-to-person query and save a network
 plot as a PDF file.
 }
-\details{
-For specifying the node placement algorithm, please see the \code{gplot.layout} documentation for the \strong{sna}
-package, which provides a full list of the valid functions that can be passed into the \code{mode} argument.
+\examples{
+## Simulate simple P2P network
+sim_net <-
+  data.frame(TieOrigin_PersonId = sample(seq(1, 100), size = 100, replace = TRUE),
+             TieDestination_PersonId = sample(seq(1, 100), size = 100, replace = TRUE)) \%>\%
+  dplyr::mutate(TieOrigin_Organization = ifelse(TieOrigin_PersonId >= 50, "A", "B"),
+                TieDestination_Organization = ifelse(TieDestination_PersonId >= 50, "A", "B"),
+                StrongTieScore = rep(1, 100))
+
+## Run plot
+## ONLY return 'plot' instead of 'pdf' when data size is small
+network_p2p(data = sim_net,
+            hrvar = "Organization",
+            return = "plot")
+
 }