зеркало из https://github.com/microsoft/wpa.git
feat: add p2p_data_sim
New method for simulating p2p data
This commit is contained in:
Родитель
bc9b7295b8
Коммит
bee1287312
|
@ -118,6 +118,7 @@ export(one2one_rank)
|
|||
export(one2one_sum)
|
||||
export(one2one_summary)
|
||||
export(one2one_trend)
|
||||
export(p2p_data_sim)
|
||||
export(pairwise_count)
|
||||
export(period_change)
|
||||
export(personas_hclust)
|
||||
|
@ -171,6 +172,7 @@ importFrom(dplyr,mutate_if)
|
|||
importFrom(grDevices,rainbow)
|
||||
importFrom(htmltools,HTML)
|
||||
importFrom(igraph,graph_from_data_frame)
|
||||
importFrom(igraph,layout_with_mds)
|
||||
importFrom(igraph,plot.igraph)
|
||||
importFrom(magrittr,"%>%")
|
||||
importFrom(markdown,markdownToHTML)
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
#' @title Simulate a person-to-person query using a Watts-Strogatz model
|
||||
#'
|
||||
#' @description Generate an person-to-person query / edgelist based on the graph
|
||||
#' according to the Watts-Strogatz small-world network model. Organizational data
|
||||
#' fields are also simulated for `Organization`, `LevelDesignation`, and `City`.
|
||||
#'
|
||||
#' @param dim Integer constant, the dimension of the starting lattice.
|
||||
#' @param size Integer constant, the size of the lattice along each dimension.
|
||||
#' @param nei Integer constant, the neighborhood within which the vertices of
|
||||
#' the lattice will be connected.
|
||||
#' @param p Real constant between zero and one, the rewiring probability.
|
||||
#'
|
||||
#' @details
|
||||
#' This is a wrapper around `igraph::watts.strogatz.game()`. See igraph documentation
|
||||
#' for details on methodology. Loop edges and multiple edges are disabled. Size of the
|
||||
#' network can be changing the arguments `size` and `nei`.
|
||||
#'
|
||||
#' @examples
|
||||
#' # Simulate a p2p dataset with 800 edges
|
||||
#' p2p_data_sim(size = 200, nei = 4)
|
||||
#'
|
||||
#' @export
|
||||
p2p_data_sim <- function(dim = 1,
|
||||
size = 300,
|
||||
nei = 5,
|
||||
p = 0.05){
|
||||
|
||||
igraph::watts.strogatz.game(dim = dim,
|
||||
size = 300,
|
||||
nei = 5,
|
||||
p = 0.05) %>%
|
||||
igraph::as_edgelist() %>%
|
||||
as.data.frame() %>%
|
||||
dplyr::rename(TieOrigin_PersonId = "V1",
|
||||
TieDestination_PersonId = "V2") %>%
|
||||
dplyr::mutate(TieOrigin_Organization = add_cat(TieOrigin_PersonId, "Organization"),
|
||||
TieDestination_Organization = add_cat(TieDestination_PersonId, "Organization"),
|
||||
TieOrigin_LevelDesignation = add_cat(TieOrigin_PersonId, "LevelDesignation"),
|
||||
TieDestination_LevelDesignation = add_cat(TieDestination_PersonId, "LevelDesignation"),
|
||||
TieOrigin_City = add_cat(TieOrigin_PersonId, "City"),
|
||||
TieDestination_City = add_cat(TieDestination_PersonId, "City")) %>%
|
||||
dplyr::mutate_at(dplyr::vars(dplyr::ends_with("PersonId")),
|
||||
~paste0("SIM_ID_", .)) %>%
|
||||
dplyr::mutate(StrongTieScore = 1)
|
||||
}
|
||||
|
||||
#' Add organizational data to the simulated p2p data
|
||||
|
||||
add_cat <- function(x, type){
|
||||
|
||||
if(type == "Organization"){
|
||||
|
||||
dplyr::case_when((x %% 7 == 0) ~ "Org A",
|
||||
(x %% 6 == 0) ~ "Org B",
|
||||
(x %% 5 == 0) ~ "Org C",
|
||||
(x %% 4 == 0) ~ "Org D",
|
||||
(x %% 3 == 0) ~ "Org E",
|
||||
x < 100 ~ "Org F",
|
||||
(x %% 2 == 0) ~ "Org G", # Even number
|
||||
TRUE ~ "Org H")
|
||||
|
||||
} else if(type == "LevelDesignation"){
|
||||
|
||||
paste("Level", substr(x, 1, 1)) # Extract first digit
|
||||
|
||||
} else if(type == "City"){
|
||||
|
||||
dplyr::case_when((x %% 3 == 0) ~ "City A", # Divisible by 3
|
||||
(x %% 2 == 0) ~ "City B",
|
||||
TRUE ~ "City C")
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/p2p_data_sim.R
|
||||
\name{add_cat}
|
||||
\alias{add_cat}
|
||||
\title{Add organizational data to the simulated p2p data}
|
||||
\usage{
|
||||
add_cat(x, type)
|
||||
}
|
||||
\description{
|
||||
Add organizational data to the simulated p2p data
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/p2p_data_sim.R
|
||||
\name{p2p_data_sim}
|
||||
\alias{p2p_data_sim}
|
||||
\title{Simulate a person-to-person query using a Watts-Strogatz model}
|
||||
\usage{
|
||||
p2p_data_sim(dim = 1, size = 300, nei = 5, p = 0.05)
|
||||
}
|
||||
\arguments{
|
||||
\item{dim}{Integer constant, the dimension of the starting lattice.}
|
||||
|
||||
\item{size}{Integer constant, the size of the lattice along each dimension.}
|
||||
|
||||
\item{nei}{Integer constant, the neighborhood within which the vertices of
|
||||
the lattice will be connected.}
|
||||
|
||||
\item{p}{Real constant between zero and one, the rewiring probability.}
|
||||
}
|
||||
\description{
|
||||
Generate an person-to-person query / edgelist based on the graph
|
||||
according to the Watts-Strogatz small-world network model. Organizational data
|
||||
fields are also simulated for \code{Organization}, \code{LevelDesignation}, and \code{City}.
|
||||
}
|
||||
\details{
|
||||
This is a wrapper around \code{igraph::watts.strogatz.game()}. See igraph documentation
|
||||
for details on methodology. Loop edges and multiple edges are disabled. Size of the
|
||||
network can be changing the arguments \code{size} and \code{nei}.
|
||||
}
|
||||
\examples{
|
||||
# Simulate a p2p dataset with 800 edges
|
||||
p2p_data_sim(size = 200, nei = 4)
|
||||
|
||||
}
|
Загрузка…
Ссылка в новой задаче