feat: add pairwise_count

A data.table implementation of the original widyr::pairwise_count() function
2020-12-18 13:45:35 +00:00 · 2020-12-18 13:45:35 +00:00 · 6386f99651
--- a/3
+++ b/3
@ -114,6 +114,7 @@ export(one2one_rank)
 export(one2one_sum)
 export(one2one_summary)
 export(one2one_trend)
+export(pairwise_count)
 export(period_change)
 export(personas_hclust)
 export(read_preamble)
@ -149,6 +150,7 @@ export(workpatterns_rank)
 export(wrap)
 import(DT)
 import(Information)
+import(data.table)
 import(dplyr)
 import(ggplot2)
 import(ggraph)
@ -190,4 +192,3 @@ importFrom(tidyselect,all_of)
 importFrom(tidytext,unnest_tokens)
 importFrom(utils,write.csv)
 importFrom(utils,write.table)
-importFrom(widyr,pairwise_count)
--- a/R/pairwise_count.R
+++ b/R/pairwise_count.R
@ -0,0 +1,48 @@
+#' @title Perform a pairwise count of words by id
+#'
+#' @description This is a **data.table** implementation that mimics the output of
+#' `widyr::pairwise_count()` to reduce package dependency. This is used internally
+#' within `tm_cooc()`.
+#'
+#' @param data Data frame output from `tm_clean()`.
+#' @param id String to represent the id variable. Defaults to "word".
+#' @param word String to represent the word variable. Defaults to "word".
+#'
+#' @import data.table
+#'
+#' @export
+pairwise_count <- function(data,
+                           id = "line",
+                           word = "word"){
+
+  data <-
+    data %>%
+    dplyr::rename(word := !!sym(word),
+                  id := !!sym(id))
+
+
+  DT <- data.table::as.data.table(data)
+
+  # convert to character
+  DT[, word := as.character(word)]
+
+  # subset those with >1 per id
+  DT2 <- DT[, N := .N, by = id][N>1]
+
+  # create all combinations of 2
+  # return as a data.table with these as columns `V1` and `V2`
+  # then count the numbers in each id
+  out_data <-
+    DT2[, rbindlist(utils::combn(word,2,
+                        FUN = function(x) as.data.table(as.list(x)),
+                        simplify = FALSE)), by = id] %>%
+    .[, .N, by = list(V1,V2)]
+
+  # format and sort
+  out_data %>%
+    dplyr::as_tibble() %>%
+    dplyr::rename(item1 = "V1",
+                  item2 = "V2",
+                  n = "N") %>%
+    dplyr::arrange(desc(n))
+}
--- a/man/pairwise_count.Rd
+++ b/man/pairwise_count.Rd
@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pairwise_count.R
+\name{pairwise_count}
+\alias{pairwise_count}
+\title{Perform a pairwise count of words by id}
+\usage{
+pairwise_count(data, id = "line", word = "word")
+}
+\arguments{
+\item{data}{Data frame output from \code{tm_clean()}.}
+
+\item{id}{String to represent the id variable. Defaults to "word".}
+
+\item{word}{String to represent the word variable. Defaults to "word".}
+}
+\description{
+This is a \strong{data.table} implementation that mimics the output of
+\code{widyr::pairwise_count()} to reduce package dependency. This is used internally
+within \code{tm_cooc()}.
+}