зеркало из https://github.com/microsoft/wpa.git
feat: add pairwise_count
A data.table implementation of the original widyr::pairwise_count() function
This commit is contained in:
Родитель
626c95fed1
Коммит
6386f99651
|
@ -114,6 +114,7 @@ export(one2one_rank)
|
||||||
export(one2one_sum)
|
export(one2one_sum)
|
||||||
export(one2one_summary)
|
export(one2one_summary)
|
||||||
export(one2one_trend)
|
export(one2one_trend)
|
||||||
|
export(pairwise_count)
|
||||||
export(period_change)
|
export(period_change)
|
||||||
export(personas_hclust)
|
export(personas_hclust)
|
||||||
export(read_preamble)
|
export(read_preamble)
|
||||||
|
@ -149,6 +150,7 @@ export(workpatterns_rank)
|
||||||
export(wrap)
|
export(wrap)
|
||||||
import(DT)
|
import(DT)
|
||||||
import(Information)
|
import(Information)
|
||||||
|
import(data.table)
|
||||||
import(dplyr)
|
import(dplyr)
|
||||||
import(ggplot2)
|
import(ggplot2)
|
||||||
import(ggraph)
|
import(ggraph)
|
||||||
|
@ -190,4 +192,3 @@ importFrom(tidyselect,all_of)
|
||||||
importFrom(tidytext,unnest_tokens)
|
importFrom(tidytext,unnest_tokens)
|
||||||
importFrom(utils,write.csv)
|
importFrom(utils,write.csv)
|
||||||
importFrom(utils,write.table)
|
importFrom(utils,write.table)
|
||||||
importFrom(widyr,pairwise_count)
|
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
#' @title Perform a pairwise count of words by id
|
||||||
|
#'
|
||||||
|
#' @description This is a **data.table** implementation that mimics the output of
|
||||||
|
#' `widyr::pairwise_count()` to reduce package dependency. This is used internally
|
||||||
|
#' within `tm_cooc()`.
|
||||||
|
#'
|
||||||
|
#' @param data Data frame output from `tm_clean()`.
|
||||||
|
#' @param id String to represent the id variable. Defaults to "word".
|
||||||
|
#' @param word String to represent the word variable. Defaults to "word".
|
||||||
|
#'
|
||||||
|
#' @import data.table
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
pairwise_count <- function(data,
|
||||||
|
id = "line",
|
||||||
|
word = "word"){
|
||||||
|
|
||||||
|
data <-
|
||||||
|
data %>%
|
||||||
|
dplyr::rename(word := !!sym(word),
|
||||||
|
id := !!sym(id))
|
||||||
|
|
||||||
|
|
||||||
|
DT <- data.table::as.data.table(data)
|
||||||
|
|
||||||
|
# convert to character
|
||||||
|
DT[, word := as.character(word)]
|
||||||
|
|
||||||
|
# subset those with >1 per id
|
||||||
|
DT2 <- DT[, N := .N, by = id][N>1]
|
||||||
|
|
||||||
|
# create all combinations of 2
|
||||||
|
# return as a data.table with these as columns `V1` and `V2`
|
||||||
|
# then count the numbers in each id
|
||||||
|
out_data <-
|
||||||
|
DT2[, rbindlist(utils::combn(word,2,
|
||||||
|
FUN = function(x) as.data.table(as.list(x)),
|
||||||
|
simplify = FALSE)), by = id] %>%
|
||||||
|
.[, .N, by = list(V1,V2)]
|
||||||
|
|
||||||
|
# format and sort
|
||||||
|
out_data %>%
|
||||||
|
dplyr::as_tibble() %>%
|
||||||
|
dplyr::rename(item1 = "V1",
|
||||||
|
item2 = "V2",
|
||||||
|
n = "N") %>%
|
||||||
|
dplyr::arrange(desc(n))
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/pairwise_count.R
|
||||||
|
\name{pairwise_count}
|
||||||
|
\alias{pairwise_count}
|
||||||
|
\title{Perform a pairwise count of words by id}
|
||||||
|
\usage{
|
||||||
|
pairwise_count(data, id = "line", word = "word")
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{data}{Data frame output from \code{tm_clean()}.}
|
||||||
|
|
||||||
|
\item{id}{String to represent the id variable. Defaults to "word".}
|
||||||
|
|
||||||
|
\item{word}{String to represent the word variable. Defaults to "word".}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
This is a \strong{data.table} implementation that mimics the output of
|
||||||
|
\code{widyr::pairwise_count()} to reduce package dependency. This is used internally
|
||||||
|
within \code{tm_cooc()}.
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче