Merge branch 'main' into refactor/streamline-network_p2p

This commit is contained in:
Martin Chan 2023-08-16 11:22:27 +01:00
Родитель 4d261c6c44 7af58b4097
Коммит e801b49137
26 изменённых файлов: 368 добавлений и 55036 удалений

2
.github/workflows/R-CMD-check.yaml поставляемый
Просмотреть файл

@ -7,7 +7,7 @@ jobs:
runs-on: macOS-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2-branch
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))

Просмотреть файл

@ -1,3 +1,3 @@
Version: 1.7.0
Date: 2022-06-06 13:37:15 UTC
SHA: 7aefaa599891ef7c20db669a8f52b59e80ef48ef
Version: 1.8.1
Date: 2023-01-27 11:02:41 UTC
SHA: da6fe2da817d04f06e90017f92f82cb788d093df

Просмотреть файл

@ -1,7 +1,7 @@
Package: wpa
Type: Package
Title: Tools for Analysing and Visualising Viva Insights Data
Version: 1.7.0.9000
Version: 1.8.1
Authors@R: c(
person(given = "Martin", family = "Chan", role = c("aut", "cre"), email = "martin.chan@microsoft.com"),
person(given = "Carlos", family = "Morales", role = "aut", email = "carlos.morales@microsoft.com"),

Просмотреть файл

@ -96,6 +96,7 @@ export(hrvar_count)
export(hrvar_count_all)
export(hrvar_trend)
export(identify_churn)
export(identify_datefreq)
export(identify_holidayweeks)
export(identify_inactiveweeks)
export(identify_nkw)
@ -148,6 +149,7 @@ export(one2one_summary)
export(one2one_trend)
export(p2p_data_sim)
export(p_test)
export(pad2)
export(pairwise_count)
export(period_change)
export(personas_hclust)

Просмотреть файл

@ -1,7 +1,13 @@
# wpa (development version)
# wpa 1.8.1
- fixed issue due to a 'tidyr' update (see #233).
# wpa 1.8.0
- Updated and improved output and algorithm for `workpatterns_classify()`
- Additional visual options for `workpatterns_classify()` and `flex_index()`
- Added the `external_*` family of functions to visualize the metric `Collaboration_hours_external`
- Added experimental function `identify_datefreq()` for detecting date frequency (#131)
# wpa 1.7.0

Просмотреть файл

@ -18,7 +18,7 @@
#' @inherit create_dist return
#'
#' @family Visualization
#' @family External
#' @family External Collaboration
#'
#' @examples
#' # Return plot
@ -38,13 +38,18 @@ external_dist <- function(data,
return = "plot",
cut = c(5, 10, 15)) {
# Calculate Internal / External Collaboration time
plot_data <- data %>% mutate(External_collaboration_hours = Collaboration_hours_external)
# Rename metric
plot_data <-
data %>%
mutate(External_collaboration_hours = Collaboration_hours_external)
plot_data %>% create_dist(metric = "External_collaboration_hours",
hrvar = hrvar,
mingroup = mingroup,
return = return,
cut = cut,
dist_colours = c("#3F7066", "#64B4A4", "#B1EDE1","#CBF3EB"))
plot_data %>%
create_dist(
metric = "External_collaboration_hours",
hrvar = hrvar,
mingroup = mingroup,
return = return,
cut = cut,
dist_colours = c("#3F7066", "#64B4A4", "#B1EDE1","#CBF3EB")
)
}

Просмотреть файл

@ -14,10 +14,10 @@
#' @inherit create_stacked return
#'
#' @family Visualization
#' @family External
#' @family External Collaboration
#'
#' @examples
#' # Return a plot
#' # Return a plot
#' external_sum(sq_data, hrvar = "LevelDesignation")
#'
#' # Return summary table
@ -32,9 +32,9 @@ external_sum <- function(data,
return = "plot"){
# Calculate Internal / External Collaboration time
plot_data <- data %>% mutate(Internal_hours= Collaboration_hours - Collaboration_hours_external) %>% mutate(External_hours= Collaboration_hours_external)
# Calculate Internal / External Collaboration time
plot_data <- data %>% mutate(Internal_hours= Collaboration_hours - Collaboration_hours_external) %>% mutate(External_hours= Collaboration_hours_external)
# Plot Internal / External Collaboration time by Organization
plot_data %>% create_stacked(hrvar = hrvar, metrics = c("Internal_hours", "External_hours"), plot_title = "Internal and External Collaboration Hours", stack_colours = stack_colours, mingroup = mingroup, return = return)

Просмотреть файл

@ -257,6 +257,9 @@ utils::globalVariables(
"HRAttribute",
"identifier",
"PersonasNet",
"Unique values"
"Unique values",
"Collaboration_hours_external",
".GRP",
"Id"
)
)

118
R/identify_datefreq.R Normal file
Просмотреть файл

@ -0,0 +1,118 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
#' @title Identify date frequency based on a series of dates
#'
#' @description
#' `r lifecycle::badge('experimental')`
#'
#' Takes a vector of dates and identify whether the frequency is 'daily',
#' 'weekly', or 'monthly'. The primary use case for this function is to provide
#' an accurate description of the query type used and for raising errors should
#' a wrong date grouping be used in the data input.
#'
#' @param x Vector containing a series of dates.
#'
#' @details
#' Date frequency detection works as follows:
#' - If at least three days of the week are present (e.g., Monday, Wednesday,
#' Thursday) in the series, then the series is classified as 'daily'
#' - If the total number of months in the series is equal to the length, then
#' the series is classified as 'monthly'
#' - If the total number of sundays in the series is equal to the length of
#' the series, then the series is classified as 'weekly
#'
#' @section Limitations:
#' One of the assumptions made behind the classification is that weeks are
#' denoted with Sundays, hence the count of sundays to measure the number of
#' weeks. In this case, weeks where a Sunday is missing would result in an
#' 'unable to classify' error.
#'
#' Another assumption made is that dates are evenly distributed, i.e. that the
#' gap between dates are equal. If dates are unevenly distributed, e.g. only two
#' days of the week are available for a given week, then the algorithm will fail
#' to identify the frequency as 'daily'.
#'
#' @return
#' String describing the detected date frequency, i.e.:
#' - 'daily'
#' - 'weekly'
#' - 'monthly'
#'
#' @examples
#' start_date <- as.Date("2022/06/26")
#' end_date <- as.Date("2022/11/27")
#'
#' # Daily
#' day_seq <-
#' seq.Date(
#' from = start_date,
#' to = end_date,
#' by = "day"
#' )
#'
#' identify_datefreq(day_seq)
#'
#' # Weekly
#' week_seq <-
#' seq.Date(
#' from = start_date,
#' to = end_date,
#' by = "week"
#' )
#'
#' identify_datefreq(week_seq)
#'
#' # Monthly
#' month_seq <-
#' seq.Date(
#' from = start_date,
#' to = end_date,
#' by = "month"
#' )
#' identify_datefreq(month_seq)
#'
#' @export
identify_datefreq <- function(x){
# Data frame for checking
date_df <- data.frame(
weekdays = names(table(weekdays(x))),
n = as.numeric(table(weekdays(x)))
)
dweekchr <- c(
"Sunday",
"Saturday",
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday"
)
# At least 3 days of the week must be present
check_wdays <- ifelse(
sum(dweekchr %in% date_df$weekdays) >= 3, TRUE, FALSE)
# Check number of Sundays - should equal number of weeks if weekly
check_nsun <- sum(date_df$n[date_df$weekdays == "Sunday"])
ifelse(
length(months(x)) == length(x),
"monthly",
ifelse(
check_nsun == length(x),
"weekly",
ifelse(
check_wdays,
"daily",
"Unable to identify date frequency."
)
)
)
}

Просмотреть файл

@ -151,7 +151,7 @@ plot_flex_index <- function(data,
)
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
hours_col <- pad2(x = seq(0,23))
# Use `mutate()` method
# Will get 10 IDs, not 10 rows

Просмотреть файл

@ -22,7 +22,10 @@
#' @param legend_text String to be used in the bottom legend label.
#'
#' @param rows Number of rows to show in plot.
#' @noRd
#' @param title String to specify plot title.
#' @param subtitle String to specify plot subtitle.
#' @param caption String to specify plot caption.
#' @param ylab String to specify plot y-axis label.
#'
#' @export
@ -41,7 +44,7 @@ plot_hourly_pat <- function(
){
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
hours_col <- pad2(x = seq(0,23))
data %>%
utils::head(rows) %>%

Просмотреть файл

@ -251,3 +251,19 @@ wrap_text <- function(x, threshold = 15){
x = x
)
}
#' @title
#' Create the two-digit zero-padded format
#'
#' @param x numeric value or vector with maximum two characters.
#'
#' @return
#' Numeric value containing two-digit zero-padded values.
#'
#' @export
pad2 <- function(x){
x <- as.character(x)
ifelse(nchar(x) == 1, paste0("0", x), x)
}

Просмотреть файл

@ -191,14 +191,6 @@ workpatterns_area <- function(data,
mutate(Signals = sub(pattern = "_\\d.+", replacement = "", x = Signals)) %>%
spread(Signals, Value)
## Create the two-digit zero-padded format
## Used in `scale_x_continuous()`
pad2 <- function(x){
x <- as.character(x)
ifelse(nchar(x) == 1, paste0("0", x), x)
}
## Return
if(return == "data"){

Просмотреть файл

@ -234,7 +234,8 @@ workpatterns_classify_bw <- function(data,
WpA_classify <-
WpA_classify[, c("PersonId", "Date", "Active_Hours", "HourType", "sent")] %>%
.[, .(sent = sum(sent)), by = c("PersonId", "Date", "Active_Hours", "HourType")] %>%
tidyr::spread(HourType, sent)%>%
dplyr::as_tibble() %>%
tidyr::spread(HourType, sent) %>%
left_join(WpA_classify %>% ## Calculate first and last activity for day_span
filter(sent > 0)%>%
group_by(PersonId, Date)%>%
@ -242,7 +243,8 @@ workpatterns_classify_bw <- function(data,
Last_signal = max(End)),
by = c("PersonId","Date"))%>%
mutate(Day_Span = Last_signal - First_signal,
Signals_Break_hours = Day_Span - Active_Hours)
Signals_Break_hours = Day_Span - Active_Hours) %>%
data.table::as.data.table()
## Working patterns classification ---------------------------------------

Просмотреть файл

@ -295,14 +295,6 @@ plot_signal_clust <- function(data,
mutate_at("Hours", ~sub(pattern = paste0(sig_label, "_"), replacement = "", x = .)) %>%
mutate_at("Hours", ~sub(pattern = "_.+", replacement = "", x = .))
## Create the two-digit zero-padded format
## Used in `scale_x_continuous()`
pad2 <- function(x){
ifelse(nchar(x) == 1,
paste0(0, x),
x)
}
## bar plot
output_bar <-
plot_data %>%

Просмотреть файл

@ -171,7 +171,7 @@ workpatterns_rank <- function(data,
by = num_cols]
## 00, 01, 02, etc.
hours_col <- stringr::str_pad(seq(0,23), width = 2, pad = 0)
hours_col <- pad2(x = seq(0,23))
# Wide table showing proportion of signals by hour
# Ranked descending by `WeekCount`

Просмотреть файл

@ -1,7 +1,6 @@
# wpa <img src="https://raw.githubusercontent.com/microsoft/wpa/main/man/figures/logo2.png" align="right" width=15% />
[![R build status](https://github.com/microsoft/wpa/workflows/R-CMD-check/badge.svg)](https://github.com/microsoft/wpa/actions/)
[![CodeFactor](https://www.codefactor.io/repository/github/microsoft/wpa/badge/)](https://www.codefactor.io/repository/github/microsoft/wpa/)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT/)
[![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
[![CRAN status](https://www.r-pkg.org/badges/version/wpa)](https://CRAN.R-project.org/package=wpa/)
@ -9,7 +8,7 @@
## Analyze and Visualize Viva Leader Insights data
This is an R package for analyzing and visualizing data from [Microsoft Viva Advanced Insights](https://docs.microsoft.com/en-us/workplace-analytics/) (previously Microsoft Workplace Analytics).
This is an R package for analyzing and visualizing data from [Microsoft Workplace Analytics](https://docs.microsoft.com/en-us/workplace-analytics/). For analyzing data from [Microsoft Viva Insights](https://analysis.insights.viva.office.com/), please see our other package [**vivainsights**](https://microsoft.github.io/vivainsights/).
## With the **wpa** package, you can...
@ -63,9 +62,12 @@ See [NEWS.md](https://microsoft.github.io/wpa/news/index.html) for the package c
## Related repositories
- [Viva Insights R library - new implementation](https://microsoft.github.io/vivainsights/)
- [Viva Insights Python library](https://github.com/microsoft/vivainsights-py/)
- [Viva RMarkdown Report Marketplace](https://github.com/microsoft/VivaRMDReportMarketplace)
- [Viva Insights Sample Code](https://github.com/microsoft/viva-insights-sample-code)
- [Viva Insights Zoom Integration](https://github.com/microsoft/vivainsights_zoom_int)
- [Viva Insights OData Query Download](https://github.com/microsoft/vivainsights-odatadl)
---

Просмотреть файл

@ -7,6 +7,14 @@
0 errors | 0 warnings | 0 note
## Submission 1.8.1
Patch to fix bug due to an update in 'tidyr' dependency
## Submission 1.8.0
New functions and improving outputs of existing functions
## Submission 1.7.0
Bug fixes, new features, and removal of archived dependency 'portes'

Просмотреть файл

@ -139,8 +139,10 @@ Other Visualization:
\code{\link{workpatterns_area}()},
\code{\link{workpatterns_rank}()}
Other External:
Other External Collaboration:
\code{\link{external_fizz}()},
\code{\link{external_line}()},
\code{\link{external_sum}()}
}
\concept{External}
\concept{External Collaboration}
\concept{Visualization}

Просмотреть файл

@ -125,7 +125,9 @@ Other Visualization:
\code{\link{workpatterns_rank}()}
Other External Collaboration:
\code{\link{external_line}()}
\code{\link{external_dist}()},
\code{\link{external_line}()},
\code{\link{external_sum}()}
}
\concept{External Collaboration}
\concept{Visualization}

Просмотреть файл

@ -127,7 +127,9 @@ Other Visualization:
\code{\link{workpatterns_rank}()}
Other External Collaboration:
\code{\link{external_fizz}()}
\code{\link{external_dist}()},
\code{\link{external_fizz}()},
\code{\link{external_sum}()}
}
\concept{External Collaboration}
\concept{Visualization}

Просмотреть файл

@ -46,7 +46,7 @@ Returns a stacked bar plot of internal and external collaboration.
Additional options available to return a summary table.
}
\examples{
# Return a plot
# Return a plot
external_sum(sq_data, hrvar = "LevelDesignation")
# Return summary table
@ -130,8 +130,10 @@ Other Visualization:
\code{\link{workpatterns_area}()},
\code{\link{workpatterns_rank}()}
Other External:
\code{\link{external_dist}()}
Other External Collaboration:
\code{\link{external_dist}()},
\code{\link{external_fizz}()},
\code{\link{external_line}()}
}
\concept{External}
\concept{External Collaboration}
\concept{Visualization}

85
man/identify_datefreq.Rd Normal file
Просмотреть файл

@ -0,0 +1,85 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/identify_datefreq.R
\name{identify_datefreq}
\alias{identify_datefreq}
\title{Identify date frequency based on a series of dates}
\usage{
identify_datefreq(x)
}
\arguments{
\item{x}{Vector containing a series of dates.}
}
\value{
String describing the detected date frequency, i.e.:
\itemize{
\item 'daily'
\item 'weekly'
\item 'monthly'
}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
Takes a vector of dates and identify whether the frequency is 'daily',
'weekly', or 'monthly'. The primary use case for this function is to provide
an accurate description of the query type used and for raising errors should
a wrong date grouping be used in the data input.
}
\details{
Date frequency detection works as follows:
\itemize{
\item If at least three days of the week are present (e.g., Monday, Wednesday,
Thursday) in the series, then the series is classified as 'daily'
\item If the total number of months in the series is equal to the length, then
the series is classified as 'monthly'
\item If the total number of sundays in the series is equal to the length of
the series, then the series is classified as 'weekly
}
}
\section{Limitations}{
One of the assumptions made behind the classification is that weeks are
denoted with Sundays, hence the count of sundays to measure the number of
weeks. In this case, weeks where a Sunday is missing would result in an
'unable to classify' error.
Another assumption made is that dates are evenly distributed, i.e. that the
gap between dates are equal. If dates are unevenly distributed, e.g. only two
days of the week are available for a given week, then the algorithm will fail
to identify the frequency as 'daily'.
}
\examples{
start_date <- as.Date("2022/06/26")
end_date <- as.Date("2022/11/27")
# Daily
day_seq <-
seq.Date(
from = start_date,
to = end_date,
by = "day"
)
identify_datefreq(day_seq)
# Weekly
week_seq <-
seq.Date(
from = start_date,
to = end_date,
by = "week"
)
identify_datefreq(week_seq)
# Monthly
month_seq <-
seq.Date(
from = start_date,
to = end_date,
by = "month"
)
identify_datefreq(month_seq)
}

17
man/pad2.Rd Normal file
Просмотреть файл

@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/supporting_functions.R
\name{pad2}
\alias{pad2}
\title{Create the two-digit zero-padded format}
\usage{
pad2(x)
}
\arguments{
\item{x}{numeric value or vector with maximum two characters.}
}
\value{
Numeric value containing two-digit zero-padded values.
}
\description{
Create the two-digit zero-padded format
}

56
man/plot_hourly_pat.Rd Normal file
Просмотреть файл

@ -0,0 +1,56 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot_hourly_pat.R
\name{plot_hourly_pat}
\alias{plot_hourly_pat}
\title{Internal function for plotting the hourly activity patterns.}
\usage{
plot_hourly_pat(
data,
start_hour,
end_hour,
legend,
legend_label,
legend_text = "Observed activity",
rows,
title,
subtitle,
caption,
ylab = paste("Top", rows, "activity patterns")
)
}
\arguments{
\item{data}{Data frame containing three columns:
\itemize{
\item \code{patternRank}
\item \code{Hours}
\item \code{Freq}
}}
\item{start_hour}{Numeric value to specify expected start hour.}
\item{end_hour}{Numeric value to specify expected end hour.}
\item{legend}{Data frame containing the columns:
\itemize{
\item \code{patternRank}
\item Any column to be used in the grey label box, supplied to \code{legend_label}
}}
\item{legend_label}{String specifying column to display in the grey label
box}
\item{legend_text}{String to be used in the bottom legend label.}
\item{rows}{Number of rows to show in plot.}
\item{title}{String to specify plot title.}
\item{subtitle}{String to specify plot subtitle.}
\item{caption}{String to specify plot caption.}
\item{ylab}{String to specify plot y-axis label.}
}
\description{
This is used within \code{plot_flex_index()} and \code{workpatterns_rank()}.
}

Различия файлов скрыты, потому что одна или несколько строк слишком длинны