Skip to content

Commit

Permalink
Add error logging using futile.logger for year_sector_data
Browse files Browse the repository at this point in the history
  • Loading branch information
mammykins committed Oct 9, 2017
1 parent fdf701c commit 482b0c5
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 26 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Suggests:
Imports:
assertr (>= 1.0.2),
dplyr (>= 0.5.0),
futile.logger (>= 1.4.3),
ggplot2 (>= 2.2.0),
govstyle (>= 0.1.2),
haven (>= 1.0.0),
Expand Down
85 changes: 61 additions & 24 deletions R/year_sector_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
#' \code{years}: an integer vector containing \code{unique(df$year)}.
#'
#' @param x Input dataframe, see details.
#' @param log_level The severity level at which log messages are written from
#' least to most serious: TRACE, DEBUG, INFO, WARN, ERROR, FATAL. Default is
#' level is INFO. See \code{?flog.threshold()} for additional details.
#' @param log_appender Defaults to write the log to "console", alternatively you
#' can provide a character string to specify a filename to also write to. See
#' for additional details \code{?futile.logger::appender.tee()}.
#' @param log_issues should issues with the data quality be logged to github?
#' See \code{?raise_issue()} for additional details.
#'
Expand All @@ -35,9 +41,24 @@
#' @export


year_sector_data <- function(x, log_issues = FALSE) {
year_sector_data <- function(x, log_level = futile.logger::WARN,
log_appender = "console",
log_issues = FALSE) {

message('Initiating year_sector_data class.
# Set logger severity threshold, defaults to
# high level use (only flags warnings and errors)
# Set log_level argument to futile.logger::TRACE for full info
futile.logger::flog.threshold(log_level)

# Set where to write the log to
if (log_appender != "console")
{
# if not console then to console and a file called...
futile.logger::flog.appender(futile.logger::appender.file(log_appender))
}

# Checks
futile.logger::flog.info('Initiating year_sector_data class.
\n\nExpects a data.frame with three columns: sector, year, and measure, where
measure is one of GVA, exports, or enterprises. The data.frame should include
historical data, which is used for checks on the quality of this year\'s data,
Expand All @@ -50,35 +71,44 @@ this class is given by ?year_sector_data().')
# missing values and three columns, containing sector, year, and one
# additional column.

message('\n*** Running integrity checks on input dataframe (x):')
message('\nChecking input is properly formatted...')
message('Checking x is a data.frame...')
if (!is.data.frame(x)) stop("x must be a data.frame")
futile.logger::flog.info('\n*** Running integrity checks on input dataframe (x):')
futile.logger::flog.info('\nChecking input is properly formatted...')

message('Checking x has correct columns...')
if (length(colnames(x)) != 3) stop("x must have three columns: sector, year, and one of GVA, export, or x")
futile.logger::flog.debug('Checking x is a data.frame...')
if (!is.data.frame(x))
{
futile.logger::flog.error("x must be a data.frame",
x, capture = TRUE)
}

futile.logger::flog.debug('Checking x has correct columns...')
if (length(colnames(x)) != 3)
{
futile.logger::flog.error("x must have three columns: sector, year, and one of GVA, export, or x")
}

message('Checking x contains a year column...')
futile.logger::flog.debug('Checking x contains a year column...')
if (!'year' %in% colnames(x)) stop("x must contain year column")

message('Checking x contains a sector column...')
futile.logger::flog.debug('Checking x contains a sector column...')
if (!'sector' %in% colnames(x)) stop("x must contain sector column")

message('Checking x does not contain missing values...')
futile.logger::flog.debug('Checking x does not contain missing values...')
if (anyNA(x)) stop("x cannot contain any missing values")

message('Checking for the correct number of rows...')
futile.logger::flog.debug('Checking for the correct number of rows...')
if (nrow(x) != length(unique(x$sector)) * length(unique(x$year))) {

warning("x does not appear to be well formed. nrow(x) should equal
length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
futile.logger::flog.warn("x does not appear to be well formed. nrow(x) should equal
length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
}

message('...passed')


futile.logger::flog.info('...passed')

# User assertr to run statistical tests on the data itself ----

message('\n***Running statistical checks on input dataframe (x)...\n
futile.logger::flog.info('\n***Running statistical checks on input dataframe (x)...\n
These tests are implemented using the package assertr see:
https://cran.r-project.org/web/packages/assertr for more details.')

Expand All @@ -88,13 +118,13 @@ length(unique(x$sector)) * length(unique(x$year)). Check the of x.")

# Check snsible range for year

message('Checking years in a sensible range (2000:2020)...')
futile.logger::flog.debug('Checking years in a sensible range (2000:2020)...')

assertr::assert_(x, assertr::in_set(2000:2020), ~year)

# Check that the correct levels are in sector

message('Checking sectors are correct...')
futile.logger::flog.debug('Checking sectors are correct...')

# Save sectors name lookup for use later

Expand All @@ -120,7 +150,7 @@ length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
# median +- 3 * median absolute deviation, implemented in the
# assertr::within_n_mads() function.

message('Checking for outliers (x_i > median(x) + 3 * mad(x)) in each sector timeseries...')
futile.logger::flog.debug('Checking for outliers (x_i > median(x) + 3 * mad(x)) in each sector timeseries...')

# Create a list split by series containing a df in each

Expand All @@ -131,7 +161,7 @@ length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
lapply(
X = series_split,
FUN = function(x) {
message('Checking sector timeseries: ', unique(x[['sector']]))
futile.logger::flog.debug('Checking sector timeseries: ', unique(x[['sector']]))
assertr::insist_(
x,
assertr::within_n_mads(3),
Expand All @@ -140,7 +170,7 @@ length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
}
)

message('...passed')
futile.logger::flog.info('...passed')

# Check for outliers using mahalanobis ----

Expand All @@ -150,14 +180,21 @@ length(unique(x$sector)) * length(unique(x$year)). Check the of x.")
# ourliers in this new vector of norms. Any value with a distance too great is
# flagged as an outlier.

message('Checking for outliers on a row by row basis using mahalanobis distance...')
futile.logger::flog.debug('Checking for outliers on a row by row basis using mahalanobis distance...')

lapply(
X = series_split,
FUN = maha_check
)

message('...passed')
futile.logger::flog.debug('...passed')

### ISSUE - these might be "changing the world" for the user unexpectedly!

# Reset threshold to package default
futile.logger::flog.threshold(futile.logger::INFO)
# Reset so that log is appended to console (the package default)
futile.logger::flog.appender(futile.logger::appender.console())

# Define the class here ----

Expand Down
11 changes: 10 additions & 1 deletion man/year_sector_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion packrat/packrat.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
PackratFormat: 1.4
PackratVersion: 0.4.8.1
RVersion: 3.3.2
RVersion: 3.4.1
Repos: BioCsoft=https://bioconductor.org/packages/3.4/bioc,
BioCann=https://bioconductor.org/packages/3.4/data/annotation,
BioCexp=https://bioconductor.org/packages/3.4/data/experiment,
Expand Down Expand Up @@ -139,6 +139,17 @@ Source: CRAN
Version: 1.4
Hash: b2a47a2b91737978cf50ba95825a9cd1

Package: futile.logger
Source: CRAN
Version: 1.4.3
Hash: d1cc6584b39f2c4cfbe377fd94cbdbaa
Requires: futile.options, lambda.r

Package: futile.options
Source: CRAN
Version: 1.0.0
Hash: 4f24cbf581681c263186b9425f05ba3e

Package: ggplot2
Source: CRAN
Version: 2.2.1
Expand Down Expand Up @@ -224,6 +235,11 @@ Source: CRAN
Version: 0.3
Hash: ecf589b42cd284b03a4beb9665482d3e

Package: lambda.r
Source: CRAN
Version: 1.2
Hash: a17af00010c92198f159cebf1467e111

Package: lazyeval
Source: CRAN
Version: 0.2.0
Expand Down
Binary file not shown.
Binary file not shown.
Binary file added packrat/src/lambda.r/lambda.r_1.2.tar.gz
Binary file not shown.

0 comments on commit 482b0c5

Please sign in to comment.