Skip to content

Commit

Permalink
Fixed bug in get_dct_transform. Added rolling mean via zoo.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjockers committed Nov 16, 2015
1 parent 025995e commit 596a27d
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ License: GPL-3
Imports:
openNLP,
NLP,
zoo
zoo,
dtt
LazyData: true
Suggests:
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2 (4.1.1): do not edit by hand

export(get_dct_transform)
export(get_nrc_sentiment)
export(get_percentage_values)
export(get_sentences)
Expand Down
37 changes: 37 additions & 0 deletions R/syuzhet.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ get_text_as_string <- function(path_to_file){
#' @description
#' Parses a string into a vector of word tokens
#' @param text_of_file A Text String
#' @param pattern A regular expression for token breaking
#' @return A Character Vector of Words
#' @export
#'
Expand Down Expand Up @@ -294,6 +295,42 @@ simple_plot <- function(raw_values, title="Syuzhet Plot", legend_pos="top"){
legend(legend_pos, c("Loess Smooth", "Rolling Mean", "Simple Syuzhet"), lty=1, lwd=1,col=c('black', 'blue', 'red'), bty='n', cex=.75)
}

#' Discrete Cosine Transformation with Reverse Transform to Time Domain
#' @description
#' Converts input values into a standardized
#' set of filtered and reverse transformed values for
#' easy plotting and/or comparison.
#' @param raw_values the raw sentiment values
#' calculated for each sentence
#' @param low_pass_size The number of components
#' to retain in the low pass filtering. Default = 10
#' @param x_reverse_len the number of values to return via decimation. Default = 100
#' @param scale_range Logical determines whether or not to scale the values from -1 to +1. Default = FALSE. If set to TRUE, the lowest value in the vector will be set to -1 and the highest values set to +1 and all the values scaled accordingly in between.
#' @param scale_vals Logical determines whether or not to normalize the values using the scale function Default = FALSE. If TRUE, values will be scaled by subtracting the means and scaled by dividing by their standard deviations. See ?scale
#' @return The transformed values
#' @export
#'
get_dct_transform <- function(raw_values, low_pass_size = 10, x_reverse_len = 100, scale_vals = FALSE, scale_range = FALSE){
if (!is.numeric(raw_values))
stop("Input must be an numeric vector")
if (low_pass_size > length(raw_values))
stop("low_pass_size must be less than or equal to the length of raw_values input vector")
values_dct <- dtt::dct(raw_values, variant = 2)
keepers <- values_dct[1:low_pass_size]
padded_keepers <- c(keepers, rep(0, x_reverse_len-low_pass_size))
dct_out <- dtt::dct(padded_keepers, inverted = T)
if (scale_vals & scale_range)
stop("ERROR: scale_vals and scale_range cannot both be true.")
if (scale_vals) {
return(scale(dct_out))
}
else if (scale_range & !scale_vals) {
return(rescale(dct_out))
}
else {
return(dct_out)
}
}



Expand Down
31 changes: 31 additions & 0 deletions man/get_dct_transform.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/syuzhet.R
\name{get_dct_transform}
\alias{get_dct_transform}
\title{Discrete Cosine Transformation with Reverse Transform to Time Domain}
\usage{
get_dct_transform(raw_values, low_pass_size = 10, x_reverse_len = 100,
scale_vals = FALSE, scale_range = FALSE)
}
\arguments{
\item{raw_values}{the raw sentiment values
calculated for each sentence}

\item{low_pass_size}{The number of components
to retain in the low pass filtering. Default = 10}

\item{x_reverse_len}{the number of values to return via decimation. Default = 100}

\item{scale_vals}{Logical determines whether or not to normalize the values using the scale function Default = FALSE. If TRUE, values will be scaled by subtracting the means and scaled by dividing by their standard deviations. See ?scale}

\item{scale_range}{Logical determines whether or not to scale the values from -1 to +1. Default = FALSE. If set to TRUE, the lowest value in the vector will be set to -1 and the highest values set to +1 and all the values scaled accordingly in between.}
}
\value{
The transformed values
}
\description{
Converts input values into a standardized
set of filtered and reverse transformed values for
easy plotting and/or comparison.
}

2 changes: 2 additions & 0 deletions man/get_tokens.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ get_tokens(text_of_file, pattern = "\\\\W")
}
\arguments{
\item{text_of_file}{A Text String}

\item{pattern}{A regular expression for token breaking}
}
\value{
A Character Vector of Words
Expand Down

0 comments on commit 596a27d

Please sign in to comment.