Time Series Aggregation in R
############################################ ### Bimal Adhikari ### ### Aggregation of 5min time seires ### ############################################ #' Aggregation function #' #' This function allows to aggregate 5-min time series into hourly series. #' @param source input time series to aggregate (always in hr) ie. 5/60 #' means source data is in 5 min time series. #' @param scale time step to aggregate (always in hr) ie. 1 means aggregate into 1 hr #' @param readfromfile "sys" or "path #' @param export TRUE or FALSE #' @keywords aggregation #' @export #' @examples #' sankalan() sankalan <- function(input_path=getwd(), output_path=getwd(), input_filename, output_filename=paste(input_filename, "_agg.txt"), col=1, delcol=-(1:col_agg), source=1, scale=1, export=FALSE, readfilefrom="sys") { #Read file if(readfilefrom == "path"){ file_temp <- read.table(file = paste(input_path, paste(input_filename, ".txt", sep=""), sep = "/"), header = TRUE) }else if(readfilefrom == "sys"){ file_temp <- input_filename }else{} file_temp[file_temp==-999] = NA len_series = nrow(file_temp) #Create output table for aggregated time series timestep <- scale*(1/source) #how many parts to divide the source series and sum them up row_agg <- floor(len_series/timestep) #rows of table according to the scale choosen #if scale=1(in hr), original 5-min table divided into twelve parts #1hr contains 12 5-min parts #Retrun NULL if timestep is not logical ie not integer or -ve value or zero or smaller than 1 if(timestep!=round(timestep)|timestep<1){return(NULL)} col_agg <- ncol(file_temp) agg <- array(dim=c(row_agg, col_agg)) agg <- data.frame(agg) colnames(agg) <- colnames(file_temp) #Create table for aggregation process ot=array(dim=c(row_agg, timestep+1)) ot=data.frame(ot) agg_series <- 0 for(j in 1:timestep) { i=seq(from=j, length.out=row_agg, by=timestep) series_temp <- file_temp[,col][i] ot[,j] <- series_temp agg_series <- agg_series + series_temp } ot[,(timestep+1)]<-agg_series #Replace aggregated values in appropriate cells of output table k=seq(from=1, length.out=row_agg, by=timestep) agg[] <- c(as.data.frame(file_temp[(k+timestep-1),])) #(k+timestep-1) this is to write in column with time lag while aggregating (copying rows skipping by time steps) agg[,col] <- as.data.frame(agg_series) #replace aggregated value to the selected column #Delete unnecessary column as specified by the user agg <- agg[,-delcol] #if user specifies nothing then takes default argument as delcol=-(1:col_agg) which means delcol=1:col_agg ie agg <- agg if(export==TRUE){ #write.table(x=ot, file=paste(output_path, paste(input_filename, "_p", ".txt", sep=""), sep="/"), col.names=TRUE, quote=FALSE, row.names=FALSE, sep="\t") write.table(x=agg, file=paste(output_path, paste(output_filename, ".txt", sep=""), sep="/"), col.names=TRUE, quote=FALSE, row.names=FALSE, sep="\t") }else{} return(agg) }