library (tidyverse)
library (string)
## STEPS TO USE THESE FEATURES:
## 1. DEFINE THE 'PATH_FILE' OBJECT, WHICH IS A PATH TO THE DIRECTORY WHERE
## ALL DATA SETS ARE STORED.
## 2. APPLY THE 'get_names_labels' FUNCTION WITH THE PATH. THE FUNCTION WILL BE
## RETURN A DATAFRAME NAMES 'tag_names'.
## 3. THE FUNCTION WILL RETURN A SET OF DATA ('names_labels) SHOWING THE NAMES OF
## THE VARIABLES, THE LABELS AND THE DATA SET. VISUALLY/MANUALLY EXPLORE THE
##SET OF DATA TO SELECT THE VARIABLES WE NEED. CREATE A VECTOR WITH THE NAMES
## OF THE VARIABLES WE NEED, AND NAME THIS VECTOR 'variables_needed'.
## 4. FROM THE DATA SET 'names_labels', KEEP ONLY THE ROWS WITH THE US VARIABLES
## WILL USE (STORED IN VECTOR 'variables_needed').
## 5. APPLY THE 'read_and_select' FUNCTION TO EACH OF THE RELEVANT DATA SETS
##VARIABLES. THIS FUNCTION WILL ONLY NEED THE NAME OF THE DATA SET, WHICH IS
## STORED IN THE LAST COLUMN OF THE DATA SET 'names_labels'.
### FUNCTION TO 1) READ ALL DATA SETS IN ONE FOLDER; 2) EXTRACTS OF NAMES AND LABELS;
### 3) PUT NAMES AND LABELS ON A DATA SET; AND 4) RETURN THE DATA SET. THE ONLY
### THE REQUIRED ENTRY IS A PATH TO A DIRECTORY WHERE ALL DATASETS ARE STORED.
get_names_labels <- function(file_path){
results_df <- list()
sas_files <- c(
list.files(path = file_path, pattern = “\\.sas7bdat$”)
)
for (self in 1: length (sas_files)) {
print(sas_files(i))
# Read the SAS file
sas_data <- read_sas(paste0(file_path, sas_files(i)))
sas_data <- as.data.frame(sas_data)
# Get the names and labels of the variables.
var_names <- names (sas_data)
labels %
map(~attributes(.)$label) %>%
map_chr(~ifelse(es.null(.), NA, .))
# Combine variable names and labels in a data frame
var_df <- frame.data(
variable_name = var_names,
variable_label = labels,
file_name = sas_files(i),
stringsAsFactors = FALSE
)
# Add the results to the general data frame
results_df((i)) <- var_df
}
results_df <- do.call(rbind, results_df)
#return(df_results)
assign('tag_names', df_results, environment = .GlobalEnv)
}
################################################## ##############################
### FUNCTION TO READ EACH DATA SET AND KEEP ONLY THE VARIABLES WE SELECT; HE
### THE FUNCTION WILL SAVE EACH DATA SET IN THE ENVIRONMENT. THE ONLY CONTRIBUTION IS THE
### DATASET NAME.
read_and_select <- function (df_file) {
df_tmp <- read_sas(paste0(file_path, file_df))
df_tmp %
select(single(tag_names(than(tag_names$file_name == df_file), )$variable_name)) %>%
as.data.frame()
assign(str_extract(df_file, “(^.)+”), df_tmp,envir = .GlobalEnv)
}
################################################## ##############################