-
Notifications
You must be signed in to change notification settings - Fork 0
/
regenerate_data.R
executable file
·119 lines (111 loc) · 2.92 KB
/
regenerate_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env Rscript
library(Pint)
library(pROC)
library(dplyr)
library(foreach)
library(doMC)
library(ggplot2)
registerDoMC(cores=detectCores())
source("generation_functions.R")
generate_sets <- function(path, num, size = "large", threeway = FALSE) {
viol = 100
num_lethals = 0
if (size == "8k") {
n = 8000
p = 4000
snr = 5
num_bi = 40
num_bij = 200
num_bijk = 0
} else if (size == "p100") {
n = 1000
p = 100
snr = 5
num_bi = 10
num_bij = 50
num_bijk = 0
} else if (size == "wide") {
n = 1000
p = 20000
snr = 5
num_bi = 100
num_bij = 500
num_bijk = 0
} else if (size == "large_3way") {
n = 40000
p = 4000
snr = 5
num_bi = 10
num_bij = 100
num_bijk = 1000
} else if (size == "small_3way") {
n = 4000
p = 400
snr = 5
num_bi = 10
num_bij = 100
num_bijk = 1000
} else if (size == "wide_3way") {
n = 1000
p = 10000
snr = 5
num_bi = 10
num_bij = 100
num_bijk = 1000
} else if (size == "wide_10k") {
n = 1000
p = 10000
snr = 5
num_bi = 100
num_bij = 500
num_bijk = 0
}
else {
print("invalid size")
}
for (e in 1:num) {
dataset = generate_set(n, p, snr, num_bi, num_bij, num_bijk, num_lethals)
saveRDS(dataset, file = paste0(path, sprintf("n%d_p%d_nbi%d_nbij%d_nbijk%d_nlethals%d_viol%d_snr%d_%d.rds",
n, p, num_bi, num_bij, num_bijk, num_lethals, viol, snr, (runif(1) * 1e5) %>% floor)))
}
}
ensure_8k_set_exists <- function(path) {
if(!dir.exists(path)) {
dir.create(path, recursive = TRUE)
}
existing_files = list.files(path)
if (length(existing_files) == 0) {
generate_sets(path, 10, "8k")
}
}
ensure_p100_set_exists <- function(path) {
if(!dir.exists(path)) {
dir.create(path, recursive = TRUE)
}
existing_files = list.files(path)
if (length(existing_files) == 0) {
generate_sets(path, 50, "p100")
}
}
ensure_wide_set_exists <- function(path) {
if(!dir.exists(path)) {
dir.create(path, recursive = TRUE)
}
existing_files = list.files(path)
if (length(existing_files) == 0) {
generate_sets(path, 10, "wide_10k")
}
}
ensure_3way_set_exists <- function(path) {
if(!dir.exists(path)) {
dir.create(path, recursive = TRUE)
}
existing_files = list.files(path)
if (length(existing_files) == 0) {
generate_sets(path, 10, "small_3way", threeway = TRUE)
}
}
ensure_p100_set_exists("./data/simulated_rerun/simulated_small_data_sample/")
ensure_8k_set_exists("./data/simulated_rerun/8k_only/")
ensure_wide_set_exists("./data/simulated_rerun/wide_only_10k/")
ensure_3way_set_exists("./data/simulated_rerun/3way/")