-
Notifications
You must be signed in to change notification settings - Fork 1
/
exampleTable5.jl
77 lines (63 loc) · 5.22 KB
/
exampleTable5.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
include("DoME.jl");
include("usefulFunctions.jl");
# Uncomment the desired line to perform the experiments and obtain the results shown in Table 5
datasetName = "1027_ESL"; Strategy = Strategy2; MinimumReductionMSE = 1e-5; MaximumNodes = 100;
# datasetName = "1028_SWD"; Strategy = Strategy1; MinimumReductionMSE = 1e-5; MaximumNodes = 60;
# datasetName = "1029_LEV"; Strategy = Strategy3; MinimumReductionMSE = 1e-4; MaximumNodes = 45;
# datasetName = "1030_ERA"; Strategy = Strategy2; MinimumReductionMSE = 1e-7; MaximumNodes = 100;
# datasetName = "1089_USCrime"; Strategy = Strategy3; MinimumReductionMSE = 1e-3; MaximumNodes = 20;
# datasetName = "1096_FacultySalaries"; Strategy = Strategy1; MinimumReductionMSE = 1e-7; MaximumNodes = 155;
# datasetName = "192_vineyard"; Strategy = Strategy4; MinimumReductionMSE = 1e-3; MaximumNodes = 15;
# datasetName = "195_auto_price"; Strategy = Strategy1; MinimumReductionMSE = 1e-7; MaximumNodes = 65;
# datasetName = "207_autoPrice"; Strategy = Strategy2; MinimumReductionMSE = 1e-4; MaximumNodes = 160;
# datasetName = "210_cloud"; Strategy = Strategy4; MinimumReductionMSE = 1e-7; MaximumNodes = 55;
# datasetName = "228_elusage"; Strategy = Strategy1; MinimumReductionMSE = 1e-3; MaximumNodes = 15;
# datasetName = "230_machine_cpu"; Strategy = Strategy4; MinimumReductionMSE = 1e-7; MaximumNodes = 70;
# datasetName = "485_analcatdata_vehicle"; Strategy = Strategy4; MinimumReductionMSE = 1e-5; MaximumNodes = 145;
# datasetName = "519_vinnie"; Strategy = Strategy3; MinimumReductionMSE = 1e-4; MaximumNodes = 5;
# datasetName = "522_pm10"; Strategy = Strategy2; MinimumReductionMSE = 1e-4; MaximumNodes = 30;
# datasetName = "523_analcatdata_neavote"; Strategy = Strategy3; MinimumReductionMSE = 1e-2; MaximumNodes = 10;
# datasetName = "527_analcatdata_election2000"; Strategy = Strategy3; MinimumReductionMSE = 1e-5; MaximumNodes = 100;
# datasetName = "542_pollution"; Strategy = Strategy4; MinimumReductionMSE = 1e-4; MaximumNodes = 30;
# datasetName = "547_no2"; Strategy = Strategy1; MinimumReductionMSE = 1e-4; MaximumNodes = 50;
# datasetName = "556_analcatdata_apnea2"; Strategy = Strategy4; MinimumReductionMSE = 1e-4; MaximumNodes = 115;
# datasetName = "557_analcatdata_apnea1"; Strategy = Strategy1; MinimumReductionMSE = 1e-6; MaximumNodes = 125;
# datasetName = "561_cpu"; Strategy = Strategy3; MinimumReductionMSE = 1e-7; MaximumNodes = 140;
# datasetName = "659_sleuth_ex1714"; Strategy = Strategy3; MinimumReductionMSE = 1e-2; MaximumNodes = 40;
# datasetName = "663_rabe_266"; Strategy = Strategy1; MinimumReductionMSE = 1e-7; MaximumNodes = 185;
# datasetName = "665_sleuth_case2002"; Strategy = Strategy3; MinimumReductionMSE = 1e-6; MaximumNodes = 30;
# datasetName = "666_rmftsa_ladata"; Strategy = Strategy1; MinimumReductionMSE = 1e-3; MaximumNodes = 70;
# datasetName = "678_visualizing_environmental"; Strategy = Strategy3; MinimumReductionMSE = 1e-5; MaximumNodes = 30;
# datasetName = "687_sleuth_ex1605"; Strategy = Strategy2; MinimumReductionMSE = 1e-7; MaximumNodes = 30;
# datasetName = "690_visualizing_galaxy"; Strategy = Strategy1; MinimumReductionMSE = 1e-7; MaximumNodes = 75;
# datasetName = "695_chatfield_4"; Strategy = Strategy3; MinimumReductionMSE = 1e-6; MaximumNodes = 30;
# datasetName = "706_sleuth_case1202"; Strategy = Strategy3; MinimumReductionMSE = 1e-3; MaximumNodes = 70;
# datasetName = "712_chscase_geyser1"; Strategy = Strategy1; MinimumReductionMSE = 1e-3; MaximumNodes = 45;
# Load the dataset
(inputs, targets) = loadDataset(datasetName);
numFolds = 10;
# Create the same cross-validation indices used in the rest of experiments
indicesKFold = crossvalidationIndices(length(targets), numFolds);
testValues = Array{Float64,1}(undef,numFolds);
println("Dataset \"", datasetNameFromFile(datasetName), "\" with strategy ", Strategy, ", Min. MSE reduction ", MinimumReductionMSE, " and Maximum num. nodes ", MaximumNodes);
for numFold = 1:numFolds
trainingInputs = inputs[indicesKFold.!=numFold,:];
trainingTargets = targets[indicesKFold.!=numFold];
(_, _, _, bestTree) = dome(trainingInputs, trainingTargets;
minimumReductionMSE = MinimumReductionMSE ,
maximumNodes = MaximumNodes ,
strategy = Strategy ,
showText = false
);
# Get this expression as a string with vector operations
expr = vectorString(bestTree);
# Convert the variable names "X" to "inputs"
expr = replace(expr, "X" => "inputs");
# Evaluate this expression
outputs = eval(Meta.parse(expr))
# Calculate MSE in the test samples
testMSE = mean(((targets .- outputs).^2)[indicesKFold.==numFold]);
println(" Finished fold $numFold/$numFolds, MSE in test: $testMSE");
testValues[numFold] = testMSE;
end;
println("Median test MSE: ", median(testValues));