-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_augmentation.py
102 lines (79 loc) · 3 KB
/
data_augmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import random
from SimpleGA import load_or_create_pickle_file,save_to_pickle_file
def parse_encoding(encoding):
# Extract the dimensions
rows = int(encoding[1])
cols = int(encoding[2])
# Extract the grid data
grid_data = encoding[3:]
# Initialize the 2D list
grid_2d = []
# Fill in the 2D list
for i in range(0, len(grid_data), cols):
row_data = grid_data[i:i + cols]
grid_2d.append(list(row_data))
#remove the last element of the list
grid_2d.pop()
return grid_2d
def generate_permutations(s, limit=None):
# Locate positions of 'P' and 'N'
pn_positions = [i for i, c in enumerate(s) if c == 'P' or c == 'N']
pn_values = [s[i] for i in pn_positions]
# Initialize the output list and a set to check for duplicates
output = []
seen = set()
# Randomly shuffle 'P' and 'N' and apply the limit
attempts = 0
while len(output) < limit and attempts < limit * 10:
random.shuffle(pn_values)
s_list = list(s)
for i, position in enumerate(pn_positions):
s_list[position] = pn_values[i]
new_str = ''.join(s_list)
if new_str not in seen:
seen.add(new_str)
output.append(new_str)
attempts += 1
return output
easy_levels = 'levels_datasets\pickles\easy_levels.pickle'
medium_levels = 'levels_datasets\pickles\medium_levels.pickle'
hard_levels = 'levels_datasets\pickles\hard_levels.pickle'
easy_levels = load_or_create_pickle_file(easy_levels)
medium_levels = load_or_create_pickle_file(medium_levels)
hard_levels = load_or_create_pickle_file(hard_levels)
print("Which dataset you want to augment? 1) easy 2) medium 3) hard")
answer = input()
if answer == '1':
levels=easy_levels
difficulty = 'easy'
short_difficulty = 'E'
elif answer == '2':
levels=medium_levels
difficulty = 'medium'
short_difficulty = 'M'
elif answer == '3':
levels=hard_levels
difficulty = 'hard'
short_difficulty = 'H'
new_levels = set()
for level in levels:
print("Level encoding: ")
# Convert 2D level tuple to 1D string
level_1d = ''.join(''.join(row) for row in level)
# Get the number of rows and columns
rows = len(level)
cols = len(level[0])
# Create the level descriptor string
level_descriptor = f"{short_difficulty}{rows}{cols}{level_1d}\n"
print(level_descriptor)
# Parse the encoding
augmentations = generate_permutations(level_descriptor,limit=10)
for augmentation in augmentations:
augmentation_parsed = parse_encoding(augmentation)
#add the augmentation to the set of levels
new_levels.add(tuple(tuple(row) for row in augmentation_parsed))
print("added {} augmentations of this level to the set".format(len(augmentations)))
# Update the original set with the new elements
levels.update(new_levels)
# Save the updated datasets
save_to_pickle_file(levels, f'levels_datasets\pickles\{difficulty}_levels.pickle')