diff --git a/utils/domain_substitution.py b/utils/domain_substitution.py index f00d7b2d1b..bcd0667061 100755 --- a/utils/domain_substitution.py +++ b/utils/domain_substitution.py @@ -15,6 +15,7 @@ import io import os import re +import string import tarfile import tempfile import zlib @@ -102,19 +103,48 @@ def _substitute_path(path, regex_iter): continue if not content: raise UnicodeDecodeError('Unable to decode with any encoding: %s' % path) + + replace_comments = (path.suffix == '.java' or path.suffix == '.cc' or path.suffix == '.h' + or path.suffix == '.js' or path.suffix == '.cpp' or path.suffix == '.c') + if replace_comments: + content = re.sub('(\/\*.*?\*/\n|\/\/.*?\n)', replaceComments, content, flags=re.DOTALL) + if path.suffix == '.py': + content = re.sub('(#.*?\n)', replaceComments, content, flags=re.DOTALL) + replace_comments = True + file_subs = 0 for regex_pair in regex_iter: content, sub_count = regex_pair.pattern.subn(regex_pair.replacement, content) file_subs += sub_count if file_subs > 0: + if replace_comments: + ## restore comments + content = re.sub(place_holder + '(\d+):', restoreComments, content) + comments = [] + substituted_content = content.encode(encoding) input_file.seek(0) input_file.write(content.encode(encoding)) input_file.truncate() return (zlib.crc32(substituted_content), original_content) + comments = [] return (None, None) +## use a randomized placeholder for comments +place_holder = ':C7yae7ozv:' +comments = [] + + +def replaceComments(m): + comments.append(m.group(0)) + return place_holder + str(len(comments)) + ':' + + +def restoreComments(m): + return comments[int(m.group(1)) - 1] + + def _validate_file_index(index_file, resolved_tree, cache_index_files): """ Validation of file index and hashes against the source tree.