From 4601b35e57b87114dfb4118d190fdeb4b7aef204 Mon Sep 17 00:00:00 2001 From: Yifan Mai Date: Fri, 11 Aug 2023 11:09:47 -0700 Subject: [PATCH] Use HTML entities instead --- .../mechanical_turk_critique_exporter.py | 52 +------------------ .../proxy/clients/mechanical_turk_utils.py | 5 +- 2 files changed, 3 insertions(+), 54 deletions(-) diff --git a/src/helm/proxy/clients/mechanical_turk_critique_exporter.py b/src/helm/proxy/clients/mechanical_turk_critique_exporter.py index 0c88f97b7b..a8f5fe317b 100644 --- a/src/helm/proxy/clients/mechanical_turk_critique_exporter.py +++ b/src/helm/proxy/clients/mechanical_turk_critique_exporter.py @@ -26,7 +26,7 @@ def _format_template_tags(raw_text: str) -> str: def _render_template_crowd_html(task_template: CritiqueTaskTemplate) -> str: """Render the Crowd HTML for the template.""" - scripts_crowd_html = textwrap.dedent( + validation_crowd_html = textwrap.dedent( """\ - """ ) @@ -115,7 +67,7 @@ def _render_template_crowd_html(task_template: CritiqueTaskTemplate) -> str: return textwrap.dedent( f"""\ - {_indent_to_level(scripts_crowd_html, 2)} + {_indent_to_level(validation_crowd_html, 2)} {_indent_to_level(instructions_crowd_html, 3)} {_indent_to_level(divider_html, 3)} diff --git a/src/helm/proxy/clients/mechanical_turk_utils.py b/src/helm/proxy/clients/mechanical_turk_utils.py index 970a66940f..8c46aa1ae7 100644 --- a/src/helm/proxy/clients/mechanical_turk_utils.py +++ b/src/helm/proxy/clients/mechanical_turk_utils.py @@ -1,4 +1,3 @@ -import codecs import json import re import sys @@ -29,9 +28,7 @@ def _emoji_match_to_span(emoji_match): Returns: Unicode string """ - bytes = codecs.encode(emoji_match.group(), "utf-8") - bytes_as_json = json.dumps([b for b in bytearray(bytes)]) - return "" % bytes_as_json + return emoji_match.group().encode("ascii", "xmlcharrefreplace").decode() # The procedure for stripping Emoji characters is based on this # StackOverflow post: