Fix typos in multiple places (#2244)

ACLUE bibtex typo reported to ACL Anthology and fixed here as title in pdf is correct.
EleutherAI · Aug 23, 2024 · fa83764 · fa83764
1 parent 259b756
commit fa83764
Show file tree

Hide file tree

Showing 9 changed files with 60 additions and 60 deletions.
diff --git a/lm_eval/tasks/README.md b/lm_eval/tasks/README.md
@@ -69,7 +69,7 @@
 | [mgsm](mgsm/README.md) | Benchmark of multilingual grade-school math problems. | Spanish, French, German, Russian, Chinese, Japanese, Thai, Swahili, Bengali, Telugu |
 | [minerva_math](minerva_math/README.md) | Mathematics-focused tasks requiring numerical reasoning and problem-solving skills. | English |
 | mmlu | Massive Multitask Language Understanding benchmark for broad domain language evaluation. Several variants are supported. | English |
-| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigourous. | English |
+| [mmlusr](mmlusr/README.md) | Variation of MMLU designed to be more rigorous. | English |
 | model_written_evals | Evaluation tasks auto-generated for evaluating a collection of AI Safety concerns. | |
 | [mutual](mutual/README.md) | A retrieval-based dataset for multi-turn dialogue reasoning. | English |
 | [nq_open](nq_open/README.md) | Open domain question answering tasks based on the Natural Questions dataset. | English |

diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py
@@ -492,7 +492,7 @@ def _get_task_and_group(self, task_dir: str):
                                         "`group` and `group_alias` keys in tasks' configs will no longer be used in the next release of lm-eval. "
                                         "`tag` will be used to allow to call a collection of tasks just like `group`. "
                                         "`group` will be removed in order to not cause confusion with the new ConfigurableGroup "
-                                        "which will be the offical way to create groups with addition of group-wide configuations."
+                                        "which will be the official way to create groups with addition of group-wide configurations."
                                     )
                                     print_info = False
                                     # attr = "tag"

diff --git a/lm_eval/tasks/aclue/README.md b/lm_eval/tasks/aclue/README.md
@@ -14,7 +14,7 @@ Homepage: https://github.com/isen-zhang/ACLUE
 
 ```bibtex
 @inproceedings{zhang-li-2023-large,
-    title = "Can Large Langauge Model Comprehend {A}ncient {C}hinese? A Preliminary Test on {ACLUE}",
+    title = "Can Large Language Model Comprehend {A}ncient {C}hinese? A Preliminary Test on {ACLUE}",
     author = "Zhang, Yixuan  and Li, Haonan",
     booktitle = "Proceedings of the Ancient Language Processing Workshop",
     month = sep,

diff --git a/lm_eval/tasks/eq_bench/README.md b/lm_eval/tasks/eq_bench/README.md
@@ -16,8 +16,8 @@ Homepage: https://eqbench.com/
 NOTE: There are some key differences between the lm-evaluation-harness version and the implementation described in the EQ-Bench paper (These have been OK'd by the author):
 
 - The lm-eval version uses the EQ-Bench v2 test set (171 questions) and score calculation. It does not incorporate the revision part of the prompt, as per v2.1 (https://github.com/EQ-bench/EQ-Bench)
-- No retries in lm-eval version (EQ-Bench pipeline retries with successively higher temps if it encounters unparseable answers)
-- In the original implementation, unparseable answers are excluded from the final score, and 83% of answers have to be parseable or a fail is returned. The lm-eval version instead assigns 0 to unparsable answers and has no fail criteria. So for lower performing models, there may be differences with the EQ-Bench leaderboard.
+- No retries in lm-eval version (EQ-Bench pipeline retries with successively higher temps if it encounters unparsable answers)
+- In the original implementation, unparsable answers are excluded from the final score, and 83% of answers have to be parseable or a fail is returned. The lm-eval version instead assigns 0 to unparsable answers and has no fail criteria. So for lower performing models, there may be differences with the EQ-Bench leaderboard.
 
 
 ### Citation

diff --git a/lm_eval/tasks/ifeval/instructions.py b/lm_eval/tasks/ifeval/instructions.py
@@ -78,7 +78,7 @@
 # The number of highlighted sections.
 _NUM_HIGHLIGHTED_SECTIONS = 4
 
-# The section spliter.
+# The section splitter.
 _SECTION_SPLITER = ("Section", "SECTION")
 
 # The number of sections.
@@ -153,7 +153,7 @@ def build_description(self, *, language=None):
         return self._description_pattern.format(language=_LANGUAGES[self._language])
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"language": self._language}
 
     def get_instruction_args_keys(self):
@@ -223,7 +223,7 @@ def build_description(self, *, num_sentences=None, relation=None):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "num_sentences": self._num_sentences_threshold,
             "relation": self._comparison_relation,
@@ -276,7 +276,7 @@ def build_description(self, *, num_placeholders=None):
         return self._description_pattern.format(num_placeholders=self._num_placeholders)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"num_placeholders": self._num_placeholders}
 
     def get_instruction_args_keys(self):
@@ -323,7 +323,7 @@ def build_description(self, *, num_bullets=None):
         return self._description_pattern.format(num_bullets=self._num_bullets)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"num_bullets": self._num_bullets}
 
     def get_instruction_args_keys(self):
@@ -362,7 +362,7 @@ def build_description(self):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return None
 
     def get_instruction_args_keys(self):
@@ -393,7 +393,7 @@ def build_description(self, *, starter=None):
         """Build the instruction description.
 
         Args:
-          starter: A string representing the keyward that the response should start
+          starter: A string representing the keyword that the response should start
             with.
 
         Returns:
@@ -409,7 +409,7 @@ def build_description(self, *, starter=None):
         return self._description_pattern.format(starter=self._starter)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"starter": self._starter}
 
     def get_instruction_args_keys(self):
@@ -458,7 +458,7 @@ def build_description(self, *, num_highlights=None):
         return self._description_pattern.format(num_highlights=self._num_highlights)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"num_highlights": self._num_highlights}
 
     def get_instruction_args_keys(self):
@@ -469,12 +469,12 @@ def check_following(self, value):
         """Checks if the number of highlighted sections meets the requirement.
 
         Args:
-          value: a string repesenting the response. The response is expected to
+          value: a string representing the response. The response is expected to
             contain highlighted sections in the format of *highlighted*.
 
         Returns:
           True if the actual number of highlighted sections in the format of
-          *highlighed sections* meets the minimum requirement; otherwise False.
+          *highlighted sections* meets the minimum requirement; otherwise False.
         """
         num_highlights = 0
         highlights = re.findall(r"\*[^\n\*]*\*", value)
@@ -529,7 +529,7 @@ def build_description(self, *, section_spliter=None, num_sections=None):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "section_spliter": self._section_spliter,
             "num_sections": self._num_sections,
@@ -582,7 +582,7 @@ def build_description(self, *, num_paragraphs=None):
         return self._description_pattern.format(num_paragraphs=self._num_paragraphs)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"num_paragraphs": self._num_paragraphs}
 
     def get_instruction_args_keys(self):
@@ -642,7 +642,7 @@ def build_description(self, *, postscript_marker=None):
         return self._description_pattern.format(postscript=self._postscript_marker)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"postscript_marker": self._postscript_marker}
 
     def get_instruction_args_keys(self):
@@ -672,7 +672,7 @@ def check_following(self, value):
 
 
 class RephraseChecker(Instruction):
-    """Checks the repharse."""
+    """Checks the rephrase."""
 
     def build_description(self, *, original_message):
         """Build the instruction description.
@@ -701,7 +701,7 @@ def build_description(self, *, original_message):
         return self._description
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"original_message": self._reference_without_change}
 
     def get_instruction_args_keys(self):
@@ -766,7 +766,7 @@ def build_description(self, *, keywords=None):
         return self._description_pattern.format(keywords=self._keywords)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"keywords": self._keywords}
 
     def get_instruction_args_keys(self):
@@ -831,7 +831,7 @@ def build_description(self, *, keyword=None, frequency=None, relation=None):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "keyword": self._keyword,
             "frequency": self._frequency,
@@ -894,7 +894,7 @@ def build_description(self, *, num_words=None, relation=None):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"num_words": self._num_words, "relation": self._comparison_relation}
 
     def get_instruction_args_keys(self):
@@ -922,7 +922,7 @@ def build_description(self):
         return self._description_pattern
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return None
 
     def get_instruction_args_keys(self):
@@ -996,7 +996,7 @@ def build_description(
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "num_paragraphs": self._num_paragraphs,
             "nth_paragraph": self._nth_paragraph,
@@ -1089,7 +1089,7 @@ def build_description(self, key_sentences=None, num_sentences=None):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "num_sentences": self._num_sentences,
             "key_sentences": list(self._key_sentences),
@@ -1117,7 +1117,7 @@ def build_description(self, forbidden_words=None):
         """Build the instruction description.
 
         Args:
-          forbidden_words: A sequences of strings respresenting words that are not
+          forbidden_words: A sequences of strings representing words that are not
             allowed in the response.
 
         Returns:
@@ -1138,7 +1138,7 @@ def build_description(self, forbidden_words=None):
         return self._description_pattern.format(forbidden_words=self._forbidden_words)
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {"forbidden_words": self._forbidden_words}
 
     def get_instruction_args_keys(self):
@@ -1188,7 +1188,7 @@ def build_description(self, *, original_paragraph, low, high):
         )
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return {
             "original_paragraph": self._original_paragraph,
             "low": self._low,
@@ -1225,7 +1225,7 @@ def build_description(self):
         return self._description_pattern
 
     def get_instruction_args(self):
-        """Returns the keyward args of `build_description`."""
+        """Returns the keyword args of `build_description`."""
         return None
 
     def get_instruction_args_keys(self):