diff --git a/autoarena/judge/anthropic.py b/autoarena/judge/anthropic.py index fdce1354..e207bdc2 100644 --- a/autoarena/judge/anthropic.py +++ b/autoarena/judge/anthropic.py @@ -1,8 +1,7 @@ from autoarena.api import api from autoarena.api.api import JudgeType from autoarena.judge.base import AutomatedJudge -from autoarena.judge.utils import get_user_prompt, rate_limit -from tests.unit.judge.test_utils import DEFAULT_BATCH_SIZE +from autoarena.judge.utils import get_user_prompt, rate_limit, DEFAULT_BATCH_SIZE class AnthropicJudge(AutomatedJudge): diff --git a/autoarena/judge/cohere.py b/autoarena/judge/cohere.py index 5ca4e4fc..c251e4a4 100644 --- a/autoarena/judge/cohere.py +++ b/autoarena/judge/cohere.py @@ -1,8 +1,7 @@ from autoarena.api import api from autoarena.api.api import JudgeType from autoarena.judge.base import AutomatedJudge -from autoarena.judge.utils import get_user_prompt, rate_limit -from tests.unit.judge.test_utils import DEFAULT_BATCH_SIZE +from autoarena.judge.utils import get_user_prompt, rate_limit, DEFAULT_BATCH_SIZE class CohereJudge(AutomatedJudge): diff --git a/autoarena/judge/executor.py b/autoarena/judge/executor.py index a513ff5c..18606438 100644 --- a/autoarena/judge/executor.py +++ b/autoarena/judge/executor.py @@ -8,7 +8,7 @@ from autoarena.api import api from autoarena.judge.base import Judge -from tests.unit.judge.test_utils import DEFAULT_BATCH_SIZE +from autoarena.judge.utils import DEFAULT_BATCH_SIZE # TODO: this interface is a little gnarly as callers need to deal with responses coming back in any order diff --git a/autoarena/judge/gemini.py b/autoarena/judge/gemini.py index d56c2c2c..9fd03e1f 100644 --- a/autoarena/judge/gemini.py +++ b/autoarena/judge/gemini.py @@ -1,8 +1,7 @@ from autoarena.api import api from autoarena.api.api import JudgeType from autoarena.judge.base import AutomatedJudge -from autoarena.judge.utils import get_user_prompt, JOINED_PROMPT_TEMPLATE, rate_limit -from tests.unit.judge.test_utils import DEFAULT_BATCH_SIZE +from autoarena.judge.utils import get_user_prompt, JOINED_PROMPT_TEMPLATE, rate_limit, DEFAULT_BATCH_SIZE class GeminiJudge(AutomatedJudge): diff --git a/autoarena/judge/openai.py b/autoarena/judge/openai.py index edc88846..45ae93a8 100644 --- a/autoarena/judge/openai.py +++ b/autoarena/judge/openai.py @@ -1,8 +1,7 @@ from autoarena.api import api from autoarena.api.api import JudgeType from autoarena.judge.base import AutomatedJudge -from autoarena.judge.utils import get_user_prompt, rate_limit -from tests.unit.judge.test_utils import DEFAULT_BATCH_SIZE +from autoarena.judge.utils import get_user_prompt, rate_limit, DEFAULT_BATCH_SIZE class OpenAIJudge(AutomatedJudge): diff --git a/autoarena/judge/utils.py b/autoarena/judge/utils.py index e1998034..1b5f863c 100644 --- a/autoarena/judge/utils.py +++ b/autoarena/judge/utils.py @@ -39,6 +39,9 @@ ACCEPTABLE_RESPONSES = {"A", "B", "-"} +# this is more or less a constant as it is baked into the @rate_limit decorators applied to proprietary judges +DEFAULT_BATCH_SIZE = 8 + def get_user_prompt(h2h: api.HeadToHead) -> str: return USER_PROMPT_TEMPLATE.format(prompt=h2h.prompt, response_a=h2h.response_a, response_b=h2h.response_b) diff --git a/tests/unit/judge/test_utils.py b/tests/unit/judge/test_utils.py index d76f2ff1..c231db52 100644 --- a/tests/unit/judge/test_utils.py +++ b/tests/unit/judge/test_utils.py @@ -8,9 +8,6 @@ from autoarena.judge.base import Judge from autoarena.judge.utils import CleaningJudge, RetryingJudge, FixingJudge, ABShufflingJudge, rate_limit -# this is more or less a constant as it is baked into the @rate_limit decorators applied to proprietary judges -DEFAULT_BATCH_SIZE = 8 - class DummyJudge(Judge): def __init__(self, winners: list[str]):