Skip to content

Commit

Permalink
Add a new folder of updated e2e tests with assertions (#46)
Browse files Browse the repository at this point in the history
* update some e2e_tests to the new assertions format

* adapt 2 correction test cases

* adapt all cancellations, chitchat, disambiguation

* adapt test cases in invalid_path, invalid_user_inputs, negations, potential_bugs

* adapt digressions, flow_guards, skip_question

* adapt tests for corrections

* adapt all happy_path test cases

* fix error in running datetime validation in the custom actions

* add generative assertion test cases

* update gitignore

* Add assertions for failing and flaky tests

* udate rasa-pro to 3.10.0rc1

* add mlflow optional dependency, add new workflow and new make commands

* fix CI deprecation warning, fix failing test case

* update threshold to prevent flakiness

* update button payload and test case

* fix flaky passing e2e test

---------

Co-authored-by: Maksim Moiseikin <[email protected]>
  • Loading branch information
ancalita and maksim-m authored Aug 22, 2024
1 parent d089acb commit 9d37bd8
Show file tree
Hide file tree
Showing 94 changed files with 4,061 additions and 205 deletions.
111 changes: 110 additions & 1 deletion .github/workflows/continous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
path: models/${{steps.upload_model.outputs.model}}.tar.gz

run_e2e_tests:
name: Run e2e Tests
name: Run e2e tests
runs-on: ubuntu-22.04
needs: [train-model]

Expand Down Expand Up @@ -206,3 +206,112 @@ jobs:
- name: Stop Duckling server
run: |
make stop-duckling
run_e2e_tests_with_assertions:
name: Run e2e tests with assertions
runs-on: ubuntu-22.04
needs: [train-model]

steps:
- name: Checkout git repository 🕝
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c

- name: Setup Python
uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
with:
python-version: ${{ env.DEFAULT_PYTHON_VERSION }}

- name: Install poetry 🦄
uses: Gr1N/setup-poetry@15821dc8a61bc630db542ae4baf6a7c19a994844
with:
poetry-version: ${{ env.POETRY_VERSION }}

- name: Load Poetry Cached Libraries ⬇
id: cache-poetry
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
with:
path: .venv
key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ env.DEFAULT_PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
restore-keys: ${{ runner.os }}-poetry-${{ env.DEFAULT_PYTHON_VERSION }}

- name: Create virtual environment
if: steps.cache-poetry.outputs.cache-hit != 'true'
run: python -m venv create .venv

- name: Set up virtual environment
run: poetry config virtualenvs.in-project true

# Authenticate with gcloud for release registry (where Rasa is published)
- id: "auth-release"
name: Authenticate with gcloud for release registry 🎫
uses: "google-github-actions/auth@ef5d53e30bbcd8d0836f4288f5e50ff3e086997d"
with:
token_format: 'access_token'
credentials_json: "${{ secrets.RASA_RELEASES_READ }}"

- name: Configure OAuth token for poetry
run: |
poetry config http-basic.rasa-plus oauth2accesstoken $(gcloud auth print-access-token)
- name: Install Dependencies 📦
run: |
make install
- uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a
with:
name: trained-model
path: models/

- name: Init LLM Cache
id: cache-llm
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
with:
path: .rasa
key: rasa-llm-cache

- name: Run action server
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_INTENTLESS: true
run: |
make actions &
- name: Run duckling server
run: |
make run-duckling
- name: Run e2e passing tests with assertions
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-passing-assertions
- name: Run e2e flaky tests with assertions
if: always()
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-flaky-assertions || true
- name: Run e2e failing tests with assertions
if: always()
env:
OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
RASA_PRO_BETA_E2E_ASSERTIONS: true
run: |
make test-failing-assertions | grep '0 passed'
- name: Stop Duckling server
run: |
make stop-duckling
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,6 @@ models/
prompts/
tests/
qdrant_storage/

# mlflow
mlruns/
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,12 @@ test-one: .EXPORT_ALL_VARIABLES

stop-duckling:
docker stop duckling_container

test-passing-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/passing

test-flaky-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/flaky

test-failing-assertions: .EXPORT_ALL_VARIABLES
poetry run rasa test e2e e2e_tests_with_assertions/failing
2 changes: 1 addition & 1 deletion actions/ask_for_slot_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(

if len(restaurant_names) > 0:
dispatcher.utter_message(
text="Do you know which restaurant you would like me to reverse a table at?",
text="Do you know which restaurant you would like me to reserve a table at?",
buttons=[
{"title": r, "payload": f'/inform{{"restaurant_name":"{r}"}}'}
for r in restaurant_names
Expand Down
9 changes: 5 additions & 4 deletions actions/setup_recurrent_payment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def parse_datetime(text: str) -> Optional[datetime]:
if isinstance(parsed_value, dict):
parsed_value = parsed_value["from"]

return datetime.fromisoformat(parsed_value)
result = datetime.fromisoformat(parsed_value)
return result.replace(tzinfo=None)


class ValidatePaymentStartDate(Action):
Expand All @@ -43,7 +44,7 @@ def run(
dispatcher.utter_message(response="utter_invalid_date")
return [SlotSet("recurrent_payment_start_date", None)]

return [SlotSet("recurrent_payment_start_date", start_date.isoformat())]
return [SlotSet("recurrent_payment_start_date", start_date.strftime("%Y-%m-%d"))]


class ValidatePaymentEndDate(Action):
Expand All @@ -66,11 +67,11 @@ def run(
return [SlotSet("recurrent_payment_end_date", None)]

start_date = tracker.get_slot("recurrent_payment_start_date")
if start_date is not None and end_date < datetime.fromisoformat(start_date):
if start_date is not None and end_date < datetime.strptime(start_date, "%Y-%m-%d"):
dispatcher.utter_message(response="utter_invalid_date")
return [SlotSet("recurrent_payment_end_date", None)]

return [SlotSet("recurrent_payment_end_date", end_date.isoformat())]
return [SlotSet("recurrent_payment_end_date", end_date.strftime("%Y-%m-%d"))]


class ExecutePayment(Action):
Expand Down
2 changes: 1 addition & 1 deletion config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pipeline:
- name: NLUCommandAdapter
- name: SingleStepLLMCommandGenerator
llm:
model_name: gpt-4
model: gpt-4
request_timeout: 7
temperature: 0.0
top_p: 0.0
Expand Down
2 changes: 1 addition & 1 deletion domain/flows/check_portfolio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ responses:
title: stocks
- payload: bonds
title: bonds
- payload: mutual_funds
- payload: /SetSlots(portfolio_type=mutual_funds)
title: mutual funds
utter_portfolio_options_found:
- text: "Your {portfolio_type} portfolio: {portfolio_options}"
Expand Down
2 changes: 1 addition & 1 deletion e2e_tests/passing/happy_path/user_checks_portfolio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test_cases:
- user: "1234"
- utter: utter_authentication_successful
- utter: utter_ask_portfolio_type
- user: mutual funds
- user: /SetSlots(portfolio_type=mutual_funds)
- slot_was_set:
- portfolio_type: mutual_funds
- portfolio_exists: True
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
test_cases:
- test_case: user tries to use abstract values
steps:
- user: send money
assertions:
- bot_uttered:
utter_name: utter_ask_transfer_money_recipient
# the llm is extracting "good friend" as the recipient
- user: to a good friend
assertions:
- slot_was_not_set:
- name: transfer_money_recipient
- bot_uttered:
utter_name: utter_ask_transfer_money_recipient
- user: okay, to Mary
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: Mary
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: "50"
assertions:
- slot_was_set:
- name: transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
test_cases:
- test_case: user tries to skip a question and then cancels the flow (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
assertions:
# cancel flow instead of skip question is predicted
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to continue
assertions:
- bot_uttered:
utter_name: utter_flow_cancelled_rasa
- bot_uttered:
utter_name: utter_can_do_something_else
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
test_cases:
- test_case: user tries to skip a question multiple times (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this for now
assertions:
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
# cancel flow instead of skip question is predicted
assertions:
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: okay 50
assertions:
- slot_was_set:
- name: utter_ask_transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
test_cases:
- test_case: user tries to skip a question once (hard)
steps:
- user: send money to John
assertions:
- slot_was_set:
- name: transfer_money_recipient
value: John
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: i don't want to answer this
assertions:
# cancel flow instead of skip question is predicted
- bot_uttered:
utter_name: utter_skip_question_answer
- bot_uttered:
utter_name: utter_ask_transfer_money_amount_of_money
- user: okay 50
assertions:
- slot_was_set:
- name: utter_ask_transfer_money_amount_of_money
value: "50"
- bot_uttered:
utter_name: utter_ask_transfer_money_final_confirmation
- user: "yes"
assertions:
- bot_uttered:
utter_name: utter_transfer_complete
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# instead of clarify a start flow command for "list_contact" is predicted

fixtures:
- route_to_calm:
- route_session_to_calm: True

test_cases:
- test_case: user sends short noun only message
fixtures:
- route_to_calm
steps:
- user: contact
assertions:
- bot_uttered:
utter_name: utter_clarification_options_rasa
- user: add
assertions:
- bot_uttered:
utter_name: utter_ask_add_contact_handle
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# recurrent_payment_type is not mapped to standing order

test_cases:
- test_case: user wants to set up a new recurrent payment, but specifies the type incompletely, example 3
steps:
- user: I want to set up a new recurrent payment
assertions:
- bot_uttered:
utter_name: utter_ask_recurrent_payment_type
- user: stand order
assertions:
- slot_was_set:
- name: recurrent_payment_type
value: standing order
- bot_uttered:
utter_name: utter_ask_recipient
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
fixtures:
- non_california_resident:
- based_in_california: False
- route_session_to_calm: True

test_cases:
- test_case: Register to vote for non-California resident (should not trigger)
fixtures:
- non_california_resident
steps:
- user: I want to register to vote
# ChitChat is predicted instead of no command being predicted
assertions:
- bot_uttered:
utter_name: utter_cannot_answer
Loading

0 comments on commit 9d37bd8

Please sign in to comment.