Add a new folder of updated e2e tests with assertions (#46)

* update some e2e_tests to the new assertions format * adapt 2 correction test cases * adapt all cancellations, chitchat, disambiguation * adapt test cases in invalid_path, invalid_user_inputs, negations, potential_bugs * adapt digressions, flow_guards, skip_question * adapt tests for corrections * adapt all happy_path test cases * fix error in running datetime validation in the custom actions * add generative assertion test cases * update gitignore * Add assertions for failing and flaky tests * udate rasa-pro to 3.10.0rc1 * add mlflow optional dependency, add new workflow and new make commands * fix CI deprecation warning, fix failing test case * update threshold to prevent flakiness * update button payload and test case * fix flaky passing e2e test --------- Co-authored-by: Maksim Moiseikin <[email protected]>
RasaHQ · Aug 22, 2024 · 9d37bd8 · 9d37bd8
1 parent d089acb
commit 9d37bd8
Show file tree

Hide file tree

Showing 94 changed files with 4,061 additions and 205 deletions.
diff --git a/.github/workflows/continous-integration.yml b/.github/workflows/continous-integration.yml
@@ -100,7 +100,7 @@ jobs:
           path: models/${{steps.upload_model.outputs.model}}.tar.gz
 
   run_e2e_tests:
-    name: Run e2e Tests
+    name: Run e2e tests
     runs-on: ubuntu-22.04
     needs: [train-model]
 
@@ -206,3 +206,112 @@ jobs:
     - name: Stop Duckling server
       run: |
           make stop-duckling
+
+  run_e2e_tests_with_assertions:
+    name: Run e2e tests with assertions
+    runs-on: ubuntu-22.04
+    needs: [train-model]
+
+    steps:
+    - name: Checkout git repository 🕝
+      uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
+
+    - name: Setup Python
+      uses: actions/setup-python@57ded4d7d5e986d7296eab16560982c6dd7c923b
+      with:
+        python-version: ${{ env.DEFAULT_PYTHON_VERSION }}
+
+    - name: Install poetry 🦄
+      uses: Gr1N/setup-poetry@15821dc8a61bc630db542ae4baf6a7c19a994844
+      with:
+        poetry-version: ${{ env.POETRY_VERSION }}
+
+    - name: Load Poetry Cached Libraries ⬇
+      id: cache-poetry
+      uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
+      with:
+        path: .venv
+        key: ${{ runner.os }}-poetry-${{ env.POETRY_VERSION }}-${{ env.DEFAULT_PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
+        restore-keys: ${{ runner.os }}-poetry-${{ env.DEFAULT_PYTHON_VERSION }}
+
+    - name: Create virtual environment
+      if: steps.cache-poetry.outputs.cache-hit != 'true'
+      run: python -m venv create .venv
+
+    - name: Set up virtual environment
+      run: poetry config virtualenvs.in-project true
+
+      # Authenticate with gcloud for release registry (where Rasa is published)
+    - id: "auth-release"
+      name: Authenticate with gcloud for release registry 🎫
+      uses: "google-github-actions/auth@ef5d53e30bbcd8d0836f4288f5e50ff3e086997d"
+      with:
+        token_format: 'access_token'
+        credentials_json: "${{ secrets.RASA_RELEASES_READ }}"
+
+    - name: Configure OAuth token for poetry
+      run: |
+        poetry config http-basic.rasa-plus oauth2accesstoken $(gcloud auth print-access-token)
+
+    - name: Install Dependencies 📦
+      run: |
+        make install
+
+    - uses: actions/download-artifact@9bc31d5ccc31df68ecc42ccf4149144866c47d8a
+      with:
+        name: trained-model
+        path: models/
+
+    - name: Init LLM Cache
+      id: cache-llm
+      uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
+      with:
+        path: .rasa
+        key: rasa-llm-cache
+
+    - name: Run action server
+      env:
+        OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
+        RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
+        RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
+        RASA_PRO_BETA_INTENTLESS: true
+      run: |
+        make actions &
+
+    - name: Run duckling server
+      run: |
+        make run-duckling
+
+    - name: Run e2e passing tests with assertions
+      env:
+        OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
+        RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
+        RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
+        RASA_PRO_BETA_E2E_ASSERTIONS: true
+      run: |
+        make test-passing-assertions
+
+    - name: Run e2e flaky tests with assertions
+      if: always()
+      env:
+        OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
+        RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
+        RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
+        RASA_PRO_BETA_E2E_ASSERTIONS: true
+      run: |
+        make test-flaky-assertions || true
+
+    - name: Run e2e failing tests with assertions
+      if: always()
+      env:
+        OPENAI_API_KEY: ${{secrets.OPENAI_API_KEY}}
+        RASA_PRO_LICENSE: ${{secrets.RASA_PRO_LICENSE}}
+        RASA_DUCKLING_HTTP_URL: ${{secrets.DUCKLING_URL}}
+        RASA_PRO_BETA_E2E_ASSERTIONS: true
+      run: |
+        make test-failing-assertions | grep '0 passed'
+
+    - name: Stop Duckling server
+      run: |
+          make stop-duckling
+
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,6 @@ models/
 prompts/
 tests/
 qdrant_storage/
+
+# mlflow
+mlruns/
diff --git a/Makefile b/Makefile
@@ -91,3 +91,12 @@ test-one: .EXPORT_ALL_VARIABLES
 
 stop-duckling:
 	docker stop duckling_container
+
+test-passing-assertions: .EXPORT_ALL_VARIABLES
+	poetry run rasa test e2e e2e_tests_with_assertions/passing
+
+test-flaky-assertions: .EXPORT_ALL_VARIABLES
+	poetry run rasa test e2e e2e_tests_with_assertions/flaky
+
+test-failing-assertions: .EXPORT_ALL_VARIABLES
+	poetry run rasa test e2e e2e_tests_with_assertions/failing
diff --git a/actions/ask_for_slot_action.py b/actions/ask_for_slot_action.py
@@ -47,7 +47,7 @@ def run(
 
         if len(restaurant_names) > 0:
             dispatcher.utter_message(
-                text="Do you know which restaurant you would like me to reverse a table at?",
+                text="Do you know which restaurant you would like me to reserve a table at?",
                 buttons=[
                     {"title": r, "payload": f'/inform{{"restaurant_name":"{r}"}}'}
                     for r in restaurant_names

diff --git a/actions/setup_recurrent_payment.py b/actions/setup_recurrent_payment.py
@@ -20,7 +20,8 @@ def parse_datetime(text: str) -> Optional[datetime]:
     if isinstance(parsed_value, dict):
         parsed_value = parsed_value["from"]
 
-    return datetime.fromisoformat(parsed_value)
+    result = datetime.fromisoformat(parsed_value)
+    return result.replace(tzinfo=None)
 
 
 class ValidatePaymentStartDate(Action):
@@ -43,7 +44,7 @@ def run(
             dispatcher.utter_message(response="utter_invalid_date")
             return [SlotSet("recurrent_payment_start_date", None)]
 
-        return [SlotSet("recurrent_payment_start_date", start_date.isoformat())]
+        return [SlotSet("recurrent_payment_start_date", start_date.strftime("%Y-%m-%d"))]
 
 
 class ValidatePaymentEndDate(Action):
@@ -66,11 +67,11 @@ def run(
             return [SlotSet("recurrent_payment_end_date", None)]
 
         start_date = tracker.get_slot("recurrent_payment_start_date")
-        if start_date is not None and end_date < datetime.fromisoformat(start_date):
+        if start_date is not None and end_date < datetime.strptime(start_date, "%Y-%m-%d"):
             dispatcher.utter_message(response="utter_invalid_date")
             return [SlotSet("recurrent_payment_end_date", None)]
 
-        return [SlotSet("recurrent_payment_end_date", end_date.isoformat())]
+        return [SlotSet("recurrent_payment_end_date", end_date.strftime("%Y-%m-%d"))]
 
 
 class ExecutePayment(Action):

diff --git a/config/config.yml b/config/config.yml
@@ -28,7 +28,7 @@ pipeline:
 - name: NLUCommandAdapter
 - name: SingleStepLLMCommandGenerator
   llm:
-    model_name: gpt-4
+    model: gpt-4
     request_timeout: 7
     temperature: 0.0
     top_p: 0.0

diff --git a/domain/flows/check_portfolio.yml b/domain/flows/check_portfolio.yml
@@ -31,7 +31,7 @@ responses:
           title: stocks
         - payload: bonds
           title: bonds
-        - payload: mutual_funds
+        - payload: /SetSlots(portfolio_type=mutual_funds)
           title: mutual funds
   utter_portfolio_options_found:
     - text: "Your {portfolio_type} portfolio: {portfolio_options}"

diff --git a/e2e_tests/passing/happy_path/user_checks_portfolio.yml b/e2e_tests/passing/happy_path/user_checks_portfolio.yml
@@ -8,7 +8,7 @@ test_cases:
       - user: "1234"
       - utter: utter_authentication_successful
       - utter: utter_ask_portfolio_type
-      - user: mutual funds
+      - user: /SetSlots(portfolio_type=mutual_funds)
       - slot_was_set:
         - portfolio_type: mutual_funds
         - portfolio_exists: True

diff --git a/e2e_tests_with_assertions/failing/potential_bugs/user_tries_to_use_abstract_values.yml b/e2e_tests_with_assertions/failing/potential_bugs/user_tries_to_use_abstract_values.yml
@@ -0,0 +1,32 @@
+test_cases:
+  - test_case: user tries to use abstract values
+    steps:
+      - user: send money
+        assertions:
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_recipient
+      # the llm is extracting "good friend" as the recipient
+      - user: to a good friend
+        assertions:
+          - slot_was_not_set:
+              - name: transfer_money_recipient
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_recipient
+      - user: okay, to Mary
+        assertions:
+          - slot_was_set:
+              - name: transfer_money_recipient
+                value: Mary
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: "50"
+        assertions:
+          - slot_was_set:
+              - name: transfer_money_amount_of_money
+                value: "50"
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_final_confirmation
+      - user: "yes"
+        assertions:
+          - bot_uttered:
+              utter_name: utter_transfer_complete
diff --git a/...ns/failing/skip_question/user_tries_to_skip_a_question_and_then_cancels_the_flow_hard.yml b/...ns/failing/skip_question/user_tries_to_skip_a_question_and_then_cancels_the_flow_hard.yml
@@ -0,0 +1,23 @@
+test_cases:
+  - test_case: user tries to skip a question and then cancels the flow (hard)
+    steps:
+      - user: send money to John
+        assertions:
+          - slot_was_set:
+              - name: transfer_money_recipient
+                value: John
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: i don't want to answer this
+        assertions:
+          # cancel flow instead of skip question is predicted
+          - bot_uttered:
+              utter_name: utter_skip_question_answer
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: i don't want to continue
+        assertions:
+          - bot_uttered:
+              utter_name: utter_flow_cancelled_rasa
+          - bot_uttered:
+              utter_name: utter_can_do_something_else
diff --git a/...th_assertions/failing/skip_question/user_tries_to_skip_a_question_multiple_times_hard.yml b/...th_assertions/failing/skip_question/user_tries_to_skip_a_question_multiple_times_hard.yml
@@ -0,0 +1,34 @@
+test_cases:
+  - test_case: user tries to skip a question multiple times (hard)
+    steps:
+      - user: send money to John
+        assertions:
+          - slot_was_set:
+              - name: transfer_money_recipient
+                value: John
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: i don't want to answer this for now
+        assertions:
+          - bot_uttered:
+              utter_name: utter_skip_question_answer
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: i don't want to answer this
+      # cancel flow instead of skip question is predicted
+        assertions:
+        - bot_uttered:
+            utter_name: utter_skip_question_answer
+        - bot_uttered:
+            utter_name: utter_ask_transfer_money_amount_of_money
+      - user: okay 50
+        assertions:
+          - slot_was_set:
+              - name: utter_ask_transfer_money_amount_of_money
+                value: "50"
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_final_confirmation
+      - user: "yes"
+        assertions:
+          - bot_uttered:
+              utter_name: utter_transfer_complete
diff --git a/e2e_tests_with_assertions/failing/skip_question/user_tries_to_skip_a_question_once_hard.yml b/e2e_tests_with_assertions/failing/skip_question/user_tries_to_skip_a_question_once_hard.yml
@@ -0,0 +1,28 @@
+test_cases:
+  - test_case: user tries to skip a question once (hard)
+    steps:
+      - user: send money to John
+        assertions:
+          - slot_was_set:
+              - name: transfer_money_recipient
+                value: John
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: i don't want to answer this
+        assertions:
+          # cancel flow instead of skip question is predicted
+          - bot_uttered:
+              utter_name: utter_skip_question_answer
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_amount_of_money
+      - user: okay 50
+        assertions:
+          - slot_was_set:
+              - name: utter_ask_transfer_money_amount_of_money
+                value: "50"
+          - bot_uttered:
+              utter_name: utter_ask_transfer_money_final_confirmation
+      - user: "yes"
+        assertions:
+          - bot_uttered:
+              utter_name: utter_transfer_complete
diff --git a/e2e_tests_with_assertions/flaky/disambiguation/user_sends_short_noun_only_message.yml b/e2e_tests_with_assertions/flaky/disambiguation/user_sends_short_noun_only_message.yml
@@ -0,0 +1,19 @@
+# instead of clarify a start flow command for "list_contact" is predicted
+
+fixtures:
+  - route_to_calm:
+      - route_session_to_calm: True
+
+test_cases:
+  - test_case: user sends short noun only message
+    fixtures:
+      - route_to_calm
+    steps:
+      - user: contact
+        assertions:
+          - bot_uttered:
+              utter_name: utter_clarification_options_rasa
+      - user: add
+        assertions:
+          - bot_uttered:
+              utter_name: utter_ask_add_contact_handle
diff --git a/e2e_tests_with_assertions/flaky/happy_path/user_sets_up_recurrent_payment.yml b/e2e_tests_with_assertions/flaky/happy_path/user_sets_up_recurrent_payment.yml
@@ -0,0 +1,16 @@
+# recurrent_payment_type is not mapped to standing order
+
+test_cases:
+  - test_case: user wants to set up a new recurrent payment, but specifies the type incompletely, example 3
+    steps:
+      - user: I want to set up a new recurrent payment
+        assertions:
+          - bot_uttered:
+              utter_name: utter_ask_recurrent_payment_type
+      - user: stand order
+        assertions:
+          - slot_was_set:
+              - name: recurrent_payment_type
+                value: standing order
+          - bot_uttered:
+              utter_name: utter_ask_recipient
diff --git a/e2e_tests_with_assertions/flaky/happy_path/user_wants_to_register_to_vote.yml b/e2e_tests_with_assertions/flaky/happy_path/user_wants_to_register_to_vote.yml
@@ -0,0 +1,15 @@
+fixtures:
+  - non_california_resident:
+      - based_in_california: False
+      - route_session_to_calm: True
+
+test_cases:
+  - test_case: Register to vote for non-California resident (should not trigger)
+    fixtures:
+      - non_california_resident
+    steps:
+      - user: I want to register to vote
+        # ChitChat is predicted instead of no command being predicted
+        assertions:
+          - bot_uttered:
+              utter_name: utter_cannot_answer