Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating mnemonic language determination #142

Merged
merged 7 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/src/existing_mnemonic.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Uses an existing BIP-39 mnemonic phrase for key generation.

- **`--mnemonic`**: The mnemonic you used to create withdrawal credentials. <span class="warning"></span>

- **`--mnemonic_language`**: The language of your mnemonic. If this is not provided we will attempt to determine it based on the mnemonic.

- **`--mnemonic_password`**: The mnemonic password you used in your key generation. Note: It's not the keystore password. <span class="warning"></span>

- **`--validator_start_index`**: The index of the first validator's keys you wish to generate. If this is your first time generating keys with this mnemonic, use 0. If you have generated keys using this mnemonic before, use the next index from which you want to start generating keys from. As an example if you've generated 4 keys before (keys #0, #1, #2, #3), then enter 4 here.
Expand Down
2 changes: 2 additions & 0 deletions docs/src/exit_transaction_mnemonic.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Creates an exit transaction using a mnemonic phrase.

- **`--mnemonic`**: The mnemonic you used during key generation. <span class="warning"></span>

- **`--mnemonic_language`**: The language of your mnemonic. If this is not provided we will attempt to determine it based on the mnemonic.

- **`--mnemonic_password`**: The mnemonic password you used in your key generation. Note: It's not the keystore password. <span class="warning"></span>

- **`--validator_start_index`**: The index position for the keys to start generating keystores in [ERC-2334 format](https://eips.ethereum.org/EIPS/eip-2334#eth2-specific-parameters) format.
Expand Down
2 changes: 2 additions & 0 deletions docs/src/generate_bls_to_execution_change.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Generates a BLS to execution address change message. This is used to add a withd

- **`--mnemonic`**: The mnemonic you used to create withdrawal credentials. <span class="warning"></span>

- **`--mnemonic_language`**: The language of your mnemonic. If this is not provided we will attempt to determine it based on the mnemonic.

- **`--mnemonic_password`**: The mnemonic password you used in your key generation. Note: It's not the keystore password. <span class="warning"></span>

- **`--validator_start_index`**: The index position for the keys to start generating withdrawal credentials for.
Expand Down
46 changes: 38 additions & 8 deletions ethstaker_deposit/cli/existing_mnemonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,23 @@
from typing import (
Any,
Callable,
Optional,
)

from ethstaker_deposit.exceptions import ValidationError
from ethstaker_deposit.exceptions import MultiLanguageError, ValidationError
from ethstaker_deposit.key_handling.key_derivation.mnemonic import (
reconstruct_mnemonic,
)
from ethstaker_deposit.utils.constants import (
MNEMONIC_LANG_OPTIONS,
WORD_LISTS_PATH,
)
from ethstaker_deposit.utils.click import (
captive_prompt_callback,
choice_prompt_func,
jit_option,
)
from ethstaker_deposit.utils.intl import load_text
from ethstaker_deposit.utils.intl import fuzzy_reverse_dict_lookup, get_first_options, load_text
from ethstaker_deposit.utils.validation import validate_int_range
from .generate_keys import (
generate_keys,
Expand All @@ -31,10 +34,15 @@ def load_mnemonic_arguments_decorator(function: Callable[..., Any]) -> Callable[
'''
decorators = [
jit_option(
callback=validate_mnemonic,
callback=lambda c, _, mnemonic:
captive_prompt_callback(
lambda mnemonic: validate_mnemonic(mnemonic=mnemonic, language=c.params.get('mnemonic_language')),
prompt=lambda: load_text(['arg_mnemonic', 'prompt'], func='existing_mnemonic'),
prompt_if_none=True,
)(c, _, mnemonic),
help=lambda: load_text(['arg_mnemonic', 'help'], func='existing_mnemonic'),
param_decls='--mnemonic',
prompt=lambda: load_text(['arg_mnemonic', 'prompt'], func='existing_mnemonic'),
prompt=False,
type=str,
),
jit_option(
Expand All @@ -51,20 +59,42 @@ def load_mnemonic_arguments_decorator(function: Callable[..., Any]) -> Callable[
param_decls='--mnemonic_password',
prompt=False,
),
jit_option(
callback=validate_mnemonic_language,
default=None,
help=lambda: load_text(['arg_mnemonic_language', 'help'], func='existing_mnemonic'),
param_decls='--mnemonic_language',
prompt=None,
),
]
for decorator in reversed(decorators):
function = decorator(function)
return function


def validate_mnemonic(ctx: click.Context, param: Any, mnemonic: str) -> str:
mnemonic = reconstruct_mnemonic(mnemonic, WORD_LISTS_PATH)
if mnemonic is not None:
return mnemonic
def validate_mnemonic(mnemonic: str, language: Optional[str] = None) -> str:
try:
reconstructed_mnemonic = reconstruct_mnemonic(mnemonic, WORD_LISTS_PATH, language)
except MultiLanguageError as e:
# Get discovered languages from error and prompt user to select one of them
available_languages = sorted(get_first_options({lang: MNEMONIC_LANG_OPTIONS[lang] for lang in e.languages}))
prompt_message = choice_prompt_func(lambda: load_text(['arg_mnemonic_language'], func='validate_mnemonic'),
available_languages,
False)
language = click.prompt(prompt_message())
mnemonic_language = fuzzy_reverse_dict_lookup(language, MNEMONIC_LANG_OPTIONS)
return validate_mnemonic(mnemonic=mnemonic, language=mnemonic_language)

if reconstructed_mnemonic is not None:
return reconstructed_mnemonic
else:
raise ValidationError(load_text(['err_invalid_mnemonic']))


def validate_mnemonic_language(ctx: click.Context, param: Any, language: str) -> Optional[str]:
return fuzzy_reverse_dict_lookup(language, MNEMONIC_LANG_OPTIONS) if language else None


@click.command(
help=load_text(['arg_existing_mnemonic', 'help'], func='existing_mnemonic'),
)
Expand Down
2 changes: 1 addition & 1 deletion ethstaker_deposit/cli/new_mnemonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
def new_mnemonic(ctx: click.Context, mnemonic_language: str, **kwargs: Any) -> None:
mnemonic = get_mnemonic(language=mnemonic_language, words_path=WORD_LISTS_PATH)
test_mnemonic = ''
while mnemonic != reconstruct_mnemonic(test_mnemonic, WORD_LISTS_PATH):
while mnemonic != reconstruct_mnemonic(test_mnemonic, WORD_LISTS_PATH, mnemonic_language):
clear_terminal()
click.echo(load_text(['msg_mnemonic_presentation']))
click.echo('\n\n%s\n\n' % mnemonic)
Expand Down
7 changes: 7 additions & 0 deletions ethstaker_deposit/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
class ValidationError(Exception):
...


class MultiLanguageError(Exception):
remyroy marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, languages: list[str]):
self.languages = languages
message = "Multiple valid languages found: %s" % ", ".join(languages)
super().__init__(message)
6 changes: 5 additions & 1 deletion ethstaker_deposit/intl/en/cli/existing_mnemonic.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"validate_mnemonic": {
"err_invalid_mnemonic": "That is not a valid mnemonic, please check for typos."
"err_invalid_mnemonic": "That is not a valid mnemonic, please check for typos.",
"arg_mnemonic_language": "Please select from the following languages"
remyroy marked this conversation as resolved.
Show resolved Hide resolved
},
"existing_mnemonic": {
"arg_existing_mnemonic": {
Expand All @@ -10,6 +11,9 @@
"help": "The mnemonic that you used to generate your keys. (It is recommended not to use this argument, and wait for the CLI to ask you for your mnemonic as otherwise it will appear in your shell history.)",
"prompt": "Please enter your mnemonic separated by spaces (\" \"). Note: you only need to enter the first 4 letters of each word if you'd prefer."
},
"arg_mnemonic_language": {
"help": "The language of your mnemonic. If this is not provided we will attempt to determine it based on the mnemonic provided."
},
"arg_mnemonic_password": {
"help": "This is almost certainly not the argument you are looking for: it is for mnemonic passwords, not keystore passwords. Providing a password here when you didn't use one initially, can result in lost keys (and therefore funds)! Also note that if you used this tool to generate your mnemonic initially, then you did not use a mnemonic password. However, if you are certain you used a password to \"increase\" the security of your mnemonic, this is where you enter it.",
"prompt": "Enter your mnemonic password (if you used one). Make sure you won't forget it, it can not be recovered.",
Expand Down
39 changes: 26 additions & 13 deletions ethstaker_deposit/key_handling/key_derivation/mnemonic.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
import os
from unicodedata import normalize
from secrets import randbits
Expand All @@ -6,7 +7,7 @@
Sequence,
)

from ethstaker_deposit.exceptions import ValidationError
from ethstaker_deposit.exceptions import MultiLanguageError
from ethstaker_deposit.utils.constants import (
MNEMONIC_LANG_OPTIONS,
)
Expand Down Expand Up @@ -64,14 +65,25 @@ def get_seed(*, mnemonic: str, password: str) -> bytes:
def determine_mnemonic_language(mnemonic: str, words_path: str) -> Sequence[str]:
"""
Given a `mnemonic` determine what language[s] it is written in.
There are collisions between word-lists, so multiple candidate languages are returned.
First create a map of every word in all supported languages and what languages those words belong to.
Then go through each word in the provided mnemonic and determine its potential languages.
Return a list of all potential languages.
"""
languages = MNEMONIC_LANG_OPTIONS.keys()
word_language_map = {word: lang for lang in languages for word in _get_word_list(lang, words_path)}
word_language_map = defaultdict(list)
for lang, word in ((lang, word) for lang in languages for word in _get_word_list(lang, words_path)):
word_language_map[word].append(lang)

try:
mnemonic_list = [normalize('NFKC', word)[:4] for word in mnemonic.lower().split(' ')]
word_languages = [[lang for word, lang in word_language_map.items() if normalize('NFKC', word)[:4] == abbrev]
for abbrev in mnemonic_list]
word_languages = [
[
lang for word, langs in word_language_map.items()
for lang in langs
if normalize('NFKC', word)[:4] == abbrev
]
for abbrev in mnemonic_list
]
return list(set(sum(word_languages, [])))
except KeyError:
raise ValueError('Word not found in mnemonic word lists for any language.')
Expand Down Expand Up @@ -99,15 +111,16 @@ def abbreviate_words(words: Sequence[str]) -> list[str]:
return [normalize('NFKC', word)[:4] for word in words]


def reconstruct_mnemonic(mnemonic: str, words_path: str) -> Optional[str]:
def reconstruct_mnemonic(mnemonic: str, words_path: str, language: Optional[str] = None) -> Optional[str]:
"""
Given a mnemonic, a reconstructed the full version (incase the abbreviated words were used)
then verify it against its own checksum
"""
try:
languages = determine_mnemonic_language(mnemonic, words_path)
languages = [language] if language else determine_mnemonic_language(mnemonic, words_path)
except ValueError:
return None
valid_languages = []
reconstructed_mnemonic = None
for language in languages:
try:
Expand All @@ -123,17 +136,17 @@ def reconstruct_mnemonic(mnemonic: str, words_path: str) -> Optional[str]:
entropy_bits = entropy.to_bytes(checksum_length * 4, 'big')
full_word_list = _get_word_list(language, words_path)
if _get_checksum(entropy_bits) == checksum:
"""
This check guarantees that only one language has a valid mnemonic.
It is needed to ensure abbrivated words aren't valid in multiple languages
"""
if reconstructed_mnemonic is not None:
raise ValidationError("This mnemonic abbreviated form is available in multiple languages.")
valid_languages.append(language)
reconstructed_mnemonic = ' '.join([_index_to_word(full_word_list, index) for index in word_indices])
else:
pass
except ValueError:
pass

if len(valid_languages) > 1:
valid_languages.sort()
remyroy marked this conversation as resolved.
Show resolved Hide resolved
raise MultiLanguageError(valid_languages)

return reconstructed_mnemonic


Expand Down
6 changes: 4 additions & 2 deletions ethstaker_deposit/utils/click.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ def callback(ctx: click.Context, param: Any, user_input: str) -> Any:
return callback


def choice_prompt_func(prompt_func: Callable[[], str], choices: Sequence[str]) -> Callable[[], str]:
def choice_prompt_func(prompt_func: Callable[[], str],
choices: Sequence[str],
add_colon: bool = True) -> Callable[[], str]:
'''
Formats the prompt and choices in a printable manner.
'''
Expand All @@ -152,7 +154,7 @@ def choice_prompt_func(prompt_func: Callable[[], str], choices: Sequence[str]) -
else:
output = output + ', '
output = output + ']'
return lambda: '%s %s: ' % (prompt_func(), output)
return lambda: '%s %s%s' % (prompt_func(), output, ': ' if add_colon else '')


def deactivate_prompts_callback(param_names: list[str]) -> Callable[[click.Context, str, str], Any]:
Expand Down
4 changes: 4 additions & 0 deletions ethstaker_deposit/utils/terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@


def clear_terminal() -> None:
# Do not clear if running unit tests as stdout can be used to determine state
if "PYTEST_CURRENT_TEST" in os.environ:
return

# We bundle libtinfo via pyinstaller, which messes with the system tput.
# Remove LD_LIBRARY_PATH just for subprocess.run()
if sys.platform == 'linux':
Expand Down
91 changes: 91 additions & 0 deletions tests/test_cli/test_existing_mnemonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,3 +396,94 @@ def test_existing_mnemonic_custom_testnet() -> None:
assert get_permissions(validator_keys_folder_path, file_name) == '0o440'
# Clean up
clean_key_folder(my_folder_path)


def test_existing_mnemonic_multiple_languages() -> None:
# Prepare folder
my_folder_path = os.path.join(os.getcwd(), 'TESTING_TEMP_FOLDER')
clean_key_folder(my_folder_path)
if not os.path.exists(my_folder_path):
os.mkdir(my_folder_path)

runner = CliRunner()
inputs = [
'TREZOR',
'的 的 的 的 的 的 的 的 的 的 的 在', '1',
'2', '2', '5', 'MyPasswordIs', 'MyPasswordIs']
data = '\n'.join(inputs)
arguments = [
'--language', 'english',
'--ignore_connectivity',
'existing-mnemonic',
'--chain', 'holesky',
'--withdrawal_address', '',
'--folder', my_folder_path,
'--mnemonic_password', 'TREZOR',
]
result = runner.invoke(cli, arguments, input=data)

assert result.exit_code == 0

# Check files
validator_keys_folder_path = os.path.join(my_folder_path, DEFAULT_VALIDATOR_KEYS_FOLDER_NAME)
_, _, key_files = next(os.walk(validator_keys_folder_path))

all_uuid = [
get_uuid(validator_keys_folder_path + '/' + key_file)
for key_file in key_files
if key_file.startswith('keystore')
]
assert len(set(all_uuid)) == 5

# Verify file permissions
if os.name == 'posix':
for file_name in key_files:
assert get_permissions(validator_keys_folder_path, file_name) == '0o440'
# Clean up
clean_key_folder(my_folder_path)


def test_existing_mnemonic_multiple_languages_argument() -> None:
# Prepare folder
my_folder_path = os.path.join(os.getcwd(), 'TESTING_TEMP_FOLDER')
clean_key_folder(my_folder_path)
if not os.path.exists(my_folder_path):
os.mkdir(my_folder_path)

runner = CliRunner()
inputs = [
'TREZOR',
'的 的 的 的 的 的 的 的 的 的 的 在',
'2', '2', '5', 'MyPasswordIs', 'MyPasswordIs']
data = '\n'.join(inputs)
arguments = [
'--language', 'english',
'--ignore_connectivity',
'existing-mnemonic',
'--chain', 'holesky',
'--withdrawal_address', '',
'--folder', my_folder_path,
'--mnemonic_language', '简体中文',
'--mnemonic_password', 'TREZOR',
]
result = runner.invoke(cli, arguments, input=data)

assert result.exit_code == 0

# Check files
validator_keys_folder_path = os.path.join(my_folder_path, DEFAULT_VALIDATOR_KEYS_FOLDER_NAME)
_, _, key_files = next(os.walk(validator_keys_folder_path))

all_uuid = [
get_uuid(validator_keys_folder_path + '/' + key_file)
for key_file in key_files
if key_file.startswith('keystore')
]
assert len(set(all_uuid)) == 5

# Verify file permissions
if os.name == 'posix':
for file_name in key_files:
assert get_permissions(validator_keys_folder_path, file_name) == '0o440'
# Clean up
clean_key_folder(my_folder_path)
Loading