From 987d2c8976e604ff1d5fc635437aae412163449b Mon Sep 17 00:00:00 2001 From: Zhenyu Guo Date: Wed, 12 Oct 2016 16:23:14 +0800 Subject: [PATCH] - add scenario configurations - fix a bug about extensible_object introduced by module separation on Windows --- README.md | 21 ++- bin/config.common.ini | 2 +- bin/config.onecluster.ini.template | 2 +- include/dsn/utility/extensible_object.h | 16 +- src/core/README.md | 15 ++ src/plugins/apps.skv/simple_kv.app.example.h | 3 +- src/tools/webstudio/README.md | 12 -- tutorial/simple_kv/config.logic.failure.ini | 61 +++++++ tutorial/simple_kv/config.logic.ini | 35 ++++ tutorial/simple_kv/config.logic.perf.ini | 45 +++++ tutorial/simple_kv/config.logic.perf.prog.ini | 73 ++++++++ tutorial/simple_kv/config.stateful.ini | 168 ++++++++++++++++++ 12 files changed, 429 insertions(+), 24 deletions(-) create mode 100644 tutorial/simple_kv/config.logic.failure.ini create mode 100644 tutorial/simple_kv/config.logic.ini create mode 100644 tutorial/simple_kv/config.logic.perf.ini create mode 100644 tutorial/simple_kv/config.logic.perf.prog.ini create mode 100644 tutorial/simple_kv/config.stateful.ini diff --git a/README.md b/README.md index c382f432..98879e1a 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,10 @@ **Robust Distributed System Nucleus (rDSN)** is a framework for quickly building robust distributed systems. It has a microkernel for pluggable components, including applications, distributed frameworks, devops tools, and local runtime/resource providers, enabling their independent development and seamless integration. The project was originally developed for Microsoft Bing, and now has been adopted in production both inside and outside Microsoft. * [What are the existing modules I can immediately use?](#existing) +* [What scenaios are enabled by combining these modules differently?](#scenarios) * [How does rDSN build robustness?](#novel) * [Related papers](#papers) - ### Top Links * [[Case](https://github.com/imzhenyu/rocksdb)] RocksDB made replicated using rDSN! * [[Tutorial](https://github.com/Microsoft/rDSN/wiki/Tutorial:-one-box-cluster)] A one-box cluster demo to understand how rDSN helps service registration, deployment, monitoring etc.. @@ -20,8 +20,9 @@ The core of rDSN is a service kernel with which we can develop (via [Service API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__service-api.html) and [Tool API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__tool-api.html)) and plugin lots of different application, framework, tool, and local runtime modules, so that they can seamlessly benefit each other. Here is an incomplete list of the pluggable modules. -| Pluggable modules | Description | Demo | +| Pluggable modules | Description | Release | |--------|-------------|------| +| [dsn.core](https://github.com/Microsoft/rDSN/tree/master/src/core) | rDSN service kernel | todo | | [dsn.dist.service.stateless](https://github.com/imzhenyu/rDSN.dist.service/tree/master/src/app_daemon) | scale-out and fail-over for stateless services (e.g., micro services) | todo | | [dsn.dist.service.stateful.type1](https://github.com/imzhenyu/rDSN.dist.service/tree/master/src/replica_server) | scale-out, replicate, and fail-over for stateful services (e.g., storage) | todo | | [dsn.dist.service.meta_server](https://github.com/imzhenyu/rDSN.dist.service/tree/master/src/meta_server) | membership, load balance, and machine pool management for the above service frameworks | todo | @@ -33,9 +34,21 @@ The core of rDSN is a service kernel with which we can develop (via [Service API | [dsn.tools.hpc](https://github.com/imzhenyu/rDSN.tools.hpc) | high performance counterparts for the modules as implemented in tools.common | todo | | [dsn.tools.explorer](https://github.com/imzhenyu/rDSN.tools.explorer) | extracts task-level dependencies automatically | todo | | [dsn.tools.log.monitor](https://github.com/imzhenyu/rDSN.tools.log.monitor) | collect critical logs (e.g., log-level >= WARNING) in cluster | todo | -| [dsn.apps.skv](https://github.com/Microsoft/rDSN/tree/master/src/plugins/apps.skv) | an example application module | todo | +| [dsn.app.simple_kv](https://github.com/Microsoft/rDSN/tree/master/src/plugins/apps.skv) | an example application module | todo | + +### Scenarios by different module combination and configuration + +rDSN provides flexible configuration so that developers can combine and configure the modules differently to enable different scenarios. All modules are loaded by [dsn.svchost](https://github.com/Microsoft/rDSN/tree/master/src/tools/svchost), a common process runner in rDSN, with the given configuration file. The following table lists some examples (note **dsn.core** is always required therefore omitted in ```Modules``` column). + +| Scenarios | Modules | Config | Demo | +|----------|---------------|--------|------| +| logic correctness development | dsn.app.simple_kv + dsn.tools.emulator + dsn.tools.common | [config](https://github.com/Microsoft/rDSN/blob/master/tutorial/simple_kv/config.logic.ini) | todo | +| logic correctness with failure | dsn.app.simple_kv + dsn.tools.emulator + dsn.tools.common | [config](https://github.com/Microsoft/rDSN/blob/master/tutorial/simple_kv/config.logic.failure.ini) | todo | +| performance tuning | dsn.app.simple_kv + dsn.tools.common | [config](https://github.com/Microsoft/rDSN/blob/master/tutorial/simple_kv/config.logic.perf.ini) | todo | +| progressive performance tuning | dsn.app.simple_kv + dsn.tools.common + dsn.tools.emulator | [config](https://github.com/Microsoft/rDSN/blob/master/tutorial/simple_kv/config.logic.perf.prog.ini) | todo | +| Paxos enabled stateful service | dsn.app.simple_kv + dsn.tools.common + dsn.tools.emulator + dsn.dist.uri.resolver + dsn.dist.serivce.meta_server + dsn.dist.service.stateful.type1 | [config](https://github.com/Microsoft/rDSN/blob/master/tutorial/simple_kv/config.stateful.ini) | todo | -rDSN also provides a [web portal](https://github.com/Microsoft/rDSN/tree/master/src/tools/webstudio) that enables quick deployment of the above modules in a cluster, and allows easy operations through simple clicks as well as rich visualization. +There are a lot more possibilities. rDSN provides a [web portal](https://github.com/Microsoft/rDSN/tree/master/src/tools/webstudio) to enable quick deployment of these scenarios in a cluster, and allow easy operations through simple clicks as well as rich visualization. Deployment scenarios are defined [here](https://github.com/Microsoft/rDSN/blob/master/src/tools/webstudio/app_package/static/js/rdsn.envs.js), and developers can add more on demand. ### How does rDSN build robustness? diff --git a/bin/config.common.ini b/bin/config.common.ini index d3549347..ed84b2cf 100644 --- a/bin/config.common.ini +++ b/bin/config.common.ini @@ -77,7 +77,7 @@ arguments = start_nfs = true tool = nativerun -;tool = simulator +;tool = emulator toollets = tracer ;toollets = tracer,profiler,fault_injector pause_on_start = false diff --git a/bin/config.onecluster.ini.template b/bin/config.onecluster.ini.template index 562d6302..6795d67d 100644 --- a/bin/config.onecluster.ini.template +++ b/bin/config.onecluster.ini.template @@ -51,7 +51,7 @@ arguments = localhost:%meta_port% start_nfs = true tool = nativerun -;tool = simulator +;tool = emulator toollets = tracer ;toollets = tracer,profiler,fault_injector pause_on_start = false diff --git a/include/dsn/utility/extensible_object.h b/include/dsn/utility/extensible_object.h index 591ac173..e319f163 100644 --- a/include/dsn/utility/extensible_object.h +++ b/include/dsn/utility/extensible_object.h @@ -87,7 +87,7 @@ class extensible_object : public extensible memset((void*)_extensions, 0, sizeof(_extensions)); } - ~extensible_object() + DSN_API ~extensible_object() { int maxId = static_cast(get_extension_count()); @@ -98,9 +98,17 @@ class extensible_object : public extensible s_extensionDeletors[i]((void*)_extensions[i]); } } + + // impossible branch to ensure the used apis are exported:-) + if (maxId > 0x0eadbeef) + { + extensible_object r; + copy_to(r); + register_extension(nullptr); + } } - void copy_to(extensible_object& r) + DSN_API void copy_to(extensible_object& r) { int maxId = static_cast(get_extension_count()); @@ -113,7 +121,7 @@ class extensible_object : public extensible } } - static uint32_t register_extension(extension_deletor deletor = nullptr) + DSN_API static uint32_t register_extension(extension_deletor deletor = nullptr) { int idx = s_nextExtensionIndex++; if (idx < MAX_EXTENSION_COUNT) @@ -128,7 +136,7 @@ class extensible_object : public extensible return idx; } - static uint32_t get_extension_count() + DSN_API static uint32_t get_extension_count() { return s_nextExtensionIndex.load(); } diff --git a/src/core/README.md b/src/core/README.md index f9bcaa34..87eae8ad 100644 --- a/src/core/README.md +++ b/src/core/README.md @@ -4,3 +4,18 @@ This directory contains the source code for rDSN service microkernel. * src - source code, including the unit tests * dev.cpp.core.use - reference to the dev/cpp module +***dsn.core*** is the service kernel in rDSN. It defines [Service API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__service-api.html) and [Tool API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__tool-api.html), with which users can develop and plugin various modules, including distributed frameworks, development & operation tools, local runtime, and applications (see [examples](https://github.com/Microsoft/rDSN#existing-pluggable-modules-and-growing-) here). dsn.core takes charge of interconnecting these components, and makes sure they can benefit each other transparently (while developed independently). + +### Build tools and local runtime libraries + +[Tool API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__tool-api.html) is mainly for this purpose, with wich developers can plugin, for example, new network providers (e.g,. a RDMA network provider with better performance, a [virtual network](https://github.com/Microsoft/rDSN/blob/master/src/plugins/tools.emulator/network.sim.h) for emulation), toollets for capturing how the requests are processed in the system (e.g., [tracer](https://github.com/Microsoft/rDSN/blob/master/src/plugins/tools.common/tracer.h)), tools for driving the execution of the whole distributed system (e.g., [emulator](https://github.com/Microsoft/rDSN/tree/master/src/plugins/tools.emulator)). + +For all cases, developers use ```dsn::tools::register_component_provider```, ```dsn::tools::register_component_aspect```, ```dsn::tools::register_toollet```, ```dsn::tools::register_tool``` to plugin the modules into **dsn.core**. + +### Build frameworks and applications + +[Service API](http://imzhenyu.github.io/rDSN/documents/v1/html/group__service-api.html) provides the basic C APIs for building these components, and **dsn.dev.xxx** provides language wrappers atop (e.g., **dsn.dev.cpp**) to ease the development. The frameworks are considered advanced applications, and both are registered into **dsn.core** through ```dsn_register_app```. See [here](http://imzhenyu.github.io/rDSN/documents/v1/html/group__service-api-model.html) for more details. + + + + diff --git a/src/plugins/apps.skv/simple_kv.app.example.h b/src/plugins/apps.skv/simple_kv.app.example.h index fbfdccb4..70889bf0 100644 --- a/src/plugins/apps.skv/simple_kv.app.example.h +++ b/src/plugins/apps.skv/simple_kv.app.example.h @@ -145,8 +145,7 @@ class simple_kv_perf_test_client_app : return ::dsn::ERR_INVALID_PARAMETERS; // argv[1]: e.g., dsn://mycluster/simple-kv.instance0 - rpc_address service_addr; - service_addr.assign_uri(dsn_uri_build(argv[1])); + url_host_address service_addr(argv[1]); _simple_kv_client.reset(new simple_kv_perf_test_client(service_addr)); _simple_kv_client->start_test("simple_kv.simple_kv.perf-test.case", 3); diff --git a/src/tools/webstudio/README.md b/src/tools/webstudio/README.md index b191b08b..bac73338 100644 --- a/src/tools/webstudio/README.md +++ b/src/tools/webstudio/README.md @@ -1,17 +1,5 @@ **rDSN.WebStudio** is a web portal for service registration, deployment, testing, and monitoring, as well as cluster management, atop of the service frameworks and tool modules built with rDSN. -#### Serivce registration and cluster machine view -![registration](app_package/static/img/register.jpg) - -#### Service deployment as a stateless service -![deploy](app_package/static/img/deploy.jpg) - -#### Service monitoring with built-in profiler toollet -![monitor](app_package/static/img/monitor.jpg) - -#### Service deployed as a replicated stateful service -![stateful](app_package/static/img/stateful.jpg) - ## Installation To start rDSN.WebStudio, you should install python 2.7.11+, and run the following commands. diff --git a/tutorial/simple_kv/config.logic.failure.ini b/tutorial/simple_kv/config.logic.failure.ini new file mode 100644 index 00000000..68721ad2 --- /dev/null +++ b/tutorial/simple_kv/config.logic.failure.ini @@ -0,0 +1,61 @@ +[config.args] +;; defined in app config file +;; service_type = + +service_type = simple_kv + +[modules] +dsn.app.simple_kv +dsn.tools.common +dsn.tools.emulator + +[apps.server] +type = %service_type% +ports = 54333 +pools = THREAD_POOL_DEFAULT + +[apps.client] +type = %service_type%.client +arguments = localhost:54333 +pools = THREAD_POOL_DEFAULT + +[core] +;tool = nativerun +tool = emulator +toollets = tracer,fault_injector +;toollets = tracer,profiler,fault_injector + +[tools.emulator] +random_seed = 594750959 + +[task..default] +rpc_request_data_corrupted_ratio = 0.00 +rpc_response_data_corrupted_ratio = 0.00 + +; data corrupted type: random/header/body +rpc_message_data_corrupted_type = random + +rpc_request_drop_ratio = 0.001 +rpc_response_drop_ratio = 0.001 +rpc_request_delay_ratio = 0.001 +rpc_response_delay_ratio = 0.001 +disk_read_fail_ratio = 0.001 +disk_write_fail_ratio = 0.001 + +rpc_message_delay_ms_min = 10 +rpc_message_delay_ms_max = 10000 +disk_io_delay_ms_min = 0 +disk_io_delay_ms_max = 0 +execution_extra_delay_us_max = 0 + +[task.RPC_SIMPLE_KV_SIMPLE_KV_READ] +rpc_response_data_corrupted_ratio = 0.01 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false +disk_write_fail_ratio = 0.0 + +[task.LPC_RPC_TIMEOUT] +is_trace = false diff --git a/tutorial/simple_kv/config.logic.ini b/tutorial/simple_kv/config.logic.ini new file mode 100644 index 00000000..8444c8c1 --- /dev/null +++ b/tutorial/simple_kv/config.logic.ini @@ -0,0 +1,35 @@ +[config.args] +;; defined in app config file +;; service_type = + +service_type = simple_kv + +[modules] +dsn.app.simple_kv +dsn.tools.common +dsn.tools.emulator + +[apps.server] +type = %service_type% +ports = 54333 +pools = THREAD_POOL_DEFAULT + +[apps.client] +type = %service_type%.client +arguments = localhost:54333 +pools = THREAD_POOL_DEFAULT + +[core] +;tool = nativerun +tool = emulator +toollets = tracer +;toollets = tracer,profiler,fault_injector + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false +disk_write_fail_ratio = 0.0 + +[task.LPC_RPC_TIMEOUT] +is_trace = false diff --git a/tutorial/simple_kv/config.logic.perf.ini b/tutorial/simple_kv/config.logic.perf.ini new file mode 100644 index 00000000..ac6fe514 --- /dev/null +++ b/tutorial/simple_kv/config.logic.perf.ini @@ -0,0 +1,45 @@ +[config.args] +;; defined in app config file +;; service_type = + +service_type = simple_kv + +[modules] +dsn.app.simple_kv +dsn.tools.common + +[apps.server] +type = %service_type% +ports = 54333 +pools = THREAD_POOL_DEFAULT + +[apps.client.perf] +type = %service_type%.client.perf +arguments = localhost:54333 +pools = THREAD_POOL_DEFAULT + +[simple_kv.simple_kv.perf-test.case.1] +perf_test_seconds = 20 +perf_test_key_space_size = 100000 +perf_test_concurrency = 10 +perf_test_payload_bytes = 10 +perf_test_timeouts_ms = 10000 +perf_test_hybrid_request_ratio = 1,1,1, + +[core] +tool = nativerun +;tool = emulator +;toollets = profiler +;toollets = tracer,profiler,fault_injector + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false +disk_write_fail_ratio = 0.0 + +[task.LPC_RPC_TIMEOUT] +is_trace = false + +[threadpool.THREAD_POOL_DEFAULT] +worker_count = 10 \ No newline at end of file diff --git a/tutorial/simple_kv/config.logic.perf.prog.ini b/tutorial/simple_kv/config.logic.perf.prog.ini new file mode 100644 index 00000000..b3f40ef0 --- /dev/null +++ b/tutorial/simple_kv/config.logic.perf.prog.ini @@ -0,0 +1,73 @@ + +; +; progressive system complexity for performance tuning: +; +; . single thread for both client and server, workload concurrency set to 1, use emulated network to avoid network cost (delay set to 0) +; . + concurrency +; . + native network +; . + multi-threaded server/client +; +; the similar approach can be applied for correctness debugging +; + +[config.args] +;; defined in app config file +;; service_type = + +service_type = simple_kv + +[modules] +dsn.app.simple_kv +dsn.tools.common +dsn.tools.emulator + +[apps..default] + +;network.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536 +network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 +network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536 + +;network.server.0.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536 +; 0 for any server port +network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536 +network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536 + +[apps.server] +type = %service_type% +ports = 54333 +pools = THREAD_POOL_DEFAULT + +[apps.client.perf] +type = %service_type%.client.perf +arguments = localhost:54333 +pools = THREAD_POOL_DEFAULT + +[simple_kv.simple_kv.perf-test.case.1] +perf_test_seconds = 20 +perf_test_key_space_size = 100000 +perf_test_concurrency = 1 +perf_test_payload_bytes = 10 +perf_test_timeouts_ms = 10000 +perf_test_hybrid_request_ratio = 1,1,1, + +[core] +tool = nativerun +;tool = emulator +;toollets = profiler +;toollets = tracer,profiler,fault_injector + +[tools.emulator] +min_message_delay_microseconds = 0 +max_message_delay_microseconds = 0 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false +disk_write_fail_ratio = 0.0 + +[task.LPC_RPC_TIMEOUT] +is_trace = false + +[threadpool.THREAD_POOL_DEFAULT] +worker_count = 1 diff --git a/tutorial/simple_kv/config.stateful.ini b/tutorial/simple_kv/config.stateful.ini new file mode 100644 index 00000000..d2a795bd --- /dev/null +++ b/tutorial/simple_kv/config.stateful.ini @@ -0,0 +1,168 @@ +[config.args] +;; defined in app config file +;; service_type = + +service_type = simple_kv + +[modules] +dsn.app.simple_kv +dsn.tools.common +dsn.tools.emulator +dsn.tools.nfs +dsn.dist.uri.resolver +dsn.dist.service.meta_server +dsn.dist.service.stateful.type1 + +[apps.client] +type = %service_type%.client +arguments = dsn://mycluster/myskv +pools = THREAD_POOL_DEFAULT + +[apps.client.perf] +type = %service_type%.client.perf +arguments = dsn://mycluster/myskv +pools = THREAD_POOL_DEFAULT +run = false + +[simple_kv.simple_kv.perf-test.case.1] +perf_test_seconds = 20 +perf_test_key_space_size = 100000 +perf_test_concurrency = 1 +perf_test_payload_bytes = 128 +perf_test_timeouts_ms = 10000 +perf_test_hybrid_request_ratio = 1,1,1, + +;; meta server for stateful service +[apps.meta] +type = meta +ports = 24701 +pools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE + +;; replica servers for stateful service +[apps.replica] +type = replica +ports = 34701 +pools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_REPLICATION_LONG +count = 3 + +[meta_server] +server_list = localhost:24701 +min_live_node_count_for_unfreeze = 1 + +[replication.app] +app_name = myskv +app_type = %service_type% +partition_count = 4 +max_replica_count = 3 +stateful = true + +;; required by clients, and replica servers +[uri-resolver.dsn://mycluster] +factory = partition_resolver_simple +arguments = localhost:24701 + +[core] +start_nfs = true +tool = emulator +;tool = nativerun +toollets = tracer +;toollets = tracer,profiler,fault_injector +pause_on_start = false + +;logging_start_level = LOG_LEVEL_WARNING +;logging_factory_name = dsn::tools::hpc_logger + +[network] +; how many network threads for network library(used by asio) +io_service_worker_count = 2 + +; specification for each thread pool +[threadpool..default] +worker_count = 15 + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +max_input_queue_length = 1024 + +[threadpool.THREAD_POOL_REPLICATION] +name = replication +partitioned = true +max_input_queue_length = 2560 + +[threadpool.THREAD_POOL_META_STATE] +worker_count = 1 + +[task..default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +rpc_message_header_format = dsn +rpc_timeout_milliseconds = 5000 +disk_write_fail_ratio = 0.0 +disk_read_fail_ratio = 0.00001 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +allow_inline = false +disk_write_fail_ratio = 0.0 + +[task.LPC_RPC_TIMEOUT] +is_trace = false + +[task.LPC_CHECKPOINT_REPLICA] +;execution_extra_delay_us_max = 10000000 + +[task.LPC_LEARN_REMOTE_DELTA_FILES] +;execution_extra_delay_us_max = 10000000 + +[task.RPC_FD_FAILURE_DETECTOR_PING] +is_trace = false +rpc_call_channel = RPC_CHANNEL_UDP + +[task.RPC_FD_FAILURE_DETECTOR_PING_ACK] +is_trace = false +rpc_call_channel = RPC_CHANNEL_UDP + +[task.LPC_BEACON_CHECK] +is_trace = false + +[task.LPC_DAEMON_APPS_CHECK_TIMER] +is_trace = false + +[task.RPC_PREPARE] +rpc_request_resend_timeout_milliseconds = 8000 + +[replication] + +prepare_timeout_ms_for_secondaries = 10000 +prepare_timeout_ms_for_potential_secondaries = 20000 + +learn_timeout_ms = 30000 +staleness_for_commit = 20 +staleness_for_start_prepare_for_potential_secondary = 110 +mutation_max_size_mb = 15 +mutation_max_pending_time_ms = 20 +mutation_2pc_min_replica_count = 2 + +prepare_list_max_size_mb = 250 +request_batch_disabled = false +group_check_internal_ms = 100000 +group_check_disabled = false +fd_disabled = false +fd_check_interval_seconds = 5 +fd_beacon_interval_seconds = 3 +fd_lease_seconds = 14 +fd_grace_seconds = 15 +working_dir = . + +log_buffer_size_mb = 1 +log_pending_max_ms = 100 +log_file_size_mb = 32 +log_batch_write = true + +log_enable_shared_prepare = true +log_enable_private_commit = false + +config_sync_interval_ms = 60000