Skip to content

Commit

Permalink
refactor: use ordered publisher list
Browse files Browse the repository at this point in the history
This change brings down the average cost of a price update without
aggregation from 5k to 2k because binary search requires much less
lookups. There are mechanims implemented in the code to make sure
that upon the upgrade we sort the array without adding overhead to
the happy path (working binary search). Also add_publisher now does
an insertion sort to keep the list sorted and del_publisher won't
change the order of the list.
  • Loading branch information
ali-bahjati committed May 13, 2024
1 parent ceb4a32 commit 6a58b0a
Show file tree
Hide file tree
Showing 3 changed files with 311 additions and 15 deletions.
112 changes: 108 additions & 4 deletions program/rust/src/processor/add_publisher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ use {
account_info::AccountInfo,
entrypoint::ProgramResult,
program_error::ProgramError,
program_memory::sol_memset,
program_memory::{
sol_memcmp,
sol_memset,
},
pubkey::Pubkey,
},
std::mem::size_of,
Expand All @@ -41,8 +44,7 @@ pub fn add_publisher(
let cmd_args = load::<AddPublisherArgs>(instruction_data)?;

pyth_assert(
instruction_data.len() == size_of::<AddPublisherArgs>()
&& cmd_args.publisher != Pubkey::default(),
instruction_data.len() == size_of::<AddPublisherArgs>(),
ProgramError::InvalidArgument,
)?;

Expand All @@ -67,20 +69,122 @@ pub fn add_publisher(
return Err(ProgramError::InvalidArgument);
}

// Use the call with the default pubkey (000..) as a trigger to sort the publishers as a
// migration step from unsorted list to sorted list.
if cmd_args.publisher == Pubkey::default() {
let num_publishers = try_convert::<u32, usize>(price_data.num_)?;
sort_publishers(&mut price_data.comp_, num_publishers)?;
return Ok(());
}

for i in 0..(try_convert::<u32, usize>(price_data.num_)?) {
if cmd_args.publisher == price_data.comp_[i].pub_ {
return Err(ProgramError::InvalidArgument);
}
}

let current_index: usize = try_convert(price_data.num_)?;
let mut current_index: usize = try_convert(price_data.num_)?;
sol_memset(
bytes_of_mut(&mut price_data.comp_[current_index]),
0,
size_of::<PriceComponent>(),
);
price_data.comp_[current_index].pub_ = cmd_args.publisher;

// Shift the element back to keep the publishers components sorted.
while current_index > 0
&& price_data.comp_[current_index].pub_ < price_data.comp_[current_index - 1].pub_
{
price_data.comp_.swap(current_index, current_index - 1);
current_index -= 1;
}

price_data.num_ += 1;
price_data.header.size = try_convert::<_, u32>(PriceAccount::INITIAL_SIZE)?;
Ok(())
}

/// A copy of rust slice/sort.rs heapsort implementation which is small and fast. We couldn't use
/// the sort directly because it was only accessible behind a unstable feature flag at the time of
/// writing this code.
#[inline(always)]
fn heapsort(v: &mut [(Pubkey, usize)]) {
// This binary heap respects the invariant `parent >= child`.
let sift_down = |v: &mut [(Pubkey, usize)], mut node: usize| {
loop {
// Children of `node`.
let mut child = 2 * node + 1;
if child >= v.len() {
break;
}

// Choose the greater child.
if child + 1 < v.len()
&& sol_memcmp(v[child].0.as_ref(), v[child + 1].0.as_ref(), 32) < 0
{
child += 1;
}

// Stop if the invariant holds at `node`.
if sol_memcmp(v[node].0.as_ref(), v[child].0.as_ref(), 32) >= 0 {
break;
}

// Swap `node` with the greater child, move one step down, and continue sifting.
v.swap(node, child);
node = child;
}
};

// Build the heap in linear time.
for i in (0..v.len() / 2).rev() {
sift_down(v, i);
}

// Pop maximal elements from the heap.
for i in (1..v.len()).rev() {
v.swap(0, i);
sift_down(&mut v[..i], 0);
}
}

/// Sort the publishers price component list in place by performing minimal swaps.
#[inline(always)]
fn sort_publishers(
comps: &mut [PriceComponent],
num_publishers: usize,
) -> Result<(), ProgramError> {
let comps = comps
.get_mut(..num_publishers)
.ok_or(ProgramError::InvalidArgument)?;

let mut keys = comps
.iter()
.enumerate()
.map(|(i, x)| (x.pub_, i))
.collect::<Vec<_>>();

heapsort(&mut keys);

for i in 0..num_publishers {
// We know that the publisher with key[i].0 should be at index i in the sorted array but we
// do not know where it is as we are doing an in-place sort. Normally, the publisher at
// key[i].0 should be at key[i].1 but if it is swapped, we need to find the correct index.
//
// The way it is done is via satisfying the invariant that for the publisher at index j <
// i, the publisher at index key[j].1 is the index that their publisher was at before the
// swap and the elements j >= i are not swapped if they are not equal to any of the keys
// 0..(i-1). This way, for the publisher at index i we can find the correct index by
// following the chain of swaps and finally set the key[i].1 to the correct index to
// satisfy the invariant.
let mut index = keys[i].1;

while index < i {
index = keys[index].1;
}
keys[i].1 = index;
comps.swap(i, index);
}

Ok(())
}
133 changes: 122 additions & 11 deletions program/rust/src/processor/upd_price.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use {
crate::{
accounts::{
PriceAccount,
PriceComponent,
PriceInfo,
PythOracleSerialize,
UPD_PRICE_WRITE_SEED,
Expand Down Expand Up @@ -31,6 +32,7 @@ use {
},
program::invoke_signed,
program_error::ProgramError,
program_memory::sol_memcmp,
pubkey::Pubkey,
sysvar::Sysvar,
},
Expand Down Expand Up @@ -127,7 +129,7 @@ pub fn upd_price(
// Check clock
let clock = Clock::from_account_info(clock_account)?;

let mut publisher_index: usize = 0;
let publisher_index: usize;
let latest_aggregate_price: PriceInfo;

// The price_data borrow happens in a scope because it must be
Expand All @@ -137,17 +139,15 @@ pub fn upd_price(
// Verify that symbol account is initialized
let price_data = load_checked::<PriceAccount>(price_account, cmd_args.header.version)?;

// Verify that publisher is authorized
while publisher_index < try_convert::<u32, usize>(price_data.num_)? {
if price_data.comp_[publisher_index].pub_ == *funding_account.key {
break;
publisher_index = match find_publisher_index(
&price_data.comp_[..try_convert::<u32, usize>(price_data.num_)?],
funding_account.key,
) {
Some(index) => index,
None => {
return Err(OracleError::PermissionViolation.into());
}
publisher_index += 1;
}
pyth_assert(
publisher_index < try_convert::<u32, usize>(price_data.num_)?,
OracleError::PermissionViolation.into(),
)?;
};

latest_aggregate_price = price_data.agg_;
let latest_publisher_price = price_data.comp_[publisher_index].latest_;
Expand Down Expand Up @@ -281,6 +281,62 @@ pub fn upd_price(
Ok(())
}

/// Find the index of the publisher in the list of components.
///
/// This method first tries to binary search for the publisher's key in the list of components
/// to get the result faster if the list is sorted. If the list is not sorted, it falls back to
/// a linear search.
#[inline(always)]
fn find_publisher_index(comps: &[PriceComponent], key: &Pubkey) -> Option<usize> {
// Verify that publisher is authorized by initially binary searching
// for the publisher's component in the price account. The binary
// search might not work if the publisher list is not sorted; therefore
// we fall back to a linear search.
let mut binary_search_result = None;

{
// Binary search to find the publisher key. Rust std binary search is not used because
// they guarantee valid outcome only if the array is sorted whereas we want to rely on
// a Equal match if it is a result on an unsorted array. Currently the rust
// implementation behaves the same but we do not want to rely on api internals.
let mut left = 0;
let mut right = comps.len();
while left < right {
let mid = left + (right - left) / 2;
match sol_memcmp(comps[mid].pub_.as_ref(), key.as_ref(), 32) {
i if i < 0 => {
left = mid + 1;
}
i if i > 0 => {
right = mid;
}
_ => {
binary_search_result = Some(mid);
break;
}
}
}
}

match binary_search_result {
Some(index) => Some(index),
None => {
let mut index = 0;
while index < comps.len() {
if sol_memcmp(comps[index].pub_.as_ref(), key.as_ref(), 32) == 0 {
break;
}
index += 1;
}
if index == comps.len() {
None
} else {
Some(index)
}
}
}
}

#[allow(dead_code)]
// Wrapper struct for the accounts required to add data to the accumulator program.
struct MessageBufferAccounts<'a, 'b: 'a> {
Expand All @@ -289,3 +345,58 @@ struct MessageBufferAccounts<'a, 'b: 'a> {
oracle_auth_pda: &'a AccountInfo<'b>,
message_buffer_data: &'a AccountInfo<'b>,
}

#[cfg(test)]
mod test {
use {
super::*,
crate::accounts::PriceComponent,
solana_program::pubkey::Pubkey,
};

fn dummy_price_component(pub_: Pubkey) -> PriceComponent {
let dummy_price_info = PriceInfo {
price_: 0,
conf_: 0,
status_: 0,
pub_slot_: 0,
corp_act_status_: 0,
};

PriceComponent {
latest_: dummy_price_info,
agg_: dummy_price_info,
pub_,
}
}

/// Test the find_publisher_index method works with an unordered list of components.
#[test]
pub fn test_find_publisher_index_unordered_comp() {
let comps = (0..64)
.map(|_| dummy_price_component(Pubkey::new_unique()))
.collect::<Vec<_>>();

comps.iter().enumerate().for_each(|(idx, comp)| {
assert_eq!(find_publisher_index(&comps, &comp.pub_), Some(idx));
});

assert_eq!(find_publisher_index(&comps, &Pubkey::new_unique()), None);
}

/// Test the find_publisher_index method works with a sorted list of components.
#[test]
pub fn test_find_publisher_index_ordered_comp() {
let mut comps = (0..64)
.map(|_| dummy_price_component(Pubkey::new_unique()))
.collect::<Vec<_>>();

comps.sort_by_key(|comp| comp.pub_);

comps.iter().enumerate().for_each(|(idx, comp)| {
assert_eq!(find_publisher_index(&comps, &comp.pub_), Some(idx));
});

assert_eq!(find_publisher_index(&comps, &Pubkey::new_unique()), None);
}
}
Loading

0 comments on commit 6a58b0a

Please sign in to comment.