From 8f4498fc10afb21c69c2884dd6bbb211cdfedde1 Mon Sep 17 00:00:00 2001 From: abstractqqq Date: Sun, 17 Mar 2024 14:45:58 -0400 Subject: [PATCH] fixed some tests, added examples --- README.md | 3 +- examples/basics.ipynb | 99 ++++++++++++++++++++++++++++++++++++++- src/cusip_parsing/mod.rs | 2 +- tests/test_correctness.py | 8 ++-- 4 files changed, 104 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d67e060..ce076b4 100644 --- a/README.md +++ b/README.md @@ -19,4 +19,5 @@ Processing IBAN, ISINs, URLs, etc., and other standard format data in Polars. 1. Iban is powered by [iban_validate](https://crates.io/crates/iban_validate) 2. Isin is powered by [isin_rs](https://docs.rs/isin/latest/isin/) -3. URL is powered by [url](https://crates.io/crates/url) \ No newline at end of file +3. URL is powered by [url](https://crates.io/crates/url) +4. CUSIP is powered by [cusip](https://crates.io/crates/cusip) \ No newline at end of file diff --git a/examples/basics.ipynb b/examples/basics.ipynb index 8577fc2..45ca28c 100644 --- a/examples/basics.ipynb +++ b/examples/basics.ipynb @@ -8,7 +8,7 @@ "outputs": [], "source": [ "import polars as pl\n", - "import polars_istr" + "import polars_istr # noqa: F401" ] }, { @@ -363,6 +363,101 @@ " pl.col(\"url\").url.is_special().alias(\"is_special\"),\n", ")" ] + }, + { + "cell_type": "markdown", + "id": "f716217c", + "metadata": {}, + "source": [ + "# CUSIP" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f0aa4db1", + "metadata": {}, + "outputs": [], + "source": [ + "df = pl.DataFrame({\n", + " \"cusip\": [\n", + " \"303075105\", # regular cusip (FactSet - Common Stock)\n", + " \"30307510\", # regular cusip ex. check digit\n", + " \"G0052B105\", # regular CINS (Abingdon Capital PLC - Shares)\n", + " \"HELLOWORLD\", # Invalid\n", + " ]\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "711de472", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (4, 11)
issue_numissuer_numcheck_digitcountry_codepayloadis_private_issuehas_private_issueris_private_useis_cinsis_cins_baseis_cins_extended
strstrstrstrstrboolboolboolboolboolbool
"10""303075""5"null"30307510"falsefalsefalsefalsenullnull
nullnullnullnullnullnullnullnullnullnullnull
"10""0052B""5""G""G0052B10"falsefalsefalsetruetruefalse
nullnullnullnullnullnullnullnullnullnullnull
" + ], + "text/plain": [ + "shape: (4, 11)\n", + "┌───────────┬────────────┬───────────┬───────────┬───┬───────────┬─────────┬───────────┬───────────┐\n", + "│ issue_num ┆ issuer_num ┆ check_dig ┆ country_c ┆ … ┆ is_privat ┆ is_cins ┆ is_cins_b ┆ is_cins_e │\n", + "│ --- ┆ --- ┆ it ┆ ode ┆ ┆ e_use ┆ --- ┆ ase ┆ xtended │\n", + "│ str ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ bool ┆ --- ┆ --- │\n", + "│ ┆ ┆ str ┆ str ┆ ┆ bool ┆ ┆ bool ┆ bool │\n", + "╞═══════════╪════════════╪═══════════╪═══════════╪═══╪═══════════╪═════════╪═══════════╪═══════════╡\n", + "│ 10 ┆ 303075 ┆ 5 ┆ null ┆ … ┆ false ┆ false ┆ null ┆ null │\n", + "│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n", + "│ 10 ┆ 0052B ┆ 5 ┆ G ┆ … ┆ false ┆ true ┆ true ┆ false │\n", + "│ null ┆ null ┆ null ┆ null ┆ … ┆ null ┆ null ┆ null ┆ null │\n", + "└───────────┴────────────┴───────────┴───────────┴───┴───────────┴─────────┴───────────┴───────────┘" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.select(\n", + " pl.col(\"cusip\").cusip.issue_num().alias(\"issue_num\"),\n", + " pl.col(\"cusip\").cusip.issuer_num().alias(\"issuer_num\"),\n", + " pl.col(\"cusip\").cusip.check_digit().alias(\"check_digit\"),\n", + " pl.col(\"cusip\").cusip.country_code().alias(\"country_code\"),\n", + " pl.col(\"cusip\").cusip.payload().alias(\"payload\"),\n", + " pl.col(\"cusip\").cusip.is_private_issue().alias(\"is_private_issue\"),\n", + " pl.col(\"cusip\").cusip.has_private_issuer().alias(\"has_private_issuer\"),\n", + " pl.col(\"cusip\").cusip.is_private_use().alias(\"is_private_use\"),\n", + " pl.col(\"cusip\").cusip.is_cins().alias(\"is_cins\"),\n", + " pl.col(\"cusip\").cusip.is_cins_base().alias(\"is_cins_base\"),\n", + " pl.col(\"cusip\").cusip.is_cins_extended().alias(\"is_cins_extended\"),\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd30b6da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a7fa410", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -381,7 +476,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.11.8" } }, "nbformat": 4, diff --git a/src/cusip_parsing/mod.rs b/src/cusip_parsing/mod.rs index 1355bfa..2f42543 100644 --- a/src/cusip_parsing/mod.rs +++ b/src/cusip_parsing/mod.rs @@ -114,7 +114,7 @@ fn pl_cusip_country_code(inputs: &[Series]) -> PolarsResult { ca.into_iter().for_each(|op_s| { if let Some(s) = op_s { if let Ok(cusip) = CUSIP::parse(s) { - if let Some(cins) = cusip.as_cins(){ + if let Some(cins) = cusip.as_cins() { s_builder.append_value(cins.country_code().to_string()); } else { s_builder.append_null(); diff --git a/tests/test_correctness.py b/tests/test_correctness.py index 68c812b..26ccb21 100644 --- a/tests/test_correctness.py +++ b/tests/test_correctness.py @@ -7,13 +7,10 @@ import pytest -import pytest -import polars as pl import polars_istr # noqa: F401 -from polars.testing import assert_frame_equal -from typing import List, Optional +from typing import Optional # There are no valid test cases for Extended CINS or Private Issue(r) since I could not @@ -116,6 +113,9 @@ def test_cusip( } ) + assert_frame_equal(test1, ans) + assert_frame_equal(test2, ans) + @pytest.mark.parametrize( "df, cc, cd, reason, is_valid, bban, bank_id, branch_id",