From 858ef8c67ed92fa76fceb948b7b77eb13983b901 Mon Sep 17 00:00:00 2001 From: Rashikraj Shrestha Date: Sun, 18 Aug 2024 15:20:37 +0545 Subject: [PATCH] add pytest for addition of drop_empty_rows flag --- .../tests/unit/io/files/test_empty_rows.xlsx | Bin 0 -> 8732 bytes py-polars/tests/unit/io/test_spreadsheet.py | 26 ++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 py-polars/tests/unit/io/files/test_empty_rows.xlsx diff --git a/py-polars/tests/unit/io/files/test_empty_rows.xlsx b/py-polars/tests/unit/io/files/test_empty_rows.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1fc27ee4c3e4801060acc0891352d36f22f6946c GIT binary patch literal 8732 zcmeHM1y@|j)@|Gg?hcJhW5FQ-0>RxnxCf^jcS(?7!Gi{Zgy8P(gg|h23GNas@O5V1 zdoz=n?-#sRz53qOtM57MR@L6MtM*Yr&^FUVSQGOlD0;Bp$GyG1sS! zyWHD=SqLr=>YwjeQPy6<75w~Z9A|wLf5PS^i?`v~I)@x$1fBN+k(*(jDy^;ti>Z18 z$ z?AcfH`ZA?egalZF{66*%)C;z&9GFq~IOLC_Y#=_ndLX#O-Dx~)m&6%pr)BUBJy_}A z5%Jz1*?bXvc#^c^&vTCq06aV(095}%%Ni{Z{W%P4iZIq;z-Va%v9N=3asIsikB7@J}EQv57#r?4q!hWc!)oiIxza8zxHJO%%wb09Q(@cmkou`qxUHh4IS{wck9}ea8 zeswxNyeposvVFj;3x9Am-^pNxXSV=aS5g1*Ku`pA!4gcN{!WrW@r2|v*oZX3NJ0ic zh4ZlE`kg24ju2Z@M@QSA>h*8Vz`-O8mgT>@6{{;Lbc1kOFz!P^?rCm>xU()?bo*L+ zk5C7yS>_pOdHv3p$ypl>bf*-!5FA51kB0i(zVldzhuob+)L-fM8I}$;196B39 zKs7xmp$d{k!pGa+-77l4z)pqYH;Bjevu6?re!F4@l7{Bw(@CE#DRN0Z88x%to_HGhfxKFE`L^o$fuP87=%FuFZK%XZcA>=N~~ z`OZ?wbJ(^c=LU0=qDX*ha>R=#kw_hdE+H^ZCF22ZL%YeifJ-0H|ZjU}Z8)6?3NZkJt zHg%lb&adcrGrU!}16Tu3jV(RO>bLc9a~o$JDz$a;mZf+6xEj8yg2vZz1q6xRYSK4o z%ak18K3NyrvAWj?V)+3?Y}(@XAXN{msgLIjMbQ?%m(@OjWAlU0*_gB@$mO>N2Cw{} z2fM5gWyP=86!#>zxQI=;QtPhsXwjEh_Hrt`e3p{Kf;p*u$CG{Qurx?koRI4hbUg5M zn0St@C-tJH1(7!9xEvHcP6J_kN>Vk4zR1IdJ_#OK%`qyQTnI4GL?<}(C0O?QghW8J zjp6e;=_Jx14a1VHY}z&Y*a~6TZ0N}q`H+JW(w=?Ku`U&|J9rV@0n2UE!$PyHp*^FJ zbpKYaZQrxv?sX8UyGaL5ve@?@w@2LY#4e8u&pT}%Eqx{0dizObANv$}mzjF-<_pjg zG6~5TEea(~DGLhfy*I63y>lYOlSm1TH6n1qlPMYy%@}SC<`o{UH*G1Q0vq?4>oT~83i!ogc0i$l^ zzKMAy5CR4U(Jw9dr}^-mE@X{z1-P>8vX6=aa;s)dcT`@+U7azcUk{kpp6l=_z2wGX zj4vQTU42i2^^qM!?o~#~x*jj$$wIMJN^Ke`n$h66T@{BdL{aCg4J|;N#=^jt51J^! z5%G4|9|=YUj^xm$D9MrCm^UPTy_DkVp3ib8gr>!dFwhCgQ$Ri4~9 zv$N$NK&f7*+Q?54PH>!{~$cJAI%oo^}?{ zpDS&ymc8vf2dbt zU%2wtIP~ixRDCQ>8NJ%97gEAQVep!STWj}Cky1#TIAI4S{a!&~!N?m6kFV^l1Iuj+ zCG!}U^%h@f15_V(z1T`cqOD!vkvo8~CWx!%ah&i>Dpp05NprM8BZ$u3*w%GRDT@Eg;#&(P4;Bj!ZTFpd99=~sfhJDt!MK~w`s6@(TCV{PkDW0QbL^R8)4TOZfV7;1iIx7COVJcC73vHPT z6&%R^F8;=YDzvAQma{utYacCuUgfEHUnHw>F!tqmYv4ILqkX{6`aNz7w+nU8z&2n9 z|Jx2Y4W03JAtr}S1*wP+WJNaWO|H#M2ZKOo57eH~EXltXl;(4-$N0|PX(^~Bt z6f`4^o1M-VD4hX%u3JQ}4B@uQ442b2h_n<4ks)r#uY?rP@8BkzMmf zIVdYw({27`rv#i+o#-A6U$SSU!egwALS)kd{Umapcz^MwQt673BS7wPB|(a3$v~>w zam6E$BPh#{-!xEXpg$a7dmsK*dh~e3<8+|Yc>qyIwj>%os)Yv6?m-<n@4J$(SzUAY`@0osHXD%mWE$_T!-(hsdB^kAtZ(kl-`hBcKd4KOu%_^(hv65Jk%*#536~OF@^{z_Eu-Dt_ zhm^E)rzV-)B2;VMW8aza@cgW=fj_p0>$r$UwHc*1k@ch;vL1LE6pej!N1>-X9X4Y$ zsKH?GJR^hx{l1I6AK&|UerR5;?A)K#kY}^~> z2+cdUqw$w8gP*pO+r<3Ox39a5DjRB86&z528XAX_F-8wReh@DiwcQ`@o(SwrFr9REx7}S2CbvDD z2w#-1Ef8|G`ktQd#1k+3UTmqvP|nR$P`*2+4X-#ZT{Symj4%nrzCD4H$mj-1cE>%z z-F7QD9PEcTd_l9@=~{l!IZvV6i_|RIW;@$RebT@Xol+)KfwW*2=|4Bt zVtM%aAx>Fv*f>xMVhmL0td=qj8|<{8c{O)Bv}9k_RbV?Q=3`E80(%dTU)ZP3-a%_l zwS9FVX>NYX)(G}myZsokX`jWq2g?!r#LS9Fi%C(aR&p=Yq1AT&u%xR;+OSDY zU<$jj7^bXfj!bExCUyxbkFb~%t(~bE3g;COKKqn7*F>V>64^`QT_+CJ9Wmh@+-exr zp)*dT+?OwA>mSy-NH>PFh%2<6Vu2*1vRQmTNw6=sa&!*zAYu;3UCzBGlU|fJ5Zp}H z^va0FR$AeQ-=yA%7-yMA+6KinwaM*8q3T&Bdl4P_7&x){Iac)<8!d?)v?7xF*pSj( z)?E(asM!XInuMUDV-hb0g%RHCOJ@{;?G6*tX3`!%LLiXQZo!Y=i0b(oUdeRl{+Y@h z4vLHv^Wx+-K{$=sVoN%(3hg^JyRDaX=0dwuTrqUYQwwT1dG&BxXgO{uUS=_Dn{o#3 zy|}!1YLn9J6Zo0&=yW?%rMY60mp0*W5UHn0(%l!ac{CCQR4}jB$kbp1H%xbx%Kwgi zA%UJ_-I-B>iDxcm1QRS}vDlveM)5+^gO@SOEH~7Zjqv?Q$*uC4M@-bXvQ;COXKTkkDiEa^+deX6Vzory-UU30r1_AFETEBTLbTQ)Im2c|WDVshV=} z(!{uE%r-iE+PJh(6Jgii3NG?+G!LwI+i9^u+P zqhqQMWMtGB=@xOguCYO_W2NV_y~f`r>6-RHZlIrdLHt3Z-i=f))8We9B#D%`GE_Q2 zSU+C*GctGVJMV;U4kHf#L@d5hiVyZ|IsrK1;PuoeY}9GaI4&an9y}fNFGChd)H0Q+ z16MUlo$d>JmMM!FW+^&bsH0+95QMh`aC3Eax!6#m!vi9MA-hA|fobK0v76WKYsR0g zr<>o52_;0+3rDEBeRyY{*~uSX`ejswjhaMdHnk&rk5lwrA;M*g%Ax@EMEF7Z2|A{$smr&1G@ta%PUlsO=!www*vc$1#;&8ri>5eOY1j`v@9l=PaFyWUh) zsm)jOvZG?EOgz4_ca;$oJ1J$7=+F29c74LWC!cY3^Le3p(h|izP}IxGU+|;)wyLCK zGClLn7N%Jp|$ zSs$$!$ExXLkMG^U>6?r^b9-6VH_dstzR5B48aERzDQ04@A- z^a@ZCK?%nruStnebwc`tFuB+ws+K26H$sbqGp z2y9dm_Y1?;7yTThZUL8_#Lb#ulT^zg_mqwOV{enMOt<`i=XWCJ9uCyA+IW}((+I5|? zbOne*c>eOSH3Q?DgB?naL6Vonzz33K9gL>4=O4x|Mvs;z&UH-e%X8hE?}m7^0$9$K zWuVP$lLCHMinFzZEZWXFsbl@G@rz6Q%l zpHC6)BPG0Qz6{5o^+ru>E-m>wy>LIbc%3?Jr0{jxaxoQa)Rk7!Bocd+w*w1a*GOwT zdye-tJej>PZ+_R85SvGj%DniN1g^#hxkD>j3-hIFtc0l@8#Bo@{GGs3HKC!6@ zo#kA6xIuWS{Z&mPx`}8 z(K|snuUA`EC_DG<3&X+~b|jAxy#i*U?XyxQzG$9bYD>T3d~oyt<$LuSih-cd%}opv zvfl1aAlh)~4}YAbB8d3d`#pT56N8ksPM^e5Z9Hj4l8J|+`deeycEfXoF_(rsiWbF` z7ei7q%TA%p71L>z3p?8gfraDGO-(Rta-FD8u57IY1?h*$Ifl1Yd{%`+y|9O<&(HI` zVUEe)yG6hG9!(j>Yi}5XabfnGxucmX#L)@LW#$O6__=k4Io1Dj-C*+_8E2%>1tREK zMrxB9@J_p*{f26f!s-oSuFt+Aiks({VE~4^-CpJ|YO`q^a|>qKQ zLgsK&f&kN0j2ey+%NEQn=JIG(W!sXhCn)9CYKBIe#k7j#9z2b3>xeV$@d?0UmXYCx z5M#SpY=)XxcV;f_x#zogqjt_xvm%S|%Ih8>Kv?l4rye0>=!rBFgB3#MHOTeR3Z@hm zUP6^%INsfXt8wlR*DM_wWtN5PlMH3&KD*)QwUwn8+$8h0QFFyHt7lACt{2A(I#{oF z-Fg6tLRPOxvy{^yz;~nCI-DNGtQ|Bnx*#Dcp+@_uiN-`DA7H>T)je4w-%aw3Y2Q`= zia4tfkq8^t?BWlv@Xvh8OeUg_;;)4>V+SG?9hVv@B)&Cm_hER-pQ)^24XDQ{$#(N* zeL3lIXK90t2XSq^FONv2shN2i{wJ~ zYM6n=rq6ZG%T3|p+^m%I&9W;Q!ir<}r&Vf4CuZH|qNuqH#Xl4rVrQ^NNuUJ!F0P60 zGO`_AiI?GpwP0@0epz!no7x>>CtNX}7z0Ko?+Q&?9JgDn;0`ryi+V(Vt~rrn4|lrl zRz=I`pxWDD=&CU&aHCgfZMahEX337`L?SF1x)y7)mf=*3u*?S?77!Owx#{wzY(%|( zNB(g;?V{|Q!hMyBWO>@tP;;ILZ|WLFc~6{h!feSL5To-7iM_--3TJ7XJw5#QZJz51aAV&|f{AKSJ-Z z|IyL;HNmgD&_5FR68-1J|F$RnHOsFe|3?;c(%*aVtK|Qh;jeo7M?3%^LIwc*O Path: def path_ods_mixed(io_files_path: Path) -> Path: return io_files_path / "mixed.ods" +@pytest.fixture() +def path_empty_rows_excel(io_files_path: Path) -> Path: + return io_files_path / "test_empty_rows.xlsx" + @pytest.mark.parametrize( ("read_spreadsheet", "source", "engine_params"), @@ -1058,3 +1062,25 @@ def test_identify_workbook( bytesio_data = BytesIO(f.read()) assert _identify_workbook(bytesio_data) == file_type assert isinstance(pl.read_excel(bytesio_data, engine="calamine"), pl.DataFrame) + +def test_drop_empty_rows(path_empty_rows_excel: Path): + df1 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv") + assert df1.shape == (8, 4) + df2 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv", drop_empty_rows=True) + assert df2.shape == (8, 4) + df3 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv", drop_empty_rows=False) + assert df3.shape == (10, 4) + + df4 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl") + assert df4.shape == (8, 4) + df5 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl", drop_empty_rows=True) + assert df5.shape == (8, 4) + df6 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl", drop_empty_rows=False) + assert df6.shape == (10, 4) + + df7 = pl.read_excel(source=path_empty_rows_excel, engine="calamine") + assert df7.shape == (8, 4) + df8 = pl.read_excel(source=path_empty_rows_excel, engine="calamine", drop_empty_rows=True) + assert df8.shape == (8, 4) + df9 = pl.read_excel(source=path_empty_rows_excel, engine="calamine", drop_empty_rows=False) + assert df9.shape == (10, 4)