From fff2b6ef5582b2f3daf534daff1fa34446dc008b Mon Sep 17 00:00:00 2001 From: nwoodward Date: Thu, 16 Feb 2023 13:43:14 -0600 Subject: [PATCH 01/22] updated version for development --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e4a3065..9bb6309 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name = 'dspace-reports', - version='1.1.0', + version='1.3.0-SNAPSHOT', url = 'https://github.com/TexasDigitalLibrary/dspace-reports', author = 'Nicholas Woodward', author_email = 'njw@austin.utexas.edu', From 1c4ac146956931de9ee1994b9a9c45e02d572abd Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 6 Sep 2023 16:17:53 -0500 Subject: [PATCH 02/22] pylint fixes --- lib/database.py | 3 ++- lib/oai.py | 2 +- lib/solr.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/database.py b/lib/database.py index 96a693a..5fef9b5 100644 --- a/lib/database.py +++ b/lib/database.py @@ -1,6 +1,7 @@ +import logging import psycopg2 import psycopg2.extras -import logging + class Database(object): diff --git a/lib/oai.py b/lib/oai.py index fd5e6f7..7a5092c 100644 --- a/lib/oai.py +++ b/lib/oai.py @@ -1,6 +1,6 @@ import re -import requests import logging +import requests from time import sleep from xml.dom import pulldom diff --git a/lib/solr.py b/lib/solr.py index ddafb63..bcfac67 100644 --- a/lib/solr.py +++ b/lib/solr.py @@ -1,5 +1,5 @@ -import requests import logging +import requests import re From 5e2a54f5cbdf505e5e134a966ecec529f1841459 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 6 Sep 2023 16:18:16 -0500 Subject: [PATCH 03/22] dependency updates --- Pipfile | 20 +- Pipfile.lock | 514 +++++++++++++++++++++++++++------------------------ 2 files changed, 278 insertions(+), 256 deletions(-) diff --git a/Pipfile b/Pipfile index c7405aa..9010965 100644 --- a/Pipfile +++ b/Pipfile @@ -6,23 +6,23 @@ verify_ssl = true [dev-packages] [packages] -astroid = "==2.14.2" -certifi = "==2022.12.7" -chardet = "==5.1.0" +astroid = "==2.15.6" +certifi = "==2023.7.22" +chardet = "==5.2.0" idna = "==3.4" isort = "==5.12.0" lazy-object-proxy = "==1.9.0" mccabe = "==0.7.0" -psycopg2 = "==2.9.5" -pylint = "==2.16.2" +psycopg2 = "==2.9.7" +pylint = "==2.17.5" python-dateutil = "==2.8.2" -PyYAML = "==6.0" -requests = "==2.28.2" +requests = "==2.31.0" six = "==1.16.0" toml = "==0.10.2" -urllib3 = "==1.26.14" -wrapt = "==1.14.1" -XlsxWriter = "==3.0.8" +urllib3 = "==2.0.4" +wrapt = "==1.15.0" +XlsxWriter = "==3.1.2" +pyyaml = "==6.0.1" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index 4d1baf8..90e7e78 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "42ef5f6d44f6109a3d9fb8444d8af99918e307135ecf107616152a6be1cfeef3" + "sha256": "ee3418598f2a50b7645ba8c92e4e8b5eb54ae74092ecda69ed6ca6220a02d50c" }, "pipfile-spec": 6, "requires": { @@ -18,129 +18,119 @@ "default": { "astroid": { "hashes": [ - "sha256:0e0e3709d64fbffd3037e4ff403580550f14471fd3eaae9fa11cc9a5c7901153", - "sha256:a3cf9f02c53dd259144a7e8f3ccd75d67c9a8c716ef183e0c1f291bc5d7bb3cf" + "sha256:389656ca57b6108f939cf5d2f9a2a825a3be50ba9d589670f393236e0a03b91c", + "sha256:903f024859b7c7687d7a7f3a3f73b17301f8e42dfd9cc9df9d4418172d3e2dbd" ], "index": "pypi", - "version": "==2.14.2" + "markers": "python_full_version >= '3.7.2'", + "version": "==2.15.6" }, "certifi": { "hashes": [ - "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3", - "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18" + "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", + "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" ], "index": "pypi", - "version": "==2022.12.7" + "markers": "python_version >= '3.6'", + "version": "==2023.7.22" }, "chardet": { "hashes": [ - "sha256:0d62712b956bc154f85fb0a266e2a3c5913c2967e00348701b32411d6def31e5", - "sha256:362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9" + "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", + "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970" ], "index": "pypi", - "version": "==5.1.0" + "markers": "python_version >= '3.7'", + "version": "==5.2.0" }, "charset-normalizer": { "hashes": [ - "sha256:00d3ffdaafe92a5dc603cb9bd5111aaa36dfa187c8285c543be562e61b755f6b", - "sha256:024e606be3ed92216e2b6952ed859d86b4cfa52cd5bc5f050e7dc28f9b43ec42", - "sha256:0298eafff88c99982a4cf66ba2efa1128e4ddaca0b05eec4c456bbc7db691d8d", - "sha256:02a51034802cbf38db3f89c66fb5d2ec57e6fe7ef2f4a44d070a593c3688667b", - "sha256:083c8d17153ecb403e5e1eb76a7ef4babfc2c48d58899c98fcaa04833e7a2f9a", - "sha256:0a11e971ed097d24c534c037d298ad32c6ce81a45736d31e0ff0ad37ab437d59", - "sha256:0bf2dae5291758b6f84cf923bfaa285632816007db0330002fa1de38bfcb7154", - "sha256:0c0a590235ccd933d9892c627dec5bc7511ce6ad6c1011fdf5b11363022746c1", - "sha256:0f438ae3532723fb6ead77e7c604be7c8374094ef4ee2c5e03a3a17f1fca256c", - "sha256:109487860ef6a328f3eec66f2bf78b0b72400280d8f8ea05f69c51644ba6521a", - "sha256:11b53acf2411c3b09e6af37e4b9005cba376c872503c8f28218c7243582df45d", - "sha256:12db3b2c533c23ab812c2b25934f60383361f8a376ae272665f8e48b88e8e1c6", - "sha256:14e76c0f23218b8f46c4d87018ca2e441535aed3632ca134b10239dfb6dadd6b", - "sha256:16a8663d6e281208d78806dbe14ee9903715361cf81f6d4309944e4d1e59ac5b", - "sha256:292d5e8ba896bbfd6334b096e34bffb56161c81408d6d036a7dfa6929cff8783", - "sha256:2c03cc56021a4bd59be889c2b9257dae13bf55041a3372d3295416f86b295fb5", - "sha256:2e396d70bc4ef5325b72b593a72c8979999aa52fb8bcf03f701c1b03e1166918", - "sha256:2edb64ee7bf1ed524a1da60cdcd2e1f6e2b4f66ef7c077680739f1641f62f555", - "sha256:31a9ddf4718d10ae04d9b18801bd776693487cbb57d74cc3458a7673f6f34639", - "sha256:356541bf4381fa35856dafa6a965916e54bed415ad8a24ee6de6e37deccf2786", - "sha256:358a7c4cb8ba9b46c453b1dd8d9e431452d5249072e4f56cfda3149f6ab1405e", - "sha256:37f8febc8ec50c14f3ec9637505f28e58d4f66752207ea177c1d67df25da5aed", - "sha256:39049da0ffb96c8cbb65cbf5c5f3ca3168990adf3551bd1dee10c48fce8ae820", - "sha256:39cf9ed17fe3b1bc81f33c9ceb6ce67683ee7526e65fde1447c772afc54a1bb8", - "sha256:3ae1de54a77dc0d6d5fcf623290af4266412a7c4be0b1ff7444394f03f5c54e3", - "sha256:3b590df687e3c5ee0deef9fc8c547d81986d9a1b56073d82de008744452d6541", - "sha256:3e45867f1f2ab0711d60c6c71746ac53537f1684baa699f4f668d4c6f6ce8e14", - "sha256:3fc1c4a2ffd64890aebdb3f97e1278b0cc72579a08ca4de8cd2c04799a3a22be", - "sha256:4457ea6774b5611f4bed5eaa5df55f70abde42364d498c5134b7ef4c6958e20e", - "sha256:44ba614de5361b3e5278e1241fda3dc1838deed864b50a10d7ce92983797fa76", - "sha256:4a8fcf28c05c1f6d7e177a9a46a1c52798bfe2ad80681d275b10dcf317deaf0b", - "sha256:4b0d02d7102dd0f997580b51edc4cebcf2ab6397a7edf89f1c73b586c614272c", - "sha256:502218f52498a36d6bf5ea77081844017bf7982cdbe521ad85e64cabee1b608b", - "sha256:503e65837c71b875ecdd733877d852adbc465bd82c768a067badd953bf1bc5a3", - "sha256:5995f0164fa7df59db4746112fec3f49c461dd6b31b841873443bdb077c13cfc", - "sha256:59e5686dd847347e55dffcc191a96622f016bc0ad89105e24c14e0d6305acbc6", - "sha256:601f36512f9e28f029d9481bdaf8e89e5148ac5d89cffd3b05cd533eeb423b59", - "sha256:608862a7bf6957f2333fc54ab4399e405baad0163dc9f8d99cb236816db169d4", - "sha256:62595ab75873d50d57323a91dd03e6966eb79c41fa834b7a1661ed043b2d404d", - "sha256:70990b9c51340e4044cfc394a81f614f3f90d41397104d226f21e66de668730d", - "sha256:71140351489970dfe5e60fc621ada3e0f41104a5eddaca47a7acb3c1b851d6d3", - "sha256:72966d1b297c741541ca8cf1223ff262a6febe52481af742036a0b296e35fa5a", - "sha256:74292fc76c905c0ef095fe11e188a32ebd03bc38f3f3e9bcb85e4e6db177b7ea", - "sha256:761e8904c07ad053d285670f36dd94e1b6ab7f16ce62b9805c475b7aa1cffde6", - "sha256:772b87914ff1152b92a197ef4ea40efe27a378606c39446ded52c8f80f79702e", - "sha256:79909e27e8e4fcc9db4addea88aa63f6423ebb171db091fb4373e3312cb6d603", - "sha256:7e189e2e1d3ed2f4aebabd2d5b0f931e883676e51c7624826e0a4e5fe8a0bf24", - "sha256:7eb33a30d75562222b64f569c642ff3dc6689e09adda43a082208397f016c39a", - "sha256:81d6741ab457d14fdedc215516665050f3822d3e56508921cc7239f8c8e66a58", - "sha256:8499ca8f4502af841f68135133d8258f7b32a53a1d594aa98cc52013fff55678", - "sha256:84c3990934bae40ea69a82034912ffe5a62c60bbf6ec5bc9691419641d7d5c9a", - "sha256:87701167f2a5c930b403e9756fab1d31d4d4da52856143b609e30a1ce7160f3c", - "sha256:88600c72ef7587fe1708fd242b385b6ed4b8904976d5da0893e31df8b3480cb6", - "sha256:8ac7b6a045b814cf0c47f3623d21ebd88b3e8cf216a14790b455ea7ff0135d18", - "sha256:8b8af03d2e37866d023ad0ddea594edefc31e827fee64f8de5611a1dbc373174", - "sha256:8c7fe7afa480e3e82eed58e0ca89f751cd14d767638e2550c77a92a9e749c317", - "sha256:8eade758719add78ec36dc13201483f8e9b5d940329285edcd5f70c0a9edbd7f", - "sha256:911d8a40b2bef5b8bbae2e36a0b103f142ac53557ab421dc16ac4aafee6f53dc", - "sha256:93ad6d87ac18e2a90b0fe89df7c65263b9a99a0eb98f0a3d2e079f12a0735837", - "sha256:95dea361dd73757c6f1c0a1480ac499952c16ac83f7f5f4f84f0658a01b8ef41", - "sha256:9ab77acb98eba3fd2a85cd160851816bfce6871d944d885febf012713f06659c", - "sha256:9cb3032517f1627cc012dbc80a8ec976ae76d93ea2b5feaa9d2a5b8882597579", - "sha256:9cf4e8ad252f7c38dd1f676b46514f92dc0ebeb0db5552f5f403509705e24753", - "sha256:9d9153257a3f70d5f69edf2325357251ed20f772b12e593f3b3377b5f78e7ef8", - "sha256:a152f5f33d64a6be73f1d30c9cc82dfc73cec6477ec268e7c6e4c7d23c2d2291", - "sha256:a16418ecf1329f71df119e8a65f3aa68004a3f9383821edcb20f0702934d8087", - "sha256:a60332922359f920193b1d4826953c507a877b523b2395ad7bc716ddd386d866", - "sha256:a8d0fc946c784ff7f7c3742310cc8a57c5c6dc31631269876a88b809dbeff3d3", - "sha256:ab5de034a886f616a5668aa5d098af2b5385ed70142090e2a31bcbd0af0fdb3d", - "sha256:c22d3fe05ce11d3671297dc8973267daa0f938b93ec716e12e0f6dee81591dc1", - "sha256:c2ac1b08635a8cd4e0cbeaf6f5e922085908d48eb05d44c5ae9eabab148512ca", - "sha256:c512accbd6ff0270939b9ac214b84fb5ada5f0409c44298361b2f5e13f9aed9e", - "sha256:c75ffc45f25324e68ab238cb4b5c0a38cd1c3d7f1fb1f72b5541de469e2247db", - "sha256:c95a03c79bbe30eec3ec2b7f076074f4281526724c8685a42872974ef4d36b72", - "sha256:cadaeaba78750d58d3cc6ac4d1fd867da6fc73c88156b7a3212a3cd4819d679d", - "sha256:cd6056167405314a4dc3c173943f11249fa0f1b204f8b51ed4bde1a9cd1834dc", - "sha256:db72b07027db150f468fbada4d85b3b2729a3db39178abf5c543b784c1254539", - "sha256:df2c707231459e8a4028eabcd3cfc827befd635b3ef72eada84ab13b52e1574d", - "sha256:e62164b50f84e20601c1ff8eb55620d2ad25fb81b59e3cd776a1902527a788af", - "sha256:e696f0dd336161fca9adbb846875d40752e6eba585843c768935ba5c9960722b", - "sha256:eaa379fcd227ca235d04152ca6704c7cb55564116f8bc52545ff357628e10602", - "sha256:ebea339af930f8ca5d7a699b921106c6e29c617fe9606fa7baa043c1cdae326f", - "sha256:f4c39b0e3eac288fedc2b43055cfc2ca7a60362d0e5e87a637beac5d801ef478", - "sha256:f5057856d21e7586765171eac8b9fc3f7d44ef39425f85dbcccb13b3ebea806c", - "sha256:f6f45710b4459401609ebebdbcfb34515da4fc2aa886f95107f556ac69a9147e", - "sha256:f97e83fa6c25693c7a35de154681fcc257c1c41b38beb0304b9c4d2d9e164479", - "sha256:f9d0c5c045a3ca9bedfc35dca8526798eb91a07aa7a2c0fee134c6c6f321cbd7", - "sha256:ff6f3db31555657f3163b15a6b7c6938d08df7adbfc9dd13d9d19edad678f1e8" + "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96", + "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c", + "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710", + "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706", + "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020", + "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252", + "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad", + "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329", + "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a", + "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f", + "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6", + "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4", + "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a", + "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46", + "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2", + "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23", + "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", + "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd", + "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982", + "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10", + "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2", + "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea", + "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09", + "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5", + "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149", + "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489", + "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9", + "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80", + "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592", + "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3", + "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6", + "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed", + "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c", + "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200", + "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", + "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e", + "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d", + "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", + "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623", + "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669", + "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3", + "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa", + "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9", + "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2", + "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f", + "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1", + "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4", + "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a", + "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8", + "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3", + "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029", + "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f", + "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959", + "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22", + "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7", + "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952", + "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346", + "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e", + "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d", + "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299", + "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd", + "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a", + "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3", + "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037", + "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94", + "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c", + "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858", + "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a", + "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449", + "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c", + "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918", + "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1", + "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c", + "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", + "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" ], - "markers": "python_version >= '3'", - "version": "==3.0.1" + "markers": "python_full_version >= '3.7.0'", + "version": "==3.2.0" }, "dill": { "hashes": [ - "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0", - "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373" + "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", + "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03" ], "markers": "python_version < '3.11'", - "version": "==0.3.6" + "version": "==0.3.7" }, "idna": { "hashes": [ @@ -148,6 +138,7 @@ "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" ], "index": "pypi", + "markers": "python_version >= '3.5'", "version": "==3.4" }, "isort": { @@ -156,6 +147,7 @@ "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6" ], "index": "pypi", + "markers": "python_full_version >= '3.8.0'", "version": "==5.12.0" }, "lazy-object-proxy": { @@ -198,6 +190,7 @@ "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59" ], "index": "pypi", + "markers": "python_version >= '3.7'", "version": "==1.9.0" }, "mccabe": { @@ -206,42 +199,43 @@ "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" ], "index": "pypi", + "markers": "python_version >= '3.6'", "version": "==0.7.0" }, "platformdirs": { "hashes": [ - "sha256:8a1228abb1ef82d788f74139988b137e78692984ec7b08eaa6c65f1723af28f9", - "sha256:b1d5eb14f221506f50d6604a561f4c5786d9e80355219694a1b244bcd96f4567" + "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d", + "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d" ], "markers": "python_version >= '3.7'", - "version": "==3.0.0" + "version": "==3.10.0" }, "psycopg2": { "hashes": [ - "sha256:093e3894d2d3c592ab0945d9eba9d139c139664dcf83a1c440b8a7aa9bb21955", - "sha256:190d51e8c1b25a47484e52a79638a8182451d6f6dff99f26ad9bd81e5359a0fa", - "sha256:1a5c7d7d577e0eabfcf15eb87d1e19314c8c4f0e722a301f98e0e3a65e238b4e", - "sha256:1e5a38aa85bd660c53947bd28aeaafb6a97d70423606f1ccb044a03a1203fe4a", - "sha256:322fd5fca0b1113677089d4ebd5222c964b1760e361f151cbb2706c4912112c5", - "sha256:4cb9936316d88bfab614666eb9e32995e794ed0f8f6b3b718666c22819c1d7ee", - "sha256:920bf418000dd17669d2904472efeab2b20546efd0548139618f8fa305d1d7ad", - "sha256:922cc5f0b98a5f2b1ff481f5551b95cd04580fd6f0c72d9b22e6c0145a4840e0", - "sha256:a5246d2e683a972e2187a8714b5c2cf8156c064629f9a9b1a873c1730d9e245a", - "sha256:b9ac1b0d8ecc49e05e4e182694f418d27f3aedcfca854ebd6c05bb1cffa10d6d", - "sha256:d3ef67e630b0de0779c42912fe2cbae3805ebaba30cda27fea2a3de650a9414f", - "sha256:f5b6320dbc3cf6cfb9f25308286f9f7ab464e65cfb105b64cc9c52831748ced2", - "sha256:fc04dd5189b90d825509caa510f20d1d504761e78b8dfb95a0ede180f71d50e5" + "sha256:1a6a2d609bce44f78af4556bea0c62a5e7f05c23e5ea9c599e07678995609084", + "sha256:44d93a0109dfdf22fe399b419bcd7fa589d86895d3931b01fb321d74dadc68f1", + "sha256:8275abf628c6dc7ec834ea63f6f3846bf33518907a2b9b693d41fd063767a866", + "sha256:91e81a8333a0037babfc9fe6d11e997a9d4dac0f38c43074886b0d9dead94fe9", + "sha256:b22ed9c66da2589a664e0f1ca2465c29b75aaab36fa209d4fb916025fb9119e5", + "sha256:b6bd7d9d3a7a63faae6edf365f0ed0e9b0a1aaf1da3ca146e6b043fb3eb5d723", + "sha256:c7949770cafbd2f12cecc97dea410c514368908a103acf519f2a346134caa4d5", + "sha256:d1210fcf99aae6f728812d1d2240afc1dc44b9e6cba526a06fb8134f969957c2", + "sha256:d5c5297e2fbc8068d4255f1e606bfc9291f06f91ec31b2a0d4c536210ac5c0a2", + "sha256:e9b04cbef584310a1ac0f0d55bb623ca3244c87c51187645432e342de9ae81a8", + "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8" ], "index": "pypi", - "version": "==2.9.5" + "markers": "python_version >= '3.6'", + "version": "==2.9.7" }, "pylint": { "hashes": [ - "sha256:13b2c805a404a9bf57d002cd5f054ca4d40b0b87542bdaba5e05321ae8262c84", - "sha256:ff22dde9c2128cd257c145cfd51adeff0be7df4d80d669055f24a962b351bbe4" + "sha256:73995fb8216d3bed149c8d51bba25b2c52a8251a2c8ac846ec668ce38fab5413", + "sha256:f7b601cbc06fef7e62a754e2b41294c2aa31f1cb659624b9a85bcba29eaf8252" ], "index": "pypi", - "version": "==2.16.2" + "markers": "python_full_version >= '3.7.2'", + "version": "==2.17.5" }, "python-dateutil": { "hashes": [ @@ -249,61 +243,74 @@ "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, "pyyaml": { "hashes": [ - "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf", - "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", - "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", - "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", - "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", - "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", - "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", - "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", - "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", - "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", - "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", - "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", - "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782", - "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", - "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", - "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", - "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", - "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", - "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1", - "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", - "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", - "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", - "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", - "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", - "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", - "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d", - "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", - "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", - "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7", - "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", - "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", - "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", - "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358", - "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", - "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", - "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", - "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", - "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f", - "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", - "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", + "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", + "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", + "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", + "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", + "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", + "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", + "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", + "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", + "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", + "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", + "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", + "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", + "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", + "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", + "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", + "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", + "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", + "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", + "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", + "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", + "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", + "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", + "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", + "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", + "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", + "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", + "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", + "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", + "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", + "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", + "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", + "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", + "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", + "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", + "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", + "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", + "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", + "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", + "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", + "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", + "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", + "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", + "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", + "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", + "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", + "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", + "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", + "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", + "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" ], "index": "pypi", - "version": "==6.0" + "markers": "python_version >= '3.6'", + "version": "==6.0.1" }, "requests": { "hashes": [ - "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa", - "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf" + "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", + "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" ], "index": "pypi", - "version": "==2.28.2" + "markers": "python_version >= '3.7'", + "version": "==2.31.0" }, "six": { "hashes": [ @@ -311,6 +318,7 @@ "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], "index": "pypi", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "toml": { @@ -319,6 +327,7 @@ "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], "index": "pypi", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.10.2" }, "tomli": { @@ -331,105 +340,118 @@ }, "tomlkit": { "hashes": [ - "sha256:07de26b0d8cfc18f871aec595fda24d95b08fef89d147caa861939f37230bf4b", - "sha256:71b952e5721688937fb02cf9d354dbcf0785066149d2855e44531ebdd2b65d73" + "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86", + "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899" ], - "markers": "python_version >= '3.6'", - "version": "==0.11.6" + "markers": "python_version >= '3.7'", + "version": "==0.12.1" }, "typing-extensions": { "hashes": [ - "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb", - "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4" + "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", + "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2" ], "markers": "python_version < '3.11'", - "version": "==4.5.0" + "version": "==4.7.1" }, "urllib3": { "hashes": [ - "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72", - "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1" + "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11", + "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4" ], "index": "pypi", - "version": "==1.26.14" + "markers": "python_version >= '3.7'", + "version": "==2.0.4" }, "wrapt": { "hashes": [ - "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3", - "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b", - "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4", - "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2", - "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656", - "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3", - "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff", - "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310", - "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a", - "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57", - "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069", - "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383", - "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe", - "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87", - "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d", - "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b", - "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907", - "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f", - "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0", - "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28", - "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1", - "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853", - "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc", - "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3", - "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3", - "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164", - "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1", - "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c", - "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1", - "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7", - "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1", - "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320", - "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed", - "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1", - "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248", - "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c", - "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456", - "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77", - "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef", - "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1", - "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7", - "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86", - "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4", - "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d", - "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d", - "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8", - "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5", - "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471", - "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00", - "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68", - "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3", - "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d", - "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735", - "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d", - "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569", - "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7", - "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59", - "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5", - "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb", - "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b", - "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f", - "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462", - "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015", - "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af" + "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0", + "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420", + "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a", + "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c", + "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079", + "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923", + "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f", + "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1", + "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8", + "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86", + "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0", + "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364", + "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e", + "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c", + "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e", + "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c", + "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727", + "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff", + "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e", + "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29", + "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7", + "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72", + "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475", + "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a", + "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317", + "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2", + "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd", + "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640", + "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98", + "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248", + "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e", + "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d", + "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec", + "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1", + "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e", + "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9", + "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92", + "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb", + "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094", + "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46", + "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29", + "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd", + "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705", + "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8", + "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975", + "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb", + "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e", + "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b", + "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418", + "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019", + "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1", + "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba", + "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6", + "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2", + "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3", + "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7", + "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752", + "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416", + "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f", + "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1", + "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc", + "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145", + "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee", + "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a", + "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7", + "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b", + "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653", + "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0", + "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90", + "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29", + "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6", + "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034", + "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09", + "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559", + "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639" ], "index": "pypi", - "version": "==1.14.1" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.15.0" }, "xlsxwriter": { "hashes": [ - "sha256:ec77335fb118c36bc5ed1c89e33904d649e4989df2d7980f7d6a9dd95ee5874e", - "sha256:f5c7491b8450cf49968428f062355de16c9140aa24eafc466c9dfe107610bd44" + "sha256:331508ff39d610ecdaf979e458840bc1eab6e6a02cfd5d08f044f0f73636236f", + "sha256:78751099a770273f1c98b8d6643351f68f98ae8e6acf9d09d37dc6798f8cd3de" ], - "index": "pypi", - "version": "==3.0.8" + "markers": "python_version >= '3.6'", + "version": "==3.1.2" } }, "develop": {} From 440384618398502417ae1e035fa8ddbe89a3a429 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 6 Sep 2023 16:19:43 -0500 Subject: [PATCH 04/22] changed resourcetype from 2 to Item --- dspace_reports/collection_indexer.py | 2 +- dspace_reports/community_indexer.py | 2 +- dspace_reports/repository_indexer.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 9814dee..9276ae7 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -86,7 +86,7 @@ def index_collection_items(self, collection_id=None, time_period=None): # Default Solr params solr_query_params = { - "q": "search.resourcetype:2", + "q": "search.resourcetype:Item", "start": "0", "rows": "0", "wt": "json" diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index b582612..dcc89a6 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -78,7 +78,7 @@ def index_community_items(self, community_id=None, time_period=None): # Default Solr params solr_query_params = { - "q": "search.resourcetype:2", + "q": "search.resourcetype:Item", "start": "0", "rows": "0", "wt": "json" diff --git a/dspace_reports/repository_indexer.py b/dspace_reports/repository_indexer.py index 174c5d4..71d9d97 100644 --- a/dspace_reports/repository_indexer.py +++ b/dspace_reports/repository_indexer.py @@ -45,7 +45,7 @@ def index_repository_items(self, repository_id=None, time_period=None): # Default Solr params solr_query_params = { - "q": "search.resourcetype:2", + "q": "search.resourcetype:Item", "start": "0", "rows": "0", "wt": "json" @@ -104,7 +104,7 @@ def index_repository_views(self, repository_id=None, time_period=None): # Default Solr params solr_query_params = { "q": "type:2", - "fq": "isBot:false AND statistics_type:view", + "fq": "-isBot:true AND statistics_type:view", "shards": shards, "rows": 0, "wt": "json" @@ -165,7 +165,7 @@ def index_repository_downloads(self, repository_id=None, time_period=None): # Default Solr params solr_query_params = { "q": "type:0", - "fq": "isBot:false AND statistics_type:view AND bundleName:ORIGINAL", + "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", "shards": shards, "rows": 0, "wt": "json" From f0b11974e54b287236d983b6d8b6934c0d14bd0f Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 11 Sep 2023 10:14:09 -0500 Subject: [PATCH 05/22] rolled back urllib3 to work with older OS libraries --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 9010965..75c2da9 100644 --- a/Pipfile +++ b/Pipfile @@ -19,7 +19,7 @@ python-dateutil = "==2.8.2" requests = "==2.31.0" six = "==1.16.0" toml = "==0.10.2" -urllib3 = "==2.0.4" +urllib3 = "==1.26.16" wrapt = "==1.15.0" XlsxWriter = "==3.1.2" pyyaml = "==6.0.1" From 5117b83a180dc16080a5450832c3efa1f1cc756b Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 15 Jul 2024 15:54:11 -0500 Subject: [PATCH 06/22] WIP: large-scale refactor for DSpace 7 using new REST API with updated dependencies and poetry instead of pipenv --- Pipfile | 28 -- Pipfile.lock | 458 --------------------- README.md | 10 +- database_manager.py | 79 ++-- dspace_reports/collection_indexer.py | 140 ++++--- dspace_reports/community_indexer.py | 148 +++---- dspace_reports/indexer.py | 40 +- dspace_reports/item_indexer.py | 99 +++-- dspace_reports/repository_indexer.py | 117 +++--- lib/api.py | 255 ++++++++---- lib/database.py | 34 +- lib/emailer.py | 69 ++-- lib/oai.py | 75 ++-- lib/output.py | 43 +- lib/solr.py | 72 +++- lib/util.py | 38 +- poetry.lock | 586 +++++++++++++++++++++++++++ pyproject.toml | 33 ++ requirements.txt | 25 ++ run_collection_indexer.py | 36 +- run_community_indexer.py | 38 +- run_cron.py | 46 ++- run_indexer.py | 47 ++- run_item_indexer.py | 36 +- run_reports.py | 118 ++++-- run_repository_indexer.py | 38 +- setup.py | 23 -- tests/__init__.py | 0 28 files changed, 1606 insertions(+), 1125 deletions(-) delete mode 100644 Pipfile delete mode 100644 Pipfile.lock create mode 100644 poetry.lock create mode 100644 pyproject.toml create mode 100644 requirements.txt delete mode 100644 setup.py create mode 100644 tests/__init__.py diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 75c2da9..0000000 --- a/Pipfile +++ /dev/null @@ -1,28 +0,0 @@ -[[source]] -name = "pypi" -url = "https://pypi.org/simple" -verify_ssl = true - -[dev-packages] - -[packages] -astroid = "==2.15.6" -certifi = "==2023.7.22" -chardet = "==5.2.0" -idna = "==3.4" -isort = "==5.12.0" -lazy-object-proxy = "==1.9.0" -mccabe = "==0.7.0" -psycopg2 = "==2.9.7" -pylint = "==2.17.5" -python-dateutil = "==2.8.2" -requests = "==2.31.0" -six = "==1.16.0" -toml = "==0.10.2" -urllib3 = "==1.26.16" -wrapt = "==1.15.0" -XlsxWriter = "==3.1.2" -pyyaml = "==6.0.1" - -[requires] -python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 90e7e78..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,458 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "ee3418598f2a50b7645ba8c92e4e8b5eb54ae74092ecda69ed6ca6220a02d50c" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.9" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "astroid": { - "hashes": [ - "sha256:389656ca57b6108f939cf5d2f9a2a825a3be50ba9d589670f393236e0a03b91c", - "sha256:903f024859b7c7687d7a7f3a3f73b17301f8e42dfd9cc9df9d4418172d3e2dbd" - ], - "index": "pypi", - "markers": "python_full_version >= '3.7.2'", - "version": "==2.15.6" - }, - "certifi": { - "hashes": [ - "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", - "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==2023.7.22" - }, - "chardet": { - "hashes": [ - "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", - "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==5.2.0" - }, - "charset-normalizer": { - "hashes": [ - "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96", - "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c", - "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710", - "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706", - "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020", - "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252", - "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad", - "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329", - "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a", - "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f", - "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6", - "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4", - "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a", - "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46", - "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2", - "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23", - "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace", - "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd", - "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982", - "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10", - "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2", - "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea", - "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09", - "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5", - "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149", - "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489", - "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9", - "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80", - "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592", - "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3", - "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6", - "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed", - "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c", - "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200", - "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a", - "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e", - "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d", - "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6", - "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623", - "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669", - "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3", - "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa", - "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9", - "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2", - "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f", - "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1", - "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4", - "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a", - "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8", - "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3", - "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029", - "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f", - "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959", - "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22", - "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7", - "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952", - "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346", - "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e", - "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d", - "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299", - "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd", - "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a", - "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3", - "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037", - "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94", - "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c", - "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858", - "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a", - "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449", - "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c", - "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918", - "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1", - "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c", - "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac", - "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa" - ], - "markers": "python_full_version >= '3.7.0'", - "version": "==3.2.0" - }, - "dill": { - "hashes": [ - "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e", - "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03" - ], - "markers": "python_version < '3.11'", - "version": "==0.3.7" - }, - "idna": { - "hashes": [ - "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", - "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" - ], - "index": "pypi", - "markers": "python_version >= '3.5'", - "version": "==3.4" - }, - "isort": { - "hashes": [ - "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504", - "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6" - ], - "index": "pypi", - "markers": "python_full_version >= '3.8.0'", - "version": "==5.12.0" - }, - "lazy-object-proxy": { - "hashes": [ - "sha256:09763491ce220c0299688940f8dc2c5d05fd1f45af1e42e636b2e8b2303e4382", - "sha256:0a891e4e41b54fd5b8313b96399f8b0e173bbbfc03c7631f01efbe29bb0bcf82", - "sha256:189bbd5d41ae7a498397287c408617fe5c48633e7755287b21d741f7db2706a9", - "sha256:18b78ec83edbbeb69efdc0e9c1cb41a3b1b1ed11ddd8ded602464c3fc6020494", - "sha256:1aa3de4088c89a1b69f8ec0dcc169aa725b0ff017899ac568fe44ddc1396df46", - "sha256:212774e4dfa851e74d393a2370871e174d7ff0ebc980907723bb67d25c8a7c30", - "sha256:2d0daa332786cf3bb49e10dc6a17a52f6a8f9601b4cf5c295a4f85854d61de63", - "sha256:5f83ac4d83ef0ab017683d715ed356e30dd48a93746309c8f3517e1287523ef4", - "sha256:659fb5809fa4629b8a1ac5106f669cfc7bef26fbb389dda53b3e010d1ac4ebae", - "sha256:660c94ea760b3ce47d1855a30984c78327500493d396eac4dfd8bd82041b22be", - "sha256:66a3de4a3ec06cd8af3f61b8e1ec67614fbb7c995d02fa224813cb7afefee701", - "sha256:721532711daa7db0d8b779b0bb0318fa87af1c10d7fe5e52ef30f8eff254d0cd", - "sha256:7322c3d6f1766d4ef1e51a465f47955f1e8123caee67dd641e67d539a534d006", - "sha256:79a31b086e7e68b24b99b23d57723ef7e2c6d81ed21007b6281ebcd1688acb0a", - "sha256:81fc4d08b062b535d95c9ea70dbe8a335c45c04029878e62d744bdced5141586", - "sha256:8fa02eaab317b1e9e03f69aab1f91e120e7899b392c4fc19807a8278a07a97e8", - "sha256:9090d8e53235aa280fc9239a86ae3ea8ac58eff66a705fa6aa2ec4968b95c821", - "sha256:946d27deaff6cf8452ed0dba83ba38839a87f4f7a9732e8f9fd4107b21e6ff07", - "sha256:9990d8e71b9f6488e91ad25f322898c136b008d87bf852ff65391b004da5e17b", - "sha256:9cd077f3d04a58e83d04b20e334f678c2b0ff9879b9375ed107d5d07ff160171", - "sha256:9e7551208b2aded9c1447453ee366f1c4070602b3d932ace044715d89666899b", - "sha256:9f5fa4a61ce2438267163891961cfd5e32ec97a2c444e5b842d574251ade27d2", - "sha256:b40387277b0ed2d0602b8293b94d7257e17d1479e257b4de114ea11a8cb7f2d7", - "sha256:bfb38f9ffb53b942f2b5954e0f610f1e721ccebe9cce9025a38c8ccf4a5183a4", - "sha256:cbf9b082426036e19c6924a9ce90c740a9861e2bdc27a4834fd0a910742ac1e8", - "sha256:d9e25ef10a39e8afe59a5c348a4dbf29b4868ab76269f81ce1674494e2565a6e", - "sha256:db1c1722726f47e10e0b5fdbf15ac3b8adb58c091d12b3ab713965795036985f", - "sha256:e7c21c95cae3c05c14aafffe2865bbd5e377cfc1348c4f7751d9dc9a48ca4bda", - "sha256:e8c6cfb338b133fbdbc5cfaa10fe3c6aeea827db80c978dbd13bc9dd8526b7d4", - "sha256:ea806fd4c37bf7e7ad82537b0757999264d5f70c45468447bb2b91afdbe73a6e", - "sha256:edd20c5a55acb67c7ed471fa2b5fb66cb17f61430b7a6b9c3b4a1e40293b1671", - "sha256:f0117049dd1d5635bbff65444496c90e0baa48ea405125c088e93d9cf4525b11", - "sha256:f0705c376533ed2a9e5e97aacdbfe04cecd71e0aa84c7c0595d02ef93b6e4455", - "sha256:f12ad7126ae0c98d601a7ee504c1122bcef553d1d5e0c3bfa77b16b3968d2734", - "sha256:f2457189d8257dd41ae9b434ba33298aec198e30adf2dcdaaa3a28b9994f6adb", - "sha256:f699ac1c768270c9e384e4cbd268d6e67aebcfae6cd623b4d7c3bfde5a35db59" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==1.9.0" - }, - "mccabe": { - "hashes": [ - "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", - "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==0.7.0" - }, - "platformdirs": { - "hashes": [ - "sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d", - "sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d" - ], - "markers": "python_version >= '3.7'", - "version": "==3.10.0" - }, - "psycopg2": { - "hashes": [ - "sha256:1a6a2d609bce44f78af4556bea0c62a5e7f05c23e5ea9c599e07678995609084", - "sha256:44d93a0109dfdf22fe399b419bcd7fa589d86895d3931b01fb321d74dadc68f1", - "sha256:8275abf628c6dc7ec834ea63f6f3846bf33518907a2b9b693d41fd063767a866", - "sha256:91e81a8333a0037babfc9fe6d11e997a9d4dac0f38c43074886b0d9dead94fe9", - "sha256:b22ed9c66da2589a664e0f1ca2465c29b75aaab36fa209d4fb916025fb9119e5", - "sha256:b6bd7d9d3a7a63faae6edf365f0ed0e9b0a1aaf1da3ca146e6b043fb3eb5d723", - "sha256:c7949770cafbd2f12cecc97dea410c514368908a103acf519f2a346134caa4d5", - "sha256:d1210fcf99aae6f728812d1d2240afc1dc44b9e6cba526a06fb8134f969957c2", - "sha256:d5c5297e2fbc8068d4255f1e606bfc9291f06f91ec31b2a0d4c536210ac5c0a2", - "sha256:e9b04cbef584310a1ac0f0d55bb623ca3244c87c51187645432e342de9ae81a8", - "sha256:f00cc35bd7119f1fed17b85bd1007855194dde2cbd8de01ab8ebb17487440ad8" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==2.9.7" - }, - "pylint": { - "hashes": [ - "sha256:73995fb8216d3bed149c8d51bba25b2c52a8251a2c8ac846ec668ce38fab5413", - "sha256:f7b601cbc06fef7e62a754e2b41294c2aa31f1cb659624b9a85bcba29eaf8252" - ], - "index": "pypi", - "markers": "python_full_version >= '3.7.2'", - "version": "==2.17.5" - }, - "python-dateutil": { - "hashes": [ - "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", - "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" - ], - "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.8.2" - }, - "pyyaml": { - "hashes": [ - "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5", - "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc", - "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df", - "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741", - "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206", - "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27", - "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595", - "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62", - "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98", - "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696", - "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290", - "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9", - "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d", - "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6", - "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867", - "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47", - "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486", - "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6", - "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3", - "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007", - "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938", - "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0", - "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c", - "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735", - "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d", - "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28", - "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4", - "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba", - "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8", - "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5", - "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd", - "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3", - "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0", - "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515", - "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c", - "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c", - "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924", - "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34", - "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43", - "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859", - "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673", - "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54", - "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a", - "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b", - "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab", - "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa", - "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c", - "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585", - "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d", - "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f" - ], - "index": "pypi", - "markers": "python_version >= '3.6'", - "version": "==6.0.1" - }, - "requests": { - "hashes": [ - "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", - "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==2.31.0" - }, - "six": { - "hashes": [ - "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", - "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" - ], - "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.16.0" - }, - "toml": { - "hashes": [ - "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", - "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" - ], - "index": "pypi", - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.10.2" - }, - "tomli": { - "hashes": [ - "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", - "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" - ], - "markers": "python_version < '3.11'", - "version": "==2.0.1" - }, - "tomlkit": { - "hashes": [ - "sha256:38e1ff8edb991273ec9f6181244a6a391ac30e9f5098e7535640ea6be97a7c86", - "sha256:712cbd236609acc6a3e2e97253dfc52d4c2082982a88f61b640ecf0817eab899" - ], - "markers": "python_version >= '3.7'", - "version": "==0.12.1" - }, - "typing-extensions": { - "hashes": [ - "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", - "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2" - ], - "markers": "python_version < '3.11'", - "version": "==4.7.1" - }, - "urllib3": { - "hashes": [ - "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11", - "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==2.0.4" - }, - "wrapt": { - "hashes": [ - "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0", - "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420", - "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a", - "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c", - "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079", - "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923", - "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f", - "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1", - "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8", - "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86", - "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0", - "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364", - "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e", - "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c", - "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e", - "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c", - "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727", - "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff", - "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e", - "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29", - "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7", - "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72", - "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475", - "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a", - "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317", - "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2", - "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd", - "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640", - "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98", - "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248", - "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e", - "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d", - "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec", - "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1", - "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e", - "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9", - "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92", - "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb", - "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094", - "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46", - "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29", - "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd", - "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705", - "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8", - "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975", - "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb", - "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e", - "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b", - "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418", - "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019", - "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1", - "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba", - "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6", - "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2", - "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3", - "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7", - "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752", - "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416", - "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f", - "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1", - "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc", - "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145", - "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee", - "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a", - "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7", - "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b", - "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653", - "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0", - "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90", - "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29", - "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6", - "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034", - "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09", - "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559", - "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639" - ], - "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==1.15.0" - }, - "xlsxwriter": { - "hashes": [ - "sha256:331508ff39d610ecdaf979e458840bc1eab6e6a02cfd5d08f044f0f73636236f", - "sha256:78751099a770273f1c98b8d6643351f68f98ae8e6acf9d09d37dc6798f8cd3de" - ], - "markers": "python_version >= '3.6'", - "version": "==3.1.2" - } - }, - "develop": {} -} diff --git a/README.md b/README.md index 0664844..d8a9efa 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # dspace-reports -A python3-based tool to generate and email statistical reports for [DSpace](https://github.com/DSpace/DSpace) repository administrators. +A python3-based tool to generate and email statistical reports for [DSpace 7.x](https://github.com/DSpace/DSpace) repository administrators. ## Requirements - Python 3.9+ -- PostgreSQL 9.6+ -- DSpace 6.x repository ** +- PostgreSQL 13+ +- DSpace 7.x repository ** ** If your Solr index contains statistics from legacy DSpace 5.x or earlier instances, then the quality of the reports will go up significantly if you have migrated the old statistics to the new UUID identifiers in DSpace 6. See the [DSpace Documentation](https://wiki.lyrasis.org/display/DSDOC6x/SOLR+Statistics+Maintenance#SOLRStatisticsMaintenance-UpgradeLegacyDSpaceObjectIdentifiers(pre-6xstatistics)toDSpace6xUUIDIdentifiers) for more information @@ -15,8 +15,8 @@ A python3-based tool to generate and email statistical reports for [DSpace](http ```bash python3 -m venv venv source venv/bin/activate -pip install pipenv -pipenv install +pip install -r requirements.txt + ``` ## Configuration diff --git a/database_manager.py b/database_manager.py index c9a43f9..1264f0b 100644 --- a/database_manager.py +++ b/database_manager.py @@ -1,12 +1,14 @@ -import sys +"""Class for managing database functions""" -from optparse import OptionParser +import argparse +import sys from lib.database import Database from lib.util import Utilities class DatabaseManager(): + """Class for managing database functions""" repository_column_map = { 'repository_id': 'Repository ID', 'repository_name': 'Repository', @@ -14,7 +16,7 @@ class DatabaseManager(): 'items_academic_year': 'Items added in academic year', 'items_total': 'Total Items', 'views_last_month': 'Item views last month', - 'views_academic_year': 'Item views in academic year', + 'views_academic_year': 'Item views in academic year', 'views_total': 'Total item views', 'downloads_last_month': 'Item downloads last month', 'downloads_academic_year': 'Item downloads in academic year', @@ -53,7 +55,7 @@ class DatabaseManager(): 'downloads_total': 'Total item downloads' } - items_column_map = { + items_column_map = { 'item_id': 'Item ID', 'collection_name': 'Collection Name', 'item_name': 'Item Title', @@ -74,6 +76,7 @@ def __init__(self, config=None): self.config = config def create_tables(self, config, logger): + """Function to create statistics tables""" logger.info('Creating tables...') # Create statistics tables @@ -152,14 +155,17 @@ def create_tables(self, config, logger): # Commit changes db.commit() + logger.info('Finished creating tables.') + def drop_tables(self, config, logger): + """Function to drop statistics tables""" # First check that tables exist tables_exist = self.check_tables(config, logger) - if tables_exist == False: + if tables_exist is False: logger.info('Tables do not exist.') return - else: - logger.info('Removing tables...') + + logger.info('Dropping tables...') # Drop statistics tables with Database(config=config['statistics_db']) as db: @@ -186,32 +192,39 @@ def drop_tables(self, config, logger): # Commit changes db.commit() + logger.info('Finished dropping tables.') + def check_tables(self, config, logger): + """Function to check if statistics tables exist""" logger.debug('Checking for statistics tables.') tables_exist = False # Check if statistics tables exist with Database(config=config['statistics_db']) as db: with db.cursor() as cursor: - cursor.execute("SELECT * FROM information_schema.tables WHERE table_name=%s", ('repository_stats',)) + cursor.execute("SELECT * FROM information_schema.tables WHERE " + + "table_name='repository_stats'") if bool(cursor.rowcount): logger.debug('The repository_stats table exists.') tables_exist = True else: logger.debug('The repository_stats table DOES NOT exist.') - cursor.execute("SELECT * FROM information_schema.tables WHERE table_name=%s", ('community_stats',)) + cursor.execute("SELECT * FROM information_schema.tables WHERE " + + "table_name='community_stats'") if bool(cursor.rowcount): logger.debug('The community_stats table exists.') tables_exist = True else: logger.debug('The community_stats table DOES NOT exist.') - cursor.execute("SELECT * FROM information_schema.tables WHERE table_name=%s", ('collection_stats',)) + cursor.execute("SELECT * FROM information_schema.tables WHERE " + + "table_name='collection_stats'") if bool(cursor.rowcount): logger.debug('The collection_stats table exists.') tables_exist = True else: logger.debug('The collection_stats table DOES NOT exist.') - cursor.execute("SELECT * FROM information_schema.tables WHERE table_name=%s", ('item_stats',)) + cursor.execute("SELECT * FROM information_schema.tables WHERE " + + "table_name='item_stats'") if bool(cursor.rowcount): logger.debug('The item_stats table exists.') tables_exist = True @@ -224,60 +237,68 @@ def check_tables(self, config, logger): def main(): - parser = OptionParser() + """Main function""" + + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-f", "--function", dest="function", help="Database function to perform. Options: create, drop, check, recreate") + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-f", "--function", dest="function", action='store', type=str, + help="Database function to perform. Options: create, drop, check," + + " recreate.") - (options, args) = parser.parse_args() + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.function is None: + if args.function is None: parser.print_help() parser.error("Must specify a function to perform.") - if options.function not in ['create', 'drop', 'check', 'recreate']: + if args.function not in ['create', 'drop', 'check', 'recreate']: parser.print_help() parser.error("Must specify a valid function.") - + # Load config - print("Loading configuration from file: %s" %(options.config_file)) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("Unable to load configuration.") sys.exit(0) # Set up logging logger = utilities.load_logger(config=config) - + # Create object to manage database manage_database = DatabaseManager(config=config) # Perform function from command line - if options.function == 'create': + if args.function == 'create': tables_exist = manage_database.check_tables(config, logger) - if tables_exist == True: - logger.error('Unable to create statistics tables because one or more (check logs) already exists.') + if tables_exist is True: + logger.error("Unable to create statistics tables because one or more (check logs) " + + "already exists.") sys.exit(0) logger.info('Creating statistics tables in the database.') manage_database.create_tables(config, logger) - elif options.function == 'drop': + elif args.function == 'drop': logger.info('Dropping statistics tables') manage_database.drop_tables(config, logger) - elif options.function == 'check': + elif args.function == 'check': logger.info('Checking for statistics tables.') tables_exist = manage_database.check_tables(config, logger) - if tables_exist == True: + if tables_exist is True: logger.info('One or more statistics tables exists (check logs).') sys.exit(0) - elif options.function == 'recreate': + elif args.function == 'recreate': logger.info('Droping and recreating statistics tables in the database.') manage_database.drop_tables(config, logger) manage_database.create_tables(config, logger) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 9276ae7..330f684 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -1,3 +1,5 @@ +"""Class for indexing collections""" + import math from lib.database import Database @@ -5,25 +7,25 @@ class CollectionIndexer(Indexer): - def index(self): - # Get site hierarchy - hierarchy = self.rest.get_hierarchy() + """Class for indexing collections""" - # Traverse hierarchy - self.logger.info(hierarchy) + def index(self): + """Index function""" self.logger.info("Loading DSpace collections...") self.index_collections() def index_collections(self): + """Index the collections in the repository""" + # List of collections collections = [] - # Get site hierarchy - hierarchy = self.rest.get_hierarchy() + # Get top level communities + top_communities = self.rest.get_top_level_communities() - if 'community' in hierarchy: - communities = hierarchy['community'] + if 'community' in top_communities: + communities = top_communities['community'] self.logger.info("Repository has %s top-level communities.", str(len(communities))) for community in communities: @@ -32,16 +34,18 @@ def index_collections(self): self.logger.info("Repository has no communities.") for time_period in self.time_periods: - self.logger.info("Updating views statistics for collections during time period: %s" %(time_period)) + self.logger.info("Updating views statistics for collections during time period: %s", time_period) self.index_collection_views(time_period=time_period) - self.logger.info("Updating downloads statistics for collection during time period: %s" %(time_period)) + self.logger.info("Updating downloads statistics for collection during time period: %s", time_period) self.index_collection_downloads(time_period=time_period) def load_collections_recursive(self, collections, community): + """Load all collections recursively""" + community_id = community['id'] community_name = community['name'] - self.logger.info("Loading collections of community %s (%s)" %(community_name, community_id)) + self.logger.info("Loading collections of community %s (%s)", community_name, community_id) if 'collection' in community: collections = community['collection'] @@ -51,7 +55,7 @@ def load_collections_recursive(self, collections, community): collection_name = collection['name'] collection_handle = collection['handle'] collection_url = self.base_url + collection_handle - self.logger.info("Loading collection: %s (%s)..." %(collection_name, collection_id)) + self.logger.info("Loading collection: %s (%s)...", collection_name, collection_id) if len(collection_name) > 255: self.logger.debug("Collection name is longer than 255 characters. It will be shortened to that length.") @@ -60,11 +64,11 @@ def load_collections_recursive(self, collections, community): # Insert the collection into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - cursor.execute("INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (community_name, collection_id, collection_name, collection_url)) + cursor.execute(f"INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES ({community_name}, {collection_id}, {collection_name}, {collection_url}) ON CONFLICT DO NOTHING") db.commit() for time_period in self.time_periods: - self.logger.info("Indexing items for collection: %s (%s)" %(collection_id, collection_name)) + self.logger.info("Indexing items for collection: %s (%s)", collection_id, collection_name) self.index_collection_items(collection_id=collection_id, time_period=time_period) else: self.logger.info("There are no collections in this community.") @@ -75,15 +79,17 @@ def load_collections_recursive(self, collections, community): self.load_collections_recursive(collections, sub_community) else: self.logger.info("There are no subcommunities in this community.") - + def index_collection_items(self, collection_id=None, time_period=None): + """Index the collection items""" + if collection_id is None or time_period is None: return # Create base Solr URL solr_url = self.solr_server + "/search/select" - self.logger.debug("tdl solr_url: %s" %(solr_url)) - + self.logger.debug("TDL Solr_URL: %s", solr_url) + # Default Solr params solr_query_params = { "q": "search.resourcetype:Item", @@ -94,10 +100,10 @@ def index_collection_items(self, collection_id=None, time_period=None): # Get date range for Solr query if time period is specified date_range = [] - self.logger.debug("Creating date range for time period: %s" %(time_period)) + self.logger.debug("Creating date range for time period: %s", time_period) date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -111,12 +117,12 @@ def index_collection_items(self, collection_id=None, time_period=None): # Make call to Solr for items statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total items in community: %s", response.url) - - results_totalItems = 0 + + results_total_items = 0 try: # Get total number of items - results_totalItems = response.json()["response"]["numFound"] - self.logger.info("Solr - total items: %s", str(results_totalItems)) + results_total_items = response.json()["response"]["numFound"] + self.logger.info("Solr - total items: %s", str(results_total_items)) except TypeError: self.logger.info("No collection items to index, returning.") return @@ -124,24 +130,26 @@ def index_collection_items(self, collection_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_last_month = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id))) - cursor.execute("UPDATE collection_stats SET items_last_month = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_academic_year = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id))) - cursor.execute("UPDATE collection_stats SET items_academic_year = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_total = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id))) - cursor.execute("UPDATE collection_stats SET items_total = %i WHERE collection_id = '%s'" %(results_totalItems, collection_id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_id}'") # Commit changes db.commit() def index_collection_views(self, time_period=None): + """Index the collection views""" + # Create base Solr url solr_url = self.solr_server + "/statistics/select" # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Default Solr params solr_query_params = { @@ -165,21 +173,21 @@ def index_collection_views(self, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" else: self.logger.error("Error creating date range.") - + # Make call to Solr for views statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total collection views in collections: %s", response.url) - + try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["owningColl"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["owningColl"][ "countDistinct" ] except TypeError: @@ -188,9 +196,9 @@ def index_collection_views(self, time_period=None): # divide results into "pages" and round up to next integer results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 - + # Update database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: @@ -216,7 +224,7 @@ def index_collection_views(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -224,21 +232,21 @@ def index_collection_views(self, time_period=None): response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr collection views query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views - for id, collection_views in views["owningColl"].items(): + # Iterate over the facetField dict and get the ids and views + for collection_id, collection_views in views["owningColl"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_last_month = %s WHERE collection_id = %s", (collection_views, id))) - cursor.execute("UPDATE collection_stats SET views_last_month = %s WHERE collection_id = %s", (collection_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_academic_year = %s WHERE collection_id = %s", (collection_views, id))) - cursor.execute("UPDATE collection_stats SET views_academic_year = %s WHERE collection_id = %s", (collection_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_total = %s WHERE collection_id = %s", (collection_views, id))) - cursor.execute("UPDATE collection_stats SET views_total = %s WHERE collection_id = %s", (collection_views, id)) - + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_id}'") + # Commit changes to database db.commit() @@ -246,8 +254,10 @@ def index_collection_views(self, time_period=None): def index_collection_downloads(self, time_period=None): + """Index the collection downloads""" + # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Create base Solr url solr_url = self.solr_server + "/statistics/select" @@ -274,7 +284,7 @@ def index_collection_downloads(self, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -288,7 +298,7 @@ def index_collection_downloads(self, time_period=None): try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["owningColl"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["owningColl"][ "countDistinct" ] except TypeError: @@ -296,7 +306,7 @@ def index_collection_downloads(self, time_period=None): return results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 # Update database @@ -325,30 +335,30 @@ def index_collection_downloads(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" - + response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr collection downloads query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views - for id, collection_downloads in downloads["owningColl"].items(): + # Iterate over the facetField dict and get the ids and views + for collection_id, collection_downloads in downloads["owningColl"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_last_month = %s WHERE collection_id = %s", (collection_downloads, id))) - cursor.execute("UPDATE collection_stats SET downloads_last_month = %s WHERE collection_id = %s", (collection_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_academic_year = %s WHERE collection_id = %s", (collection_downloads, id))) - cursor.execute("UPDATE collection_stats SET downloads_academic_year = %s WHERE collection_id = %s", (collection_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_id}") else: - self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_total = %s WHERE collection_id = %s", (collection_downloads, id))) - cursor.execute("UPDATE collection_stats SET downloads_total = %s WHERE collection_id = %s", (collection_downloads, id)) - + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_id}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_id}'") + # Commit changes to database db.commit() - results_current_page += 1 \ No newline at end of file + results_current_page += 1 diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index dcc89a6..38a3b59 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -1,3 +1,5 @@ +"""Class for indexing communities""" + import math from lib.database import Database @@ -5,77 +7,81 @@ class CommunityIndexer(Indexer): - def index(self): - # Get site hierarchy - hierarchy = self.rest.get_hierarchy() + """Class for indexing communities""" - # Traverse hierarchy - self.logger.info(hierarchy) + def index(self): + """Index function""" self.logger.info("Loading DSpace communities...") self.index_communities() def index_communities(self): + """Index the communities in the repository""" + # List of communities communities = [] - # Get site hierarchy - hierarchy = self.rest.get_hierarchy() + # Get top level communities + top_communities = self.rest.get_top_level_communities() - if 'community' in hierarchy: - communities = hierarchy['community'] + if 'community' in top_communities: + communities = top_communities['community'] self.logger.info("Repository has %s top-level communities.", str(len(communities))) for community in communities: - self.logger.debug("Loading top-level community: %s (%s)" %(community['name'], community['id'])) + self.logger.debug("Loading top-level community: %s (%s)", community['name'], community['id']) self.load_communities_recursive(communities, community) else: self.logger.info("Repository has no communities.") for time_period in self.time_periods: - self.logger.info("Updating views statistics for communities during time period: %s" %(time_period)) + self.logger.info("Updating views statistics for communities during time period: %s", time_period) self.index_community_views(time_period=time_period) - self.logger.info("Updating downloads statistics for communities during time period: %s" %(time_period)) + self.logger.info("Updating downloads statistics for communities during time period: %s", time_period) self.index_community_downloads(time_period=time_period) - + def load_communities_recursive(self, communities, community, parent_community_name=""): + """Load all communities recursively""" + # Extract metadata community_id = community['id'] community_name = community['name'] community_handle = community['handle'] community_url = self.base_url + community_handle - self.logger.info("Loading community: %s (%s)..." %(community_name, community_id)) + self.logger.info("Loading community: %s (%s)...", community_name, community_id) # Insert the community into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_id, community_name, community_url, parent_community_name))) - cursor.execute("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_id, community_name, community_url, parent_community_name)) + self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_id}, {community_name}, {community_url}, {parent_community_name})")) + cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_id}, {community_name}, {community_url}, {parent_community_name})") db.commit() # Index views and downloads for the current community for time_period in self.time_periods: - self.logger.info("Indexing items for community: %s (%s)" %(community_id, community_name)) + self.logger.info("Indexing items for community: %s (%s)", community_id, community_name) self.index_community_items(community_id=community_id, time_period=time_period) # Load sub communities if 'community' in community: sub_communities = community['community'] for sub_community in sub_communities: - self.logger.info("Loading subcommunity: %s (%s)" %(sub_community['name'], sub_community['id'])) + self.logger.info("Loading subcommunity: %s (%s)", sub_community['name'], sub_community['id']) self.load_communities_recursive(communities=communities, community=sub_community, parent_community_name=community_name) else: self.logger.info("There are no subcommunities in this community.") - + def index_community_items(self, community_id=None, time_period=None): + """Index the community items""" + if community_id is None or time_period is None: - return + return None # Create base Solr URL solr_url = self.solr_server + "/search/select" - self.logger.debug("tdl solr_url: %s" %(solr_url)) - + self.logger.debug("Solr_URL: %s", solr_url) + # Default Solr params solr_query_params = { "q": "search.resourcetype:Item", @@ -86,10 +92,10 @@ def index_community_items(self, community_id=None, time_period=None): # Get date range for Solr query if time period is specified date_range = [] - self.logger.debug("Creating date range for time period: %s" %(time_period)) + self.logger.debug("Creating date range for time period: %s", time_period) date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -105,12 +111,12 @@ def index_community_items(self, community_id=None, time_period=None): # Make call to Solr for items statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr items in community: %s", response.url) - - results_totalItems = 0 + + results_total_items = 0 try: # Get total number of items - results_totalItems = response.json()["response"]["numFound"] - self.logger.info("Solr - total items: %s", str(results_totalItems)) + results_total_items = response.json()["response"]["numFound"] + self.logger.info("Solr - total items: %s", str(results_total_items)) except TypeError: self.logger.info("No community items to index.") return @@ -118,24 +124,26 @@ def index_community_items(self, community_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_last_month = %i WHERE community_id = '%s'" %(results_totalItems, community_id))) - cursor.execute("UPDATE community_stats SET items_last_month = %i WHERE community_id = '%s'" %(results_totalItems, community_id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_academic_year = %i WHERE community_id = '%s'" %(results_totalItems, community_id))) - cursor.execute("UPDATE community_stats SET items_academic_year = %i WHERE community_id = '%s'" %(results_totalItems, community_id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_total = %i WHERE community_id = '%s'" %(results_totalItems, community_id))) - cursor.execute("UPDATE community_stats SET items_total = %i WHERE community_id = '%s'" %(results_totalItems, community_id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_id}'") # Commit changes db.commit() def index_community_views(self, time_period=None): + """Index the community views""" + # Create base Solr url solr_url = self.solr_server + "/statistics/select" # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Default Solr params solr_query_params = { @@ -156,11 +164,11 @@ def index_community_views(self, time_period=None): } # Get date range for Solr query if time period is specified - date_range = [] - self.logger.debug("Creating date range for time period: %s" %(time_period)) + date_range = [] + self.logger.debug("Creating date range for time period: %s", time_period) date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -173,10 +181,10 @@ def index_community_views(self, time_period=None): # Make call to Solr for views statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total community views in communities: %s", response.url) - + try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["owningComm"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["owningComm"][ "countDistinct" ] except TypeError: @@ -185,9 +193,9 @@ def index_community_views(self, time_period=None): # divide results into "pages" and round up to next integer results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 - + # Update database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: @@ -213,7 +221,7 @@ def index_community_views(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -225,28 +233,30 @@ def index_community_views(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for id, community_views in views["owningComm"].items(): + for community_id, community_views in views["owningComm"].items(): if len(id) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_last_month = %s WHERE community_id = %s", (community_views, id))) - cursor.execute("UPDATE community_stats SET views_last_month = %s WHERE community_id = %s", (community_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_academic_year = %s WHERE community_id = %s", (community_views, id))) - cursor.execute("UPDATE community_stats SET views_academic_year = %s WHERE community_id = %s", (community_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_total = %s WHERE community_id = %s", (community_views, id))) - cursor.execute("UPDATE community_stats SET views_total = %s WHERE community_id = %s", (community_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_id}'") else: - self.logger.warn("owningComm value is not a UUID: %s", id) + self.logger.warning("owningComm value is not a UUID: %s", id) # Commit changes to database db.commit() results_current_page += 1 - + def index_community_downloads(self, time_period=None): + """Index the community downloads""" + # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Create base Solr url solr_url = self.solr_server + "/statistics/select" @@ -273,29 +283,29 @@ def index_community_downloads(self, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" else: self.logger.error("Error creating date range.") - + # Make call to Solr for downloads statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total community downloads in community: %s", response.url) try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["owningComm"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["owningComm"][ "countDistinct" ] except TypeError: self.logger.info("No community downloads to index.") return - + results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 # Update database @@ -324,7 +334,7 @@ def index_community_downloads(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -332,25 +342,25 @@ def index_community_downloads(self, time_period=None): response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr community downloads query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for id, community_downloads in downloads["owningComm"].items(): + for community_id, community_downloads in downloads["owningComm"].items(): if len(id) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, id))) - cursor.execute("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, id))) - cursor.execute("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id ='{community_id}'")) + cursor.execute(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id = '{community_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, id))) - cursor.execute("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_id}'")) + cursor.execute(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_id}'") else: - self.logger.warn("owningComm value is not a UUID: %s", id) + self.logger.warning("owningComm value is not a UUID: %s", id) # Commit changes to database db.commit() - results_current_page += 1 \ No newline at end of file + results_current_page += 1 diff --git a/dspace_reports/indexer.py b/dspace_reports/indexer.py index 49fcedf..84c0a9e 100644 --- a/dspace_reports/indexer.py +++ b/dspace_reports/indexer.py @@ -1,5 +1,6 @@ +"""Base indexer class""" + import logging -import re import sys from datetime import date, datetime @@ -7,10 +8,10 @@ from lib.api import DSpaceRestApi from lib.solr import DSpaceSolr -from lib.util import Utilities +class Indexer(): + """Base indexer class""" -class Indexer(object): def __init__(self, config=None, logger=None): if config is None: print("ERROR: A configuration file required to create the stats indexer.") @@ -29,19 +30,28 @@ def __init__(self, config=None, logger=None): # Create REST API object self.rest = DSpaceRestApi(rest_server=config['rest_server']) if self.rest is None: - self.logger.error("Unable to create Indexer due to earlier failures creating a connection to the REST API.") + self.logger.error("Unable to create Indexer due to earlier failures creating a " + + "connection to the REST API.") sys.exit(1) # Create Solr server object self.solr = DSpaceSolr(solr_server=config['solr_server']) if self.solr is None: - self.logger.error("Unable to create Indexer due to earlier failures creating a connection to Solr.") + self.logger.error("Unable to create Indexer due to earlier failures creating a " + + "connection to Solr.") sys.exit(1) # The time periods used to generate statistical reports self.time_periods = ['month', 'year', 'all'] + def index(self): + """Index function""" + + self.logger.info("Base indexing class.") + def get_date_range(self, time_period=None): + """Calculate date range""" + date_range = [] if time_period is None: self.logger.debug("time_period of none given to get_date_range() method.") @@ -52,7 +62,8 @@ def get_date_range(self, time_period=None): dt = date.today() today = datetime.combine(dt, datetime.min.time()).isoformat() + 'Z' self.logger.debug("Current date: %s ", today) - one_month_ago = datetime.combine((date.today() + relativedelta(months=-1)), datetime.min.time()).isoformat() + 'Z' + one_month_ago = datetime.combine( + (date.today() + relativedelta(months=-1)), datetime.min.time()).isoformat() + 'Z' self.logger.debug("One month ago: %s ", one_month_ago) date_range = [one_month_ago, today] @@ -61,14 +72,15 @@ def get_date_range(self, time_period=None): dt = date.today() today = datetime.combine(dt, datetime.max.time()).isoformat() + 'Z' self.logger.debug("Current date: %s ", today) - + current_month = datetime.today().month if current_month <= 9: fiscal_year = datetime.today().year - 1 else: fiscal_year = datetime.today().year - first_day_of_academic_year = datetime.combine((date(fiscal_year, 9, 1)), datetime.min.time()).isoformat() + 'Z' + first_day_of_academic_year = datetime.combine( + (date(fiscal_year, 9, 1)), datetime.min.time()).isoformat() + 'Z' self.logger.debug("First day of academic year: %s ", first_day_of_academic_year) date_range = [first_day_of_academic_year, today] @@ -80,15 +92,5 @@ def get_date_range(self, time_period=None): date_range = ['*', today] - self.logger.debug("Date range has %s dates." %(str(len(date_range)))) + self.logger.debug("Date range has %s dates.", len(date_range)) return date_range - - def validate_uuid4(self, uuid_string=None): - if uuid_string is None or not isinstance(uuid_string, str): - self.logger.debug("Item ID is either none or not a string: %s." %(uuid_string)) - return False - - uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}\Z', re.I) - match = uuid4hex.match(uuid_string) - - return bool(match) diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index 385fbda..dbb049f 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -1,3 +1,5 @@ +"""Class for indexing items""" + import math import sys @@ -9,13 +11,16 @@ class ItemIndexer(Indexer): + """Class for indexing items""" + def __init__(self, config, logger): super().__init__(config, logger) # Create OAI-PMH server object self.oai = DSpaceOai(oai_server=config['oai_server']) if self.oai is None: - self.logger.error("Unable to create Indexer due to earlier failures creating a connection to OAI-PMH feed.") + self.logger.error("Unable to create Indexer due to earlier failures creating a " + + "connection to OAI-PMH feed.") sys.exit(1) # Set time periods to only month and year as all can cause Solr to crash @@ -28,7 +33,7 @@ def index(self): # Get list of identifiers from OAI-PMH feed records = self.oai.get_records() total_records = len(records) - self.logger.info("Found %s records in OAI-PMH feed." %(str(total_records))) + self.logger.info("Found %s records in OAI-PMH feed.", str(total_records)) # Keep a count of records that cannot be found by their metadata count_records = 0 @@ -39,10 +44,12 @@ def index(self): with db.cursor() as cursor: for record in records: count_records = count_records + 1 - self.logger.info("(%s/%s) - Calling REST API for record: %s" %(str(count_records), str(total_records), record)) + self.logger.info("(%s/%s) - Calling REST API for record: %s", + str(count_records), str(total_records), record) metadata_entry = '{"key":"dc.identifier.uri", "value":"%s"}' %(record) - items = self.rest.find_items_by_metadata_field(metadata_entry=metadata_entry, expand=['parentCollection']) + items = self.rest.find_items_by_metadata_field(metadata_entry=metadata_entry, + expand=['parentCollection']) if len(items) == 1: item = items[0] item_id = item['uuid'] @@ -55,14 +62,16 @@ def index(self): item_collection_name = item_collection['name'] if len(item_collection_name) > 255: - self.logger.debug("Collection name is longer than 255 characters. It will be shortened to that length.") + self.logger.debug("Collection name is longer than 255 characters. " + + "It will be shortened to that length.") item_collection_name = item_collection_name[0:251] + "..." - - self.logger.info("item collection: %s " %(item_collection_name)) + + self.logger.info("Item collection: %s ", item_collection_name) # If name is null then use "Untitled" if item_name is not None: - # If item name is longer than 255 characters then shorten it to fit in database field + # If item name is longer than 255 characters then shorten it + # to fit in database field if len(item_name) > 255: item_name = item_name[0:251] + "..." else: @@ -71,16 +80,19 @@ def index(self): # Create handle URL for item item_url = self.base_url + item['handle'] - self.logger.debug(cursor.mogrify("INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (item_collection_name, item_id, item_name, item_url))) - cursor.execute("INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (item_collection_name, item_id, item_name, item_url)) + self.logger.debug(cursor.mogrify(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ({item_collection_name}, {item_id}, {item_name}, {item_url}) ON CONFLICT DO NOTHING")) + cursor.execute(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ({item_collection_name}, {item_id}, {item_name}, {item_url}) ON CONFLICT DO NOTHING") db.commit() else: count_missing_records += 1 - self.logger.error("Unable to find item in REST API: %s" %(record)) - - self.logger.info("Total records in OAI-PMH feed: %s" %(str(len(records)))) + self.logger.error("Unable to find item in REST API: %s", record) + + self.logger.info("Total records in OAI-PMH feed: %s", str(len(records))) + if count_missing_records > 0 and total_records > 0: - self.logger.info("Total records missing in OAI-PMH feed: %s (%.0f%%)" %(str(count_missing_records), (100 * count_missing_records/total_records))) + self.logger.info("Total records missing in OAI-PMH feed: %s (%.0f%%)", + str(count_missing_records), + (100 * count_missing_records/total_records)) for time_period in self.time_periods: self.logger.info("Indexing Solr views for time period: %s ", time_period) @@ -90,11 +102,13 @@ def index(self): self.index_item_downloads(time_period=time_period) def index_item_views(self, time_period='all'): + """Index the item views""" + # Create base Solr url solr_url = self.solr_server + "/statistics/select" # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Solr params solr_query_params = { @@ -118,7 +132,7 @@ def index_item_views(self, time_period='all'): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -129,10 +143,10 @@ def index_item_views(self, time_period='all'): # Make call to Solr for total views statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr total item views query: %s", response.url) - + try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["id"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["id"][ "countDistinct" ] except TypeError: @@ -141,7 +155,7 @@ def index_item_views(self, time_period='all'): # divide results into "pages" and round up to next integer results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 with Database(self.config['statistics_db']) as db: @@ -169,7 +183,8 @@ def index_item_views(self, time_period='all'): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", + date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -181,17 +196,17 @@ def index_item_views(self, time_period='all'): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for id, item_views in views["id"].items(): + for item_id, item_views in views["id"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_last_month = %s WHERE item_id = %s", (item_views, id))) - cursor.execute("UPDATE item_stats SET views_last_month = %s WHERE item_id = %s", (item_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_academic_year = %s WHERE item_id = %s", (item_views, id))) - cursor.execute("UPDATE item_stats SET views_academic_year = %s WHERE item_id = %s", (item_views, id)) + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_total = %s WHERE item_id = %s", (item_views, id))) - cursor.execute("UPDATE item_stats SET views_total = %s WHERE item_id = %s", (item_views, id)) - + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_id}'") + # Commit changes to database db.commit() @@ -201,11 +216,13 @@ def index_item_views(self, time_period='all'): results_current_page += 1 def index_item_downloads(self, time_period='all'): + """Index the item downloads""" + # Create base Solr url solr_url = self.solr_server + "/statistics/select" # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Solr params solr_query_params = { @@ -229,7 +246,7 @@ def index_item_downloads(self, time_period='all'): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -243,7 +260,7 @@ def index_item_downloads(self, time_period='all'): try: # get total number of distinct facets (countDistinct) - results_totalNumFacets = response.json()["stats"]["stats_fields"]["owningItem"][ + results_total_num_facets = response.json()["stats"]["stats_fields"]["owningItem"][ "countDistinct" ] except TypeError: @@ -251,7 +268,7 @@ def index_item_downloads(self, time_period='all'): return results_per_page = 100 - results_num_pages = math.ceil(results_totalNumFacets / results_per_page) + results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 with Database(self.config['statistics_db']) as db: @@ -280,7 +297,7 @@ def index_item_downloads(self, time_period='all'): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -288,20 +305,20 @@ def index_item_downloads(self, time_period='all'): response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr item downloads query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for id, item_downloads in downloads["owningItem"].items(): + for item_id, item_downloads in downloads["owningItem"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_last_month = %s WHERE item_id = %s", (item_downloads, id))) - cursor.execute("UPDATE item_stats SET downloads_last_month = %s WHERE item_id = %s", (item_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_academic_year = %s WHERE item_id = %s", (item_downloads, id))) - cursor.execute("UPDATE item_stats SET downloads_academic_year = %s WHERE item_id = %s", (item_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_total = %s WHERE item_id = %s", (item_downloads, id))) - cursor.execute("UPDATE item_stats SET downloads_total = %s WHERE item_id = %s", (item_downloads, id)) + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_id}'")) + cursor.execute(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_id}'") # Commit changes to database db.commit() diff --git a/dspace_reports/repository_indexer.py b/dspace_reports/repository_indexer.py index 71d9d97..8d1c8f6 100644 --- a/dspace_reports/repository_indexer.py +++ b/dspace_reports/repository_indexer.py @@ -1,27 +1,39 @@ +"""Class for indexing a repository""" + from lib.database import Database from dspace_reports.indexer import Indexer class RepositoryIndexer(Indexer): + """Class for indexing a repository""" + def index(self): + """Index function""" + self.logger.info("Loading DSpace repository...") self.index_repository() def index_repository(self): - # Get site hierarchy - hierarchy = self.rest.get_hierarchy() + """Index the entire repository""" + + # Get repository information + repository_id = 0 + repository_name = "Unknown" + + site = self.rest.get_site() + if 'uuid' in site: + repository_id = site['uuid'] - # Repository information - repository_id = hierarchy['id'] - repository_name = hierarchy['name'] + if 'name' in site: + repository_name = site['name'] - self.logger.info("Indexing Repository: %s (%s)" %(repository_name, repository_id)) + self.logger.info("Indexing Repository: %s (UUID: %s)", repository_name, repository_id) with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify("INSERT INTO repository_stats (repository_id, repository_name) VALUES (%s, %s)", (repository_id, repository_name))) - cursor.execute("INSERT INTO repository_stats (repository_id, repository_name) VALUES (%s, %s)", (repository_id, repository_name)) - + self.logger.debug(cursor.mogrify(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ({repository_id}, {repository_name})")) + cursor.execute(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ({repository_id}, {repository_name})") + db.commit() # Index views and downloads for the current community @@ -34,15 +46,17 @@ def index_repository(self): self.logger.info("Indexing repository downloads.") self.index_repository_downloads(repository_id=repository_id, time_period=time_period) - + def index_repository_items(self, repository_id=None, time_period=None): + """Index repository items""" + if repository_id is None or time_period is None: return # Create base Solr URL solr_url = self.solr_server + "/search/select" - self.logger.debug("solr_url: %s" %(solr_url)) - + self.logger.debug("Solr_URL: %s", solr_url) + # Default Solr params solr_query_params = { "q": "search.resourcetype:Item", @@ -55,7 +69,7 @@ def index_repository_items(self, repository_id=None, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -66,12 +80,12 @@ def index_repository_items(self, repository_id=None, time_period=None): # Make call to Solr for items statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total items in repository: %s", response.url) - - results_totalItems = 0 + + results_total_items = 0 try: # Get total number of items - results_totalItems = response.json()["response"]["numFound"] - self.logger.info("Solr - total items: %s", str(results_totalItems)) + results_total_items = response.json()["response"]["numFound"] + self.logger.info("Solr - total items: %s", str(results_total_items)) except TypeError: self.logger.info("No items to index, returning.") return @@ -79,27 +93,29 @@ def index_repository_items(self, repository_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_last_month = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id))) - cursor.execute("UPDATE repository_stats SET items_last_month = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_academic_year = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id))) - cursor.execute("UPDATE repository_stats SET items_academic_year = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_total = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id))) - cursor.execute("UPDATE repository_stats SET items_total = %i WHERE repository_id = '%s'" %(results_totalItems, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_id}'") # Commit changes db.commit() def index_repository_views(self, repository_id=None, time_period=None): + """Index repository views""" + if repository_id is None or time_period is None: return - + # Create base Solr url solr_url = self.solr_server + "/statistics/select" # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Default Solr params solr_query_params = { @@ -114,18 +130,19 @@ def index_repository_views(self, repository_id=None, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" else: self.logger.error("Error creating date range.") - + # Make call to Solr for views statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total item views in repository: %s", response.url) - + + results_num_found = 0 try: # Get total number of items results_num_found = response.json()["response"]["numFound"] @@ -134,30 +151,32 @@ def index_repository_views(self, repository_id=None, time_period=None): self.logger.info("No item views to index.") return - self.logger.info("Total repository item views: %s" %(str(results_num_found))) + self.logger.info("Total repository item views: %s", str(results_num_found)) with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.info("Setting repository views stats with %s views for time period: %s" %(str(results_num_found), time_period)) + self.logger.info("Setting repository views stats with %s views for time period: %s", str(results_num_found), time_period) if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_last_month = %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET views_last_month = %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_academic_year = %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET views_academic_year = %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_total = %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET views_total = %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_id}'") # Commit changes db.commit() def index_repository_downloads(self, repository_id=None, time_period=None): + """Index repository downloads""" + if repository_id is None or time_period is None: return - + # Get Solr shards - shards = self.solr.get_statistics_shards(time_period) + shards = self.solr.get_statistics_shards() # Create base Solr url solr_url = self.solr_server + "/statistics/select" @@ -175,18 +194,20 @@ def index_repository_downloads(self, repository_id=None, time_period=None): date_range = [] date_range = self.get_date_range(time_period) if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s" %(date_range[0], date_range[1])) + self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" else: self.logger.error("Error creating date range.") - + # Make call to Solr for views statistics response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Calling Solr total item downloads in repository: %s", response.url) + results_num_found = 0 try: # Get total number of items results_num_found = response.json()["response"]["numFound"] @@ -195,19 +216,19 @@ def index_repository_downloads(self, repository_id=None, time_period=None): self.logger.info("No item downloads to index.") return - self.logger.info("Total repository item downloads: %s" %(str(results_num_found))) + self.logger.info("Total repository item downloads: %s", str(results_num_found)) with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_last_month = downloads_last_month + %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET downloads_last_month = downloads_last_month + %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_id}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_id}'") else: - self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_total = downloads_total + %i WHERE repository_id = '%s'" %(results_num_found, repository_id))) - cursor.execute("UPDATE repository_stats SET downloads_total = downloads_total + %i WHERE repository_id = '%s'" %(results_num_found, repository_id)) + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_id}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_id}'") # Commit changes - db.commit() \ No newline at end of file + db.commit() diff --git a/lib/api.py b/lib/api.py index 1c3819c..7001efc 100644 --- a/lib/api.py +++ b/lib/api.py @@ -1,83 +1,113 @@ -import requests +"""Class for interacting with a DSpace 7+ REST API""" + import logging +import requests -import xml.etree.ElementTree as ET +class DSpaceRestApi(): + """Class for interacting with a DSpace 7+ REST API""" -class DSpaceRestApi(object): def __init__(self, rest_server=None): # Ensure URL of rest_server has trailing slash url = rest_server['url'] if url[len(url)-1] != '/': - self.url = url + '/' + self.api_url = url + '/' else: - self.url = url + self.api_url = url + + self.logger = logging.getLogger('dspace-reports') + self.logger.debug("Connecting to DSpace REST API: %s.", self.api_url) self.username = rest_server['username'] self.password = rest_server['password'] - self.session_id = None + + # Construct token URL + self.token_url = self.api_url + "security/csrf" + + # Construct login URL + self.login_url = self.api_url + "authn/login" + + # Create session + self.session = requests.Session() + + # Get CSRF token + self.token = None + self.get_token() self.limit = 100 - self.headers = {'Accept': 'application/json'} + + self.auth_headers = {} + self.request_headers = {'Content-type': 'application/json'} self.cookies = {} - self.logger = logging.getLogger('dspace-reports') - self.logger.debug("Connecting to DSpace REST API: %s.", self.url) - - # Authenticate using parameters in configuration - authenticated = self.authenticate() - if authenticated is False: + # Authenticate using parameters in set here + self.authenticated = self.authenticate() + if self.authenticated is False: return None # Test connection to REST API self.test_connection() + def get_token(self): + """Get CSRF token""" + + token_response = self.session.get(self.token_url) + if 'DSPACE-XSRF-TOKEN' in token_response.headers: + self.token = token_response.headers['DSPACE-XSRF-TOKEN'] + self.session.headers.update({'X-XSRF-Token': self.token}) + self.session.cookies.update({'X-XSRF-Token': self.token}) + self.logger.debug("Updating CSRF token to: %s", self.token) + else: + self.logger.info('No DSPACE-XSRF-TOKEN in the API response.') + + def authenticate(self): + """Authenticate a REST API user""" + self.logger.info("Authenticating connection to REST API") - - # Create data dictionary - data = {'email':self.username, 'password':self.password} - + + # Create data dictionary with credentials + data = {'user':self.username, 'password':self.password} + # Attempt to log in to REST API - login_url = self.construct_url(command = 'login') - login_response = requests.post(login_url, headers=self.headers, data=data) - self.logger.info("Calling REST API: %s" %(login_response.url)) + login_response = self.session.post(self.login_url, headers=self.auth_headers, data=data) + self.logger.info("Calling REST API: %s", login_response.url) if login_response.status_code == 200: - self.logger.info("Successfully authenticated: %s" %(str(login_response.status_code))) + self.logger.info("Successfully authenticated: %s", login_response.status_code) self.logger.info(login_response.cookies) - if 'JSESSIONID' in login_response.cookies: - self.logger.info("Received session ID: %s" %(login_response.cookies['JSESSIONID'])) - self.session_id = login_response.cookies['JSESSIONID'] - self.cookies = {'JSESSIONID':self.session_id} - return True - else: - self.logger.info("No session ID in response.") - return False - else: - self.logger.info("REST API authentication failed: %s" %(str(login_response.status_code))) - self.logger.info(login_response.text) - return False + + if 'Authorization' in login_response.headers: + self.session.headers.update({'Authorization': login_response.headers.get('Authorization')}) + + return True + + self.logger.info("REST API authentication failed: %s", login_response.status_code) + self.logger.info(login_response.text) + return False def test_connection(self): - if self.session_id is None: + """"Test REST API connection""" + + if self.authenticated is False: self.logger.error("Must authenticate before connecting to the REST API.") return False - connection_url = self.url + 'status' - self.logger.info("Testing REST API connection: %s.", connection_url) - response = requests.get(connection_url) - - status = ET.fromstring(response.content) - okay = status.find('okay') - if okay is not None and okay.text == 'true': + self.logger.info("Testing REST API connection: %s.", self.api_url.strip("/")) + response = self.session.get(self.api_url.strip("/"), headers=self.request_headers) + if response.status_code == 200: self.logger.info("REST API connection successful.") return True - else: - self.logger.error("REST API connection NOT successful.") - return False - def construct_url(self, command, params={}): + self.logger.error("REST API connection NOT successful.") + return False + + def construct_url(self, command, params=None): + """Construct API URL""" + + if params is None: + params = {} + parameters = '' first = True for key, value in params.items(): @@ -87,70 +117,99 @@ def construct_url(self, command, params={}): else: parameters += '&' + key + '=' + str(value) - final_url = self.url + command + parameters + final_url = self.api_url + command + parameters return final_url - def rest_call(self, type='GET', url='', headers=None, data={}): + def rest_call(self, call_type='GET', url='', headers=None, data=None): + """Make call to REST API""" + if headers is None: - headers = self.headers + headers = self.session.headers - if type == 'POST': - response = requests.post(url, headers=headers, cookies=self.cookies, data=data) + if data is None: + data = {} + + self.logger.debug("Calling REST API with URL: %s", url) + + if call_type == 'POST': + response = self.session.post(url, headers=headers, cookies=self.cookies, data=data) else: - response = requests.get(url, headers=headers, cookies=self.cookies) + response = self.session.get(url, headers=headers, cookies=self.cookies) self.logger.debug(response.status_code) self.logger.debug(response.text) response_json = response.json() return response_json - - def get_hierarchy(self): - hierarchy_url = self.construct_url(command = 'hierarchy') - hierarchy = self.rest_call(url = hierarchy_url) - return hierarchy + + def get_site(self): + """Get site information""" + + sites_url = self.api_url + 'core/sites' + sites_response = self.rest_call(url=sites_url) + if '_embedded' in sites_response: + if 'sites' in sites_response['_embedded']: + site = sites_response['_embedded']['sites'][0] + return site + + return None + def get_communities(self): - communities_url = self.construct_url(command = 'communities') + """Get all communities""" + + communities_url = self.construct_url(command = 'core/communities') communities = self.rest_call(url = communities_url) return communities - def get_top_communities(self): - top_communities_url = self.construct_url(command = 'communities/top-communities') + def get_top_level_communities(self): + """Get top level communities""" + + top_communities_url = self.construct_url(command = 'core/communities/search/top') top_communities = self.rest_call(url = top_communities_url) return top_communities - def get_community(self, community_id=None): - if community_id is None: - return - community_url = self.construct_url(command = f"communities/{community_id}") + def get_community(self, community_uuid=None): + """Get an individual community""" + + if community_uuid is None: + return None + community_url = self.construct_url(command = f"core/communities/{community_uuid}") community = self.rest_call(url = community_url) return community - def get_collection_items(self, collection_id=None): - if collection_id is None: - return - items_url = self.construct_url(command = f"collections/{collection_id}/items") + def get_collection_items(self, collection_uuid=None): + """Get items of a collection""" + + if collection_uuid is None: + return None + items_url = self.construct_url(command = f"core/collections/{collection_uuid}/items") items = self.rest_call(url = items_url) return items - - def get_items(self, expand=[]): + + def get_items(self, expand=None): + """Get all items in the repository""" + + if expand is None: + expand = [] + offset = 0 params = {} - expandValue = '' + expand_value = '' all_items = [] if len(expand) > 0: - expandValue = ','.join(expand) - params['expand'] = expandValue - self.logger.debug("Added expand list to parameters: %s " %(expandValue)) + expand_value = ','.join(expand) + params['expand'] = expand_value + self.logger.debug("Added expand list to parameters: %s ", expand_value) while True: - self.logger.debug("Retrieving items %s through %s from the REST API" %(str(offset), str(offset + self.limit))) + self.logger.debug("Retrieving items %s through %s from the REST API", offset, + offset + self.limit) params['offset'] = offset params['limit'] = self.limit items_url = self.construct_url(command = 'items', params = params) - self.logger.debug("Items Solr call: %s" %(items_url)) + self.logger.debug("Items Solr call: %s", items_url) items = self.rest_call(url = items_url) if len(items) == 0: @@ -161,31 +220,51 @@ def get_items(self, expand=[]): return all_items - def find_items_by_metadata_field(self, metadata_entry=None, expand=[]): + def find_items_by_metadata_field(self, metadata_entry=None, expand=None): + """Find an item by any metadata field(s)""" + if metadata_entry is None: - return + return None + + if expand is None: + expand = [] params = {} - expandValue = '' + expand_value = '' if len(expand) > 0: - expandValue = ','.join(expand) - params['expand'] = expandValue - self.logger.debug("Added expand list to parameters: %s " %(expandValue)) - - headers = self.headers - headers['Content-Type'] = 'application/json' + expand_value = ','.join(expand) + params['expand'] = expand_value + self.logger.debug("Added expand list to parameters: %s ", expand_value) - items_metadata_url = self.construct_url(command = f"items/find-by-metadata-field", params = params) + items_metadata_url = self.construct_url(command = "items/find-by-metadata-field", + params = params) self.logger.info(items_metadata_url) self.logger.info(metadata_entry) - items = self.rest_call(type = 'POST', url = items_metadata_url, headers = headers, data = metadata_entry) + items = self.rest_call(call_type = 'POST', url = items_metadata_url, + headers = self.request_headers, data = metadata_entry) return items def get_item(self, item_id=None): + """Get an individual item""" + if item_id is None: - return + return None item_url = self.construct_url(command = f"items/{item_id}") item = self.rest_call(url = item_url) - return item \ No newline at end of file + return item + + def update_token(self, req): + """Update CSRF token""" + + if not self.session: + self.logger.debug('Session state not found, setting...') + self.session = requests.Session() + if 'DSPACE-XSRF-TOKEN' in req.headers: + t = req.headers['DSPACE-XSRF-TOKEN'] + self.logger.debug('Updating XSRF token to %s', t) + + # Update headers and cookies + self.session.headers.update({'X-XSRF-Token': t}) + self.session.cookies.update({'X-XSRF-Token': t}) diff --git a/lib/database.py b/lib/database.py index 5fef9b5..15a0194 100644 --- a/lib/database.py +++ b/lib/database.py @@ -1,10 +1,12 @@ +"""Class for interacting with a DSpace 7+ database""" + import logging -import psycopg2 -import psycopg2.extras +import psycopg +class Database(): + """Class for interacting with a DSpace 7+ database""" -class Database(object): def __init__(self, config): self.config = config self._connection_uri = f"dbname={config['name']} user={config['username']} password={config['password']} host={config['host']} port={config['port']}" @@ -12,29 +14,35 @@ def __init__(self, config): def __enter__(self): try: - self._connection = psycopg2.connect( - self._connection_uri, cursor_factory=psycopg2.extras.DictCursor + self._connection = psycopg.connect( + self._connection_uri, cursor_factory=psycopg.ClientCursor ) - except psycopg2.OperationalError as err: - self.logger.error("Cannot connect to database. Please check connection information and try again.") - self.logger.error(f"Error: {err=}, {type(err)=}") + except psycopg.OperationalError as err: + self.logger.error("Cannot connect to database. Please check connection information.") + self.logger.error("Error: %s, %s", err, type(err)) return self._connection def create_connection(self): + """Create database connection""" + # Debug information - self.logger.info("Attempting to connect to Dataverse database: %s (host), %s (database), %s (username) ******** (password).", self.config['host'], self.config['name'], self.config['username']) + self.logger.info("Attempting to connect to Dataverse database: %s (host), %s (database)," + + " %s (username) ******** (password).", self.config['host'], + self.config['name'], self.config['username']) # Create connection to database try: - self.connection = psycopg2.connect(self._connection_uri) + self.connection = psycopg.connect(self._connection_uri, + cursor_factory=psycopg.ClientCursor) return True - except psycopg2.OperationalError as err: - self.logger.error("Cannot connect to database. Please check connection information and try again.") - self.logger.error(f"Error: {err=}, {type(err)=}") + except psycopg.OperationalError as err: + self.logger.error("Cannot connect to database. Please check connection information.") + self.logger.error("Error: %s, %s", err, type(err)) return False def close_connection(self): + """Close database connection""" self._connection.close() def __exit__(self, exc_type, exc_value, exc_traceback): diff --git a/lib/emailer.py b/lib/emailer.py index 9bfa82a..26db4c4 100644 --- a/lib/emailer.py +++ b/lib/emailer.py @@ -1,5 +1,7 @@ -import os +"""Class for sending emails""" + import mimetypes +import os import smtplib import logging @@ -10,27 +12,36 @@ class Emailer(object): + """Class for sending emails""" + def __init__(self, config=None): self.config = config self.logger = logging.getLogger('dspace-reports') def email_report_admins(self, report_file_path=None): + """Function to send email DSpace stats reports attached""" + if report_file_path is None: self.logger.error("A report file path and admin email address must be specified.") return False # Construct email information - subject = 'DSpace statistical reports for {name}'.format(name=self.config['dspace_name']) + subject = f"DSpace statistical reports for {self.config['dspace_name']}" from_email = self.config['from_email'] # Send email(s) to contact(s) for admin_email in self.config['admin_emails']: - self.logger.info('Sending report to {admin_email}.'.format(admin_email=admin_email)) - self.__email_report_internal(report_file_path=report_file_path, to_email=admin_email, from_email=from_email, subject=subject) + self.logger.info('Sending report to %s.', admin_email=admin_email) + self.__email_report_internal(report_file_path=report_file_path, to_email=admin_email, + from_email=from_email, subject=subject) - def __email_report_internal(self, report_file_path=None, to_email=None, from_email=None, subject=None): + return None + + def __email_report_internal(self, report_file_path=None, to_email=None, from_email=None, + subject=None): if report_file_path is None: - self.logger.error("A report file path of either a ZIP archive or Excel file is required.") + self.logger.error("A report file path of either a ZIP archive or Excel file " + + "is required.") return False if to_email is None or from_email is None or subject is None: self.logger.error("One or more required email addresses is missing.") @@ -52,40 +63,36 @@ def __email_report_internal(self, report_file_path=None, to_email=None, from_ema # Attach report file(s) path, report_file_name = os.path.split(report_file_path) - attachment = open(report_file_path, "rb") - mime_type, _ = mimetypes.guess_type(report_file_path) - if mime_type == 'application/zip': - part = MIMEBase('application', 'zip') - body = "A ZIP archive with the report in Excel format is attached." - elif mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': - part = MIMEBase('application', 'octet-stream') - body = "The report in Excel format is attached." - else: - self.logger.warning("Unrecognized mimetype for report file. Check that it is either a ZIP archive or an Excel XLSX file.") - part = MIMEBase("application", "octet-stream") - + with open(report_file_path, "rb", encoding="utf8") as attachment: + mime_type, _ = mimetypes.guess_type(report_file_path) + if mime_type == 'application/zip': + part = MIMEBase('application', 'zip') + body = "A ZIP archive with the report in Excel format is attached." + elif mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': + part = MIMEBase('application', 'octet-stream') + body = "The report in Excel format is attached." + else: + self.logger.warning("Unrecognized mimetype for report file. Check that it is " + + "either a ZIP archive or an Excel XLSX file.") + part = MIMEBase("application", "octet-stream") + part.set_payload((attachment).read()) encoders.encode_base64(part) - part.add_header('Content-Disposition', "attachment; filename= %s" % report_file_name) + part.add_header('Content-Disposition', f"attachment; filename= {report_file_name}") message.attach(part) # Set message body message.attach(MIMEText(body, 'plain')) - # Get SMTP configuration - smtp_host = self.config['smtp_host'] - smtp_auth = self.config['smtp_auth'] - smtp_port = self.config['smtp_port'] - smtp_username = self.config['smtp_username'] - smtp_password = self.config['smtp_password'] - # Send email - self.logger.info('Sending DSpace report to {email}.'.format(email=to_email)) - server = smtplib.SMTP(smtp_host, smtp_port) - if smtp_auth == 'tls': + self.logger.info('Sending DSpace report to %s.', to_email) + server = smtplib.SMTP(self.config['smtp_host'], self.config['smtp_port']) + if self.config['smtp_auth'] == 'tls': server.starttls() - if smtp_username and smtp_password: - server.login(smtp_username, smtp_password) + if self.config['smtp_username'] and self.config['smtp_password']: + server.login(self.config['smtp_username'], self.config['smtp_password']) server.send_message(message) server.quit() + + return None diff --git a/lib/oai.py b/lib/oai.py index 7a5092c..a4c8c9b 100644 --- a/lib/oai.py +++ b/lib/oai.py @@ -1,13 +1,15 @@ -import re +"""Class for interacting with a DSpace 7+ OAI-PMH endpoint""" + import logging -import requests +import re from time import sleep -from xml.dom import pulldom - import xml.etree.ElementTree as ET +import requests + +class DSpaceOai(): + """Class for interacting with a DSpace 7+ OAI-PMH endpoint""" -class DSpaceOai(object): ns = { 'oai': 'http://www.openarchives.org/OAI/2.0/', 'dc': 'http://purl.org/dc/elements/1.1/' @@ -23,17 +25,20 @@ def __init__(self, oai_server=None): # Add 'request' to path self.oai_server = self.oai_server + 'request' + self.timeout = 5 self.limit = 100 - self.sleepTime = 1 + self.sleep_time = 1 self.headers = {'User-Agent': 'OAIHarvester/2.0', 'Accept': 'text/html', 'Accept-Encoding': 'compress, deflate'} self.logger = logging.getLogger('dspace-reports') - + # Test connection to OAI-PMH feed self.test_connection() def test_connection(self): + """Test OAI-PMH connection""" + identify_url = self.construct_url(verb='Identify') self.logger.info("Testing OAI-PMH feed connection: %s.", identify_url) response = self.call(url = identify_url) @@ -41,11 +46,16 @@ def test_connection(self): if response.status_code == 200: self.logger.info("OAI_PMH feed connection successful.") return True - else: - self.logger.error("OAI-PMH feed connection NOT successful.") - return False - def construct_url(self, verb, params={}): + self.logger.error("OAI-PMH feed connection NOT successful.") + return False + + def construct_url(self, verb, params=None): + """Create URL""" + + if params is None: + params = {} + parameters = '' for key, value in params.items(): parameters += '&' + key + '=' + str(value) @@ -53,18 +63,27 @@ def construct_url(self, verb, params={}): new_url = self.oai_server + '?verb=' + verb + parameters return new_url - def call(self, url=None, params={}): + def call(self, url=None, params=None): + """Make call to endpoint""" + if url is None: - return + return None - response = requests.get(url, params=params) + if params is None: + params = {} + + response = requests.get(url, params=params, timeout=self.timeout) return response def pause(self, wait_time): - self.logger.info("Pausing harvest process for %s second(s)." %(str(wait_time))) + """Pause before next call""" + + self.logger.info("Pausing harvest process for %s second(s).", str(wait_time)) sleep(wait_time) def get_records(self): + """Get all records""" + offset = 0 all_records = [] params = { @@ -72,13 +91,14 @@ def get_records(self): } while True: - self.logger.debug("Retrieving records %s through %s from the OAI-PMH feed." %(str(offset), str(offset + self.limit))) + self.logger.debug("Retrieving records %s through %s from the OAI-PMH feed.", + str(offset), str(offset + self.limit)) records_url = self.construct_url(verb = 'ListRecords', params = params) - self.logger.debug("Records OAI-PMH call: %s" %(records_url)) + self.logger.debug("Records OAI-PMH call: %s", records_url) records_response = self.call(url = records_url) records_root = ET.fromstring(records_response.text) - + list_records = records_root.find('.//oai:ListRecords', self.ns) if list_records: records = list_records.findall('.//oai:record', self.ns) @@ -88,20 +108,23 @@ def get_records(self): identifier_nodes = metadata.findall('.//dc:identifier', self.ns) for identifier_node in identifier_nodes: if identifier_node is not None and identifier_node.text is not None: - self.logger.info("Looking at record identifier: %s : %s" %(identifier_node.tag, identifier_node.text)) + self.logger.info("Looking at record identifier: %s : %s", + identifier_node.tag, identifier_node.text) handle = re.search('^https?://hdl.handle.net', identifier_node.text) if handle: all_records.append(identifier_node.text) else: - self.logger.debug("Identifier is not a handle URL: %s" %(identifier_node.text)) + self.logger.debug("Identifier is not a handle URL: %s", + identifier_node.text) # Check for resumptionToken - token_match = re.search(']*>(.*)', records_response.text) + token_match = re.search(']*>(.*)', + records_response.text) if not token_match: break token = token_match.group(1) - self.logger.debug("resumptionToken: %s" %(token)) + self.logger.debug("resumptionToken: %s", token) params['resumptionToken'] = token # Remove metadataPrefix from params @@ -110,8 +133,8 @@ def get_records(self): offset = offset + self.limit - if self.sleepTime: - self.pause(self.sleepTime) + if self.sleep_time: + self.pause(self.sleep_time) - self.logger.debug("Harvested %s records from OAI feed." %(str(len(all_records)))) - return all_records \ No newline at end of file + self.logger.debug("Harvested %s records from OAI feed.", str(len(all_records))) + return all_records diff --git a/lib/output.py b/lib/output.py index ecd9191..d577be3 100644 --- a/lib/output.py +++ b/lib/output.py @@ -1,15 +1,17 @@ -import os +"""Class for saving stats reports to CSV and Excel files""" + import csv +import os import logging import shutil -import xlsxwriter - from zipfile import ZIP_DEFLATED, ZipFile - +import xlsxwriter from lib.util import Utilities -class Output(object): +class Output(): + """Class for saving stats reports to CSV and Excel files""" + def __init__(self, config=None): self.config = config self.logger = logging.getLogger('dataverse-reports') @@ -20,7 +22,15 @@ def __init__(self, config=None): if self.work_dir[len(self.work_dir)-1] != '/': self.work_dir = self.work_dir + '/' - def save_report_csv_file(self, output_file_path=None, headers=[], data=[]): + def save_report_csv_file(self, output_file_path=None, headers=None, data=None): + """Save stats report to CSV file""" + + if headers is None: + headers = [] + + if data is None: + data = [] + # Sanity checks if output_file_path is None: self.logger.error("Output file path is required.") @@ -37,7 +47,8 @@ def save_report_csv_file(self, output_file_path=None, headers=[], data=[]): headers.remove("repository_id") with open(output_file_path, 'w', newline='', encoding='utf-8') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore', dialect='excel', quoting=csv.QUOTE_NONNUMERIC) + writer = csv.DictWriter(csvfile, fieldnames=headers, extrasaction='ignore', + dialect='excel', quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() for result in data: writer.writerow(result) @@ -45,7 +56,10 @@ def save_report_csv_file(self, output_file_path=None, headers=[], data=[]): self.logger.info("Saved report to CSV file %s.", output_file_path) return output_file_path - def update_header_report_csv_file(self, input_file_path=None, headers_old=None, headers_new=None): + def update_header_report_csv_file(self, input_file_path=None, headers_old=None, + headers_new=None): + """Update headers of CSV file""" + # Sanity checks if input_file_path is None: self.logger.error("Input file path is required.") @@ -62,10 +76,10 @@ def update_header_report_csv_file(self, input_file_path=None, headers_old=None, temp_csv_file_path = self.work_dir + 'temp.csv' - with open(input_file_path, 'r') as fp: + with open(input_file_path, 'r', encoding="utf-8") as fp: reader = csv.DictReader(fp, fieldnames=headers_new) - with open(temp_csv_file_path, 'w', newline='') as fh: + with open(temp_csv_file_path, 'w', newline='', encoding="utf-8") as fh: writer = csv.DictWriter(fh, fieldnames=reader.fieldnames) writer.writeheader() header_mapping = next(reader) @@ -75,7 +89,12 @@ def update_header_report_csv_file(self, input_file_path=None, headers_old=None, destination_file_path = shutil.copyfile(temp_csv_file_path, input_file_path) return destination_file_path - def save_report_excel_file(self, output_file_path=None, worksheet_files=[]): + def save_report_excel_file(self, output_file_path=None, worksheet_files=None): + """"Save stats report to Excel file""" + + if worksheet_files is None: + worksheet_files = [] + # Sanity checks if output_file_path is None: self.logger.error("Output file path is required.") @@ -116,6 +135,8 @@ def save_report_excel_file(self, output_file_path=None, worksheet_files=[]): return output_file_path def save_report_zip_archive(self, output_file_path=None, excel_report_file=None): + """"Save stats report to zip file""" + # Sanity checks if output_file_path is None: self.logger.error("Output file path is required.") diff --git a/lib/solr.py b/lib/solr.py index bcfac67..4d025e4 100644 --- a/lib/solr.py +++ b/lib/solr.py @@ -1,9 +1,13 @@ +"""Class for interacting with a DSpace 7+ Solr instance""" + import logging -import requests import re +import requests -class DSpaceSolr(object): +class DSpaceSolr(): + """Class for interacting with a DSpace 7+ Solr instance""" + def __init__(self, solr_server=None): # Ensure solr_server has trailing slash if solr_server[len(solr_server)-1] != '/': @@ -11,23 +15,37 @@ def __init__(self, solr_server=None): else: self.solr_server = solr_server + # Timeout for requests to Solr + self.timeout = 5 + + # Create session + self.session = requests.Session() + self.request_headers = {'Content-type': 'application/json'} + self.logger = logging.getLogger('dspace-reports') self.logger.debug("Connecting to DSpace REST API: %s.", self.solr_server) self.test_connection() def test_connection(self): - url = self.solr_server - self.logger.debug("Testing Solr server connection: %s.", url) - response = requests.get(url) + """Test Solr connection""" + + self.logger.debug("Testing Solr server connection: %s.", self.solr_server) + response = self.session.get(self.solr_server, headers=self.request_headers, + timeout=self.timeout) - if response.status_code == requests.codes.ok: - self.logger.debug("Solr server connection successful") + if response.status_code == 200: + self.logger.debug("Solr server connection successful.") return True - else: - self.logger.warning("Solr server connection NOT successful") - return None - def construct_url(self, command, params={}): + self.logger.warning("Solr server connection failed.") + return None + + def construct_url(self, command, params=None): + """Create Solr URL""" + + if params is None: + params = {} + parameters = '' first = True for key, value in params.items(): @@ -40,18 +58,27 @@ def construct_url(self, command, params={}): new_url = self.solr_server + command + parameters return new_url - def call(self, type='GET', url=None, params={}): + def call(self, call_type='GET', url=None, params=None): + """Make call to Solr server""" + if url is None: - return + return None + + if params is None: + params = {} - if type == 'POST': - response = requests.put(url, params=params) + if call_type == 'POST': + response = self.session.post(url, params=params, headers=self.request_headers, + timeout=self.timeout) else: - response = requests.get(url, params=params) + response = self.session.get(url, params=params,headers=self.request_headers, + timeout=self.timeout) return response - def get_statistics_shards(self, time_period): + def get_statistics_shards(self): + """Get Solr shards with statistics""" + # Vars shards = str() shards = f"{self.solr_server}statistics" @@ -61,10 +88,11 @@ def get_statistics_shards(self, time_period): solr_query_params = {"action": "STATUS", "wt": "json"} solr_url = self.solr_server + "admin/cores" self.logger.debug("Solr cores URL: %s", solr_url) - response = requests.get(solr_url, params=solr_query_params) + shards_response = self.session.get(solr_url, params=solr_query_params, + headers=self.request_headers, timeout=self.timeout) - if response.status_code == requests.codes.ok: - data = response.json() + if shards_response.status_code == 200: + data = shards_response.json() # Iterate over active cores from Solr's STATUS response for core in data["status"]: @@ -86,4 +114,6 @@ def get_statistics_shards(self, time_period): return shards def get_solr_server(self): - return self.solr_server \ No newline at end of file + """Return reference to Solr server""" + + return self.solr_server diff --git a/lib/util.py b/lib/util.py index 63264bb..c9ed59f 100644 --- a/lib/util.py +++ b/lib/util.py @@ -1,14 +1,20 @@ +"""Utilities class""" + import logging import os import yaml -class Utilities(object): +class Utilities(): + """Utilities class""" + def load_config(self, config_file=None): + """Load logging configuration""" + if config_file is None: print("Must specify a configuration file.") return False - + config = {} path = config_file @@ -16,12 +22,14 @@ def load_config(self, config_file=None): print("Configuration file is missing.") return False - with open(config_file, 'r') as f: + with open(config_file, 'r', encoding="utf-8") as f: config = yaml.safe_load(f) return config def load_logger(self, config=None): + """Load application logger""" + if config is None: print("No configuration given, cannot create logger.") return False @@ -32,10 +40,10 @@ def load_logger(self, config=None): log_path = log_path + '/' log_file = config['log_file'] or 'dspace-reports.log' - + log_level_string = config['log_level'] - print("Creating logger with log level: %s" % log_level_string) - + print("Creating logger with log level: %s", log_level_string) + if log_level_string == 'INFO': log_level = logging.INFO elif log_level_string == 'DEBUG': @@ -52,7 +60,7 @@ def load_logger(self, config=None): logger.setLevel(log_level) log_formatter = logging.Formatter("%(asctime)s - %(levelname)s: %(message)s") - file_handler = logging.FileHandler("{0}/{1}".format(log_path, log_file)) + file_handler = logging.FileHandler(f"{log_path}/{log_file}") file_handler.setFormatter(log_formatter) file_handler.setLevel(log_level) logger.addHandler(file_handler) @@ -65,6 +73,8 @@ def load_logger(self, config=None): return logger def ensure_directory_exists(self, output_file_path=None): + """Ensure directory exists""" + if output_file_path is None: print("Must specify an output file.") return False @@ -73,16 +83,16 @@ def ensure_directory_exists(self, output_file_path=None): if os.path.isdir(directory) and os.path.exists(directory): return True - else: - os.mkdir(directory) - return True + + os.mkdir(directory) + return True def check_file_exists(self, file_path=None): + """Check if file exists""" + if file_path is None: print("Must specify a file path.") return False - if os.path.isfile(file_path): - return True - else: - return False \ No newline at end of file + is_file = os.path.isfile(file_path) + return is_file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..dcc14fb --- /dev/null +++ b/poetry.lock @@ -0,0 +1,586 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "astroid" +version = "3.2.3" +description = "An abstract syntax tree for Python with inference support." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "astroid-3.2.3-py3-none-any.whl", hash = "sha256:3eae9ea67c11c858cdd2c91337d2e816bd019ac897ca07d7b346ac10105fceb3"}, + {file = "astroid-3.2.3.tar.gz", hash = "sha256:7099b5a60985529d8d46858befa103b82d0d05a5a5e8b816b5303ed96075e1d9"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} + +[[package]] +name = "certifi" +version = "2024.7.4" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, +] + +[[package]] +name = "chardet" +version = "5.2.0" +description = "Universal encoding detector for Python 3" +optional = false +python-versions = ">=3.7" +files = [ + {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, + {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "dill" +version = "0.3.8" +description = "serialize all of Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"}, + {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"}, +] + +[package.extras] +graph = ["objgraph (>=1.7.2)"] +profile = ["gprof2dot (>=2022.7.29)"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "isort" +version = "5.13.2" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"}, + {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"}, +] + +[package.extras] +colors = ["colorama (>=0.4.6)"] + +[[package]] +name = "lazy-object-proxy" +version = "1.10.0" +description = "A fast and thorough lazy object proxy." +optional = false +python-versions = ">=3.8" +files = [ + {file = "lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:855e068b0358ab916454464a884779c7ffa312b8925c6f7401e952dcf3b89977"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab7004cf2e59f7c2e4345604a3e6ea0d92ac44e1c2375527d56492014e690c3"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc0d2fc424e54c70c4bc06787e4072c4f3b1aa2f897dfdc34ce1013cf3ceef05"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e2adb09778797da09d2b5ebdbceebf7dd32e2c96f79da9052b2e87b6ea495895"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1f711e2c6dcd4edd372cf5dec5c5a30d23bba06ee012093267b3376c079ec83"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-win32.whl", hash = "sha256:76a095cfe6045c7d0ca77db9934e8f7b71b14645f0094ffcd842349ada5c5fb9"}, + {file = "lazy_object_proxy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:b4f87d4ed9064b2628da63830986c3d2dca7501e6018347798313fcf028e2fd4"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fec03caabbc6b59ea4a638bee5fce7117be8e99a4103d9d5ad77f15d6f81020c"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02c83f957782cbbe8136bee26416686a6ae998c7b6191711a04da776dc9e47d4"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:009e6bb1f1935a62889ddc8541514b6a9e1fcf302667dcb049a0be5c8f613e56"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75fc59fc450050b1b3c203c35020bc41bd2695ed692a392924c6ce180c6f1dc9"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:782e2c9b2aab1708ffb07d4bf377d12901d7a1d99e5e410d648d892f8967ab1f"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-win32.whl", hash = "sha256:edb45bb8278574710e68a6b021599a10ce730d156e5b254941754a9cc0b17d03"}, + {file = "lazy_object_proxy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:e271058822765ad5e3bca7f05f2ace0de58a3f4e62045a8c90a0dfd2f8ad8cc6"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e98c8af98d5707dcdecc9ab0863c0ea6e88545d42ca7c3feffb6b4d1e370c7ba"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:952c81d415b9b80ea261d2372d2a4a2332a3890c2b83e0535f263ddfe43f0d43"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b39d3a151309efc8cc48675918891b865bdf742a8616a337cb0090791a0de9"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e221060b701e2aa2ea991542900dd13907a5c90fa80e199dbf5a03359019e7a3"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:92f09ff65ecff3108e56526f9e2481b8116c0b9e1425325e13245abfd79bdb1b"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-win32.whl", hash = "sha256:3ad54b9ddbe20ae9f7c1b29e52f123120772b06dbb18ec6be9101369d63a4074"}, + {file = "lazy_object_proxy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:127a789c75151db6af398b8972178afe6bda7d6f68730c057fbbc2e96b08d282"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4ed0518a14dd26092614412936920ad081a424bdcb54cc13349a8e2c6d106a"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ad9e6ed739285919aa9661a5bbed0aaf410aa60231373c5579c6b4801bd883c"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc0a92c02fa1ca1e84fc60fa258458e5bf89d90a1ddaeb8ed9cc3147f417255"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0aefc7591920bbd360d57ea03c995cebc204b424524a5bd78406f6e1b8b2a5d8"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5faf03a7d8942bb4476e3b62fd0f4cf94eaf4618e304a19865abf89a35c0bbee"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-win32.whl", hash = "sha256:e333e2324307a7b5d86adfa835bb500ee70bfcd1447384a822e96495796b0ca4"}, + {file = "lazy_object_proxy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:cb73507defd385b7705c599a94474b1d5222a508e502553ef94114a143ec6696"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366c32fe5355ef5fc8a232c5436f4cc66e9d3e8967c01fb2e6302fd6627e3d94"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2297f08f08a2bb0d32a4265e98a006643cd7233fb7983032bd61ac7a02956b3b"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18dd842b49456aaa9a7cf535b04ca4571a302ff72ed8740d06b5adcd41fe0757"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:217138197c170a2a74ca0e05bddcd5f1796c735c37d0eee33e43259b192aa424"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a3a87cf1e133e5b1994144c12ca4aa3d9698517fe1e2ca82977781b16955658"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-win32.whl", hash = "sha256:30b339b2a743c5288405aa79a69e706a06e02958eab31859f7f3c04980853b70"}, + {file = "lazy_object_proxy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:a899b10e17743683b293a729d3a11f2f399e8a90c73b089e29f5d0fe3509f0dd"}, + {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "platformdirs" +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + +[[package]] +name = "psycopg" +version = "3.2.1" +description = "PostgreSQL database adapter for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "psycopg-3.2.1-py3-none-any.whl", hash = "sha256:ece385fb413a37db332f97c49208b36cf030ff02b199d7635ed2fbd378724175"}, + {file = "psycopg-3.2.1.tar.gz", hash = "sha256:dc8da6dc8729dacacda3cc2f17d2c9397a70a66cf0d2b69c91065d60d5f00cb7"}, +] + +[package.dependencies] +typing-extensions = ">=4.4" +tzdata = {version = "*", markers = "sys_platform == \"win32\""} + +[package.extras] +binary = ["psycopg-binary (==3.2.1)"] +c = ["psycopg-c (==3.2.1)"] +dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "mypy (>=1.6)", "types-setuptools (>=57.4)", "wheel (>=0.37)"] +docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] +pool = ["psycopg-pool"] +test = ["anyio (>=4.0)", "mypy (>=1.6)", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] + +[[package]] +name = "pylint" +version = "3.2.5" +description = "python code static checker" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "pylint-3.2.5-py3-none-any.whl", hash = "sha256:32cd6c042b5004b8e857d727708720c54a676d1e22917cf1a2df9b4d4868abd6"}, + {file = "pylint-3.2.5.tar.gz", hash = "sha256:e9b7171e242dcc6ebd0aaa7540481d1a72860748a0a7816b8fe6cf6c80a6fe7e"}, +] + +[package.dependencies] +astroid = ">=3.2.2,<=3.3.0-dev0" +colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} +dill = [ + {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, + {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, +] +isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" +mccabe = ">=0.6,<0.8" +platformdirs = ">=2.2.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +tomlkit = ">=0.10.1" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +spelling = ["pyenchant (>=3.2,<4.0)"] +testutils = ["gitpython (>3)"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "tomlkit" +version = "0.13.0" +description = "Style preserving TOML library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tomlkit-0.13.0-py3-none-any.whl", hash = "sha256:7075d3042d03b80f603482d69bf0c8f345c2b30e41699fd8883227f89972b264"}, + {file = "tomlkit-0.13.0.tar.gz", hash = "sha256:08ad192699734149f5b97b45f1f18dad7eb1b6d16bc72ad0c2335772650d7b72"}, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "tzdata" +version = "2024.1" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, +] + +[[package]] +name = "urllib3" +version = "2.2.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + +[[package]] +name = "xlsxwriter" +version = "3.2.0" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"}, + {file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.9" +content-hash = "26dfb4c08fbe0e0806f9d73966cc1f72278a62fb97c333348bc35fee85125326" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3c7899c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[tool.poetry] +name = "dspace-reports" +version = "1.3.0" +description = "A python3-based tool to generate and email views and downloads statistical reports for a DSpace repository." +authors = ["Nicholas Woodward "] +license = "GPLv3" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.9" +certifi = "2024.7.4" +chardet = "5.2.0" +idna = "3.7" +isort = "5.13.2" +lazy-object-proxy = "1.10.0" +mccabe = "==0.7.0" +psycopg = "3.2.1" +pylint = "3.2.5" +python-dateutil = "2.9.0.post0" +PyYAML = "6.0.1" +requests = "2.32.3" +six = "==1.16.0" +toml = "==0.10.2" +urllib3 = "2.2.2" +wrapt = "1.16.0" +XlsxWriter = "3.2.0" +astroid = "3.2.3" +tomlkit = "0.13.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0d812fb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +astroid==3.2.3 ; python_version >= "3.9" and python_version < "4.0" +certifi==2024.7.4 ; python_version >= "3.9" and python_version < "4.0" +chardet==5.2.0 ; python_version >= "3.9" and python_version < "4.0" +charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" +dill==0.3.8 ; python_version >= "3.9" and python_version < "4.0" +idna==3.7 ; python_version >= "3.9" and python_version < "4.0" +isort==5.13.2 ; python_version >= "3.9" and python_version < "4.0" +lazy-object-proxy==1.10.0 ; python_version >= "3.9" and python_version < "4.0" +mccabe==0.7.0 ; python_version >= "3.9" and python_version < "4.0" +platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "4.0" +psycopg==3.2.1 ; python_version >= "3.9" and python_version < "4.0" +pylint==3.2.5 ; python_version >= "3.9" and python_version < "4.0" +python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" +requests==2.32.3 ; python_version >= "3.9" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" +toml==0.10.2 ; python_version >= "3.9" and python_version < "4.0" +tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11" +tomlkit==0.13.0 ; python_version >= "3.9" and python_version < "4.0" +typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "4.0" +tzdata==2024.1 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" +urllib3==2.2.2 ; python_version >= "3.9" and python_version < "4.0" +wrapt==1.16.0 ; python_version >= "3.9" and python_version < "4.0" +xlsxwriter==3.2.0 ; python_version >= "3.9" and python_version < "4.0" \ No newline at end of file diff --git a/run_collection_indexer.py b/run_collection_indexer.py index 74082ca..c3d93c1 100644 --- a/run_collection_indexer.py +++ b/run_collection_indexer.py @@ -1,13 +1,16 @@ +"""Class for indexing collection statistics""" + +import argparse import logging import sys -from optparse import OptionParser - from lib.util import Utilities from dspace_reports.collection_indexer import CollectionIndexer class RunCollectionIndexer(): + """Class for indexing collection statistics""" + def __init__(self, config=None, logger=None): if config is None: print("ERROR: A configuration file required to create the stats indexer.") @@ -23,32 +26,40 @@ def __init__(self, config=None, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Function to run collection indexer""" + # Create collections stats indexer collection_indexer = CollectionIndexer(config=self.config) - + # Index collections stats from Solr collection_indexer.index() def main(): - parser = OptionParser() + """Main function""" + + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") - (options, args) = parser.parse_args() + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("ERROR: Unable to load configuration.") sys.exit(1) @@ -62,7 +73,7 @@ def main(): work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -74,10 +85,9 @@ def main(): # Create stats indexer indexer = RunCollectionIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() - if __name__ == "__main__": main() \ No newline at end of file diff --git a/run_community_indexer.py b/run_community_indexer.py index 19586d0..2215319 100644 --- a/run_community_indexer.py +++ b/run_community_indexer.py @@ -1,13 +1,16 @@ +"""Class for indexing community statistics""" + +import argparse import logging import sys -from optparse import OptionParser - from lib.util import Utilities from dspace_reports.community_indexer import CommunityIndexer class RunCommunityIndexer(): + """Class for indexing community statistics""" + def __init__(self, config=None, logger=None): if config is None: print("ERROR: A configuration file required to create the stats indexer.") @@ -23,32 +26,40 @@ def __init__(self, config=None, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Function to run community indexer""" + # Create communities stats indexer community_indexer = CommunityIndexer(config=self.config, logger=self.logger) - + # Index communities stats from Solr community_indexer.index() def main(): - parser = OptionParser() + """Main function""" + + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") - (options, args) = parser.parse_args() + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("ERROR: Unable to load configuration.") sys.exit(1) @@ -62,7 +73,7 @@ def main(): work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -74,10 +85,9 @@ def main(): # Create stats indexer indexer = RunCommunityIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_cron.py b/run_cron.py index ce84bd0..94bb640 100644 --- a/run_cron.py +++ b/run_cron.py @@ -1,7 +1,7 @@ -import logging -import sys +"""Script for running all statistics indexers""" -from optparse import OptionParser +import argparse +import sys from lib.util import Utilities from run_indexer import RunIndexer @@ -9,39 +9,47 @@ def main(): - parser = OptionParser() + """Main function""" - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") - parser.add_option("-e", "--email", action="store_true", dest="send_email", default=False, help="Send email with stats reports?") + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - (options, args) = parser.parse_args() + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") + parser.add_argument("-e", "--send_email", dest="send_email", + action=argparse.BooleanOptionalAction, + help="Send email with stats reports?") + + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("Unable to load configuration.") sys.exit(0) # Set up logging logger = utilities.load_logger(config=config) - + # Ensure work_dir has trailing slash work_dir = config['work_dir'] if work_dir[len(work_dir)-1] != '/': work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -49,22 +57,22 @@ def main(): output_dir_exists = utilities.ensure_directory_exists(output_dir) if output_dir_exists is False: sys.exit(0) - + # Store send email parameter - send_email = options.send_email + send_email = args.send_email # Create stats indexer indexer = RunIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() # Create reports generator reports = RunReports(config=config, output_dir=output_dir, send_email=send_email, logger=logger) - + # Generate stats reports from database reports.run() - + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_indexer.py b/run_indexer.py index 480ac6a..0fda2ed 100644 --- a/run_indexer.py +++ b/run_indexer.py @@ -1,9 +1,11 @@ +"""Class for indexing all statistics""" + +import argparse import logging import sys -from optparse import OptionParser - from lib.util import Utilities + from dspace_reports.repository_indexer import RepositoryIndexer from dspace_reports.community_indexer import CommunityIndexer from dspace_reports.collection_indexer import CollectionIndexer @@ -11,6 +13,8 @@ class RunIndexer(): + """Class for indexing all statistics""" + def __init__(self, config=None, logger=None): if config is None: print('A configuration file required to create the stats indexer.') @@ -26,67 +30,75 @@ def __init__(self, config=None, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Function to run collection indexer""" + self.logger.info("Begin running all indexing.") # Create items stats indexer repository_indexer = RepositoryIndexer(config=self.config, logger=self.logger) - + # Index repository stats from Solr repository_indexer.index() # Create communities stats indexer community_indexer = CommunityIndexer(config=self.config, logger=self.logger) - + # Index communities stats from Solr community_indexer.index() # Create collections stats indexer collection_indexer = CollectionIndexer(config=self.config, logger=self.logger) - + # Index collections stats from Solr collection_indexer.index() # Create items stats indexer item_indexer = ItemIndexer(config=self.config, logger=self.logger) - + # Index items stats from Solr item_indexer.index() self.logger.info("Finished running all indexing.") def main(): - parser = OptionParser() + """Main function""" - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - (options, args) = parser.parse_args() + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") + + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("Unable to load configuration.") sys.exit(0) # Set up logging logger = utilities.load_logger(config=config) - + # Ensure work_dir has trailing slash work_dir = config['work_dir'] if work_dir[len(work_dir)-1] != '/': work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -97,10 +109,9 @@ def main(): # Create stats indexer indexer = RunIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_item_indexer.py b/run_item_indexer.py index 7cf1dd7..8ca8377 100644 --- a/run_item_indexer.py +++ b/run_item_indexer.py @@ -1,13 +1,16 @@ +"""Class for indexing item statistics""" + +import argparse import logging import sys -from optparse import OptionParser - from lib.util import Utilities from dspace_reports.item_indexer import ItemIndexer class RunItemIndexer(): + """Class for indexing item statistics""" + def __init__(self, config=None, logger=None): if config is None: print("ERROR: A configuration file required to create the stats indexer.") @@ -23,32 +26,40 @@ def __init__(self, config=None, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Function to run item indexer""" + # Create items stats indexer item_indexer = ItemIndexer(config=self.config, logger=self.logger) - + # Index items stats from Solr item_indexer.index() def main(): - parser = OptionParser() + """Main function""" + + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") - (options, args) = parser.parse_args() + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("ERROR: Unable to load configuration.") sys.exit(1) @@ -62,7 +73,7 @@ def main(): work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -74,10 +85,9 @@ def main(): # Create stats indexer indexer = RunItemIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() - if __name__ == "__main__": main() \ No newline at end of file diff --git a/run_reports.py b/run_reports.py index d1ff756..bd40e57 100644 --- a/run_reports.py +++ b/run_reports.py @@ -1,9 +1,9 @@ -from csv import excel -import logging -import sys +"""Class for running all statistics indexers and optionally emailing the results""" -from optparse import OptionParser +import argparse from datetime import datetime +import logging +import sys from database_manager import DatabaseManager from lib.database import Database @@ -13,6 +13,8 @@ class RunReports(): + """Class for running all statistics indexers and optionally emailing the results""" + def __init__(self, config=None, output_dir=None, send_email=False, logger=None): if config is None: print('A configuration file required to generate stats reports.') @@ -29,7 +31,7 @@ def __init__(self, config=None, output_dir=None, send_email=False, logger=None): # Create output object self.output = Output(config=config) - + # Create email object self.emailer = Emailer(config=config) @@ -40,6 +42,8 @@ def __init__(self, config=None, output_dir=None, send_email=False, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Run reports""" + self.logger.info("Begin running all reports.") # Stats reports to create reports = [ @@ -69,10 +73,9 @@ def run(self): csv_report_files = [] for report in reports: csv_report_file = self.create_csv_report(report=report) - self.logger.info("Created CSV report file: {csv_report_file}.".format(csv_report_file=csv_report_file)) + self.logger.info("Created CSV report file: %s.", csv_report_file=csv_report_file) # Convert column names to human readable text - csv_report_files.append(csv_report_file) # Create Excel report file from CSV files @@ -99,6 +102,8 @@ def run(self): self.logger.info("Finished running all reports.") def create_csv_report(self, report=None): + """Create CSV report""" + if report is None: self.logger.error("Must specify a report.") return @@ -109,55 +114,70 @@ def create_csv_report(self, report=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - print(cursor.mogrify("SELECT * FROM %s ORDER BY %s ASC" %(report['table'], report['orderBy']))) - cursor.execute("SELECT * FROM %s ORDER BY %s ASC" %(report['table'], report['orderBy'])) - + print(cursor.mogrify(f"SELECT * FROM {report['table']} ORDER BY {report['orderBy']} ASC")) + cursor.execute(f"SELECT * FROM {report['table']} ORDER BY {report['orderBy']} ASC") + desc = cursor.description column_names = [col[0] for col in desc] data = [dict(zip(column_names, row)) for row in cursor.fetchall()] - - + # Save raw database table in a CSV file - report_csv_file = self.output.save_report_csv_file(output_file_path=self.output_dir + report['name'] + '.csv', headers=column_names, data=data) - + report_csv_file = self.output.save_report_csv_file( + output_file_path=self.output_dir + report['name'] + '.csv', + headers=column_names, data=data) + # Convert column names to human readable text based on mappings in DatabaseManager - column_names_new = self.map_column_names(report_name=report['name'], column_names=column_names) - report_csv_file = self.output.update_header_report_csv_file(input_file_path=report_csv_file, headers_old=column_names, headers_new=column_names_new) + column_names_new = self.map_column_names(report_name=report['name'], + column_names=column_names) + report_csv_file = self.output.update_header_report_csv_file( + input_file_path=report_csv_file, headers_old=column_names, + headers_new=column_names_new) return report_csv_file def create_excel_report(self, csv_report_files=None): + """Create Excel report""" + if csv_report_files is None or len(csv_report_files) == 0: - self.logger.warn("No CSV files to create Excel file.") + self.logger.warning("No CSV files to create Excel file.") return False # Combine CSV files into single Excel file output_file_path = self.output_dir + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.xlsx') - excel_report_file = self.output.save_report_excel_file(output_file_path=output_file_path, worksheet_files=csv_report_files) + excel_report_file = self.output.save_report_excel_file( + output_file_path=output_file_path, worksheet_files=csv_report_files) if excel_report_file: - self.logger.info('Finished saving Excel file to {excel_report_file}.'.format(excel_report_file=excel_report_file)) + self.logger.info("Finished saving Excel file to %s.", + excel_report_file=excel_report_file) return excel_report_file - else: - self.logger.error("There was an error saving the Excel file.") - return False + + self.logger.error("There was an error saving the Excel file.") + return False def create_zip_archive(self, excel_report_file=None): + """Create ZIP file""" + if excel_report_file is None: - self.logger.warn("No Excel file to create ZIP archive.") + self.logger.warning("No Excel file to create ZIP archive.") return False # Create ZIP archvie with the Excel file output_file_path = self.output_dir + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.zip') - zip_report_archive = self.output.save_report_zip_archive(output_file_path=output_file_path, excel_report_file=excel_report_file) + zip_report_archive = self.output.save_report_zip_archive(output_file_path=output_file_path, + excel_report_file=excel_report_file + ) if zip_report_archive: - self.logger.info('Finished saving ZIP archive to {zip_report_archive}.'.format(zip_report_archive=zip_report_archive)) + self.logger.info("Finished saving ZIP archive to %s.", + zip_report_archive=zip_report_archive) return zip_report_archive - else: - self.logger.error("There was an error saving the ZIP archive.") - return False + + self.logger.error("There was an error saving the ZIP archive.") + return False def map_column_names(self, report_name=None, column_names=None): + """Map column names""" + if report_name is None or column_names is None: self.logger.error("One or more parameters missing to map table columns.") return False @@ -175,35 +195,43 @@ def map_column_names(self, report_name=None, column_names=None): self.logger.error('Unrecognized report name.') if column_map is not None: - for i in range(len(column_names)): - self.logger.debug('Looking at column name: ' + column_names[i]) - if column_names[i] in column_map: - self.logger.debug('Changing column name to ' + column_map[column_names[i]]) - column_names[i] = column_map[column_names[i]] + for i, column_name in enumerate(column_names): + self.logger.debug("Looking at column name: %s.", column_names[i]) + if column_name in column_map: + self.logger.debug("Changing column name to %s.", column_name) + column_names[i] = column_map[column_name] return column_names def main(): - parser = OptionParser() + """Main function""" - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") - parser.add_option("-e", "--email", action="store_true", dest="send_email", default=False, help="Send email with stats reports?") + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - (options, args) = parser.parse_args() + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") + parser.add_argument("-e", "--send_email", dest="send_email", + action=argparse.BooleanOptionalAction, + help="Send email with stats reports?") + + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s" %(options.config_file)) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("Unable to load configuration.") sys.exit(0) @@ -217,7 +245,7 @@ def main(): work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -227,14 +255,14 @@ def main(): sys.exit(0) # Store email parameter - send_email = options.send_email + send_email = args.send_email # Create reports generator reports = RunReports(config=config, output_dir=output_dir, send_email=send_email, logger=logger) - + # Generate stats reports from database reports.run() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_repository_indexer.py b/run_repository_indexer.py index e813d55..01e9879 100644 --- a/run_repository_indexer.py +++ b/run_repository_indexer.py @@ -1,13 +1,16 @@ +"""Class for indexing repository statistics""" + +import argparse import logging import sys -from optparse import OptionParser - from lib.util import Utilities from dspace_reports.repository_indexer import RepositoryIndexer class RunRepositoryIndexer(): + """Class for indexing repository statistics""" + def __init__(self, config=None, logger=None): if config is None: print("ERROR: A configuration file required to create the stats indexer.") @@ -23,32 +26,40 @@ def __init__(self, config=None, logger=None): self.logger = logging.getLogger('dspace-reports') def run(self): + """Function to run repository indexer""" + # Create items stats indexer repository_indexer = RepositoryIndexer(config=self.config) - + # Index repository stats from Solr repository_indexer.index() def main(): - parser = OptionParser() + """Main function""" + + parser = argparse.ArgumentParser( + prog='Database Manager', + description='Commands to manage statistics database tables') - parser.add_option("-c", "--config", dest="config_file", default="config/application.yml", help="Configuration file") - parser.add_option("-o", "--output_dir", dest="output_dir", help="Directory for results files.") + parser.add_argument("-c", "--config", dest="config_file", action='store', type=str, + default="config/application.yml", help="Configuration file") + parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, + help="Directory for results files.") - (options, args) = parser.parse_args() + args = parser.parse_args() # Create utilities object utilities = Utilities() # Check required options fields - if options.output_dir is None: + if args.output_dir is None: parser.print_help() parser.error("Must specify an output directory.") # Load config - print("Loading configuration from file: %s", options.config_file) - config = utilities.load_config(options.config_file) + print("Loading configuration from file: %s", args.config_file) + config = utilities.load_config(args.config_file) if not config: print("ERROR: Unable to load configuration.") sys.exit(1) @@ -62,7 +73,7 @@ def main(): work_dir = work_dir + '/' # Ensure output_dir has trailing slash - output_dir = options.output_dir + output_dir = args.output_dir if output_dir[len(output_dir)-1] != '/': output_dir = output_dir + '/' @@ -74,10 +85,9 @@ def main(): # Create stats indexer indexer = RunRepositoryIndexer(config=config, logger=logger) - + # Get item statistics from Solr indexer.run() - if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/setup.py b/setup.py deleted file mode 100644 index 9bb6309..0000000 --- a/setup.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python - -from setuptools import setup - -setup( - name = 'dspace-reports', - version='1.3.0-SNAPSHOT', - url = 'https://github.com/TexasDigitalLibrary/dspace-reports', - author = 'Nicholas Woodward', - author_email = 'njw@austin.utexas.edu', - license = 'MIT', - packages = ['dspace-reports'], - install_requires = [''], - description = 'Generate and email statistical reports for content stored in a DSpace repository - https://github.com/DSpace/DSpace', - classifiers = [ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Environment :: Console", - "Programming Language :: Python :: 3", - ], - test_suite = 'test', -) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 24755853346bc228dc78b902a7021d99f1524ea2 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 11:05:44 -0500 Subject: [PATCH 07/22] continued WIP refactor: lots of updates to indexing, switched from OAI-PMH to REST API --- dspace_reports/collection_indexer.py | 149 +++++++++----------- dspace_reports/community_indexer.py | 59 ++++---- dspace_reports/item_indexer.py | 102 ++++++-------- dspace_reports/repository_indexer.py | 64 ++++----- lib/api.py | 200 +++++++++++++++++++++------ pyproject.toml | 4 +- run_cron.py | 2 +- run_reports.py | 2 +- 8 files changed, 330 insertions(+), 252 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 330f684..3cf565f 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -18,72 +18,52 @@ def index(self): def index_collections(self): """Index the collections in the repository""" - # List of collections - collections = [] - - # Get top level communities - top_communities = self.rest.get_top_level_communities() - - if 'community' in top_communities: - communities = top_communities['community'] - self.logger.info("Repository has %s top-level communities.", str(len(communities))) + # Get a list of all collections from the REST API + collections = self.rest.get_collections() + for collection in collections: + collection_uuid = collection['uuid'] + collection_name = collection['name'] + self.logger.info("Loading collection: %s (%s)...", collection_name, collection_uuid) + + # Get collection metadata, including parent community name + collection_handle = collection['handle'] + collection_url = self.base_url + collection_handle + + parent_community_name = "Unknown" + parent_community = self.rest.get_collection_parent_community( + collection_uuid=collection_uuid) + if 'name' in parent_community: + parent_community_name = parent_community['name'] + + if len(collection_name) > 255: + self.logger.debug("Collection name is longer than 255 characters. It will be shortened to that length.") + collection_name = collection_name[0:251] + "..." + + # Insert the collection into the database + with Database(self.config['statistics_db']) as db: + with db.cursor() as cursor: + cursor.execute(f"INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES ('{parent_community_name}', '{collection_uuid}', '{collection_name}', '{collection_url}') ON CONFLICT DO NOTHING") + db.commit() - for community in communities: - self.load_collections_recursive(collections, community) - else: - self.logger.info("Repository has no communities.") + for time_period in self.time_periods: + self.logger.info("Indexing items for collection: %s (%s)", collection_name, + collection_uuid) + self.index_collection_items(collection_uuid=collection_uuid, time_period=time_period) + # Index all views and downloads of collections for time_period in self.time_periods: - self.logger.info("Updating views statistics for collections during time period: %s", time_period) + self.logger.info("Updating views statistics for collections during time period: %s", + time_period) self.index_collection_views(time_period=time_period) - self.logger.info("Updating downloads statistics for collection during time period: %s", time_period) + self.logger.info("Updating downloads statistics for collection during time period: %s", + time_period) self.index_collection_downloads(time_period=time_period) - def load_collections_recursive(self, collections, community): - """Load all collections recursively""" - - community_id = community['id'] - community_name = community['name'] - self.logger.info("Loading collections of community %s (%s)", community_name, community_id) - - if 'collection' in community: - collections = community['collection'] - self.logger.info("Community has %s collections.", str(len(collections))) - for collection in collections: - collection_id = collection['id'] - collection_name = collection['name'] - collection_handle = collection['handle'] - collection_url = self.base_url + collection_handle - self.logger.info("Loading collection: %s (%s)...", collection_name, collection_id) - - if len(collection_name) > 255: - self.logger.debug("Collection name is longer than 255 characters. It will be shortened to that length.") - collection_name = collection_name[0:251] + "..." - - # Insert the collection into the database - with Database(self.config['statistics_db']) as db: - with db.cursor() as cursor: - cursor.execute(f"INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES ({community_name}, {collection_id}, {collection_name}, {collection_url}) ON CONFLICT DO NOTHING") - db.commit() - - for time_period in self.time_periods: - self.logger.info("Indexing items for collection: %s (%s)", collection_id, collection_name) - self.index_collection_items(collection_id=collection_id, time_period=time_period) - else: - self.logger.info("There are no collections in this community.") - - if 'community' in community: - sub_communities = community['community'] - for sub_community in sub_communities: - self.load_collections_recursive(collections, sub_community) - else: - self.logger.info("There are no subcommunities in this community.") - - def index_collection_items(self, collection_id=None, time_period=None): + def index_collection_items(self, collection_uuid=None, time_period=None): """Index the collection items""" - if collection_id is None or time_period is None: + if collection_uuid is None or time_period is None: return # Create base Solr URL @@ -111,8 +91,8 @@ def index_collection_items(self, collection_id=None, time_period=None): else: self.logger.error("Error creating date range.") - # Add community UUID to query parameter - solr_query_params['q'] = solr_query_params['q'] + " AND location.coll:" + collection_id + # Add collection UUID to query parameter + solr_query_params['q'] = solr_query_params['q'] + " AND location.coll:" + collection_uuid # Make call to Solr for items statistics response = self.solr.call(url=solr_url, params=solr_query_params) @@ -130,14 +110,14 @@ def index_collection_items(self, collection_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_uuid}'") # Commit changes db.commit() @@ -154,7 +134,7 @@ def index_collection_views(self, time_period=None): # Default Solr params solr_query_params = { "q": f"type:2 AND owningColl:/.{{36}}/", - "fq": "-isBot:true AND statistics_type:view", + "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", "fl": "owningColl", "facet": "true", "facet.field": "owningColl", @@ -186,7 +166,7 @@ def index_collection_views(self, time_period=None): self.logger.info("Calling Solr total collection views in collections: %s", response.url) try: - # get total number of distinct facets (countDistinct) + # Get total number of distinct facets (countDistinct) results_total_num_facets = response.json()["stats"]["stats_fields"]["owningColl"][ "countDistinct" ] @@ -194,7 +174,7 @@ def index_collection_views(self, time_period=None): self.logger.info("No collection views to index.") return - # divide results into "pages" and round up to next integer + # Divide results into "pages" and round up to next integer results_per_page = 100 results_num_pages = math.ceil(results_total_num_facets / results_per_page) results_current_page = 0 @@ -210,7 +190,7 @@ def index_collection_views(self, time_period=None): # Solr params for current page solr_query_params = { "q": f"type:2 AND owningColl:/.{{36}}/", - "fq": "-isBot:true AND statistics_type:view", + "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", "fl": "owningColl", "facet": "true", "facet.field": "owningColl", @@ -236,16 +216,16 @@ def index_collection_views(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # Iterate over the facetField dict and get the ids and views - for collection_id, collection_views in views["owningColl"].items(): + for collection_uuid, collection_views in views["owningColl"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'") # Commit changes to database db.commit() @@ -335,7 +315,8 @@ def index_collection_downloads(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + self.logger.info("Searching date range: %s - %s", date_range[0], + date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] @@ -347,16 +328,16 @@ def index_collection_downloads(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] # Iterate over the facetField dict and get the ids and views - for collection_id, collection_downloads in downloads["owningColl"].items(): + for collection_uuid, collection_downloads in downloads["owningColl"].items(): if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_id}") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}") else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_id}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'") # Commit changes to database db.commit() diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index 38a3b59..e4bf87d 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -45,23 +45,24 @@ def load_communities_recursive(self, communities, community, parent_community_na """Load all communities recursively""" # Extract metadata - community_id = community['id'] + community_uuid = community['uuid'] community_name = community['name'] community_handle = community['handle'] community_url = self.base_url + community_handle - self.logger.info("Loading community: %s (%s)...", community_name, community_id) + self.logger.info("Loading community: %s (%s)...", community_name, community_uuid) # Insert the community into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_id}, {community_name}, {community_url}, {parent_community_name})")) - cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_id}, {community_name}, {community_url}, {parent_community_name})") + self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_uuid}, {community_name}, {community_url}, {parent_community_name})")) + cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_uuid}, {community_name}, {community_url}, {parent_community_name})") db.commit() # Index views and downloads for the current community for time_period in self.time_periods: - self.logger.info("Indexing items for community: %s (%s)", community_id, community_name) - self.index_community_items(community_id=community_id, time_period=time_period) + self.logger.info("Indexing items for community: %s (%s)", community_name, + community_uuid) + self.index_community_items(community_uuid=community_uuid, time_period=time_period) # Load sub communities if 'community' in community: @@ -72,10 +73,10 @@ def load_communities_recursive(self, communities, community, parent_community_na else: self.logger.info("There are no subcommunities in this community.") - def index_community_items(self, community_id=None, time_period=None): + def index_community_items(self, community_uuid=None, time_period=None): """Index the community items""" - if community_id is None or time_period is None: + if community_uuid is None or time_period is None: return None # Create base Solr URL @@ -106,7 +107,7 @@ def index_community_items(self, community_id=None, time_period=None): self.logger.error("Error creating date range.") # Add community UUID to query parameter - solr_query_params['q'] = solr_query_params['q'] + " AND location.comm:" + community_id + solr_query_params['q'] = solr_query_params['q'] + " AND location.comm:" + community_uuid # Make call to Solr for items statistics response = self.solr.call(url=solr_url, params=solr_query_params) @@ -124,14 +125,14 @@ def index_community_items(self, community_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_uuid}'") # Commit changes db.commit() @@ -233,17 +234,17 @@ def index_community_views(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for community_id, community_views in views["owningComm"].items(): + for community_uuid, community_views in views["owningComm"].items(): if len(id) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'") else: self.logger.warning("owningComm value is not a UUID: %s", id) @@ -346,17 +347,17 @@ def index_community_downloads(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] # iterate over the facetField dict and get the ids and views - for community_id, community_downloads in downloads["owningComm"].items(): + for community_uuid, community_downloads in downloads["owningComm"].items(): if len(id) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id ='{community_id}'")) - cursor.execute(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id ='{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id = '{community_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_id}'")) - cursor.execute(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'")) + cursor.execute(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'") else: self.logger.warning("owningComm value is not a UUID: %s", id) diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index dbb049f..88953de 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -16,13 +16,6 @@ class ItemIndexer(Indexer): def __init__(self, config, logger): super().__init__(config, logger) - # Create OAI-PMH server object - self.oai = DSpaceOai(oai_server=config['oai_server']) - if self.oai is None: - self.logger.error("Unable to create Indexer due to earlier failures creating a " + - "connection to OAI-PMH feed.") - sys.exit(1) - # Set time periods to only month and year as all can cause Solr to crash self.time_periods = ['month', 'year', 'all'] @@ -30,69 +23,54 @@ def __init__(self, config, logger): self.delay = config['delay'] def index(self): - # Get list of identifiers from OAI-PMH feed - records = self.oai.get_records() - total_records = len(records) - self.logger.info("Found %s records in OAI-PMH feed.", str(total_records)) + # Get list of identifiers from REST API + items = self.rest.get_items() + total_items = len(items) + self.logger.info("Found %s records in REST API.", str(total_items)) # Keep a count of records that cannot be found by their metadata - count_records = 0 - count_missing_records = 0 + count_items = 0 - # Iterate over OAI-PMH records and call REST API for addiional metadata + # Iterate over records and call REST API for additional metadata with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - for record in records: - count_records = count_records + 1 - self.logger.info("(%s/%s) - Calling REST API for record: %s", - str(count_records), str(total_records), record) - - metadata_entry = '{"key":"dc.identifier.uri", "value":"%s"}' %(record) - items = self.rest.find_items_by_metadata_field(metadata_entry=metadata_entry, - expand=['parentCollection']) - if len(items) == 1: - item = items[0] - item_id = item['uuid'] - item_name = item['name'] - - # Attempt to get collection name - item_collection_name = "Unknown" - if 'parentCollection' in item: - item_collection = item['parentCollection'] - item_collection_name = item_collection['name'] - - if len(item_collection_name) > 255: - self.logger.debug("Collection name is longer than 255 characters. " + - "It will be shortened to that length.") - item_collection_name = item_collection_name[0:251] + "..." - - self.logger.info("Item collection: %s ", item_collection_name) - - # If name is null then use "Untitled" - if item_name is not None: - # If item name is longer than 255 characters then shorten it - # to fit in database field - if len(item_name) > 255: - item_name = item_name[0:251] + "..." - else: - item_name = "Untitled" - - # Create handle URL for item - item_url = self.base_url + item['handle'] - - self.logger.debug(cursor.mogrify(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ({item_collection_name}, {item_id}, {item_name}, {item_url}) ON CONFLICT DO NOTHING")) - cursor.execute(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ({item_collection_name}, {item_id}, {item_name}, {item_url}) ON CONFLICT DO NOTHING") - db.commit() + for item in items: + count_items += 1 + + # Get item metadata + item_uuid = item['uuid'] + item_name = item['name'] + + # Attempt to get collection name + item_owning_collection_name = "Unknown" + item_owning_collection = self.rest.get_item_owning_collection( + item_uuid=item_uuid) + if item_owning_collection is not None: + self.logger.info(item_owning_collection) + item_owning_collection_name = item_owning_collection['name'] + + if len(item_owning_collection_name) > 255: + self.logger.debug("Collection name is longer than 255 characters. " + + "It will be shortened to that length.") + item_owning_collection_name = item_owning_collection_name[0:251] + "..." + + self.logger.info("Item owning collection: %s ", item_owning_collection_name) + + # If name is null then use "Untitled" + if item_name is not None: + # If item name is longer than 255 characters then shorten it + # to fit in database field + if len(item_name) > 255: + item_name = item_name[0:251] + "..." else: - count_missing_records += 1 - self.logger.error("Unable to find item in REST API: %s", record) + item_name = "Untitled" - self.logger.info("Total records in OAI-PMH feed: %s", str(len(records))) + # Create handle URL for item + item_url = self.base_url + item['handle'] - if count_missing_records > 0 and total_records > 0: - self.logger.info("Total records missing in OAI-PMH feed: %s (%.0f%%)", - str(count_missing_records), - (100 * count_missing_records/total_records)) + self.logger.debug(cursor.mogrify(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ('{item_owning_collection_name}', '{item_uuid}, '{item_name}', '{item_url}') ON CONFLICT DO NOTHING")) + cursor.execute(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ('{item_owning_collection_name}', '{item_uuid}', '{item_name}', '{item_url}') ON CONFLICT DO NOTHING") + db.commit() for time_period in self.time_periods: self.logger.info("Indexing Solr views for time period: %s ", time_period) diff --git a/dspace_reports/repository_indexer.py b/dspace_reports/repository_indexer.py index 8d1c8f6..aa13af7 100644 --- a/dspace_reports/repository_indexer.py +++ b/dspace_reports/repository_indexer.py @@ -17,40 +17,40 @@ def index_repository(self): """Index the entire repository""" # Get repository information - repository_id = 0 + repository_uuid = 0 repository_name = "Unknown" site = self.rest.get_site() if 'uuid' in site: - repository_id = site['uuid'] + repository_uuid = site['uuid'] if 'name' in site: repository_name = site['name'] - self.logger.info("Indexing Repository: %s (UUID: %s)", repository_name, repository_id) + self.logger.info("Indexing Repository: %s (UUID: %s)", repository_name, repository_uuid) with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ({repository_id}, {repository_name})")) - cursor.execute(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ({repository_id}, {repository_name})") + self.logger.debug(cursor.mogrify(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ('{repository_uuid}', '{repository_name}')")) + cursor.execute(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ('{repository_uuid}', '{repository_name}')") db.commit() # Index views and downloads for the current community for time_period in self.time_periods: self.logger.info("Indexing repository items.") - self.index_repository_items(repository_id=repository_id, time_period=time_period) + self.index_repository_items(repository_uuid=repository_uuid, time_period=time_period) self.logger.info("Indexing repository views.") - self.index_repository_views(repository_id=repository_id, time_period=time_period) + self.index_repository_views(repository_uuid=repository_uuid, time_period=time_period) self.logger.info("Indexing repository downloads.") - self.index_repository_downloads(repository_id=repository_id, time_period=time_period) + self.index_repository_downloads(repository_uuid=repository_uuid, time_period=time_period) - def index_repository_items(self, repository_id=None, time_period=None): + def index_repository_items(self, repository_uuid=None, time_period=None): """Index repository items""" - if repository_id is None or time_period is None: + if repository_uuid is None or time_period is None: return # Create base Solr URL @@ -93,22 +93,22 @@ def index_repository_items(self, repository_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_uuid}'") # Commit changes db.commit() - def index_repository_views(self, repository_id=None, time_period=None): + def index_repository_views(self, repository_uuid=None, time_period=None): """Index repository views""" - if repository_id is None or time_period is None: + if repository_uuid is None or time_period is None: return # Create base Solr url @@ -157,22 +157,22 @@ def index_repository_views(self, repository_id=None, time_period=None): with db.cursor() as cursor: self.logger.info("Setting repository views stats with %s views for time period: %s", str(results_num_found), time_period) if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_uuid}'") # Commit changes db.commit() - def index_repository_downloads(self, repository_id=None, time_period=None): + def index_repository_downloads(self, repository_uuid=None, time_period=None): """Index repository downloads""" - if repository_id is None or time_period is None: + if repository_uuid is None or time_period is None: return # Get Solr shards @@ -221,14 +221,14 @@ def index_repository_downloads(self, repository_id=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_uuid}'") elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_id}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_id}'") + self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_uuid}'")) + cursor.execute(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_uuid}'") # Commit changes db.commit() diff --git a/lib/api.py b/lib/api.py index 7001efc..f3bf411 100644 --- a/lib/api.py +++ b/lib/api.py @@ -120,26 +120,32 @@ def construct_url(self, command, params=None): final_url = self.api_url + command + parameters return final_url - def rest_call(self, call_type='GET', url='', headers=None, data=None): + def rest_call(self, call_type='GET', url='', params=None, data=None, headers=None): """Make call to REST API""" - if headers is None: - headers = self.session.headers + if params is None: + params = {} if data is None: data = {} + if headers is None: + headers = self.session.headers + self.logger.debug("Calling REST API with URL: %s", url) - if call_type == 'POST': - response = self.session.post(url, headers=headers, cookies=self.cookies, data=data) + if call_type == 'GET': + response = self.session.get(url, params=params, headers=headers, cookies=self.cookies) else: - response = self.session.get(url, headers=headers, cookies=self.cookies) + response = self.session.post(url, data=data, params=params, + cookies=self.cookies, headers=headers) - self.logger.debug(response.status_code) - self.logger.debug(response.text) - response_json = response.json() - return response_json + if response.status_code == 200: + return response.json() + + self.logger.error("Error while making rest call, (HTTP code: %s) %s", + response.status_code, response.text) + return None def get_site(self): """Get site information""" @@ -157,15 +163,25 @@ def get_site(self): def get_communities(self): """Get all communities""" + communities = [] communities_url = self.construct_url(command = 'core/communities') - communities = self.rest_call(url = communities_url) + communities_response = self.rest_call(url = communities_url) + if communities_response is not None and '_embedded' in communities_response: + if 'communities' in communities_response['_embedded']: + communities = communities_response['_embedded']['communities'] + return communities def get_top_level_communities(self): """Get top level communities""" + top_communities = [] top_communities_url = self.construct_url(command = 'core/communities/search/top') - top_communities = self.rest_call(url = top_communities_url) + top_communities_response = self.rest_call(url = top_communities_url) + if top_communities_response is not None and '_embedded' in top_communities_response: + if 'communities' in top_communities_response['_embedded']: + top_communities = top_communities_response['_embedded']['communities'] + return top_communities def get_community(self, community_uuid=None): @@ -173,10 +189,79 @@ def get_community(self, community_uuid=None): if community_uuid is None: return None + + community = None community_url = self.construct_url(command = f"core/communities/{community_uuid}") - community = self.rest_call(url = community_url) + community_response = self.rest_call(url = community_url) + if community_response is not None: + community = community_response[0] + return community + def get_collections(self, sort=None): + """Get all collections""" + + params = {} + if sort is not None: + params['sort'] = sort + + collections = [] + page = 0 + params['page'] = page + size = 20 + params['size'] = size + + collections_url = self.construct_url(command = 'core/collections') + total_collections = 0 + total_pages = 0 + + while True: + self.logger.info("Loading page %s of collections...", str(page)) + + collections_response = self.rest_call(url = collections_url, params = params) + if collections_response is not None and '_embedded' in collections_response: + # Get collections from this page of results + if 'collections' in collections_response['_embedded']: + self.logger.info(collections_response['_embedded']['collections']) + for collection_json in collections_response['_embedded']['collections']: + collections.append(collection_json) + + # Check API response for amount of total collections and pages + if 'page' in collections_response: + page_info = collections_response['page'] + if 'totalElements' in page_info: + total_collections = page_info['totalElements'] + if 'totalPages' in page_info: + total_pages = page_info['totalPages'] + + page += 1 + if total_pages > 0 and page == total_pages: + break + + params['page'] = page + else: + break + + # Sanity check to make sure all pages were retrieved + if len(collections) != total_collections: + self.logger.error("There was a problem retrieving collections from the API.") + self.logger.error("Collections retrieved: %s. Total collections reported by API: %s", + str(len(collections)), str(total_collections)) + else: + self.logger.info("Retrieved %s collection(s) from the REST API.", str(len(collections))) + + return collections + + def get_collection_parent_community(self, collection_uuid=None): + """Get Parent community of a given collection""" + + if collection_uuid is None: + return None + + parent_community_url = self.construct_url( + command = f"core/collections/{collection_uuid}/parentCommunity") + return self.rest_call(url = parent_community_url) + def get_collection_items(self, collection_uuid=None): """Get items of a collection""" @@ -186,39 +271,59 @@ def get_collection_items(self, collection_uuid=None): items = self.rest_call(url = items_url) return items - def get_items(self, expand=None): - """Get all items in the repository""" + def get_items(self, sort=None): + """Get all items""" - if expand is None: - expand = [] - - offset = 0 params = {} - expand_value = '' - all_items = [] + if sort is not None: + params['sort'] = sort - if len(expand) > 0: - expand_value = ','.join(expand) - params['expand'] = expand_value - self.logger.debug("Added expand list to parameters: %s ", expand_value) - - while True: - self.logger.debug("Retrieving items %s through %s from the REST API", offset, - offset + self.limit) - params['offset'] = offset - params['limit'] = self.limit + items = [] + page = 0 + params['page'] = page + size = 100 + params['size'] = size - items_url = self.construct_url(command = 'items', params = params) - self.logger.debug("Items Solr call: %s", items_url) - items = self.rest_call(url = items_url) + items_url = self.construct_url(command = 'core/items') + total_items = 0 + total_pages = 0 - if len(items) == 0: + while True: + self.logger.info("Loading page %s of items...", str(page)) + + items_response = self.rest_call(url = items_url, params = params) + if items_response is not None and '_embedded' in items_response: + # Get items from this page of results + if 'items' in items_response['_embedded']: + self.logger.info(items_response['_embedded']['items']) + for item_json in items_response['_embedded']['items']: + items.append(item_json) + + # Check API response for amount of total items and pages + if 'page' in items_response: + page_info = items_response['page'] + if 'totalElements' in page_info: + total_items = page_info['totalElements'] + if 'totalPages' in page_info: + total_pages = page_info['totalPages'] + + page += 1 + if total_pages > 0 and page == total_pages: + break + + params['page'] = page + else: break - all_items = all_items + items - offset = offset + self.limit + # Sanity check to make sure all pages were retrieved + if len(items) != total_items: + self.logger.error("There was a problem retrieving items from the API.") + self.logger.error("Items retrieved: %s. Total items reported by API: %s", + str(len(items)), str(total_items)) + else: + self.logger.info("Retrieved %s items(s) from the REST API.", str(len(items))) - return all_items + return items def find_items_by_metadata_field(self, metadata_entry=None, expand=None): """Find an item by any metadata field(s)""" @@ -232,6 +337,8 @@ def find_items_by_metadata_field(self, metadata_entry=None, expand=None): params = {} expand_value = '' + items = [] + if len(expand) > 0: expand_value = ','.join(expand) params['expand'] = expand_value @@ -245,16 +352,27 @@ def find_items_by_metadata_field(self, metadata_entry=None, expand=None): headers = self.request_headers, data = metadata_entry) return items - def get_item(self, item_id=None): + def get_item(self, item_uuid=None): """Get an individual item""" - if item_id is None: + if item_uuid is None: return None - item_url = self.construct_url(command = f"items/{item_id}") + item_url = self.construct_url(command = f"core/items/{item_uuid}") item = self.rest_call(url = item_url) return item + def get_item_owning_collection(self, item_uuid=None): + """Get owning collection of a given item""" + + if item_uuid is None: + return None + + item_owning_collection_url = self.construct_url( + command = f"core/items/{item_uuid}/owningCollection") + item_owning_collection = self.rest_call(url = item_owning_collection_url) + return item_owning_collection + def update_token(self, req): """Update CSRF token""" diff --git a/pyproject.toml b/pyproject.toml index 3c7899c..67e34f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "dspace-reports" -version = "1.3.0" -description = "A python3-based tool to generate and email views and downloads statistical reports for a DSpace repository." +version = "2.0-SNAPSHOT" +description = "A python3-based tool to generate and email views and downloads statistical reports for a DSpace 7+ repository." authors = ["Nicholas Woodward "] license = "GPLv3" readme = "README.md" diff --git a/run_cron.py b/run_cron.py index 94bb640..68d7156 100644 --- a/run_cron.py +++ b/run_cron.py @@ -20,7 +20,7 @@ def main(): parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, help="Directory for results files.") parser.add_argument("-e", "--send_email", dest="send_email", - action=argparse.BooleanOptionalAction, + action='store_true', help="Send email with stats reports?") args = parser.parse_args() diff --git a/run_reports.py b/run_reports.py index bd40e57..a396b65 100644 --- a/run_reports.py +++ b/run_reports.py @@ -216,7 +216,7 @@ def main(): parser.add_argument("-o", "--output_dir", dest="output_dir", action='store', type=str, help="Directory for results files.") parser.add_argument("-e", "--send_email", dest="send_email", - action=argparse.BooleanOptionalAction, + action='store_true', help="Send email with stats reports?") args = parser.parse_args() From ad19acb920dbe7ec7df402a2bcc0738b43cbc7d4 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 11:45:18 -0500 Subject: [PATCH 08/22] added UUID check --- dspace_reports/collection_indexer.py | 42 +++++++++++++---------- dspace_reports/community_indexer.py | 18 +++++----- dspace_reports/item_indexer.py | 50 ++++++++++++++++------------ 3 files changed, 64 insertions(+), 46 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 3cf565f..0a946b9 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -215,17 +215,21 @@ def index_collection_views(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] - # Iterate over the facetField dict and get the ids and views + # Iterate over the facetField dict and get the UUIDs and views for collection_uuid, collection_views in views["owningColl"].items(): - if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'") - elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'") + if len(collection_uuid) == 36: + if time_period == 'month': + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'") + elif time_period == 'year': + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'") + else: + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'") + self.logger.warning("owningColl value is not a UUID: %s", + collection_uuid) # Commit changes to database db.commit() @@ -329,15 +333,19 @@ def index_collection_downloads(self, time_period=None): downloads = response.json()["facet_counts"]["facet_fields"] # Iterate over the facetField dict and get the ids and views for collection_uuid, collection_downloads in downloads["owningColl"].items(): - if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'") - elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}") + if len(collection_uuid) == 36: + if time_period == 'month': + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'") + elif time_period == 'year': + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}") + else: + self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) + cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'") + self.logger.warning("owningColl value is not a UUID: %s", + collection_uuid) # Commit changes to database db.commit() diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index e4bf87d..d9f750a 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -54,8 +54,8 @@ def load_communities_recursive(self, communities, community, parent_community_na # Insert the community into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_uuid}, {community_name}, {community_url}, {parent_community_name})")) - cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ({community_uuid}, {community_name}, {community_url}, {parent_community_name})") + self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ('{community_uuid}', '{community_name}', '{community_url}', '{parent_community_name}')")) + cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ('{community_uuid}', '{community_name}', '{community_url}', '{parent_community_name}')") db.commit() # Index views and downloads for the current community @@ -233,9 +233,9 @@ def index_community_views(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views + # Iterate over the facetField dict and get the UUIDs and views for community_uuid, community_views in views["owningComm"].items(): - if len(id) == 36: + if len(community_uuid) == 36: if time_period == 'month': self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'")) cursor.execute(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'") @@ -246,7 +246,8 @@ def index_community_views(self, time_period=None): self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'")) cursor.execute(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'") else: - self.logger.warning("owningComm value is not a UUID: %s", id) + self.logger.warning("owningComm value is not a UUID: %s", + community_uuid) # Commit changes to database db.commit() @@ -346,9 +347,9 @@ def index_community_downloads(self, time_period=None): # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views + # Iterate over the facetField dict and get the UUIDs and downloads for community_uuid, community_downloads in downloads["owningComm"].items(): - if len(id) == 36: + if len(community_uuid) == 36: if time_period == 'month': self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'")) cursor.execute(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'") @@ -359,7 +360,8 @@ def index_community_downloads(self, time_period=None): self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'")) cursor.execute(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'") else: - self.logger.warning("owningComm value is not a UUID: %s", id) + self.logger.warning("owningComm value is not a UUID: %s", + community_uuid) # Commit changes to database db.commit() diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index 88953de..430fd89 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -173,17 +173,21 @@ def index_item_views(self, time_period='all'): # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views - for item_id, item_views in views["id"].items(): - if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_id}'") - elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_id}'") + # Iterate over the facetField dict and get the UUIDs and views + for item_uuid, item_views in views["id"].items(): + if len(item_uuid) == 36: + if time_period == 'month': + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_uuid}'") + elif time_period == 'year': + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_uuid}'") + else: + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_id}'") + self.logger.warning("Item ID value is not a UUID: %s", + item_uuid) # Commit changes to database db.commit() @@ -286,18 +290,22 @@ def index_item_downloads(self, time_period='all'): # Solr returns facets as a dict of dicts (see json.nl parameter) downloads = response.json()["facet_counts"]["facet_fields"] - # iterate over the facetField dict and get the ids and views - for item_id, item_downloads in downloads["owningItem"].items(): - if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_id}'") - elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_id}'") + # Iterate over the facetField dict and get the UUIDs and downloads + for item_uuid, item_downloads in downloads["owningItem"].items(): + if len(item_uuid) == 36: + if time_period == 'month': + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_uuid}'") + elif time_period == 'year': + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_uuid}'") + else: + self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_uuid}'")) + cursor.execute(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_uuid}'") else: - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_id}'")) - cursor.execute(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_id}'") - + self.logger.warning("Item ID value is not a UUID: %s", + item_uuid) + # Commit changes to database db.commit() From 04cfc2f7140919f481e8bedae5e5f99ffe001869 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 12:56:01 -0500 Subject: [PATCH 09/22] removed string interpolation for SQL statements --- dspace_reports/collection_indexer.py | 39 +++++++++++++------------ dspace_reports/community_indexer.py | 40 +++++++++++++------------- dspace_reports/item_indexer.py | 33 ++++++++++----------- dspace_reports/repository_indexer.py | 43 ++++++++++++++-------------- 4 files changed, 77 insertions(+), 78 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 0a946b9..25602dd 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -42,7 +42,8 @@ def index_collections(self): # Insert the collection into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - cursor.execute(f"INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES ('{parent_community_name}', '{collection_uuid}', '{collection_name}', '{collection_url}') ON CONFLICT DO NOTHING") + self.logger.debug(cursor.mogrify("INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (parent_community_name, collection_uuid, collection_name, collection_url))) + cursor.execute("INSERT INTO collection_stats (parent_community_name, collection_id, collection_name, collection_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (parent_community_name, collection_uuid, collection_name, collection_url)) db.commit() for time_period in self.time_periods: @@ -110,14 +111,14 @@ def index_collection_items(self, collection_uuid=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET items_last_month = {results_total_items} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_last_month = %s WHERE collection_id = %s", (results_total_items, collection_uuid))) + cursor.execute("UPDATE collection_stats SET items_last_month = %s WHERE collection_id = %s", (results_total_items, collection_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET items_academic_year = {results_total_items} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_academic_year = %s WHERE collection_id = %s", (results_total_items, collection_uuid))) + cursor.execute("UPDATE collection_stats SET items_academic_year = %s WHERE collection_id = %s", (results_total_items, collection_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET items_total = {results_total_items} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET items_total = %s WHERE collection_id = %s", (results_total_items, collection_uuid))) + cursor.execute("UPDATE collection_stats SET items_total = %s WHERE collection_id = %s", (results_total_items, collection_uuid)) # Commit changes db.commit() @@ -219,14 +220,14 @@ def index_collection_views(self, time_period=None): for collection_uuid, collection_views in views["owningColl"].items(): if len(collection_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_last_month = {collection_views} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_last_month = %s WHERE collection_id = %s"), (collection_views, collection_uuid)) + cursor.execute("UPDATE collection_stats SET views_last_month = %s WHERE collection_id = %s", (collection_views, collection_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_academic_year = {collection_views} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_academic_year = %s WHERE collection_id = %s", (collection_views, collection_uuid))) + cursor.execute("UPDATE collection_stats SET views_academic_year = %s WHERE collection_id = %s", (collection_views, collection_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET views_total = {collection_views} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET views_total = %s WHERE collection_id = %s", (collection_views, collection_uuid))) + cursor.execute("UPDATE collection_stats SET views_total = %s WHERE collection_id = %s", (collection_views, collection_uuid)) else: self.logger.warning("owningColl value is not a UUID: %s", collection_uuid) @@ -335,14 +336,14 @@ def index_collection_downloads(self, time_period=None): for collection_uuid, collection_downloads in downloads["owningColl"].items(): if len(collection_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_last_month = {collection_downloads} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_last_month = %s WHERE collection_id = %s", (collection_downloads, collection_uuid))) + cursor.execute("UPDATE collection_stats SET downloads_last_month = %s WHERE collection_id = %s", (collection_downloads, collection_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_academic_year = {collection_downloads} WHERE collection_id = '{collection_uuid}") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_academic_year = %s WHERE collection_id = %s", (collection_downloads, collection_uuid))) + cursor.execute("UPDATE collection_stats SET downloads_academic_year = %s WHERE collection_id = %s", (collection_downloads, collection_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'")) - cursor.execute(f"UPDATE collection_stats SET downloads_total = {collection_downloads} WHERE collection_id = '{collection_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE collection_stats SET downloads_total = %s WHERE collection_id = %s", (collection_downloads, collection_uuid))) + cursor.execute("UPDATE collection_stats SET downloads_total = %s WHERE collection_id = %s", (collection_downloads, collection_uuid)) else: self.logger.warning("owningColl value is not a UUID: %s", collection_uuid) diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index d9f750a..6c1818a 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -54,8 +54,8 @@ def load_communities_recursive(self, communities, community, parent_community_na # Insert the community into the database with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ('{community_uuid}', '{community_name}', '{community_url}', '{parent_community_name}')")) - cursor.execute(f"INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES ('{community_uuid}', '{community_name}', '{community_url}', '{parent_community_name}')") + self.logger.debug(cursor.mogrify("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name))) + cursor.execute("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name)) db.commit() # Index views and downloads for the current community @@ -125,14 +125,14 @@ def index_community_items(self, community_uuid=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET items_last_month = {results_total_items} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_last_month = %s WHERE community_id = %s", (results_total_items, community_uuid))) + cursor.execute("UPDATE community_stats SET items_last_month = %s WHERE community_id = %s", (results_total_items, community_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET items_academic_year = {results_total_items} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_academic_year = %s WHERE community_id = %s", (results_total_items, community_uuid))) + cursor.execute("UPDATE community_stats SET items_academic_year = %s WHERE community_id = %s", (results_total_items, community_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET items_total = {results_total_items} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET items_total = %s WHERE community_id = %s", (results_total_items, community_uuid))) + cursor.execute("UPDATE community_stats SET items_total = %s WHERE community_id = %s", (results_total_items, community_uuid)) # Commit changes db.commit() @@ -237,14 +237,14 @@ def index_community_views(self, time_period=None): for community_uuid, community_views in views["owningComm"].items(): if len(community_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET views_last_month = {community_views} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_last_month = %s WHERE community_id = %s", (community_views, community_uuid))) + cursor.execute("UPDATE community_stats SET views_last_month = %s WHERE community_id = %s", (community_views, community_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET views_academic_year = {community_views} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_academic_year = %s WHERE community_id = %s", (community_views, community_uuid))) + cursor.execute("UPDATE community_stats SET views_academic_year = %s WHERE community_id = %s", (community_views, community_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET views_total = {community_views} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET views_total = %s WHERE community_id = %s", (community_views, community_uuid))) + cursor.execute("UPDATE community_stats SET views_total = %s WHERE community_id = %s", (community_views, community_uuid)) else: self.logger.warning("owningComm value is not a UUID: %s", community_uuid) @@ -351,14 +351,14 @@ def index_community_downloads(self, time_period=None): for community_uuid, community_downloads in downloads["owningComm"].items(): if len(community_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET downloads_last_month = {community_downloads} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_downloads))) + cursor.execute("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_downloads)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id ='{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET downloads_academic_year = {community_downloads} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_downloads))) + cursor.execute("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_downloads)) else: - self.logger.debug(cursor.mogrify(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'")) - cursor.execute(f"UPDATE community_stats SET downloads_total = {community_downloads} WHERE community_id = '{community_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_downloads))) + cursor.execute("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_downloads)) else: self.logger.warning("owningComm value is not a UUID: %s", community_uuid) diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index 430fd89..13c200c 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -1,11 +1,8 @@ """Class for indexing items""" import math -import sys - from time import sleep -from lib.oai import DSpaceOai from lib.database import Database from dspace_reports.indexer import Indexer @@ -56,7 +53,7 @@ def index(self): self.logger.info("Item owning collection: %s ", item_owning_collection_name) - # If name is null then use "Untitled" + # If name is None then use "Untitled" if item_name is not None: # If item name is longer than 255 characters then shorten it # to fit in database field @@ -68,8 +65,8 @@ def index(self): # Create handle URL for item item_url = self.base_url + item['handle'] - self.logger.debug(cursor.mogrify(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ('{item_owning_collection_name}', '{item_uuid}, '{item_name}', '{item_url}') ON CONFLICT DO NOTHING")) - cursor.execute(f"INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES ('{item_owning_collection_name}', '{item_uuid}', '{item_name}', '{item_url}') ON CONFLICT DO NOTHING") + self.logger.debug(cursor.mogrify("INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (item_owning_collection_name, item_uuid, item_name, item_url))) + cursor.execute("INSERT INTO item_stats (collection_name, item_id, item_name, item_url) VALUES (%s, %s, %s, %s) ON CONFLICT DO NOTHING", (item_owning_collection_name, item_uuid, item_name, item_url)) db.commit() for time_period in self.time_periods: @@ -177,14 +174,14 @@ def index_item_views(self, time_period='all'): for item_uuid, item_views in views["id"].items(): if len(item_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET views_last_month = {item_views} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_last_month = %s WHERE item_id = %s", (item_views, item_uuid))) + cursor.execute("UPDATE item_stats SET views_last_month = %s WHERE item_id = %s", (item_views, item_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET views_academic_year = {item_views} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_academic_year = %s WHERE item_id = %s", (item_views, item_uuid))) + cursor.execute("UPDATE item_stats SET views_academic_year = %s WHERE item_id = %s", (item_views, item_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET views_total = {item_views} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET views_total = %s WHERE item_id = %s", (item_views, item_uuid))) + cursor.execute("UPDATE item_stats SET views_total = %s WHERE item_id = %s", (item_views, item_uuid)) else: self.logger.warning("Item ID value is not a UUID: %s", item_uuid) @@ -294,14 +291,14 @@ def index_item_downloads(self, time_period='all'): for item_uuid, item_downloads in downloads["owningItem"].items(): if len(item_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET downloads_last_month = {item_downloads} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_last_month = %s WHERE item_id = %s", (item_downloads, item_uuid))) + cursor.execute("UPDATE item_stats SET downloads_last_month = %s WHERE item_id = %s", (item_downloads, item_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET downloads_academic_year = {item_downloads} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_academic_year = %s WHERE item_id = %s", (item_downloads, item_uuid))) + cursor.execute("UPDATE item_stats SET downloads_academic_year = %s WHERE item_id = %s", (item_downloads, item_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_uuid}'")) - cursor.execute(f"UPDATE item_stats SET downloads_total = {item_downloads} WHERE item_id = '{item_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE item_stats SET downloads_total = %s WHERE item_id = %s", (item_downloads, item_uuid))) + cursor.execute("UPDATE item_stats SET downloads_total = %s WHERE item_id = %s", (item_downloads, item_uuid)) else: self.logger.warning("Item ID value is not a UUID: %s", item_uuid) diff --git a/dspace_reports/repository_indexer.py b/dspace_reports/repository_indexer.py index aa13af7..e4e049b 100644 --- a/dspace_reports/repository_indexer.py +++ b/dspace_reports/repository_indexer.py @@ -31,8 +31,8 @@ def index_repository(self): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.debug(cursor.mogrify(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ('{repository_uuid}', '{repository_name}')")) - cursor.execute(f"INSERT INTO repository_stats (repository_id, repository_name) VALUES ('{repository_uuid}', '{repository_name}')") + self.logger.debug(cursor.mogrify("INSERT INTO repository_stats (repository_id, repository_name) VALUES (%s, %s)", (repository_uuid, repository_name))) + cursor.execute("INSERT INTO repository_stats (repository_id, repository_name) VALUES (%s, %s)", (repository_uuid, repository_name)) db.commit() @@ -93,14 +93,14 @@ def index_repository_items(self, repository_uuid=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET items_last_month = {results_total_items} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_last_month = %s WHERE repository_id = %s", (results_total_items, repository_uuid))) + cursor.execute("UPDATE repository_stats SET items_last_month = %s WHERE repository_id = %s", (results_total_items, repository_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET items_academic_year = {results_total_items} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_academic_year = %s WHERE repository_id = %s", (results_total_items, repository_uuid))) + cursor.execute("UPDATE repository_stats SET items_academic_year = %s WHERE repository_id = %s", (results_total_items, repository_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET items_total = {results_total_items} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET items_total = %s WHERE repository_id = %s", (results_total_items, repository_uuid))) + cursor.execute("UPDATE repository_stats SET items_total = %s WHERE repository_id = %s", (results_total_items, repository_uuid)) # Commit changes db.commit() @@ -155,16 +155,17 @@ def index_repository_views(self, repository_uuid=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - self.logger.info("Setting repository views stats with %s views for time period: %s", str(results_num_found), time_period) + self.logger.info("Setting repository views stats with %s views for time period: %s", + str(results_num_found), time_period) if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET views_last_month = {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_last_month = %s WHERE repository_id = %s", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET views_last_month = %s WHERE repository_id = %s", (results_num_found, repository_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET views_academic_year = {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_academic_year = %s WHERE repository_id = %s", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET views_academic_year = %s WHERE repository_id = %s", (results_num_found, repository_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET views_total = {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET views_total = %s WHERE repository_id = %s", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET views_total = %s WHERE repository_id = %s", (results_num_found, repository_uuid)) # Commit changes db.commit() @@ -221,14 +222,14 @@ def index_repository_downloads(self, repository_uuid=None, time_period=None): with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: if time_period == 'month': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_last_month = downloads_last_month + {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_last_month = downloads_last_month + %s WHERE repository_id = %s", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET downloads_last_month = downloads_last_month + %s WHERE repository_id = %s", (results_num_found, repository_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + %s WHERE repository_id = %s", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET downloads_academic_year = downloads_academic_year + %s WHERE repository_id = %s", (results_num_found, repository_uuid)) else: - self.logger.debug(cursor.mogrify(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_uuid}'")) - cursor.execute(f"UPDATE repository_stats SET downloads_total = downloads_total + {results_num_found} WHERE repository_id = '{repository_uuid}'") + self.logger.debug(cursor.mogrify("UPDATE repository_stats SET downloads_total = downloads_total + %s WHERE repository_id = %s'", (results_num_found, repository_uuid))) + cursor.execute("UPDATE repository_stats SET downloads_total = downloads_total + %s WHERE repository_id = %s", (results_num_found, repository_uuid)) # Commit changes db.commit() From 85481eb811afedcc7ddb011973d49ed9d0c778f1 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 12:58:20 -0500 Subject: [PATCH 10/22] fixed sql bug --- dspace_reports/community_indexer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index 6c1818a..29aabf6 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -351,14 +351,14 @@ def index_community_downloads(self, time_period=None): for community_uuid, community_downloads in downloads["owningComm"].items(): if len(community_uuid) == 36: if time_period == 'month': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_downloads))) - cursor.execute("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_downloads)) + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_uuid))) + cursor.execute("UPDATE community_stats SET downloads_last_month = %s WHERE community_id = %s", (community_downloads, community_uuid)) elif time_period == 'year': - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_downloads))) - cursor.execute("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_downloads)) + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_uuid))) + cursor.execute("UPDATE community_stats SET downloads_academic_year = %s WHERE community_id = %s", (community_downloads, community_uuid)) else: - self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_downloads))) - cursor.execute("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_downloads)) + self.logger.debug(cursor.mogrify("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_uuid))) + cursor.execute("UPDATE community_stats SET downloads_total = %s WHERE community_id = %s", (community_downloads, community_uuid)) else: self.logger.warning("owningComm value is not a UUID: %s", community_uuid) From 32512f16c44e089c63f283a04fb87995d556c2a3 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 15:55:11 -0500 Subject: [PATCH 11/22] loads of bug fixes --- dspace_reports/collection_indexer.py | 2 +- dspace_reports/community_indexer.py | 86 ++++++++++++---------------- dspace_reports/item_indexer.py | 3 +- lib/api.py | 73 ++++++++++++++++++++--- lib/emailer.py | 19 +++--- run_reports.py | 21 ++++--- 6 files changed, 126 insertions(+), 78 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 25602dd..1ff076f 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -57,7 +57,7 @@ def index_collections(self): time_period) self.index_collection_views(time_period=time_period) - self.logger.info("Updating downloads statistics for collection during time period: %s", + self.logger.info("Updating downloads statistics for collections during time period: %s", time_period) self.index_collection_downloads(time_period=time_period) diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index 29aabf6..8f2900a 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -18,61 +18,49 @@ def index(self): def index_communities(self): """Index the communities in the repository""" - # List of communities - communities = [] - - # Get top level communities - top_communities = self.rest.get_top_level_communities() - - if 'community' in top_communities: - communities = top_communities['community'] - self.logger.info("Repository has %s top-level communities.", str(len(communities))) + # Get a list of all communities from the REST API + communities = self.rest.get_communities() + for community in communities: + community_uuid = community['uuid'] + community_name = community['name'] + self.logger.info("Loading community: %s (%s)...", community_name, community_uuid) + + # Get community metadata, including parent community name + community_handle = community['handle'] + community_url = self.base_url + community_handle + + parent_community_name = "" + parent_community = self.rest.get_community_parent_community( + community_uuid=community_uuid) + if parent_community is not None and 'name' in parent_community: + parent_community_name = parent_community['name'] + + if len(community_name) > 255: + self.logger.debug("Community name is longer than 255 characters. It will be shortened to that length.") + community_name = community_name[0:251] + "..." + + # Insert the community into the database + with Database(self.config['statistics_db']) as db: + with db.cursor() as cursor: + self.logger.debug(cursor.mogrify("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name))) + cursor.execute("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name)) + db.commit() - for community in communities: - self.logger.debug("Loading top-level community: %s (%s)", community['name'], community['id']) - self.load_communities_recursive(communities, community) - else: - self.logger.info("Repository has no communities.") + for time_period in self.time_periods: + self.logger.info("Indexing items for community: %s (%s)", community_name, + community_uuid) + self.index_community_items(community_uuid=community_uuid, time_period=time_period) + # Index all views and downloads of communities for time_period in self.time_periods: - self.logger.info("Updating views statistics for communities during time period: %s", time_period) + self.logger.info("Updating views statistics for communities during time period: %s", + time_period) self.index_community_views(time_period=time_period) - self.logger.info("Updating downloads statistics for communities during time period: %s", time_period) + self.logger.info("Updating downloads statistics for communities during time period: %s", + time_period) self.index_community_downloads(time_period=time_period) - def load_communities_recursive(self, communities, community, parent_community_name=""): - """Load all communities recursively""" - - # Extract metadata - community_uuid = community['uuid'] - community_name = community['name'] - community_handle = community['handle'] - community_url = self.base_url + community_handle - self.logger.info("Loading community: %s (%s)...", community_name, community_uuid) - - # Insert the community into the database - with Database(self.config['statistics_db']) as db: - with db.cursor() as cursor: - self.logger.debug(cursor.mogrify("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name))) - cursor.execute("INSERT INTO community_stats (community_id, community_name, community_url, parent_community_name) VALUES (%s, %s, %s, %s)", (community_uuid, community_name, community_url, parent_community_name)) - db.commit() - - # Index views and downloads for the current community - for time_period in self.time_periods: - self.logger.info("Indexing items for community: %s (%s)", community_name, - community_uuid) - self.index_community_items(community_uuid=community_uuid, time_period=time_period) - - # Load sub communities - if 'community' in community: - sub_communities = community['community'] - for sub_community in sub_communities: - self.logger.info("Loading subcommunity: %s (%s)", sub_community['name'], sub_community['id']) - self.load_communities_recursive(communities=communities, community=sub_community, parent_community_name=community_name) - else: - self.logger.info("There are no subcommunities in this community.") - def index_community_items(self, community_uuid=None, time_period=None): """Index the community items""" @@ -120,7 +108,7 @@ def index_community_items(self, community_uuid=None, time_period=None): self.logger.info("Solr - total items: %s", str(results_total_items)) except TypeError: self.logger.info("No community items to index.") - return + return None with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index 13c200c..d6cf30b 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -43,7 +43,6 @@ def index(self): item_owning_collection = self.rest.get_item_owning_collection( item_uuid=item_uuid) if item_owning_collection is not None: - self.logger.info(item_owning_collection) item_owning_collection_name = item_owning_collection['name'] if len(item_owning_collection_name) > 255: @@ -51,7 +50,7 @@ def index(self): "It will be shortened to that length.") item_owning_collection_name = item_owning_collection_name[0:251] + "..." - self.logger.info("Item owning collection: %s ", item_owning_collection_name) + self.logger.info("Item owning collection: %s ", item_owning_collection_name) # If name is None then use "Untitled" if item_name is not None: diff --git a/lib/api.py b/lib/api.py index f3bf411..eb3da7d 100644 --- a/lib/api.py +++ b/lib/api.py @@ -143,8 +143,11 @@ def rest_call(self, call_type='GET', url='', params=None, data=None, headers=Non if response.status_code == 200: return response.json() - self.logger.error("Error while making rest call, (HTTP code: %s) %s", - response.status_code, response.text) + # Log errors + if response.status_code >= 400 and response.status_code <= 500: + self.logger.error("Error while making rest call, (HTTP code: %s) %s", + response.status_code, response.text) + return None def get_site(self): @@ -159,16 +162,57 @@ def get_site(self): return None - - def get_communities(self): + def get_communities(self, sort=None): """Get all communities""" + params = {} + if sort is not None: + params['sort'] = sort + communities = [] + page = 0 + params['page'] = page + size = 20 + params['size'] = size + communities_url = self.construct_url(command = 'core/communities') - communities_response = self.rest_call(url = communities_url) - if communities_response is not None and '_embedded' in communities_response: - if 'communities' in communities_response['_embedded']: - communities = communities_response['_embedded']['communities'] + total_communities = 0 + total_pages = 0 + + while True: + self.logger.info("Loading page %s of communities...", str(page)) + + communities_response = self.rest_call(url = communities_url, params = params) + if communities_response is not None and '_embedded' in communities_response: + # Get ccommunities from this page of results + if 'communities' in communities_response['_embedded']: + self.logger.info(communities_response['_embedded']['communities']) + for community_json in communities_response['_embedded']['communities']: + communities.append(community_json) + + # Check API response for amount of total communities and pages + if 'page' in communities_response: + page_info = communities_response['page'] + if 'totalElements' in page_info: + total_communities = page_info['totalElements'] + if 'totalPages' in page_info: + total_pages = page_info['totalPages'] + + page += 1 + if total_pages > 0 and page == total_pages: + break + + params['page'] = page + else: + break + + # Sanity check to make sure all pages were retrieved + if len(communities) != total_communities: + self.logger.error("There was a problem retrieving communities from the API.") + self.logger.error("Communities retrieved: %s. Total communities reported by API: %s", + str(len(communities)), str(total_communities)) + else: + self.logger.info("Retrieved %s communities from the REST API.", str(len(communities))) return communities @@ -198,6 +242,17 @@ def get_community(self, community_uuid=None): return community + def get_community_parent_community(self, community_uuid=None): + """Get parent community of a given community""" + + if community_uuid is None: + return None + + parent_community_url = self.construct_url( + command = f"core/communities/{community_uuid}/parentCommunity") + + return self.rest_call(url = parent_community_url) + def get_collections(self, sort=None): """Get all collections""" @@ -253,7 +308,7 @@ def get_collections(self, sort=None): return collections def get_collection_parent_community(self, collection_uuid=None): - """Get Parent community of a given collection""" + """Get parent community of a given collection""" if collection_uuid is None: return None diff --git a/lib/emailer.py b/lib/emailer.py index 26db4c4..426e6b1 100644 --- a/lib/emailer.py +++ b/lib/emailer.py @@ -11,7 +11,7 @@ from email import encoders -class Emailer(object): +class Emailer(): """Class for sending emails""" def __init__(self, config=None): @@ -31,13 +31,13 @@ def email_report_admins(self, report_file_path=None): # Send email(s) to contact(s) for admin_email in self.config['admin_emails']: - self.logger.info('Sending report to %s.', admin_email=admin_email) - self.__email_report_internal(report_file_path=report_file_path, to_email=admin_email, + self.logger.info('Sending report to %s.', admin_email) + self.__email_report_internal(report_file_path=report_file_path, to_email=admin_email, from_email=from_email, subject=subject) return None - def __email_report_internal(self, report_file_path=None, to_email=None, from_email=None, + def __email_report_internal(self, report_file_path=None, to_email=None, from_email=None, subject=None): if report_file_path is None: self.logger.error("A report file path of either a ZIP archive or Excel file " + @@ -63,7 +63,7 @@ def __email_report_internal(self, report_file_path=None, to_email=None, from_ema # Attach report file(s) path, report_file_name = os.path.split(report_file_path) - with open(report_file_path, "rb", encoding="utf8") as attachment: + with open(report_file_path, "rb") as attachment: mime_type, _ = mimetypes.guess_type(report_file_path) if mime_type == 'application/zip': part = MIMEBase('application', 'zip') @@ -76,11 +76,10 @@ def __email_report_internal(self, report_file_path=None, to_email=None, from_ema "either a ZIP archive or an Excel XLSX file.") part = MIMEBase("application", "octet-stream") - - part.set_payload((attachment).read()) - encoders.encode_base64(part) - part.add_header('Content-Disposition', f"attachment; filename= {report_file_name}") - message.attach(part) + part.set_payload((attachment).read()) + encoders.encode_base64(part) + part.add_header('Content-Disposition', f"attachment; filename= {report_file_name}") + message.attach(part) # Set message body message.attach(MIMEText(body, 'plain')) diff --git a/run_reports.py b/run_reports.py index a396b65..9a4eaa5 100644 --- a/run_reports.py +++ b/run_reports.py @@ -5,6 +5,8 @@ import logging import sys +from psycopg import sql + from database_manager import DatabaseManager from lib.database import Database from lib.emailer import Emailer @@ -73,7 +75,7 @@ def run(self): csv_report_files = [] for report in reports: csv_report_file = self.create_csv_report(report=report) - self.logger.info("Created CSV report file: %s.", csv_report_file=csv_report_file) + self.logger.info("Created CSV report file: %s.", csv_report_file) # Convert column names to human readable text csv_report_files.append(csv_report_file) @@ -106,22 +108,27 @@ def create_csv_report(self, report=None): if report is None: self.logger.error("Must specify a report.") - return + return None # Vars column_names = [] data = [] + self.logger.debug("Creating CSV file for report %s...", report['table']) + with Database(self.config['statistics_db']) as db: with db.cursor() as cursor: - print(cursor.mogrify(f"SELECT * FROM {report['table']} ORDER BY {report['orderBy']} ASC")) - cursor.execute(f"SELECT * FROM {report['table']} ORDER BY {report['orderBy']} ASC") + self.logger.debug(cursor.mogrify(sql.SQL("SELECT * FROM {} ORDER BY {} ASC").format(sql.Identifier(report['table']), sql.Identifier(report['orderBy'],)))) + cursor.execute(sql.SQL("SELECT * FROM {} ORDER BY {} ASC").format(sql.Identifier(report['table']), sql.Identifier(report['orderBy'],))) desc = cursor.description column_names = [col[0] for col in desc] + self.logger.debug("Report has %s columns.", str(len(column_names))) data = [dict(zip(column_names, row)) for row in cursor.fetchall()] + self.logger.debug("Report has %s rows.", str(len(data))) + # Save raw database table in a CSV file report_csv_file = self.output.save_report_csv_file( output_file_path=self.output_dir + report['name'] + '.csv', @@ -149,7 +156,7 @@ def create_excel_report(self, csv_report_files=None): output_file_path=output_file_path, worksheet_files=csv_report_files) if excel_report_file: self.logger.info("Finished saving Excel file to %s.", - excel_report_file=excel_report_file) + excel_report_file) return excel_report_file self.logger.error("There was an error saving the Excel file.") @@ -169,7 +176,7 @@ def create_zip_archive(self, excel_report_file=None): ) if zip_report_archive: self.logger.info("Finished saving ZIP archive to %s.", - zip_report_archive=zip_report_archive) + zip_report_archive) return zip_report_archive self.logger.error("There was an error saving the ZIP archive.") @@ -198,7 +205,7 @@ def map_column_names(self, report_name=None, column_names=None): for i, column_name in enumerate(column_names): self.logger.debug("Looking at column name: %s.", column_names[i]) if column_name in column_map: - self.logger.debug("Changing column name to %s.", column_name) + self.logger.debug("Changing column name to %s.", column_map[column_name]) column_names[i] = column_map[column_name] return column_names From 6ee541f5a3edc7603aa773f69a9b04f49c08cb3e Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 16:07:24 -0500 Subject: [PATCH 12/22] get all 5XX status codes --- lib/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/api.py b/lib/api.py index eb3da7d..f47370a 100644 --- a/lib/api.py +++ b/lib/api.py @@ -144,7 +144,7 @@ def rest_call(self, call_type='GET', url='', params=None, data=None, headers=Non return response.json() # Log errors - if response.status_code >= 400 and response.status_code <= 500: + if response.status_code >= 400 and response.status_code < 600: self.logger.error("Error while making rest call, (HTTP code: %s) %s", response.status_code, response.text) From 26461df7dfc9b7cd88f75bceaa1c7767d265d3b1 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Wed, 17 Jul 2024 17:38:41 -0500 Subject: [PATCH 13/22] more linting fixes --- dspace_reports/collection_indexer.py | 18 ++++++++++++------ dspace_reports/community_indexer.py | 18 ++++++++++++------ dspace_reports/item_indexer.py | 17 +++++++++++------ dspace_reports/repository_indexer.py | 9 ++++++--- lib/api.py | 3 ++- run_collection_indexer.py | 2 +- run_item_indexer.py | 2 +- run_reports.py | 6 ++++-- 8 files changed, 49 insertions(+), 26 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 1ff076f..77fb45d 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -49,7 +49,8 @@ def index_collections(self): for time_period in self.time_periods: self.logger.info("Indexing items for collection: %s (%s)", collection_name, collection_uuid) - self.index_collection_items(collection_uuid=collection_uuid, time_period=time_period) + self.index_collection_items(collection_uuid=collection_uuid, + time_period=time_period) # Index all views and downloads of collections for time_period in self.time_periods: @@ -158,7 +159,8 @@ def index_collection_views(self, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -205,11 +207,13 @@ def index_collection_views(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + self.logger.info("Searching date range: %s - %s", + date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr collection views query: %s", response.url) @@ -273,7 +277,8 @@ def index_collection_downloads(self, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -325,7 +330,8 @@ def index_collection_downloads(self, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr collection downloads query: %s", response.url) diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index 8f2900a..f766acf 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -161,7 +161,8 @@ def index_community_views(self, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") else: @@ -210,11 +211,13 @@ def index_community_views(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + self.logger.info("Searching date range: %s - %s", + date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr community views query: %s", response.url) @@ -277,7 +280,8 @@ def index_community_downloads(self, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -324,11 +328,13 @@ def index_community_downloads(self, time_period=None): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + self.logger.info("Searching date range: %s - %s", + date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr community downloads query: %s", response.url) diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index d6cf30b..a2652aa 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -110,7 +110,8 @@ def index_item_views(self, time_period='all'): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -162,11 +163,12 @@ def index_item_views(self, time_period='all'): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr item views query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # Iterate over the facetField dict and get the UUIDs and views @@ -228,7 +230,8 @@ def index_item_downloads(self, time_period='all'): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -275,11 +278,13 @@ def index_item_downloads(self, time_period='all'): } if len(date_range) == 2: - self.logger.info("Searching date range: %s - %s", date_range[0], date_range[1]) + self.logger.info("Searching date range: %s - %s", + date_range[0], date_range[1]) if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr item downloads query: %s", response.url) diff --git a/dspace_reports/repository_indexer.py b/dspace_reports/repository_indexer.py index e4e049b..d71efae 100644 --- a/dspace_reports/repository_indexer.py +++ b/dspace_reports/repository_indexer.py @@ -45,7 +45,8 @@ def index_repository(self): self.index_repository_views(repository_uuid=repository_uuid, time_period=time_period) self.logger.info("Indexing repository downloads.") - self.index_repository_downloads(repository_uuid=repository_uuid, time_period=time_period) + self.index_repository_downloads(repository_uuid=repository_uuid, + time_period=time_period) def index_repository_items(self, repository_uuid=None, time_period=None): """Index repository items""" @@ -134,7 +135,8 @@ def index_repository_views(self, repository_uuid=None, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") @@ -200,7 +202,8 @@ def index_repository_downloads(self, repository_uuid=None, time_period=None): if date_range[0] is not None and date_range[1] is not None: date_start = date_range[0] date_end = date_range[1] - solr_query_params['q'] = solr_query_params['q'] + " AND " + f"time:[{date_start} TO {date_end}]" + solr_query_params['q'] = (solr_query_params['q'] + " AND " + + f"time:[{date_start} TO {date_end}]") else: self.logger.error("Error creating date range.") diff --git a/lib/api.py b/lib/api.py index f47370a..b80afea 100644 --- a/lib/api.py +++ b/lib/api.py @@ -78,7 +78,8 @@ def authenticate(self): self.logger.info(login_response.cookies) if 'Authorization' in login_response.headers: - self.session.headers.update({'Authorization': login_response.headers.get('Authorization')}) + self.session.headers.update( + {'Authorization': login_response.headers.get('Authorization')}) return True diff --git a/run_collection_indexer.py b/run_collection_indexer.py index c3d93c1..4429351 100644 --- a/run_collection_indexer.py +++ b/run_collection_indexer.py @@ -90,4 +90,4 @@ def main(): indexer.run() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_item_indexer.py b/run_item_indexer.py index 8ca8377..c3e7098 100644 --- a/run_item_indexer.py +++ b/run_item_indexer.py @@ -90,4 +90,4 @@ def main(): indexer.run() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/run_reports.py b/run_reports.py index 9a4eaa5..927a276 100644 --- a/run_reports.py +++ b/run_reports.py @@ -151,7 +151,8 @@ def create_excel_report(self, csv_report_files=None): return False # Combine CSV files into single Excel file - output_file_path = self.output_dir + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.xlsx') + output_file_path = (self.output_dir + + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.xlsx')) excel_report_file = self.output.save_report_excel_file( output_file_path=output_file_path, worksheet_files=csv_report_files) if excel_report_file: @@ -170,7 +171,8 @@ def create_zip_archive(self, excel_report_file=None): return False # Create ZIP archvie with the Excel file - output_file_path = self.output_dir + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.zip') + output_file_path = (self.output_dir + + datetime.now().strftime('dspace-reports_%Y-%m-%d_%H-%M-%S.zip')) zip_report_archive = self.output.save_report_zip_archive(output_file_path=output_file_path, excel_report_file=excel_report_file ) From b6db12b4af82b2a45e85ff48a29bca6da477a4fa Mon Sep 17 00:00:00 2001 From: nwoodward Date: Thu, 18 Jul 2024 11:45:09 -0500 Subject: [PATCH 14/22] more lint fixes --- README.md | 2 +- dspace_reports/collection_indexer.py | 3 ++- dspace_reports/community_indexer.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d8a9efa..fc51914 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dspace-reports -A python3-based tool to generate and email statistical reports for [DSpace 7.x](https://github.com/DSpace/DSpace) repository administrators. +A tool written in Python to generate and email statistical reports for [DSpace 7.x](https://github.com/DSpace/DSpace) repository administrators. ## Requirements diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 77fb45d..5ea7879 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -36,7 +36,8 @@ def index_collections(self): parent_community_name = parent_community['name'] if len(collection_name) > 255: - self.logger.debug("Collection name is longer than 255 characters. It will be shortened to that length.") + self.logger.debug("Collection name is longer than 255 characters. " + + "It will be shortened to that length.") collection_name = collection_name[0:251] + "..." # Insert the collection into the database diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index f766acf..dfebaf9 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -36,7 +36,8 @@ def index_communities(self): parent_community_name = parent_community['name'] if len(community_name) > 255: - self.logger.debug("Community name is longer than 255 characters. It will be shortened to that length.") + self.logger.debug("Community name is longer than 255 characters. " + + "It will be shortened to that length.") community_name = community_name[0:251] + "..." # Insert the community into the database From 8adb3948e34e6b79cf9b554a638a1eacc7a5b274 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Fri, 19 Jul 2024 08:59:16 -0500 Subject: [PATCH 15/22] more linting fixes --- dspace_reports/collection_indexer.py | 6 ++++-- dspace_reports/community_indexer.py | 8 +++++--- dspace_reports/item_indexer.py | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 5ea7879..6fede51 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -188,7 +188,8 @@ def index_collection_views(self, time_period=None): with db.cursor() as cursor: while results_current_page <= results_num_pages: print( - f"Indexing collection views (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing collection views (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page @@ -306,7 +307,8 @@ def index_collection_downloads(self, time_period=None): while results_current_page <= results_num_pages: # "pages" are zero based, but one based is more human readable print( - f"Indexing collection downloads (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing collection downloads (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index dfebaf9..e075d78 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -192,7 +192,8 @@ def index_community_views(self, time_period=None): with db.cursor() as cursor: while results_current_page <= results_num_pages: print( - f"Indexing community views (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing community views (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page @@ -222,7 +223,7 @@ def index_community_views(self, time_period=None): response = self.solr.call(url=solr_url, params=solr_query_params) self.logger.info("Solr community views query: %s", response.url) - + # Solr returns facets as a dict of dicts (see json.nl parameter) views = response.json()["facet_counts"]["facet_fields"] # Iterate over the facetField dict and get the UUIDs and views @@ -309,7 +310,8 @@ def index_community_downloads(self, time_period=None): while results_current_page <= results_num_pages: # "pages" are zero based, but one based is more human readable print( - f"Indexing community downloads (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing community downloads (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page diff --git a/dspace_reports/item_indexer.py b/dspace_reports/item_indexer.py index a2652aa..0a472df 100644 --- a/dspace_reports/item_indexer.py +++ b/dspace_reports/item_indexer.py @@ -138,7 +138,8 @@ def index_item_views(self, time_period='all'): while results_current_page <= results_num_pages: print( - f"Indexing item views (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing item views (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page @@ -258,7 +259,8 @@ def index_item_downloads(self, time_period='all'): while results_current_page <= results_num_pages: # "pages" are zero based, but one based is more human readable print( - f"Indexing item downloads (page {results_current_page + 1} of {results_num_pages + 1})" + f"Indexing item downloads (page {results_current_page + 1} " + + f"of {results_num_pages + 1})" ) # Solr params for current page From a7512ac860f7d7de29420c44de6ffe29e56a9875 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 22 Jul 2024 12:42:05 -0500 Subject: [PATCH 16/22] added try-catch for requests timeout exception; increased timeout --- lib/solr.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/solr.py b/lib/solr.py index 4d025e4..92e1501 100644 --- a/lib/solr.py +++ b/lib/solr.py @@ -15,8 +15,8 @@ def __init__(self, solr_server=None): else: self.solr_server = solr_server - # Timeout for requests to Solr - self.timeout = 5 + # Timeout in seconds for requests to Solr + self.timeout = 120 # Create session self.session = requests.Session() @@ -68,11 +68,17 @@ def call(self, call_type='GET', url=None, params=None): params = {} if call_type == 'POST': - response = self.session.post(url, params=params, headers=self.request_headers, - timeout=self.timeout) + try: + response = self.session.post(url, params=params, headers=self.request_headers, + timeout=self.timeout) + except requests.exceptions.Timeout: + self.logger.error("Call to Solr timed out after %s seconds.", str(self.timeout)) else: - response = self.session.get(url, params=params,headers=self.request_headers, - timeout=self.timeout) + try: + response = self.session.get(url, params=params,headers=self.request_headers, + timeout=self.timeout) + except requests.exceptions.Timeout: + self.logger.error("Call to Solr timed out after %s seconds.", str(self.timeout)) return response From 17d816e3098c0787f65f53db87bb1be3e3aaef0e Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 10:41:15 -0500 Subject: [PATCH 17/22] fixed bug with solr views query --- dspace_reports/collection_indexer.py | 4 ++-- dspace_reports/community_indexer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace_reports/collection_indexer.py b/dspace_reports/collection_indexer.py index 6fede51..f9220e7 100644 --- a/dspace_reports/collection_indexer.py +++ b/dspace_reports/collection_indexer.py @@ -137,7 +137,7 @@ def index_collection_views(self, time_period=None): # Default Solr params solr_query_params = { "q": f"type:2 AND owningColl:/.{{36}}/", - "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", + "fq": "-isBot:true AND statistics_type:view", "fl": "owningColl", "facet": "true", "facet.field": "owningColl", @@ -195,7 +195,7 @@ def index_collection_views(self, time_period=None): # Solr params for current page solr_query_params = { "q": f"type:2 AND owningColl:/.{{36}}/", - "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", + "fq": "-isBot:true AND statistics_type:view", "fl": "owningColl", "facet": "true", "facet.field": "owningColl", diff --git a/dspace_reports/community_indexer.py b/dspace_reports/community_indexer.py index e075d78..ac63327 100644 --- a/dspace_reports/community_indexer.py +++ b/dspace_reports/community_indexer.py @@ -138,7 +138,7 @@ def index_community_views(self, time_period=None): # Default Solr params solr_query_params = { "q": f"type:0 AND owningComm:/.{{36}}/", - "fq": "-isBot:true AND statistics_type:view AND bundleName:ORIGINAL", + "fq": "-isBot:true AND statistics_type:view", "fl": "owningComm", "facet": "true", "facet.field": "owningComm", From 88e3d546ad63b1111fdeaddfea568d09d1107ad7 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 14:15:46 -0500 Subject: [PATCH 18/22] specify support for DSpace 7 and 8 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fc51914..bcb75ff 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dspace-reports -A tool written in Python to generate and email statistical reports for [DSpace 7.x](https://github.com/DSpace/DSpace) repository administrators. +A tool written in Python to generate and email statistical reports for [DSpace 7+](https://github.com/DSpace/DSpace) repository administrators. ## Requirements From 981a2ec46ca57294caffd9ec85d622784122192d Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 14:16:51 -0500 Subject: [PATCH 19/22] updated pyyaml --- poetry.lock | 110 ++++++++++++++++++++++++----------------------- pyproject.toml | 2 +- requirements.txt | 4 +- 3 files changed, 59 insertions(+), 57 deletions(-) diff --git a/poetry.lock b/poetry.lock index dcc14fb..fdd0e7f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -328,62 +328,64 @@ six = ">=1.5" [[package]] name = "pyyaml" -version = "6.0.1" +version = "6.0.2" description = "YAML parser and emitter for Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] [[package]] @@ -583,4 +585,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "26dfb4c08fbe0e0806f9d73966cc1f72278a62fb97c333348bc35fee85125326" +content-hash = "b30cde9da52f65c225d8d66d5b05af0445818de6b929c6b2755b444c4f6f1aeb" diff --git a/pyproject.toml b/pyproject.toml index 67e34f7..d5e5fb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ mccabe = "==0.7.0" psycopg = "3.2.1" pylint = "3.2.5" python-dateutil = "2.9.0.post0" -PyYAML = "6.0.1" +PyYAML = "6.0.2" requests = "2.32.3" six = "==1.16.0" toml = "==0.10.2" diff --git a/requirements.txt b/requirements.txt index 0d812fb..72c57de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ platformdirs==4.2.2 ; python_version >= "3.9" and python_version < "4.0" psycopg==3.2.1 ; python_version >= "3.9" and python_version < "4.0" pylint==3.2.5 ; python_version >= "3.9" and python_version < "4.0" python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "4.0" -pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" +pyyaml==6.0.2 ; python_version >= "3.9" and python_version < "4.0" requests==2.32.3 ; python_version >= "3.9" and python_version < "4.0" six==1.16.0 ; python_version >= "3.9" and python_version < "4.0" toml==0.10.2 ; python_version >= "3.9" and python_version < "4.0" @@ -22,4 +22,4 @@ typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "4.0" tzdata==2024.1 ; python_version >= "3.9" and python_version < "4.0" and sys_platform == "win32" urllib3==2.2.2 ; python_version >= "3.9" and python_version < "4.0" wrapt==1.16.0 ; python_version >= "3.9" and python_version < "4.0" -xlsxwriter==3.2.0 ; python_version >= "3.9" and python_version < "4.0" \ No newline at end of file +xlsxwriter==3.2.0 ; python_version >= "3.9" and python_version < "4.0" From 93560543d208377e6251ae67971c9025b1c190a7 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 14:18:26 -0500 Subject: [PATCH 20/22] removed OAI connector that is no longer needed --- lib/oai.py | 140 ----------------------------------------------------- 1 file changed, 140 deletions(-) delete mode 100644 lib/oai.py diff --git a/lib/oai.py b/lib/oai.py deleted file mode 100644 index a4c8c9b..0000000 --- a/lib/oai.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Class for interacting with a DSpace 7+ OAI-PMH endpoint""" - -import logging -import re -from time import sleep -import xml.etree.ElementTree as ET -import requests - - -class DSpaceOai(): - """Class for interacting with a DSpace 7+ OAI-PMH endpoint""" - - ns = { - 'oai': 'http://www.openarchives.org/OAI/2.0/', - 'dc': 'http://purl.org/dc/elements/1.1/' - } - - def __init__(self, oai_server=None): - # Ensure solr_server has trailing slash - if oai_server[len(oai_server)-1] != '/': - self.oai_server = oai_server + '/' - else: - self.oai_server = oai_server - - # Add 'request' to path - self.oai_server = self.oai_server + 'request' - - self.timeout = 5 - self.limit = 100 - self.sleep_time = 1 - self.headers = {'User-Agent': 'OAIHarvester/2.0', 'Accept': 'text/html', - 'Accept-Encoding': 'compress, deflate'} - - self.logger = logging.getLogger('dspace-reports') - - # Test connection to OAI-PMH feed - self.test_connection() - - def test_connection(self): - """Test OAI-PMH connection""" - - identify_url = self.construct_url(verb='Identify') - self.logger.info("Testing OAI-PMH feed connection: %s.", identify_url) - response = self.call(url = identify_url) - - if response.status_code == 200: - self.logger.info("OAI_PMH feed connection successful.") - return True - - self.logger.error("OAI-PMH feed connection NOT successful.") - return False - - def construct_url(self, verb, params=None): - """Create URL""" - - if params is None: - params = {} - - parameters = '' - for key, value in params.items(): - parameters += '&' + key + '=' + str(value) - - new_url = self.oai_server + '?verb=' + verb + parameters - return new_url - - def call(self, url=None, params=None): - """Make call to endpoint""" - - if url is None: - return None - - if params is None: - params = {} - - response = requests.get(url, params=params, timeout=self.timeout) - return response - - def pause(self, wait_time): - """Pause before next call""" - - self.logger.info("Pausing harvest process for %s second(s).", str(wait_time)) - sleep(wait_time) - - def get_records(self): - """Get all records""" - - offset = 0 - all_records = [] - params = { - 'metadataPrefix': 'oai_dc' - } - - while True: - self.logger.debug("Retrieving records %s through %s from the OAI-PMH feed.", - str(offset), str(offset + self.limit)) - records_url = self.construct_url(verb = 'ListRecords', params = params) - self.logger.debug("Records OAI-PMH call: %s", records_url) - - records_response = self.call(url = records_url) - records_root = ET.fromstring(records_response.text) - - list_records = records_root.find('.//oai:ListRecords', self.ns) - if list_records: - records = list_records.findall('.//oai:record', self.ns) - for record in records: - metadata = record.find('.//oai:metadata', self.ns) - if metadata: - identifier_nodes = metadata.findall('.//dc:identifier', self.ns) - for identifier_node in identifier_nodes: - if identifier_node is not None and identifier_node.text is not None: - self.logger.info("Looking at record identifier: %s : %s", - identifier_node.tag, identifier_node.text) - handle = re.search('^https?://hdl.handle.net', identifier_node.text) - if handle: - all_records.append(identifier_node.text) - else: - self.logger.debug("Identifier is not a handle URL: %s", - identifier_node.text) - - # Check for resumptionToken - token_match = re.search(']*>(.*)', - records_response.text) - if not token_match: - break - - token = token_match.group(1) - self.logger.debug("resumptionToken: %s", token) - params['resumptionToken'] = token - - # Remove metadataPrefix from params - if 'metadataPrefix' in params: - params.pop('metadataPrefix') - - offset = offset + self.limit - - if self.sleep_time: - self.pause(self.sleep_time) - - self.logger.debug("Harvested %s records from OAI feed.", str(len(all_records))) - return all_records From 00b518903caac0dddd92d7f2d84f1c52347fcae2 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 14:19:33 -0500 Subject: [PATCH 21/22] updated version for release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d5e5fb4..df8a7d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dspace-reports" -version = "2.0-SNAPSHOT" +version = "2.0.0" description = "A python3-based tool to generate and email views and downloads statistical reports for a DSpace 7+ repository." authors = ["Nicholas Woodward "] license = "GPLv3" From 4e067edd6c8bee8eca7058a4a21e3292839165e8 Mon Sep 17 00:00:00 2001 From: nwoodward Date: Mon, 12 Aug 2024 14:22:12 -0500 Subject: [PATCH 22/22] specify support for DSpace 7 and 8 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bcb75ff..0c435e1 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A tool written in Python to generate and email statistical reports for [DSpace 7 - Python 3.9+ - PostgreSQL 13+ -- DSpace 7.x repository ** +- DSpace 7.x or 8.x repository ** ** If your Solr index contains statistics from legacy DSpace 5.x or earlier instances, then the quality of the reports will go up significantly if you have migrated the old statistics to the new UUID identifiers in DSpace 6. See the [DSpace Documentation](https://wiki.lyrasis.org/display/DSDOC6x/SOLR+Statistics+Maintenance#SOLRStatisticsMaintenance-UpgradeLegacyDSpaceObjectIdentifiers(pre-6xstatistics)toDSpace6xUUIDIdentifiers) for more information