Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new URL of MIND small and MIND large #2145

Merged
merged 5 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions recommenders/datasets/mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@


URL_MIND_LARGE_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip"
)
URL_MIND_LARGE_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip"
)
URL_MIND_SMALL_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip"
)
URL_MIND_SMALL_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip"
)
URL_MIND_DEMO_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip"
Expand Down
4 changes: 2 additions & 2 deletions recommenders/models/newsrec/newsrec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,15 @@ def get_mind_data_set(type):

if type == "large":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDlarge_train.zip",
"MINDlarge_dev.zip",
"MINDlarge_utils.zip",
)

elif type == "small":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDsmall_train.zip",
"MINDsmall_dev.zip",
"MINDsmall_utils.zip",
Expand Down
30 changes: 15 additions & 15 deletions tests/data_validation/recommenders/datasets/test_mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,34 @@
'"0x8D8B8AD5B126C3B"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
"52952752",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52953372",
"0x8D834F2EB31BDEC",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
"30945572",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30946172",
"0x8D834F2EBA8D865",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
"530196631",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"531361237",
"0x8D8244E90C15C07",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
"103456245",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103593383",
"0x8D8244E92005849",
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
"0x8D8B8AD5B2ED4C9",
),
],
)
Expand All @@ -75,9 +75,9 @@ def test_download_mind_demo(tmp):
def test_download_mind_small(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
assert statinfo.st_size == 52952752
assert statinfo.st_size == 52953372
statinfo = os.stat(valid_path)
assert statinfo.st_size == 30945572
assert statinfo.st_size == 30946172


def test_extract_mind_demo(tmp):
Expand Down Expand Up @@ -127,9 +127,9 @@ def test_extract_mind_small(tmp):
def test_download_mind_large(tmp_path):
train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
statinfo = os.stat(train_path)
assert statinfo.st_size == 530196631
assert statinfo.st_size == 531361237
statinfo = os.stat(valid_path)
assert statinfo.st_size == 103456245
assert statinfo.st_size == 103593383


def test_extract_mind_large(tmp):
Expand Down
Loading