sci-libs/datasets/files/datasets-2.15.0-tests.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

--- a/tests/test_arrow_dataset.py	2024-02-20 21:53:24.248470991 +0100
+++ b/tests/test_arrow_dataset.py	2024-02-20 21:53:29.441804737 +0100
@@ -3978,7 +3978,6 @@
     [
         "relative/path",
         "/absolute/path",
-        "s3://bucket/relative/path",
         "hdfs://relative/path",
         "hdfs:///absolute/path",
     ],
--- a/tests/test_hf_gcp.py	2024-02-20 21:55:18.821852434 +0100
+++ b/tests/test_hf_gcp.py	2024-02-20 21:55:46.525186394 +0100
@@ -22,7 +22,6 @@
     {"dataset": "wikipedia", "config_name": "20220301.it"},
     {"dataset": "wikipedia", "config_name": "20220301.simple"},
     {"dataset": "snli", "config_name": "plain_text"},
-    {"dataset": "eli5", "config_name": "LFQA_reddit"},
     {"dataset": "wiki40b", "config_name": "en"},
     {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.compressed"},
     {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.no_index"},
--- a/tests/test_inspect.py	2024-02-20 22:01:35.148488467 +0100
+++ b/tests/test_inspect.py	2024-02-20 22:02:14.458561571 +0100
@@ -15,7 +15,7 @@
 pytestmark = pytest.mark.integration
 
 
-@pytest.mark.parametrize("path", ["paws", "csv"])
+@pytest.mark.parametrize("path", ["csv"])
 def test_inspect_dataset(path, tmp_path):
     inspect_dataset(path, tmp_path)
     script_name = path + ".py"
--- a/tests/test_load.py	2024-02-20 22:12:13.699209107 +0100
+++ b/tests/test_load.py	2024-02-20 22:13:10.862626708 +0100
@@ -1235,12 +1235,6 @@
 
 
 @pytest.mark.integration
-def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
-    ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token)
-    assert next(iter(ds)) is not None
-
-
-@pytest.mark.integration
 def test_load_dataset_config_kwargs_passed_as_arguments():
     ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4)
     ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)