1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
--- a/tests/test_arrow_dataset.py 2024-02-20 21:53:24.248470991 +0100
+++ b/tests/test_arrow_dataset.py 2024-02-20 21:53:29.441804737 +0100
@@ -3978,7 +3978,6 @@
[
"relative/path",
"/absolute/path",
- "s3://bucket/relative/path",
"hdfs://relative/path",
"hdfs:///absolute/path",
],
--- a/tests/test_hf_gcp.py 2024-02-20 21:55:18.821852434 +0100
+++ b/tests/test_hf_gcp.py 2024-02-20 21:55:46.525186394 +0100
@@ -22,7 +22,6 @@
{"dataset": "wikipedia", "config_name": "20220301.it"},
{"dataset": "wikipedia", "config_name": "20220301.simple"},
{"dataset": "snli", "config_name": "plain_text"},
- {"dataset": "eli5", "config_name": "LFQA_reddit"},
{"dataset": "wiki40b", "config_name": "en"},
{"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.compressed"},
{"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.no_index"},
--- a/tests/test_inspect.py 2024-02-20 22:01:35.148488467 +0100
+++ b/tests/test_inspect.py 2024-02-20 22:02:14.458561571 +0100
@@ -15,7 +15,7 @@
pytestmark = pytest.mark.integration
-@pytest.mark.parametrize("path", ["paws", "csv"])
+@pytest.mark.parametrize("path", ["csv"])
def test_inspect_dataset(path, tmp_path):
inspect_dataset(path, tmp_path)
script_name = path + ".py"
--- a/tests/test_load.py 2024-02-20 22:12:13.699209107 +0100
+++ b/tests/test_load.py 2024-02-20 22:13:10.862626708 +0100
@@ -1235,12 +1235,6 @@
@pytest.mark.integration
-def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data):
- ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token)
- assert next(iter(ds)) is not None
-
-
-@pytest.mark.integration
def test_load_dataset_config_kwargs_passed_as_arguments():
ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4)
ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True)
|