diff options
author | Alfredo Tupone <tupone@gentoo.org> | 2024-02-21 22:07:04 +0100 |
---|---|---|
committer | Alfredo Tupone <tupone@gentoo.org> | 2024-02-21 22:07:36 +0100 |
commit | 7938c21e268aff71e1d091dfbce1bfba8bde8308 (patch) | |
tree | 60b6537e2fa81f6685d511725457fb5af9ec8de4 /sci-libs/datasets | |
parent | dev-build/ninja: simplify LFS handling (diff) | |
download | gentoo-7938c21e268aff71e1d091dfbce1bfba8bde8308.tar.gz gentoo-7938c21e268aff71e1d091dfbce1bfba8bde8308.tar.bz2 gentoo-7938c21e268aff71e1d091dfbce1bfba8bde8308.zip |
sci-libs/datasets: drop test that require network
Closes: https://bugs.gentoo.org/925171
Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
Diffstat (limited to 'sci-libs/datasets')
-rw-r--r-- | sci-libs/datasets/datasets-2.16.0.ebuild | 14 | ||||
-rw-r--r-- | sci-libs/datasets/files/datasets-2.16.0-tests.patch | 160 |
2 files changed, 116 insertions, 58 deletions
diff --git a/sci-libs/datasets/datasets-2.16.0.ebuild b/sci-libs/datasets/datasets-2.16.0.ebuild index 0325b5ae63d6..a34fcaa2f89c 100644 --- a/sci-libs/datasets/datasets-2.16.0.ebuild +++ b/sci-libs/datasets/datasets-2.16.0.ebuild @@ -66,4 +66,18 @@ src_prepare() { sed -i -e \ "/pyarrow_hotfix/d" \ src/datasets/features/features.py || die + sed -i \ + -e "s:pytest.mark.integration:pytest.mark.skip():g" \ + tests/test_arrow_dataset.py \ + tests/test_fingerprint.py \ + tests/test_hf_gcp.py \ + tests/test_inspect.py \ + tests/test_iterable_dataset.py \ + tests/test_iterable_dataset.py \ + tests/test_load.py \ + tests/test_offline_util.py \ + tests/test_streaming_download_manager.py \ + tests/commands/test_test.py \ + tests/packaged_modules/test_cache.py \ + die } diff --git a/sci-libs/datasets/files/datasets-2.16.0-tests.patch b/sci-libs/datasets/files/datasets-2.16.0-tests.patch index 6b2845bce168..8cb89e824b3b 100644 --- a/sci-libs/datasets/files/datasets-2.16.0-tests.patch +++ b/sci-libs/datasets/files/datasets-2.16.0-tests.patch @@ -10,51 +10,72 @@ ], --- a/tests/test_load.py 2024-02-20 22:12:13.699209107 +0100 +++ b/tests/test_load.py 2024-02-20 22:13:10.862626708 +0100 -@@ -386,21 +386,6 @@ +@@ -386,6 +386,7 @@ hf_modules_cache=self.hf_modules_cache, ) -- def test_HubDatasetModuleFactoryWithScript_dont_trust_remote_code(self): -- # "squad" has a dataset script -- factory = HubDatasetModuleFactoryWithScript( -- "squad", download_config=self.download_config, dynamic_modules_path=self.dynamic_modules_path -- ) -- with patch.object(config, "HF_DATASETS_TRUST_REMOTE_CODE", None): # this will be the default soon -- self.assertRaises(ValueError, factory.get_module) -- factory = HubDatasetModuleFactoryWithScript( -- "squad", -- download_config=self.download_config, -- dynamic_modules_path=self.dynamic_modules_path, -- trust_remote_code=False, -- ) -- self.assertRaises(ValueError, factory.get_module) -- ++ @pytest.mark.skip(reason="") + def test_HubDatasetModuleFactoryWithScript_dont_trust_remote_code(self): + # "squad" has a dataset script + factory = HubDatasetModuleFactoryWithScript( +@@ -402,6 +402,7 @@ + ) + self.assertRaises(ValueError, factory.get_module) + ++ @pytest.mark.skip() def test_HubDatasetModuleFactoryWithScript_with_github_dataset(self): # "wmt_t2t" has additional imports (internal) factory = HubDatasetModuleFactoryWithScript( -@@ -1235,12 +1235,6 @@ - - - @pytest.mark.integration --def test_load_streaming_private_dataset_with_zipped_data(hf_token, hf_private_dataset_repo_zipped_txt_data): -- ds = load_dataset(hf_private_dataset_repo_zipped_txt_data, streaming=True, token=hf_token) -- assert next(iter(ds)) is not None -- -- --@pytest.mark.integration - def test_load_dataset_config_kwargs_passed_as_arguments(): - ds_default = load_dataset(SAMPLE_DATASET_IDENTIFIER4) - ds_custom = load_dataset(SAMPLE_DATASET_IDENTIFIER4, drop_metadata=True) +@@ -411,6 +412,7 @@ + assert importlib.import_module(module_factory_result.module_path) is not None + assert module_factory_result.builder_kwargs["base_path"].startswith(config.HF_ENDPOINT) + ++ @pytest.mark.skip() + def test_GithubMetricModuleFactory_with_internal_import(self): + # "squad_v2" requires additional imports (internal) + factory = GithubMetricModuleFactory( +@@ -419,6 +421,7 @@ + module_factory_result = factory.get_module() + assert importlib.import_module(module_factory_result.module_path) is not None + ++ @pytest.mark.skip() + @pytest.mark.filterwarnings("ignore:GithubMetricModuleFactory is deprecated:FutureWarning") + def test_GithubMetricModuleFactory_with_external_import(self): + # "bleu" requires additional imports (external from github) +@@ -1032,6 +1035,7 @@ + datasets.load_dataset_builder(SAMPLE_DATASET_TWO_CONFIG_IN_METADATA, "non-existing-config") + + ++@pytest.mark.skip() + @pytest.mark.parametrize("serializer", [pickle, dill]) + def test_load_dataset_builder_with_metadata_configs_pickable(serializer): + builder = datasets.load_dataset_builder(SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA) +@@ -1153,6 +1157,7 @@ + assert len(builder.config.data_files["test"]) > 0 + + ++@pytest.mark.skip() + def test_load_dataset_builder_fail(): + with pytest.raises(DatasetNotFoundError): + datasets.load_dataset_builder("blabla") +@@ -1168,6 +1173,7 @@ + assert isinstance(next(iter(dataset["train"])), dict) + + ++@pytest.mark.skip() + def test_load_dataset_cached_local_script(dataset_loading_script_dir, data_dir, caplog): + dataset = load_dataset(dataset_loading_script_dir, data_dir=data_dir) + assert isinstance(dataset, DatasetDict) --- a/tests/test_hf_gcp.py 2024-02-21 09:59:26.918397895 +0100 +++ b/tests/test_hf_gcp.py 2024-02-21 09:59:46.335100597 +0100 -@@ -21,7 +21,6 @@ - {"dataset": "wikipedia", "config_name": "20220301.frr"}, - {"dataset": "wikipedia", "config_name": "20220301.it"}, - {"dataset": "wikipedia", "config_name": "20220301.simple"}, -- {"dataset": "eli5", "config_name": "LFQA_reddit"}, - {"dataset": "wiki40b", "config_name": "en"}, - {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.compressed"}, - {"dataset": "wiki_dpr", "config_name": "psgs_w100.nq.no_index"}, +@@ -47,6 +47,7 @@ + ] + + ++@pytest.mark.skip("network") + @parameterized.named_parameters(list_datasets_on_hf_gcp_parameters(with_config=True)) + class TestDatasetOnHfGcp(TestCase): + dataset = None --- a/tests/test_inspect.py 2024-02-21 10:03:32.315520016 +0100 +++ b/tests/test_inspect.py 2024-02-21 10:03:50.345553490 +0100 @@ -18,7 +18,7 @@ @@ -66,24 +87,47 @@ def test_inspect_dataset(path, tmp_path): inspect_dataset(path, tmp_path) script_name = Path(path).stem + ".py" ---- a/tests/packaged_modules/test_cache.py 2024-02-21 12:04:18.036866572 +0100 -+++ b/tests/packaged_modules/test_cache.py 2024-02-21 12:04:54.333558520 +0100 -@@ -44,18 +44,3 @@ - Cache(dataset_name=text_dir.name, hash="missing").download_and_prepare() - with pytest.raises(ValueError): - Cache(dataset_name=text_dir.name, config_name="missing", version="auto", hash="auto").download_and_prepare() -- -- --@pytest.mark.integration --def test_cache_multi_configs(): -- repo_id = SAMPLE_DATASET_TWO_CONFIG_IN_METADATA -- dataset_name = repo_id.split("/")[-1] -- config_name = "v1" -- ds = load_dataset(repo_id, config_name) -- cache = Cache(dataset_name=dataset_name, repo_id=repo_id, config_name=config_name, version="auto", hash="auto") -- reloaded = cache.as_dataset() -- assert list(ds) == list(reloaded) -- assert len(ds["train"]) == len(reloaded["train"]) -- with pytest.raises(ValueError) as excinfo: -- Cache(dataset_name=dataset_name, repo_id=repo_id, config_name="missing", version="auto", hash="auto") -- assert config_name in str(excinfo.value) +@@ -49,6 +49,7 @@ + assert list(info.splits.keys()) == expected_splits + + ++@pytest.mark.skip(reason="require network") + def test_get_dataset_config_info_private(hf_token, hf_private_dataset_repo_txt_data): + info = get_dataset_config_info(hf_private_dataset_repo_txt_data, config_name="default", token=hf_token) + assert list(info.splits.keys()) == ["train"] +--- a/tests/test_data_files.py 2024-02-21 20:22:57.536160356 +0100 ++++ b/tests/test_data_files.py 2024-02-21 20:25:00.153052174 +0100 +@@ -378,6 +378,7 @@ + assert len(hub_dataset_repo_patterns_results[pattern]) == 0 + + ++@pytest.mark.skip(reason="network") + def test_DataFilesList_from_patterns_locally_with_extra_files(complex_data_dir, text_file): + data_files_list = DataFilesList.from_patterns([_TEST_URL, text_file.as_posix()], complex_data_dir) + assert list(data_files_list) == [_TEST_URL, text_file.as_posix()] +@@ -467,6 +468,7 @@ + assert Hasher.hash(data_files1) != Hasher.hash(data_files2) + + ++@pytest.mark.skip(reason="network") + def test_DataFilesDict_from_patterns_locally_or_remote_hashing(text_file): + patterns = {"train": [_TEST_URL], "test": [str(text_file)]} + data_files1 = DataFilesDict.from_patterns(patterns) +--- a/tests/packaged_modules/test_folder_based_builder.py 2024-02-21 21:30:20.718922523 +0100 ++++ b/tests/packaged_modules/test_folder_based_builder.py 2024-02-21 21:31:46.309061287 +0100 +@@ -382,6 +382,7 @@ + assert example[column] is not None + + ++@pytest.mark.skip(reason="network") + @pytest.mark.parametrize("remote", [True, False]) + @pytest.mark.parametrize("drop_labels", [None, True, False]) + def test_data_files_with_different_levels_no_metadata( +@@ -405,6 +406,7 @@ + assert all(example.keys() == {"base", "label"} for _, example in generator) + + ++@pytest.mark.skip(reason="network") + @pytest.mark.parametrize("remote", [False, True]) + @pytest.mark.parametrize("drop_labels", [None, True, False]) + def test_data_files_with_one_label_no_metadata(data_files_with_one_label_no_metadata, drop_labels, remote, cache_dir): |