diff options
Diffstat (limited to 'pkgs/development/python-modules/tensorflow-datasets/default.nix')
| -rw-r--r-- | pkgs/development/python-modules/tensorflow-datasets/default.nix | 89 |
1 files changed, 57 insertions, 32 deletions
diff --git a/pkgs/development/python-modules/tensorflow-datasets/default.nix b/pkgs/development/python-modules/tensorflow-datasets/default.nix index efe19803d25a..625619e9bd0a 100644 --- a/pkgs/development/python-modules/tensorflow-datasets/default.nix +++ b/pkgs/development/python-modules/tensorflow-datasets/default.nix @@ -1,81 +1,94 @@ { - apache-beam, + lib, + buildPythonPackage, + fetchFromGitHub, + fetchpatch, + + # dependencies array-record, - attrs, + dill, + dm-tree, + future, + immutabledict, + importlib-resources, + numpy, + promise, + protobuf, + psutil, + requests, + simple-parsing, + six, + tensorflow-metadata, + termcolor, + tqdm, + + # tests + apache-beam, beautifulsoup4, - buildPythonPackage, click, datasets, - dill, - dm-tree, - fetchFromGitHub, ffmpeg, - future, imagemagick, - importlib-resources, jax, jaxlib, jinja2, langdetect, - lib, lxml, matplotlib, + mlcroissant, mwparserfromhell, mwxml, networkx, nltk, - numpy, opencv4, pandas, pillow, - promise, - protobuf, - psutil, pycocotools, pydub, pytest-xdist, pytestCheckHook, - requests, scikit-image, scipy, - six, + sortedcontainers, tensorflow, - tensorflow-metadata, - termcolor, tifffile, - tqdm, zarr, }: buildPythonPackage rec { pname = "tensorflow-datasets"; - version = "4.9.7"; - format = "setuptools"; + version = "4.9.8"; + pyproject = true; src = fetchFromGitHub { owner = "tensorflow"; repo = "datasets"; tag = "v${version}"; - hash = "sha256-pnut5z3rEYIOOhsQT6uVjPdy+SqftKptSQMwxbMXoKA="; + hash = "sha256-nqveZ+8b0f5sGIn6WufKeA37yEsZjzhCIbCfwMZ9JOM="; }; patches = [ - # addresses https://github.com/tensorflow/datasets/issues/3673 - ./corruptions.patch + # mlmlcroissant uses encoding_formats, not encoding_formats. + # Backport https://github.com/tensorflow/datasets/pull/11037 until released. + (fetchpatch { + url = "https://github.com/tensorflow/datasets/commit/92cbcff725a1036569a515cc3356aa8480740451.patch"; + hash = "sha256-2hnMvQP83+eAJllce19aHujcoWQzUz3+LsasWCo4BtM="; + }) ]; - propagatedBuildInputs = [ + dependencies = [ array-record - attrs dill dm-tree future + immutabledict importlib-resources numpy promise protobuf psutil requests + simple-parsing six tensorflow-metadata termcolor @@ -97,6 +110,7 @@ buildPythonPackage rec { langdetect lxml matplotlib + mlcroissant mwparserfromhell mwxml networkx @@ -110,11 +124,18 @@ buildPythonPackage rec { pytestCheckHook scikit-image scipy + sortedcontainers tensorflow tifffile zarr ]; + pytestFlagsArray = [ + # AttributeError: 'NoneType' object has no attribute 'Table' + "--deselect=tensorflow_datasets/core/file_adapters_test.py::test_read_write" + "--deselect=tensorflow_datasets/text/c4_wsrs/c4_wsrs_test.py::C4WSRSTest" + ]; + disabledTestPaths = [ # Sandbox violations: network access, filesystem write attempts outside of build dir, ... "tensorflow_datasets/core/dataset_builder_test.py" @@ -126,6 +147,7 @@ buildPythonPackage rec { "tensorflow_datasets/import_without_tf_test.py" "tensorflow_datasets/proto/build_tf_proto_test.py" "tensorflow_datasets/scripts/cli/build_test.py" + "tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" # Requires `pretty_midi` which is not packaged in `nixpkgs`. "tensorflow_datasets/audio/groove.py" @@ -143,14 +165,16 @@ buildPythonPackage rec { # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`. "tensorflow_datasets/core/lazy_imports_lib_test.py" + # AttributeError: 'NoneType' object has no attribute 'Table' + "tensorflow_datasets/core/dataset_builder_beam_test.py" + "tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py" + "tensorflow_datasets/core/split_builder_test.py" + "tensorflow_datasets/core/writer_test.py" + # Requires `tensorflow_io` which is not packaged in `nixpkgs`. "tensorflow_datasets/core/features/audio_feature_test.py" "tensorflow_datasets/image/lsun_test.py" - # Requires `envlogger` which is not packaged in `nixpkgs`. - "tensorflow_datasets/rlds/locomotion/locomotion_test.py" - "tensorflow_datasets/rlds/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can_test.py" - # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments` # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related # to the differences in some of the dependencies? @@ -171,10 +195,11 @@ buildPythonPackage rec { "tensorflow_datasets/text/c4_utils_test.py" ]; - meta = with lib; { + meta = { description = "Library of datasets ready to use with TensorFlow"; homepage = "https://www.tensorflow.org/datasets/overview"; - license = licenses.asl20; - maintainers = with maintainers; [ ndl ]; + changelog = "https://github.com/tensorflow/datasets/releases/tag/v${version}"; + license = lib.licenses.asl20; + maintainers = with lib.maintainers; [ ndl ]; }; } |
