datumaro.components.dataset_item_storage#

Classes

DatasetItemStorage()

DatasetItemStorageDatasetView(parent, infos, ...)

ItemStatus(value[, names, module, qualname, ...])

class datumaro.components.dataset_item_storage.ItemStatus(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: Enum

added = 1#
modified = 2#
removed = 3#
class datumaro.components.dataset_item_storage.DatasetItemStorage[source]#

Bases: object

is_empty() bool[source]#
put(item: DatasetItem) bool[source]#
get(id: str | DatasetItem, subset: str | None = None, dummy: Any = None) DatasetItem | None[source]#
remove(id: str | DatasetItem, subset: str | None = None) bool[source]#
get_subset(name)[source]#
subsets()[source]#
get_annotated_items()[source]#
get_datasetitem_by_path(path)[source]#
get_annotations()[source]#
class datumaro.components.dataset_item_storage.DatasetItemStorageDatasetView(parent: DatasetItemStorage, infos: Dict[str, Any], categories: Dict[AnnotationType, Categories], media_type: Type[MediaElement] | None, ann_types: Set[AnnotationType] | None)[source]#

Bases: IDataset

class Subset(parent: DatasetItemStorageDatasetView, name: str)[source]#

Bases: IDataset

put(item)[source]#
get(id, subset=None)[source]#

Provides random access to dataset items.

remove(id, subset=None)[source]#
get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get(id, subset=None)[source]#

Provides random access to dataset items.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

class datumaro.components.dataset_item_storage.AnnotationType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: IntEnum

unknown = 0#
label = 1#
mask = 2#
points = 3#
polygon = 4#
polyline = 5#
bbox = 6#
caption = 7#
cuboid_3d = 8#
super_resolution_annotation = 9#
depth_annotation = 10#
ellipse = 11#
hash_key = 12#
feature_vector = 13#
tabular = 14#
rotated_bbox = 15#
cuboid_2d = 16#
class datumaro.components.dataset_item_storage.Any(*args, **kwargs)[source]#

Bases: object

Special type indicating an unconstrained type.

  • Any is compatible with every type.

  • Any assumed to have all methods.

  • All values assumed to be instances of Any.

Note that all the above statements are true from the point of view of static type checkers. At runtime, Any should not be used with instance checks.

class datumaro.components.dataset_item_storage.DatasetItem(id: str, *, subset: str | None = None, media: str | MediaElement | None = None, annotations: List[Annotation] | None = None, attributes: Dict[str, Any] = None)[source]#

Bases: object

id: str#
subset: str#
media: MediaElement | None#
annotations: Annotations#
attributes: Dict[str, Any]#
wrap(**kwargs)[source]#
media_as(t: Type[T]) T[source]#
class datumaro.components.dataset_item_storage.Enum(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: object

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access:

>>> Color.RED
<Color.RED: 1>
  • value lookup:

>>> Color(1)
<Color.RED: 1>
  • name lookup:

>>> Color['RED']
<Color.RED: 1>

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

name#

The name of the Enum member.

value#

The value of the Enum member.

class datumaro.components.dataset_item_storage.IDataset[source]#

Bases: object

subsets() Dict[str, IDataset][source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get_subset(name) IDataset[source]#
infos() Dict[str, Any][source]#

Returns meta-info of dataset.

categories() Dict[AnnotationType, Categories][source]#

Returns metainfo about dataset labels.

get(id: str, subset: str | None = None) DatasetItem | None[source]#

Provides random access to dataset items.

media_type() Type[MediaElement][source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types() List[AnnotationType][source]#

Returns available task type from dataset annotation types.

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.components.dataset_item_storage.MediaElement(crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>, *args, **kwargs)[source]#

Bases: Generic[AnyData]

as_dict() Dict[str, Any][source]#
from_self(**kwargs)[source]#
property is_encrypted: bool#
set_crypter(crypter: Crypter)[source]#
property type: MediaType#
property data: AnyData | None#
property has_data: bool#
property bytes: bytes | None#
save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
class datumaro.components.dataset_item_storage.auto(value=_auto_null)[source]#

Bases: object

Instances are replaced with an appropriate value in Enum class suites.

datumaro.components.dataset_item_storage.copy(x)[source]#

Shallow copy operation on arbitrary Python objects.

See the module’s __doc__ string for more info.