forked from PAIR-code/lit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dtypes.py
85 lines (67 loc) · 2.76 KB
/
dtypes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Dataclasses for representing structured output.
Classes in this file should be used for actual input/output data,
rather than in spec() metadata.
These classes can replace simple dicts or namedtuples, with two major
advantages:
- Type-checking (via pytype) doesn't work for dict fields, but does work for
these dataclasses.
- Performance and memory use may be better, due to the use of __slots__
See the documentation for attr.s (https://www.attrs.org/) for more details.
Classes inheriting from DataTuple will be handled by serialize.py, and available
on the frontend as corresponding JavaScript objects.
"""
import abc
from typing import Any, Dict, List, Text, Tuple, Union
import attr
JsonDict = Dict[Text, Any]
@attr.s(auto_attribs=True, frozen=True, slots=True)
class DataTuple(metaclass=abc.ABCMeta):
"""Simple dataclasses.
These are intended to be used for actual data, such as returned by
dataset.examples and model.predict().
Contrast with LitType and descendants, which are used in model and dataset
/specs/ to represent types and metadata.
"""
def to_json(self) -> JsonDict:
"""Used by serialize.py."""
d = attr.asdict(self)
d['__class__'] = 'DataTuple'
d['__name__'] = self.__class__.__name__
return d
@staticmethod
def from_json(d: JsonDict):
"""Used by serialize.py."""
cls = globals()[d.pop('__name__')] # class by name from this module
return cls(**d)
@attr.s(auto_attribs=True, frozen=True, slots=True)
class SpanLabel(DataTuple):
"""Dataclass for individual span label preds. Can use this in model preds."""
start: int # inclusive
end: int # exclusive
label: Text
@attr.s(auto_attribs=True, frozen=True, slots=True)
class EdgeLabel(DataTuple):
"""Dataclass for individual edge label preds. Can use this in model preds."""
span1: Tuple[int, int] # inclusive, exclusive
span2: Tuple[int, int] # inclusive, exclusive
label: Union[Text, int, float]
@attr.s(auto_attribs=True, frozen=True, slots=True)
class SalienceMap(DataTuple):
"""Dataclass for a salience map over tokens."""
tokens: List[str]
salience: List[float] # parallel to tokens