Source code for dstk.lib_types.dstk_types
from typing import (
TypeAlias,
Any,
NamedTuple,
Generator,
Sequence,
TYPE_CHECKING,
Union
)
from stanza import Document
from .stanza_types import Word, Sentence, Token
from .fasttext_types import FastText
from .gensim_types import Word2Vec
if TYPE_CHECKING:
from ..hooks.tools import Hook
#: A dictionary mapping identifiers to Stanza Documents.
DocumentIndex: TypeAlias = dict[str, Document]
#: A single unit of analysis, either a Word or a Token.
LexicalItem: TypeAlias = Token | Word
#: A sequence of words or tokens for processing.
LexicalItemSequence: TypeAlias = Sequence[Word] | Sequence[Token]
#: Collections of sentences or word/token sequences used in linguistic analysis.
LinguisticSequences: TypeAlias = (
Sequence[Sentence] | Sequence[Sequence[Word]] | Sequence[Sequence[Token]]
)
#: A pair of lists representing the left and right contexts of a target word.
Contexts: TypeAlias = tuple[list[Word], list[Word]]
#: A generator yielding context pairs for iteration.
ContextGenerator: TypeAlias = Generator[Contexts, None, None]
#: A tuple representing a group of collocates.
Collocates: TypeAlias = tuple[Word, ...]
[docs]
class Concordance(NamedTuple):
"""
A record of a word's occurrence within its surrounding context.
:param left_context: The text appearing before the target word.
:type left_context: str
:param text: The specific word or phrase being analyzed.
:type text: str
:param right_context: The text appearing after the target word.
:type right_context: str
"""
left_context: str
text: str
right_context: str
[docs]
class Bigram(NamedTuple):
"""
Represents a pair of words occurring together.
:param collocate: The collocate word.
:type collocate: Word
:param target_word: The target word in the bigram.
:type target_word: str
"""
collocate: Word
target_word: str
#: Directed collocates represented as a tuple of a word and a pair of directional tags.
DirectedCollocates: TypeAlias = tuple[Word, tuple[str, str]]
[docs]
class Neighbor(NamedTuple):
"""
A tuple representing a neighboring word and associated statistical score.
:param word: The neighboring word found in the text.
:type word: str
:param score: The statistical weight or confidence of the neighbor.
:type score: float
"""
word: str
score: float
#: A list of neighbors with their respective scores.
Neighbors: TypeAlias = list[Neighbor]
#: Supported neural language models (Word2Vec or FastText).
NeuralModels: TypeAlias = Word2Vec | FastText
#: A dictionary defining a method name and the keyword arguments passed to it.
MethodDict: TypeAlias = dict[str, dict[str, Any]]
#: A mapping of workflow stages to an ordered list of methods or a custom Hook.
Workflow: TypeAlias = dict[str, Union[list[MethodDict], "Hook"]]
[docs]
class ParameterResult(NamedTuple):
"""The result of an individual step within a workflow.
:param name: The name of the step.
:type name: str
:param result: The output produced by the step.
:type result: Any
"""
name: str
result: Any
#: Generator yielding `ParameterResult` objects containing both name and data.
ReturnAllGenerator: TypeAlias = Generator[ParameterResult, None, None]
#: Generator yielding only the results of a parameter step without metadata.
ReturnParameterGenerator: TypeAlias = Generator[Any, None, None]