base_tokenizer
Bases:
ABC, [`SerializableMixin`](../mixins/serializable_mixin.md#griptape.mixins.serializable_mixin.SerializableMixin "SerializableMixin (griptape.mixins.serializable_mixin.SerializableMixin)")Source Code in griptape/tokenizers/base_tokenizer.py
@define() class BaseTokenizer(ABC, SerializableMixin): DEFAULT_MAX_INPUT_TOKENS = 4096 DEFAULT_MAX_OUTPUT_TOKENS = 1000 MODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {} MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {} model: str = field(kw_only=True, metadata={"serializable": True}) stop_sequences: list[str] = field(default=Factory(list), kw_only=True, metadata={"serializable": True}) _max_input_tokens: Optional[int] = field( kw_only=True, default=None, alias="max_input_tokens", metadata={"serializable": True} ) _max_output_tokens: Optional[int] = field( kw_only=True, default=None, alias="max_output_tokens", metadata={"serializable": True} ) @lazy_property() def max_input_tokens(self) -> int: return self._default_max_input_tokens() @lazy_property() def max_output_tokens(self) -> int: return self._default_max_output_tokens() def __attrs_post_init__(self) -> None: if self.model is not None: if self.max_input_tokens is None: self.max_input_tokens = self._default_max_input_tokens() if self.max_output_tokens is None: # TODO(collin): https://github.com/griptape-ai/griptape/issues/1844 self.max_output_tokens = self._default_max_output_tokens() def count_input_tokens_left(self, text: str) -> int: diff = self.max_input_tokens - self.count_tokens(text) if diff > 0: return diff return 0 def count_output_tokens_left(self, text: str) -> int: diff = self.max_output_tokens - self.count_tokens(text) if diff > 0: return diff return 0 @abstractmethod def count_tokens(self, text: str) -> int: ... def _default_max_input_tokens(self) -> int: tokens = next( ( max_tokens for model_prefix, max_tokens in self.MODEL_PREFIXES_TO_MAX_INPUT_TOKENS.items() if model_prefix in self.model ), None, ) if tokens is None: logging.warning( "Model %s not found in MODEL_PREFIXES_TO_MAX_INPUT_TOKENS, using default value of %s.", self.model, self.DEFAULT_MAX_INPUT_TOKENS, ) return self.DEFAULT_MAX_INPUT_TOKENS return tokens def _default_max_output_tokens(self) -> int: tokens = next( ( max_tokens for model_prefix, max_tokens in self.MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS.items() if model_prefix in self.model ), None, ) if tokens is None: logging.debug( "Model %s not found in MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS, using default value of %s.", self.model, self.DEFAULT_MAX_OUTPUT_TOKENS, ) return self.DEFAULT_MAX_OUTPUT_TOKENS return tokens
DEFAULT_MAX_INPUT_TOKENS = 4096
class-attribute instance-attributeDEFAULT_MAX_OUTPUT_TOKENS = 1000
class-attribute instance-attributeMODEL_PREFIXES_TO_MAX_INPUT_TOKENS = {}
class-attribute instance-attributeMODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS = {}
class-attribute instance-attribute_max_input_tokens = field(kw_only=True, default=None, alias='max_input_tokens', metadata={'serializable': True})
class-attribute instance-attribute_max_output_tokens = field(kw_only=True, default=None, alias='max_output_tokens', metadata={'serializable': True})
class-attribute instance-attributemodel = field(kw_only=True, metadata={'serializable': True})
class-attribute instance-attributestop_sequences = field(default=Factory(list), kw_only=True, metadata={'serializable': True})
class-attribute instance-attribute
attrs_post_init()
Source Code in griptape/tokenizers/base_tokenizer.py
def __attrs_post_init__(self) -> None: if self.model is not None: if self.max_input_tokens is None: self.max_input_tokens = self._default_max_input_tokens() if self.max_output_tokens is None: # TODO(collin): https://github.com/griptape-ai/griptape/issues/1844 self.max_output_tokens = self._default_max_output_tokens()
_default_max_input_tokens()
Source Code in griptape/tokenizers/base_tokenizer.py
def _default_max_input_tokens(self) -> int: tokens = next( ( max_tokens for model_prefix, max_tokens in self.MODEL_PREFIXES_TO_MAX_INPUT_TOKENS.items() if model_prefix in self.model ), None, ) if tokens is None: logging.warning( "Model %s not found in MODEL_PREFIXES_TO_MAX_INPUT_TOKENS, using default value of %s.", self.model, self.DEFAULT_MAX_INPUT_TOKENS, ) return self.DEFAULT_MAX_INPUT_TOKENS return tokens
_default_max_output_tokens()
Source Code in griptape/tokenizers/base_tokenizer.py
def _default_max_output_tokens(self) -> int: tokens = next( ( max_tokens for model_prefix, max_tokens in self.MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS.items() if model_prefix in self.model ), None, ) if tokens is None: logging.debug( "Model %s not found in MODEL_PREFIXES_TO_MAX_OUTPUT_TOKENS, using default value of %s.", self.model, self.DEFAULT_MAX_OUTPUT_TOKENS, ) return self.DEFAULT_MAX_OUTPUT_TOKENS return tokens
count_input_tokens_left(text)
Source Code in griptape/tokenizers/base_tokenizer.py
def count_input_tokens_left(self, text: str) -> int: diff = self.max_input_tokens - self.count_tokens(text) if diff > 0: return diff return 0
count_output_tokens_left(text)
Source Code in griptape/tokenizers/base_tokenizer.py
def count_output_tokens_left(self, text: str) -> int: diff = self.max_output_tokens - self.count_tokens(text) if diff > 0: return diff return 0
count_tokens(text)abstractmethod
Source Code in griptape/tokenizers/base_tokenizer.py
@abstractmethod def count_tokens(self, text: str) -> int: ...
max_input_tokens()
Source Code in griptape/tokenizers/base_tokenizer.py
@lazy_property() def max_input_tokens(self) -> int: return self._default_max_input_tokens()
max_output_tokens()
Source Code in griptape/tokenizers/base_tokenizer.py
@lazy_property() def max_output_tokens(self) -> int: return self._default_max_output_tokens()
Could this page be better? Report a problem or suggest an addition!