Module `spin_sdk.spin_llm`

Module for working with the Spin large language model API

Expand source code

"""Module for working with the Spin large language model API"""

from dataclasses import dataclass
from collections.abc import Sequence
from typing import Optional

@dataclass
class LLMInferencingUsage:
    """Usage information related to an inferencing result.

    Attributes:
        prompt_token_count (int): Number of tokens in the prompt.
        generated_token_count (int): Number of tokens generated by the inferencing operation.

    """
    prompt_token_count: int
    generated_token_count: int

@dataclass
class LLMInferencingResult:
    """An inferencing result.

    Attributes:
        text (str): The text generated by the model.
        usage (LLMInferencingUsage): Usage information related to the inferencing result.

    """
    text: str
    usage: LLMInferencingUsage

@dataclass
class LLMInferencingParams:
    """Inference request parameters

    Attributes:
        max_tokens (int): The maximum tokens that should be inferred.
        repeat_penalty (float): The amount the model should avoid repeating tokens.
        repeat_penalty_last_n_token_count (int): The number of tokens the model should apply the repeat penalty to.
        temperature (float): The randomness with which the next token is selected.
        top-k (int): The number of possible next tokens the model will choose from.
        top-p: (float): The probability total of next tokens the model will choose from.

    """
    max_tokens: int
    repeat_penalty: float
    repeat_penalty_last_n_token_count: int
    temperature: float
    top_k: int
    top_p: int
        
def llm_infer(model: str, prompt: str, options: Optional[LLMInferencingParams]) -> LLMInferencingResult:
    """Perform inferencing using the provided model and prompt with the given optional params"""
    raise NotImplementedError

@dataclass
class LLMEmbeddingsUsage:
    """Usage information related to an embedding result.

    Attributes:
        prompt_token_count (int): Number of tokens in the prompt.

    """
    prompt_token_count: int

@dataclass
class LLMEmbeddingsResult:
    """An embedding result.

    Attributes:
        embeddings (Sequence[Sequence[float]]): 
        usage (LLMEmbeddingsUsage): Usage information related to the embeddings result.

    """
    embeddings: Sequence[Sequence[float]]
    usage: LLMEmbeddingsUsage

def generate_embeddings(model: str, text: Sequence[str]) -> LLMEmbeddingsResult:
    """Generate embeddings for the supplied list of text"""
    raise NotImplementedError

Functions

def generate_embeddings(model: str, text: collections.abc.Sequence[str]) ‑> LLMEmbeddingsResult

Generate embeddings for the supplied list of text

Expand source code

def generate_embeddings(model: str, text: Sequence[str]) -> LLMEmbeddingsResult:
    """Generate embeddings for the supplied list of text"""
    raise NotImplementedError

def llm_infer(model: str, prompt: str, options: Optional[LLMInferencingParams]) ‑> LLMInferencingResult

Perform inferencing using the provided model and prompt with the given optional params

Expand source code

def llm_infer(model: str, prompt: str, options: Optional[LLMInferencingParams]) -> LLMInferencingResult:
    """Perform inferencing using the provided model and prompt with the given optional params"""
    raise NotImplementedError

Classes

class LLMEmbeddingsResult (embeddings: collections.abc.Sequence[collections.abc.Sequence[float]], usage: LLMEmbeddingsUsage)

An embedding result.

Attributes

embeddings : Sequence[Sequence[float]]
usage : LLMEmbeddingsUsage: Usage information related to the embeddings result.

Expand source code

@dataclass
class LLMEmbeddingsResult:
    """An embedding result.

    Attributes:
        embeddings (Sequence[Sequence[float]]): 
        usage (LLMEmbeddingsUsage): Usage information related to the embeddings result.

    """
    embeddings: Sequence[Sequence[float]]
    usage: LLMEmbeddingsUsage

Class variables

var embeddings : collections.abc.Sequence[collections.abc.Sequence[float]]
var usage : LLMEmbeddingsUsage

class LLMEmbeddingsUsage (prompt_token_count: int)

Usage information related to an embedding result.

Attributes

prompt_token_count : int: Number of tokens in the prompt.

Expand source code

@dataclass
class LLMEmbeddingsUsage:
    """Usage information related to an embedding result.

    Attributes:
        prompt_token_count (int): Number of tokens in the prompt.

    """
    prompt_token_count: int

Class variables

var prompt_token_count : int

class LLMInferencingParams (max_tokens: int, repeat_penalty: float, repeat_penalty_last_n_token_count: int, temperature: float, top_k: int, top_p: int)

Inference request parameters

Attributes

max_tokens : int: The maximum tokens that should be inferred.
repeat_penalty : float: The amount the model should avoid repeating tokens.
repeat_penalty_last_n_token_count : int: The number of tokens the model should apply the repeat penalty to.
temperature : float: The randomness with which the next token is selected.

top-k (int): The number of possible next tokens the model will choose from. top-p: (float): The probability total of next tokens the model will choose from.

Expand source code

@dataclass
class LLMInferencingParams:
    """Inference request parameters

    Attributes:
        max_tokens (int): The maximum tokens that should be inferred.
        repeat_penalty (float): The amount the model should avoid repeating tokens.
        repeat_penalty_last_n_token_count (int): The number of tokens the model should apply the repeat penalty to.
        temperature (float): The randomness with which the next token is selected.
        top-k (int): The number of possible next tokens the model will choose from.
        top-p: (float): The probability total of next tokens the model will choose from.

    """
    max_tokens: int
    repeat_penalty: float
    repeat_penalty_last_n_token_count: int
    temperature: float
    top_k: int
    top_p: int

Class variables

var max_tokens : int
var repeat_penalty : float
var repeat_penalty_last_n_token_count : int
var temperature : float
var top_k : int
var top_p : int

class LLMInferencingResult (text: str, usage: LLMInferencingUsage)

An inferencing result.

Attributes

text : str: The text generated by the model.
usage : LLMInferencingUsage: Usage information related to the inferencing result.

Expand source code

@dataclass
class LLMInferencingResult:
    """An inferencing result.

    Attributes:
        text (str): The text generated by the model.
        usage (LLMInferencingUsage): Usage information related to the inferencing result.

    """
    text: str
    usage: LLMInferencingUsage

Class variables

var text : str
var usage : LLMInferencingUsage

class LLMInferencingUsage (prompt_token_count: int, generated_token_count: int)

Usage information related to an inferencing result.

Attributes

prompt_token_count : int: Number of tokens in the prompt.
generated_token_count : int: Number of tokens generated by the inferencing operation.

Expand source code

@dataclass
class LLMInferencingUsage:
    """Usage information related to an inferencing result.

    Attributes:
        prompt_token_count (int): Number of tokens in the prompt.
        generated_token_count (int): Number of tokens generated by the inferencing operation.

    """
    prompt_token_count: int
    generated_token_count: int

Class variables

var generated_token_count : int
var prompt_token_count : int