Skip to content

Activation functions

Customized activation functions for supporting various models in 🤗 Diffusers.

mindone.diffusers.models.activations.sigmoid(x)

A numerically stable version of the logistic sigmoid function.

Source code in mindone/diffusers/models/activations.py
19
20
21
22
23
24
25
def sigmoid(x):
    """A numerically stable version of the logistic sigmoid function."""
    return ops.where(
        x >= 0.0,
        1.0 / (1.0 + ops.exp(-x)),  # For positive values
        ops.exp(x) / (1.0 + ops.exp(x)),  # For negative values
    )

mindone.diffusers.models.activations.SiLU

Bases: Cell

Source code in mindone/diffusers/models/activations.py
28
29
30
class SiLU(nn.Cell):
    def construct(self, x: ms.Tensor) -> ms.Tensor:
        return x * sigmoid(x)

mindone.diffusers.models.activations.FP32SiLU

Bases: Cell

SiLU activation function with input upcasted to mindspore.float32.

Source code in mindone/diffusers/models/activations.py
33
34
35
36
37
38
39
40
41
42
class FP32SiLU(nn.Cell):
    r"""
    SiLU activation function with input upcasted to mindspore.float32.
    """

    def construct(self, x: ms.Tensor) -> ms.Tensor:
        x_dtype = x.dtype
        x = ops.silu(x.float())
        x = x.to(x_dtype)
        return x

mindone.diffusers.models.activations.GELU

Bases: Cell

GELU activation function with tanh approximation support with approximate="tanh".

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

approximate

If "tanh", use tanh approximation.

TYPE: `str`, *optional*, defaults to `"none"` DEFAULT: 'none'

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class GELU(nn.Cell):
    r"""
    GELU activation function with tanh approximation support with `approximate="tanh"`.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        approximate (`str`, *optional*, defaults to `"none"`): If `"tanh"`, use tanh approximation.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True):
        super().__init__()
        self.proj = nn.Dense(dim_in, dim_out, has_bias=bias)
        self.approximate = approximate

    def gelu(self, gate: ms.Tensor) -> ms.Tensor:
        return ops.gelu(gate, approximate=self.approximate)

    def construct(self, hidden_states):
        hidden_states = self.proj(hidden_states)
        hidden_states = self.gelu(hidden_states)
        return hidden_states

mindone.diffusers.models.activations.GEGLU

Bases: Cell

A variant of the gated linear unit activation function.

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class GEGLU(nn.Cell):
    r"""
    A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
        super().__init__()
        self.proj = nn.Dense(dim_in, dim_out * 2, has_bias=bias)

    def gelu(self, gate: ms.Tensor) -> ms.Tensor:
        return ops.gelu(gate)

    def construct(self, hidden_states):
        hidden_states, gate = self.proj(hidden_states).chunk(2, axis=-1)
        return hidden_states * self.gelu(gate)

mindone.diffusers.models.activations.ApproximateGELU

Bases: Cell

The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this paper.

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
class ApproximateGELU(nn.Cell):
    r"""
    The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this
    [paper](https://arxiv.org/abs/1606.08415).

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
        super().__init__()
        self.proj = nn.Dense(dim_in, dim_out, has_bias=bias)

    def construct(self, x: ms.Tensor) -> ms.Tensor:
        x = self.proj(x)
        return x * ops.sigmoid(1.702 * x)