Skip to content

Activation functions

Customized activation functions for supporting various models in ๐Ÿค— Diffusers.

mindone.diffusers.models.activations.GELU

Bases: Cell

GELU activation function with tanh approximation support with approximate="tanh".

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

approximate

If "tanh", use tanh approximation.

TYPE: `str`, *optional*, defaults to `"none"` DEFAULT: 'none'

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
class GELU(nn.Cell):
    r"""
    GELU activation function with tanh approximation support with `approximate="tanh"`.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        approximate (`str`, *optional*, defaults to `"none"`): If `"tanh"`, use tanh approximation.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, approximate: str = "none", bias: bool = True):
        super().__init__()
        self.proj = mint.nn.Linear(dim_in, dim_out, bias=bias)
        self.approximate = approximate

    def gelu(self, gate: ms.Tensor) -> ms.Tensor:
        return mint.nn.functional.gelu(gate, approximate=self.approximate).to(gate.dtype)

    def construct(self, hidden_states):
        hidden_states = self.proj(hidden_states)
        hidden_states = self.gelu(hidden_states)
        return hidden_states

mindone.diffusers.models.activations.GEGLU

Bases: Cell

A variant of the gated linear unit activation function.

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class GEGLU(nn.Cell):
    r"""
    A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
        super().__init__()
        self.proj = mint.nn.Linear(dim_in, dim_out * 2, bias=bias)

    def gelu(self, gate: ms.Tensor) -> ms.Tensor:
        return mint.nn.functional.gelu(gate).to(gate.dtype)

    def construct(self, hidden_states):
        hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1)
        return hidden_states * self.gelu(gate)

mindone.diffusers.models.activations.ApproximateGELU

Bases: Cell

The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this paper.

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class ApproximateGELU(nn.Cell):
    r"""
    The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this
    [paper](https://arxiv.org/abs/1606.08415).

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
        super().__init__()
        self.proj = mint.nn.Linear(dim_in, dim_out, bias=bias)

    def construct(self, x: ms.Tensor) -> ms.Tensor:
        x = self.proj(x)
        return x * mint.sigmoid(1.702 * x)

mindone.diffusers.models.activations.SwiGLU

Bases: Cell

A variant of the gated linear unit activation function. It's similar to GEGLU but uses SiLU / Swish instead of GeLU.

PARAMETER DESCRIPTION
dim_in

The number of channels in the input.

TYPE: `int`

dim_out

The number of channels in the output.

TYPE: `int`

bias

Whether to use a bias in the linear layer.

TYPE: `bool`, defaults to True DEFAULT: True

Source code in mindone/diffusers/models/activations.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class SwiGLU(nn.Cell):
    r"""
    A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function. It's similar to `GEGLU`
    but uses SiLU / Swish instead of GeLU.

    Parameters:
        dim_in (`int`): The number of channels in the input.
        dim_out (`int`): The number of channels in the output.
        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
    """

    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
        super().__init__()

        self.proj = mint.nn.Linear(dim_in, dim_out * 2, bias=bias)
        self.activation = mint.nn.SiLU()

    def construct(self, hidden_states):
        hidden_states = self.proj(hidden_states)
        hidden_states, gate = hidden_states.chunk(2, dim=-1)
        return hidden_states * self.activation(gate)

mindone.diffusers.models.activations.FP32SiLU

Bases: Cell

SiLU activation function with input upcasted to mindspore.float32.

Source code in mindone/diffusers/models/activations.py
36
37
38
39
40
41
42
43
44
45
class FP32SiLU(nn.Cell):
    r"""
    SiLU activation function with input upcasted to mindspore.float32.
    """

    def construct(self, x: ms.Tensor) -> ms.Tensor:
        x_dtype = x.dtype
        x = mint.nn.functional.silu(x.float())
        x = x.to(x_dtype)
        return x

mindone.diffusers.models.activations.LinearActivation

Bases: Cell

Source code in mindone/diffusers/models/activations.py
164
165
166
167
168
169
170
171
172
173
class LinearActivation(nn.Cell):
    def __init__(self, dim_in: int, dim_out: int, bias: bool = True, activation: str = "silu"):
        super().__init__()

        self.proj = mint.nn.Linear(dim_in, dim_out, bias=bias)
        self.activation = get_activation(activation)

    def construct(self, hidden_states):
        hidden_states = self.proj(hidden_states)
        return self.activation(hidden_states)