using Zygote


]st Zygote

Status `~/work/zygote_flux_tutorial/zygote_flux_tutorial/Project.toml`
  [e88e6eb3] Zygote v0.6.62


f(x) = x^2

f (generic function with 1 method)


x = 3
@assert f'(x) == 2x == 6

const var"'" = adjoint


x = 3
adjoint(f)(x) == f'(x)

true

function gradient(f, x)
  y, back = pullback(f, x)
  return back(1)
end


square(x) = x * x

pullback_for_sin(x) = (sin(x), ȳ -> ȳ * cos(x))
pullback_for_cos(x) = (cos(x), ȳ -> ȳ * -sin(x))
pullback_for_square(x) = (x * x, ȳ -> ȳ * 2x)

# 自前の pullback 関数
mypullback(::typeof(sin), x) = pullback_for_sin(x) # f=sin の場合の実装
mypullback(::typeof(cos), x) = pullback_for_cos(x) # f=cos の場合の実装
mypullback(::typeof(square), x) = pullback_for_square(x) # f=square の場合の実装

# 自前の gradient 関数
function mygradient(f, x)
    _, back = mypullback(f, x) # f によって振る舞いが異なる.
    return back(1)
end

x = π/6
@assert mygradient(sin, x) ≈ sin'(x) ≈ cos(x)
@assert mygradient(cos, x) ≈ cos'(x) ≈ -sin(x)
@assert mygradient(square, x) ≈ square'(x) ≈ 2x

@scalar_rule tan(x) 1 + Ω ^ 2


x = π/6
tan'(x), 1/(cos(x))^2, 1 + (tan(x))^2

(1.3333333333333333, 1.333333333333333, 1.3333333333333333)


square(x) = x * x
f(x) = square(x) # x * x の事.
g(y) = sin(y)
# 合成関数を定義
z(x) = (g ∘ f)(x) # ∘ は \circ とタイプする. もちろん z(x) = g(f(x)) でも良い.

pullback_for_f(x) = (x * x, ȳ -> ȳ * 2x) # (f(x), B_x(ȳ)) みたいなやつ. 中身は pullback_for_square と同じ
pullback_for_g(x) = (sin(x), ȳ -> ȳ * cos(x)) # pullback_for_sin と同じ

mypullback(::typeof(f), x) = pullback_for_f(x)
mypullback(::typeof(g), x) = pullback_for_g(x)

# 合成関数 z に対する pullback の実装
function pullback_for_z(x)
    y, back_for_f = mypullback(f, x)
    z, back_for_g = mypullback(g, y)
    function back_for_z(z̄)
        ȳ = back_for_g(z̄)
        x̄ = back_for_f(ȳ)
        return x̄
    end
    return (z, back_for_z)
end

mypullback(::typeof(z), x) = pullback_for_z(x) # f=z の場合の実装

function mygradient(f, x)
    _, back = mypullback(f, x) # f によって振る舞いが異なる.
    return back(1)
end

x = π/6
@assert mygradient(z, x) ≈ (z)'(x) ≈ 2x * cos(x^2)

julia> poly(x) = 3x*x + 2x + 1
julia> Zygote.@code_ir poly(1.)
1: (%1, %2) # %1 は関数 poly, %2 は引数 x
  %3 = 3 * %2 # まずは入力を 3倍する i.e. 3x を計算
  %4 = %3 * %2 # この時点で 3 x^2 を実現
  %5 = 2 * %2 # 入力を2倍にしたものを計算 i.e. 2x を計算
  %6 = %4 + %5 + 1 # 諸々を足し合わせる
  return %6


poly(x) = 3x*x + 2x + 1
Zygote.@code_ir poly(1.)

1: (%1, %2)
  %3 = 3 * %2
  %4 = %3 * %2
  %5 = 2 * %2
  %6 = %4 + %5 + 1
  return %6


Zygote.@code_ir z(1.)

1: (%1, %2)
  %3 = Main.g ∘ Main.f
  %4 = (%3)(%2)
  return %4

julia> Zygote.@code_adjoint poly(1)
Zygote.Adjoint(1: (%3, %4 :: Zygote.Context, %1, %2)
  %5 = Zygote._pullback(%4, Main.:*, 3, %2) 
  %6 = Base.getindex(%5, 1)
  %7 = Base.getindex(%5, 2)
  %8 = Zygote._pullback(%4, Main.:*, %6, %2)
  %9 = Base.getindex(%8, 1)
  %10 = Base.getindex(%8, 2)
  %11 = Zygote._pullback(%4, Main.:*, 2, %2)
  %12 = Base.getindex(%11, 1)
  %13 = Base.getindex(%11, 2)
  %14 = Zygote._pullback(%4, Main.:+, %9, %12, 1)
  %15 = Base.getindex(%14, 1)
  %16 = Base.getindex(%14, 2)
  return %15, 1: (%1)
  %2 = (@16)(%1)
  %3 = Zygote.gradindex(%2, 2)
  %4 = Zygote.gradindex(%2, 3)
  %5 = (@13)(%4)
  %6 = Zygote.gradindex(%5, 3)
  %7 = (@10)(%3)
  %8 = Zygote.gradindex(%7, 2)
  %9 = Zygote.gradindex(%7, 3)
  %10 = (@7)(%8)
  %11 = Zygote.gradindex(%10, 3)
  %12 = Zygote.accum(%6, %9, %11)
  %13 = Zygote.tuple(nothing, %12)
  return %13)


poly(x) = 3x*x + 2x + 1

add(x, y, z) = x + y + z
mul(x, y) = x * y 

pullback_for_add(x, y, z) = (add(x, y, z), Δ -> (Δ, Δ, Δ))
pullback_for_mul(x, y) = (mul(x, y), Δ -> (Δ * y, Δ * x)) # 第二成分は (∂_x mul, ∂_y mul) と思えば良い

mypullback(::typeof(add), x, y, z) = pullback_for_add(x, y, z)
mypullback(::typeof(mul), x, y) = pullback_for_mul(x, y)

function pullback_for_poly(x)
    # %2 は x である

    # %5 = Zygote._pullback(%4, Main.:*, 3, %2)
    # %6 = Base.getindex(%5, 1)
    # %7 = Base.getindex(%5, 2)
    out6, back7 = mypullback(mul, 3, x)
    # %8 = Zygote._pullback(%4, Main.:*, %6, %2)
    # %9 = Base.getindex(%8, 1)
    # %10 = Base.getindex(%8, 2)
    out9, back10 = mypullback(mul, out6, x)
    # %11 = Zygote._pullback(%4, Main.:*, 2, %2)
    # %12 = Base.getindex(%11, 1)
    # %13 = Base.getindex(%11, 2)
    out12, back13 = mypullback(mul, 2, x)
    # %14 = Zygote._pullback(%4, Main.:+, %9, %12, 1)
    # %15 = Base.getindex(%14, 1)
    # %16 = Base.getindex(%14, 2)
    out15, back16 = mypullback(add, out9, out12, 1)
    function back_for_poly(ā)
        # %1 は ā のことである
        # gradindex(∘, 1) は形式的なオブジェクトが入っているので意味のある出力は gradindex(∘,2), gradindex(∘, 3) などである
        # (@13) などは back13 などと対応する
        
        # %2 = (@16)(%1)
        # %3 = Zygote.gradindex(%2, 2)
        # %4 = Zygote.gradindex(%2, 3)
        x̄3, ȳ4 = back16(ā)
        # %5 = (@13)(%4)
        # %6 = Zygote.gradindex(%5, 3)
        _, ȳ6 = back13(ȳ4)
        # %7 = back10(%3)
        # %8 = Zygote.gradindex(%7, 2)
        # %9 = Zygote.gradindex(%7, 3)
        x̄8, ȳ9 = back10(x̄3)
        # %10 = (@7)(%8)
        # %11 = Zygote.gradindex(%10, 3)
        _, ȳ11 = back7(x̄8)
        # %12 = Zygote.accum(%6, %9, %11)
        # %13 = Zygote.tuple(nothing, %12)
        ō12 = sum([ȳ6, ȳ9, ȳ11]) # accum は平たくいうと sum と同じ
        return ō12 # 値はスカラーとして返したいのでここでは tuple に変換はしないでおく
    end
    return out15, back_for_poly
end

mypullback(::typeof(poly), x) = pullback_for_poly(x)

x = rand()
@assert mygradient(poly, x) ≈ (poly)'(x) ≈ 6x + 2

Zygote Internals¶

Load packages¶

復習¶

`f'` の `'`って何？¶

`gradient` and `pullback`¶

合成関数の微分¶

Example¶

Zygote のお仕事¶

`Zygote.@code_ir` ¶

Zygote.@code_adjoint¶

まとめ¶

Zygote Internals¶

Load packages¶

復習¶

f' の 'って何？¶

gradient and pullback¶

合成関数の微分¶

Example¶

Zygote のお仕事¶

Zygote.@code_ir ¶

Zygote.@code_adjoint¶

まとめ¶

`f'` の `'`って何？¶

`gradient` and `pullback`¶

`Zygote.@code_ir` ¶