机器学习基础系列——自编码器

前言

自编码器（Autoencoder）是一种无监督学习方法，通过学习数据的压缩表示来捕捉数据的本质特征。本文介绍标准自编码器、变分自编码器（VAE）和去噪自编码器（DAE）。

自编码器基础

基本结构

自编码器由编码器和解码器组成：

编码器：将输入压缩到低维潜在空间
解码器：从潜在空间重建输入

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)

class SimpleAutoencoder:
    """简单自编码器（NumPy实现）"""
    
    def __init__(self, input_dim, hidden_dim, latent_dim):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.latent_dim = latent_dim
        
        # 编码器参数
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
        self.b1 = np.zeros(hidden_dim)
        self.W2 = np.random.randn(hidden_dim, latent_dim) * 0.01
        self.b2 = np.zeros(latent_dim)
        
        # 解码器参数
        self.W3 = np.random.randn(latent_dim, hidden_dim) * 0.01
        self.b3 = np.zeros(hidden_dim)
        self.W4 = np.random.randn(hidden_dim, input_dim) * 0.01
        self.b4 = np.zeros(input_dim)
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
    
    def encode(self, x):
        h1 = self.relu(x @ self.W1 + self.b1)
        z = h1 @ self.W2 + self.b2
        return z
    
    def decode(self, z):
        h2 = self.relu(z @ self.W3 + self.b3)
        x_recon = self.sigmoid(h2 @ self.W4 + self.b4)
        return x_recon
    
    def forward(self, x):
        z = self.encode(x)
        x_recon = self.decode(z)
        return x_recon, z

# 测试
ae = SimpleAutoencoder(input_dim=784, hidden_dim=256, latent_dim=32)
x = np.random.rand(10, 784)
x_recon, z = ae.forward(x)

print(f"输入形状: {x.shape}")
print(f"潜在向量形状: {z.shape}")
print(f"重建形状: {x_recon.shape}")

可视化自编码器结构

def visualize_autoencoder_structure():
    fig, ax = plt.subplots(figsize=(14, 6))
    
    # 层的大小
    layers = [784, 256, 64, 32, 64, 256, 784]
    layer_names = ['输入', '隐藏1', '隐藏2', '潜在空间', '隐藏3', '隐藏4', '重建']
    
    max_neurons = 20
    
    for i, (size, name) in enumerate(zip(layers, layer_names)):
        x = i * 1.5
        n_show = min(size, max_neurons)
        
        for j in range(n_show):
            y = (j - n_show/2) * 0.3
            color = 'lightblue' if i < 3 else ('lightgreen' if i == 3 else 'lightyellow')
            circle = plt.Circle((x, y), 0.1, color=color, ec='black')
            ax.add_patch(circle)
        
        if size > max_neurons:
            ax.text(x, -n_show/2*0.3 - 0.5, '...', ha='center', fontsize=12)
        
        ax.text(x, n_show/2*0.3 + 0.5, f'{name}\n({size})', ha='center', fontsize=9)
    
    # 箭头
    for i in range(len(layers)-1):
        ax.annotate('', xy=(i*1.5+1.3, 0), xytext=(i*1.5+0.2, 0),
                   arrowprops=dict(arrowstyle='->', color='gray'))
    
    ax.set_xlim(-1, 10)
    ax.set_ylim(-5, 5)
    ax.set_aspect('equal')
    ax.axis('off')
    ax.set_title('自编码器结构', fontsize=14)
    
    # 标注
    ax.text(1.5, -4, '编码器', fontsize=12, ha='center', color='blue')
    ax.text(4.5, -4, '潜在空间', fontsize=12, ha='center', color='green')
    ax.text(7.5, -4, '解码器', fontsize=12, ha='center', color='orange')
    
    plt.tight_layout()
    plt.show()

visualize_autoencoder_structure()

PyTorch实现

标准自编码器

try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader, TensorDataset
    
    class Autoencoder(nn.Module):
        """标准自编码器"""
        
        def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32):
            super().__init__()
            
            # 编码器
            self.encoder = nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim // 2),
                nn.ReLU(),
                nn.Linear(hidden_dim // 2, latent_dim)
            )
            
            # 解码器
            self.decoder = nn.Sequential(
                nn.Linear(latent_dim, hidden_dim // 2),
                nn.ReLU(),
                nn.Linear(hidden_dim // 2, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, input_dim),
                nn.Sigmoid()
            )
        
        def encode(self, x):
            return self.encoder(x)
        
        def decode(self, z):
            return self.decoder(z)
        
        def forward(self, x):
            z = self.encode(x)
            x_recon = self.decode(z)
            return x_recon, z
    
    # 测试
    model = Autoencoder()
    x = torch.randn(32, 784)
    x_recon, z = model(x)
    
    print(f"输入: {x.shape}")
    print(f"潜在向量: {z.shape}")
    print(f"重建: {x_recon.shape}")
    
except ImportError:
    print("PyTorch未安装")

训练函数

try:
    def train_autoencoder(model, train_loader, epochs=10, lr=1e-3):
        """训练自编码器"""
        
        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.MSELoss()
        
        losses = []
        
        for epoch in range(epochs):
            total_loss = 0
            for batch_x, in train_loader:
                # 前向传播
                x_recon, _ = model(batch_x)
                loss = criterion(x_recon, batch_x)
                
                # 反向传播
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
            
            avg_loss = total_loss / len(train_loader)
            losses.append(avg_loss)
            
            if (epoch + 1) % 5 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.6f}")
        
        return losses
    
    # 创建模拟数据
    X_train = torch.rand(1000, 784)
    train_dataset = TensorDataset(X_train)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    
    # 训练
    ae_model = Autoencoder()
    losses = train_autoencoder(ae_model, train_loader, epochs=20)
    
except NameError:
    print("需要先定义模型")

变分自编码器（VAE）

VAE原理

VAE将潜在空间建模为概率分布，而不是确定性向量：

\[z \sim q_\phi(z|x) = \mathcal{N}(\mu, \sigma^2)\]

损失函数包含重建损失和KL散度： \(\mathcal{L} = \mathbb{E}[\log p_\theta(x|z)] - KL(q_\phi(z|x) || p(z))\)

try:
    class VAE(nn.Module):
        """变分自编码器"""
        
        def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32):
            super().__init__()
            
            # 编码器
            self.encoder = nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim // 2),
                nn.ReLU()
            )
            
            # 均值和方差
            self.fc_mu = nn.Linear(hidden_dim // 2, latent_dim)
            self.fc_var = nn.Linear(hidden_dim // 2, latent_dim)
            
            # 解码器
            self.decoder = nn.Sequential(
                nn.Linear(latent_dim, hidden_dim // 2),
                nn.ReLU(),
                nn.Linear(hidden_dim // 2, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, input_dim),
                nn.Sigmoid()
            )
        
        def encode(self, x):
            h = self.encoder(x)
            mu = self.fc_mu(h)
            log_var = self.fc_var(h)
            return mu, log_var
        
        def reparameterize(self, mu, log_var):
            """重参数化技巧"""
            std = torch.exp(0.5 * log_var)
            eps = torch.randn_like(std)
            return mu + eps * std
        
        def decode(self, z):
            return self.decoder(z)
        
        def forward(self, x):
            mu, log_var = self.encode(x)
            z = self.reparameterize(mu, log_var)
            x_recon = self.decode(z)
            return x_recon, mu, log_var
    
    def vae_loss(x_recon, x, mu, log_var):
        """VAE损失函数"""
        # 重建损失
        recon_loss = nn.functional.binary_cross_entropy(x_recon, x, reduction='sum')
        
        # KL散度
        kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
        
        return recon_loss + kl_loss
    
    # 测试VAE
    vae = VAE()
    x = torch.rand(32, 784)
    x_recon, mu, log_var = vae(x)
    
    loss = vae_loss(x_recon, x, mu, log_var)
    print(f"VAE损失: {loss.item():.4f}")
    
except NameError:
    print("需要先导入PyTorch")

训练VAE

try:
    def train_vae(model, train_loader, epochs=10, lr=1e-3):
        """训练VAE"""
        
        optimizer = optim.Adam(model.parameters(), lr=lr)
        
        losses = []
        
        for epoch in range(epochs):
            total_loss = 0
            for batch_x, in train_loader:
                # 前向传播
                x_recon, mu, log_var = model(batch_x)
                loss = vae_loss(x_recon, batch_x, mu, log_var)
                
                # 反向传播
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
            
            avg_loss = total_loss / len(train_loader)
            losses.append(avg_loss)
            
            if (epoch + 1) % 5 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
        
        return losses
    
    vae_model = VAE()
    vae_losses = train_vae(vae_model, train_loader, epochs=20)
    
except NameError:
    print("需要先定义VAE")

去噪自编码器（DAE）

DAE原理

DAE通过在输入中添加噪声来学习更鲁棒的表示。

try:
    class DenoisingAutoencoder(nn.Module):
        """去噪自编码器"""
        
        def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32, noise_factor=0.3):
            super().__init__()
            self.noise_factor = noise_factor
            
            # 编码器
            self.encoder = nn.Sequential(
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2),
                nn.Linear(hidden_dim, latent_dim),
                nn.ReLU()
            )
            
            # 解码器
            self.decoder = nn.Sequential(
                nn.Linear(latent_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2),
                nn.Linear(hidden_dim, input_dim),
                nn.Sigmoid()
            )
        
        def add_noise(self, x):
            """添加高斯噪声"""
            noise = torch.randn_like(x) * self.noise_factor
            return torch.clamp(x + noise, 0, 1)
        
        def forward(self, x, add_noise=True):
            if add_noise and self.training:
                x_noisy = self.add_noise(x)
            else:
                x_noisy = x
            
            z = self.encoder(x_noisy)
            x_recon = self.decoder(z)
            return x_recon, z, x_noisy
    
    # 测试DAE
    dae = DenoisingAutoencoder()
    x = torch.rand(32, 784)
    x_recon, z, x_noisy = dae(x)
    
    print(f"原始输入与噪声输入的差异: {torch.mean(torch.abs(x - x_noisy)).item():.4f}")
    
except NameError:
    print("需要先导入PyTorch")

卷积自编码器

try:
    class ConvAutoencoder(nn.Module):
        """卷积自编码器（用于图像）"""
        
        def __init__(self, latent_dim=64):
            super().__init__()
            
            # 编码器
            self.encoder = nn.Sequential(
                nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
                nn.ReLU(),
                nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
                nn.ReLU(),
                nn.Flatten(),
                nn.Linear(64 * 7 * 7, latent_dim)
            )
            
            # 解码器
            self.decoder = nn.Sequential(
                nn.Linear(latent_dim, 64 * 7 * 7),
                nn.ReLU(),
                nn.Unflatten(1, (64, 7, 7)),
                nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
                nn.ReLU(),
                nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
                nn.Sigmoid()
            )
        
        def forward(self, x):
            z = self.encoder(x)
            x_recon = self.decoder(z)
            return x_recon, z
    
    # 测试
    conv_ae = ConvAutoencoder()
    x = torch.rand(8, 1, 28, 28)  # MNIST格式
    x_recon, z = conv_ae(x)
    
    print(f"输入: {x.shape}")
    print(f"潜在向量: {z.shape}")
    print(f"重建: {x_recon.shape}")
    
except NameError:
    print("需要先导入PyTorch")

应用场景

应用对比

应用	AE	VAE	DAE
降维	✓	✓	✓
特征学习	✓	✓	✓
生成	△	✓	△
去噪	△	△	✓
异常检测	✓	✓	✓

常见问题

Q1: AE和PCA的区别？

特性	AE	PCA
线性/非线性	非线性	线性
表示能力	强	弱
计算复杂度	高	低

Q2: VAE的KL散度有什么作用？

正则化潜在空间，使其接近标准正态分布，便于生成。

Q3: 如何选择潜在空间维度？

太小：信息损失
太大：可能过拟合
通常通过交叉验证选择

Q4: VAE生成的图像为什么模糊？

MSE损失倾向于生成平均图像，可以使用感知损失或GAN改进。

总结

模型	特点	适用场景
AE	简单，确定性	特征学习，降维
VAE	概率模型，可生成	生成任务
DAE	鲁棒性强	去噪，特征学习

参考资料

Hinton, G. & Salakhutdinov, R. (2006). “Reducing the Dimensionality of Data with Neural Networks”
Kingma, D. & Welling, M. (2013). “Auto-Encoding Variational Bayes”
Vincent, P. et al. (2008). “Extracting and Composing Robust Features with Denoising Autoencoders”
PyTorch官方教程

（采用 CC BY-NC-SA 4.0 许可协议进行授权）

本文标题：《机器学习基础系列——自编码器》

本文链接：http://localhost:3015/ai/%E8%87%AA%E7%BC%96%E7%A0%81%E5%99%A8.html

本文最后一次更新为天前，文章中的某些内容可能已过时！