已经是最新一篇文章了!
已经是最后一篇文章了!
AE、VAE与DAE详解
前言
自编码器(Autoencoder)是一种无监督学习方法,通过学习数据的压缩表示来捕捉数据的本质特征。本文介绍标准自编码器、变分自编码器(VAE)和去噪自编码器(DAE)。
自编码器基础
基本结构
自编码器由编码器和解码器组成:
- 编码器:将输入压缩到低维潜在空间
- 解码器:从潜在空间重建输入
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
class SimpleAutoencoder:
"""简单自编码器(NumPy实现)"""
def __init__(self, input_dim, hidden_dim, latent_dim):
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.latent_dim = latent_dim
# 编码器参数
self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
self.b1 = np.zeros(hidden_dim)
self.W2 = np.random.randn(hidden_dim, latent_dim) * 0.01
self.b2 = np.zeros(latent_dim)
# 解码器参数
self.W3 = np.random.randn(latent_dim, hidden_dim) * 0.01
self.b3 = np.zeros(hidden_dim)
self.W4 = np.random.randn(hidden_dim, input_dim) * 0.01
self.b4 = np.zeros(input_dim)
def relu(self, x):
return np.maximum(0, x)
def sigmoid(self, x):
return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
def encode(self, x):
h1 = self.relu(x @ self.W1 + self.b1)
z = h1 @ self.W2 + self.b2
return z
def decode(self, z):
h2 = self.relu(z @ self.W3 + self.b3)
x_recon = self.sigmoid(h2 @ self.W4 + self.b4)
return x_recon
def forward(self, x):
z = self.encode(x)
x_recon = self.decode(z)
return x_recon, z
# 测试
ae = SimpleAutoencoder(input_dim=784, hidden_dim=256, latent_dim=32)
x = np.random.rand(10, 784)
x_recon, z = ae.forward(x)
print(f"输入形状: {x.shape}")
print(f"潜在向量形状: {z.shape}")
print(f"重建形状: {x_recon.shape}")
可视化自编码器结构
def visualize_autoencoder_structure():
fig, ax = plt.subplots(figsize=(14, 6))
# 层的大小
layers = [784, 256, 64, 32, 64, 256, 784]
layer_names = ['输入', '隐藏1', '隐藏2', '潜在空间', '隐藏3', '隐藏4', '重建']
max_neurons = 20
for i, (size, name) in enumerate(zip(layers, layer_names)):
x = i * 1.5
n_show = min(size, max_neurons)
for j in range(n_show):
y = (j - n_show/2) * 0.3
color = 'lightblue' if i < 3 else ('lightgreen' if i == 3 else 'lightyellow')
circle = plt.Circle((x, y), 0.1, color=color, ec='black')
ax.add_patch(circle)
if size > max_neurons:
ax.text(x, -n_show/2*0.3 - 0.5, '...', ha='center', fontsize=12)
ax.text(x, n_show/2*0.3 + 0.5, f'{name}\n({size})', ha='center', fontsize=9)
# 箭头
for i in range(len(layers)-1):
ax.annotate('', xy=(i*1.5+1.3, 0), xytext=(i*1.5+0.2, 0),
arrowprops=dict(arrowstyle='->', color='gray'))
ax.set_xlim(-1, 10)
ax.set_ylim(-5, 5)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('自编码器结构', fontsize=14)
# 标注
ax.text(1.5, -4, '编码器', fontsize=12, ha='center', color='blue')
ax.text(4.5, -4, '潜在空间', fontsize=12, ha='center', color='green')
ax.text(7.5, -4, '解码器', fontsize=12, ha='center', color='orange')
plt.tight_layout()
plt.show()
visualize_autoencoder_structure()
PyTorch实现
标准自编码器
try:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
class Autoencoder(nn.Module):
"""标准自编码器"""
def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32):
super().__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, latent_dim)
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim),
nn.Sigmoid()
)
def encode(self, x):
return self.encoder(x)
def decode(self, z):
return self.decoder(z)
def forward(self, x):
z = self.encode(x)
x_recon = self.decode(z)
return x_recon, z
# 测试
model = Autoencoder()
x = torch.randn(32, 784)
x_recon, z = model(x)
print(f"输入: {x.shape}")
print(f"潜在向量: {z.shape}")
print(f"重建: {x_recon.shape}")
except ImportError:
print("PyTorch未安装")
训练函数
try:
def train_autoencoder(model, train_loader, epochs=10, lr=1e-3):
"""训练自编码器"""
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
losses = []
for epoch in range(epochs):
total_loss = 0
for batch_x, in train_loader:
# 前向传播
x_recon, _ = model(batch_x)
loss = criterion(x_recon, batch_x)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
losses.append(avg_loss)
if (epoch + 1) % 5 == 0:
print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.6f}")
return losses
# 创建模拟数据
X_train = torch.rand(1000, 784)
train_dataset = TensorDataset(X_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# 训练
ae_model = Autoencoder()
losses = train_autoencoder(ae_model, train_loader, epochs=20)
except NameError:
print("需要先定义模型")
变分自编码器(VAE)
VAE原理
VAE将潜在空间建模为概率分布,而不是确定性向量:
\[z \sim q_\phi(z|x) = \mathcal{N}(\mu, \sigma^2)\]损失函数包含重建损失和KL散度: \(\mathcal{L} = \mathbb{E}[\log p_\theta(x|z)] - KL(q_\phi(z|x) || p(z))\)
try:
class VAE(nn.Module):
"""变分自编码器"""
def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32):
super().__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim // 2),
nn.ReLU()
)
# 均值和方差
self.fc_mu = nn.Linear(hidden_dim // 2, latent_dim)
self.fc_var = nn.Linear(hidden_dim // 2, latent_dim)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, hidden_dim // 2),
nn.ReLU(),
nn.Linear(hidden_dim // 2, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim),
nn.Sigmoid()
)
def encode(self, x):
h = self.encoder(x)
mu = self.fc_mu(h)
log_var = self.fc_var(h)
return mu, log_var
def reparameterize(self, mu, log_var):
"""重参数化技巧"""
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return mu + eps * std
def decode(self, z):
return self.decoder(z)
def forward(self, x):
mu, log_var = self.encode(x)
z = self.reparameterize(mu, log_var)
x_recon = self.decode(z)
return x_recon, mu, log_var
def vae_loss(x_recon, x, mu, log_var):
"""VAE损失函数"""
# 重建损失
recon_loss = nn.functional.binary_cross_entropy(x_recon, x, reduction='sum')
# KL散度
kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())
return recon_loss + kl_loss
# 测试VAE
vae = VAE()
x = torch.rand(32, 784)
x_recon, mu, log_var = vae(x)
loss = vae_loss(x_recon, x, mu, log_var)
print(f"VAE损失: {loss.item():.4f}")
except NameError:
print("需要先导入PyTorch")
训练VAE
try:
def train_vae(model, train_loader, epochs=10, lr=1e-3):
"""训练VAE"""
optimizer = optim.Adam(model.parameters(), lr=lr)
losses = []
for epoch in range(epochs):
total_loss = 0
for batch_x, in train_loader:
# 前向传播
x_recon, mu, log_var = model(batch_x)
loss = vae_loss(x_recon, batch_x, mu, log_var)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
losses.append(avg_loss)
if (epoch + 1) % 5 == 0:
print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
return losses
vae_model = VAE()
vae_losses = train_vae(vae_model, train_loader, epochs=20)
except NameError:
print("需要先定义VAE")
去噪自编码器(DAE)
DAE原理
DAE通过在输入中添加噪声来学习更鲁棒的表示。
try:
class DenoisingAutoencoder(nn.Module):
"""去噪自编码器"""
def __init__(self, input_dim=784, hidden_dim=256, latent_dim=32, noise_factor=0.3):
super().__init__()
self.noise_factor = noise_factor
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim, latent_dim),
nn.ReLU()
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_dim, input_dim),
nn.Sigmoid()
)
def add_noise(self, x):
"""添加高斯噪声"""
noise = torch.randn_like(x) * self.noise_factor
return torch.clamp(x + noise, 0, 1)
def forward(self, x, add_noise=True):
if add_noise and self.training:
x_noisy = self.add_noise(x)
else:
x_noisy = x
z = self.encoder(x_noisy)
x_recon = self.decoder(z)
return x_recon, z, x_noisy
# 测试DAE
dae = DenoisingAutoencoder()
x = torch.rand(32, 784)
x_recon, z, x_noisy = dae(x)
print(f"原始输入与噪声输入的差异: {torch.mean(torch.abs(x - x_noisy)).item():.4f}")
except NameError:
print("需要先导入PyTorch")
卷积自编码器
try:
class ConvAutoencoder(nn.Module):
"""卷积自编码器(用于图像)"""
def __init__(self, latent_dim=64):
super().__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Flatten(),
nn.Linear(64 * 7 * 7, latent_dim)
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(latent_dim, 64 * 7 * 7),
nn.ReLU(),
nn.Unflatten(1, (64, 7, 7)),
nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.Sigmoid()
)
def forward(self, x):
z = self.encoder(x)
x_recon = self.decoder(z)
return x_recon, z
# 测试
conv_ae = ConvAutoencoder()
x = torch.rand(8, 1, 28, 28) # MNIST格式
x_recon, z = conv_ae(x)
print(f"输入: {x.shape}")
print(f"潜在向量: {z.shape}")
print(f"重建: {x_recon.shape}")
except NameError:
print("需要先导入PyTorch")
应用场景
应用对比
| 应用 | AE | VAE | DAE |
|---|---|---|---|
| 降维 | ✓ | ✓ | ✓ |
| 特征学习 | ✓ | ✓ | ✓ |
| 生成 | △ | ✓ | △ |
| 去噪 | △ | △ | ✓ |
| 异常检测 | ✓ | ✓ | ✓ |
常见问题
Q1: AE和PCA的区别?
| 特性 | AE | PCA |
|---|---|---|
| 线性/非线性 | 非线性 | 线性 |
| 表示能力 | 强 | 弱 |
| 计算复杂度 | 高 | 低 |
Q2: VAE的KL散度有什么作用?
正则化潜在空间,使其接近标准正态分布,便于生成。
Q3: 如何选择潜在空间维度?
- 太小:信息损失
- 太大:可能过拟合
- 通常通过交叉验证选择
Q4: VAE生成的图像为什么模糊?
MSE损失倾向于生成平均图像,可以使用感知损失或GAN改进。
总结
| 模型 | 特点 | 适用场景 |
|---|---|---|
| AE | 简单,确定性 | 特征学习,降维 |
| VAE | 概率模型,可生成 | 生成任务 |
| DAE | 鲁棒性强 | 去噪,特征学习 |
参考资料
- Hinton, G. & Salakhutdinov, R. (2006). “Reducing the Dimensionality of Data with Neural Networks”
- Kingma, D. & Welling, M. (2013). “Auto-Encoding Variational Bayes”
- Vincent, P. et al. (2008). “Extracting and Composing Robust Features with Denoising Autoencoders”
- PyTorch官方教程
版权声明: 如无特别声明,本文版权归 sshipanoo 所有,转载请注明本文链接。
(采用 CC BY-NC-SA 4.0 许可协议进行授权)
本文标题:《 机器学习基础系列——自编码器 》
本文链接:http://localhost:3015/ai/%E8%87%AA%E7%BC%96%E7%A0%81%E5%99%A8.html
本文最后一次更新为 天前,文章中的某些内容可能已过时!