-
Notifications
You must be signed in to change notification settings - Fork 0
/
RC_preprocess.R
52 lines (39 loc) · 1.82 KB
/
RC_preprocess.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(Cardinal)
options(scipen = 999)
# 限定mz的范围
min_mz <- 800
max_mz <- 3000
desired_resolution <- 0.01083 # 期望的m/z分辨率
folder_path <- "E:\\mass_spectrum_data\\CRC-PXD019662-20240607"
set.seed(1)
# 获取文件夹下所有imzML文件的完整路径
file_paths <- list.files(folder_path, pattern = "*.imzML", full.names = TRUE)
for (path in file_paths) {
# 注意:对于continuous imzML文件,cardinal默认不能指定范围
msi <- readImzML(path, memory = FALSE, verbose = TRUE) #, mass.range = c(min_mz, max_mz))
# 手动裁剪msi中featureData(mz)的范围
mass <- subset(msi, mz >= 800)
# 可视化检查mz范围,结果正确 plot(mass)
# 质量校准
peaks <- estimateReferencePeaks(mass)
mse_recalibrate <- recalibrate(mass, ref=peaks, method="locmax", tolerance=50, units="ppm")
# 查看校准后的数据概览
normalized_msi <- normalize(mse_recalibrate, method = "tic")
# 数据平滑
mse_smoothed1 <- smooth(normalized_msi, method="sgolay", width=11) # 第一次平滑,宽度参数需根据数据调整
mse_smoothed <- smooth(mse_smoothed1, method="sgolay", width=5) # 第二次平滑,宽度参数需根据数据调整
# 基线校正
mse_baselined <- reduceBaseline(mse_smoothed, method="median") # 采用局部中值插值法
# 数据分箱
mse_bin = bin(mse_baselined, spectra="intensity", method="mean", unit="mz", resolution=desired_resolution, mass.range = c(min_mz, max_mz))
# final
mse_final <- process(mse_bin)
# 导出
filename <- basename(path)
name <- gsub(".imzML$", "", filename)
root <- "E:\\mass_spectrum_data\\CRC-PXD019662-20240607\\processed0715"
output_path <- file.path(root, name)
writeImzML(mse_final, output_path, mass.range = c(min_mz, max_mz))
# 打印处理信息
cat("Processed:", filename, "\n")
}