*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
Commit 97bee072 authored by Liu, Meiqi's avatar Liu, Meiqi
Browse files

update basic scripts

parent f58a89e9
Branches
No related tags found
No related merge requests found
......@@ -7,3 +7,4 @@ data/*.laccdb
data/Query_Results.csv
data/Table*.csv
docs/*.csv
graphs/*.png
......@@ -2,7 +2,7 @@ library(ProjectTemplate)
load.project()
# original data structure
str(data_all)
glimpse(data_all)
# amount of data for each type
str_type <- data_all %>%
......
graphs/PFAS_sum.png

23.6 KiB

Here you can store any graphs that you produce.
graphs/heatmap_detections.png

183 KiB

graphs/sum_pfas.png

23.9 KiB

graphs/sum_pfas_v2.png

21 KiB

graphs/sum_pfas_v3.png

23.7 KiB

......@@ -61,12 +61,13 @@ metadata_pfas <- metadata_pfas %>%
# extract level
level_matrix <- c("Atmospheric Deposition", "Riverwater", "Groundwater",
"Leachate", "Surface Runoff", "Wastewater")
level_type <- c("Atmospheric Deposition", "Danube Tributary", "Tributary Hotspot",
level_type <- c("Atmospheric Deposition", "Danube Tributary", "Alz",
"Danube Main", "Danube Bank-filtered Water", "Groundwater",
"Groundwater directly influenced by Landfill", "Landfill leachate",
"Surface Runoff", "Municipal Influent", "Municipal Effluent",
"Industrial Influent", "Industrial Effluent")
metadata_compartment <- metadata_compartment %>%
mutate(type = ifelse(type == "Tributary Hotspot", "Alz", type)) %>%
mutate(matrix = factor(matrix, level = level_matrix),
type = factor(type, level = level_type))
......@@ -7,6 +7,7 @@ data_all <- data_all %>%
# ---------- factorise columns ----------
data_all <- data_all %>%
mutate(type = ifelse(type == "Tributary Hotspot", "Alz", type)) %>%
mutate(matrix = factor(matrix, level = level_matrix),
type = factor(type, level = level_type),
cas_number = factor(cas_number, level = level_casno),
......
## statistical analysis on pfas detection
library(ProjectTemplate)
load.project()
# ------------ detection across whole set ------
## Rule: across the whole dataset, detection rate
## whole dataset
detection_wholeset <- mean(!data_all$value_below_loq) * 100
#detection_wholeset <-
mean(!data_all$value_below_loq) * 100
## promisces data
detection_promisces <- data_all %>%
#detection_promisces <-
data_all %>%
filter(datasource == "EU-PROMISCES") %>%
summarise(detection_rate = mean(!value_below_loq) * 100) %>%
pull(detection_rate)
......@@ -17,14 +18,16 @@ detection_promisces <- data_all %>%
# ---------- detection among samples -----
## Rule: for each unique sample, at least one PFAS is detected
## whole dataset
detection_sample_wholeset <- data_all %>%
#detection_sample_wholeset <-
data_all %>%
group_by(sample) %>%
summarise(at_least_one_detect = any(value_below_loq == FALSE)) %>%
summarise(detection_rate = mean(at_least_one_detect) * 100) %>%
pull(detection_rate)
## promisces data
detection_sample_promisces <- data_all %>%
#detection_sample_promisces <-
data_all %>%
filter(datasource == "EU-PROMISCES") %>%
group_by(sample) %>%
summarise(at_least_one_detect = any(value_below_loq == FALSE)) %>%
......@@ -54,56 +57,62 @@ detection_pfas_promisces <- data_all %>%
# ------------- detection among pfas group -----
## Rule: across the whole dataset, detection rate for substance group
detection_pfasgroup_wholeset <- data_all %>%
#detection_pfasgroup_wholeset <-
data_all %>%
left_join(metadata_pfas, by = "substance") %>%
group_by(pfas_group_abb) %>%
summarise(n_all = n(),
detection_rate_all = mean(!value_below_loq) * 100)
detection_pfasgroup_promisces <- data_all %>%
#detection_pfasgroup_promisces <-
data_all %>%
filter(datasource == "EU-PROMISCES") %>%
left_join(metadata_pfas, by = "substance") %>%
group_by(pfas_group_abb) %>%
summarise(n_promisces = n(),
detection_rate_promisces = mean(!value_below_loq) * 100)
detection_pfasgroup_combine <- detection_pfasgroup_wholeset %>%
left_join(detection_pfasgroup_promisces, by = "pfas_group_abb") %>%
select(1, 2, 4, 3, 5) %>%
arrange(desc(detection_rate_all))
# detection_pfasgroup_combine <- detection_pfasgroup_wholeset %>%
# left_join(detection_pfasgroup_promisces, by = "pfas_group_abb") %>%
# select(1, 2, 4, 3, 5) %>%
# arrange(desc(detection_rate_all))
# write.csv(detection_pfasgroup_combine, "docs/detection_pfas_groups.csv")
# --------- detection among matrix ------
detection_matrix_wholeset <- data_all %>%
#detection_matrix_wholeset <-
data_all %>%
group_by(matrix) %>%
summarise(n_all = n(),
detection_rate_all = mean(!value_below_loq) * 100)
detection_matrix_promisces <- data_all %>%
#detection_matrix_promisces <-
data_all %>%
filter(datasource == "EU-PROMISCES") %>%
group_by(matrix) %>%
summarise(n_promisces = n(),
detection_rate_promisces = mean(!value_below_loq) * 100)
detection_matrix_combine <- detection_matrix_wholeset %>%
left_join(detection_matrix_promisces, by = "matrix")
# detection_matrix_combine <- detection_matrix_wholeset %>%
# left_join(detection_matrix_promisces, by = "matrix")
# write.csv(detection_matrix_combine, "docs/detection_pfas_matrix.csv")
# --------- detection among type ------
detection_type_wholeset <- data_all %>%
#detection_type_wholeset <-
data_all %>%
group_by(type) %>%
summarise(n_all = n(),
detection_rate_all = mean(!value_below_loq) * 100)
detection_type_promisces <- data_all %>%
#detection_type_promisces <-
data_all %>%
filter(datasource == "EU-PROMISCES") %>%
group_by(type) %>%
summarise(n_promisces = n(),
detection_rate_promisces = mean(!value_below_loq) * 100)
detection_type_combine <- detection_type_wholeset %>%
left_join(detection_type_promisces, by = "type")
# detection_type_combine <- detection_type_wholeset %>%
# left_join(detection_type_promisces, by = "type")
# write.csv(detection_type_combine, "docs/detection_pfas_type.csv")
\ No newline at end of file
## Factor Analysis on Mixed Data (FAMD)
library(ProjectTemplate)
load.project()
library(FactoMineR)
library(Factoshiny)
# -------- restructure the data for FAMD --------
data_famd_raw <- data_ros %>%
filter(datasource == "EU-PROMISCES") %>%
select(code, matrix, type, substance, value) %>%
group_by(code, matrix, type, substance) %>%
filter(row_number() == 1) %>%
ungroup() %>%
pivot_wider(names_from = substance, values_from = value)
colSums(is.na(data_famd_raw))
# data_famd_raw %>% filter(if_any(4:35, is.na))
# fill NAs with half_loq
data_loq <- data_ros %>%
filter(datasource == "EU-PROMISCES") %>%
filter(value_below_loq == TRUE) %>%
group_by(substance, type) %>%
summarise(loq = mean(loq), .groups = "drop")
data_famd_long <- data_famd_raw %>%
pivot_longer(cols = 4:35, names_to = "substance", values_to = "value") %>%
left_join(data_loq, by = c("substance", "type")) %>%
mutate(value = ifelse(is.na(value), loq / 2, value)) %>%
select(-loq)
data_famd <- data_famd_long %>%
pivot_wider(names_from = substance, values_from = value) %>%
select(-code)
Factoshiny(data_famd)
# -------- perform MFA ------
res.FAMD<-FAMD(data_famd,graph=FALSE)
plot.FAMD(res.FAMD,invisible=c('ind.sup'),title="Graph of individuals and categories")
plot.FAMD(res.FAMD,axes=c(1,2),choix='var',title="Graph of the variables")
plot.FAMD(res.FAMD, choix='quanti',title="Correlation circle")
summary(res.FAMD)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment