# Data
<- fread(
coloc_table "./data/00_data_coloc.tsv",
header = TRUE, data.table = FALSE, sep = "\t"
)
# Process
<- coloc_table %>%
coloc_table as_tibble() %>%
mutate(
across(starts_with("pph"), ~ round(.x, 4))
%>%
) filter(!(id %in% c("GCST006979", "GCST006980"))) # drop results with fracture and bone mineral density
7 Colocalization analyses
Colocalization analyses are used to detect whether genetic signals from GWAS studies in a genomic region are consistent with a shared causal genetic variant(s) (Giambartolomei et al. 2014). This analysis produces five probabilities which add up to 1:
- \(P(H0)\) - probability that there is no genetic signal with either sclerostin levels or the phenotype in the SOST region
- \(P(H1)\) - probability that there is a genetic signal with sclerostin levels in the SOST region but not with the phenotype
- \(P(H2)\) - probability that there is a genetic signal with phenotype in the SOST region but not with sclerostin levels
- \(P(H3)\) - probability that there is a genetic signal with both sclerostin levels and the phenotype in the SOST region but these signals are independent1 (i.e., they are not the same)
- \(P(H4)\) - probability that there is a shared genetic signal between sclerostin levels and the phenotype in the SOST region
7.1 Data
These results were extracted from Supplementary Table 10 in Zheng et al. (2023).2
README
id
- dataset ID
pmid
- PubMed ID
trait
- phenotype
n
- number of samples
n_cases
- number of cases
n_snps
- number of SNPs
pph0
- probability that there is no genetic signal with either sclerostin levels or the phenotype in the SOST region
pph1
- probability that there is a genetic signal with sclerostin levels in the SOST region but not with the phenotype
pph2
- probability that there is a genetic signal with phenotype in the SOST region but not with sclerostin levels
pph3
- probability that there is a genetic signal with both sclerostin levels and the phenotype in the SOST region but these signals are independent (i.e., they are not the same)
pph4
- probability that there is a shared genetic signal between sclerostin levels and the phenotype in the SOST region
7.2 Results
7.3 Additional analyses
7.3.1 Data
# Data
load("./data/01_data_ldmat.Rda")
load("./data/02_data_gwas_sost_region.Rda")
## pQTLs
<- pqtls %>%
pqtls arrange(pvalue)
## GWAS
<- gwas %>%
gwas filter(
(!grepl("^UKB3", id) &
== "17" &
chr >= 41831099 - 100000 &
pos <= 41836156 + 100000
pos |
) grepl("^UKB3", id) # keep additional CD300LG variants only for lipid measures
%>%
) mutate(z = round(beta / se, 6)) %>%
filter(!is.na(z) & !is.na(se))
Since we only had access to the top associated sclerostin pQTLs in the SOST region, we imputed the circulating sclerostin associations in the SOST region using each pQTL. These imputed associations were then used in the colocalization analyses (Giambartolomei et al. 2014).3
# Impute pQTL data
<- list()
pqtls_impute for (rsid in pqtls$rsid) {
<- gwas_impute(
pqtls_impute[[rsid]] data = pqtls %>%
filter(rsid == !!rsid) %>%
rename(oa = ref, ea = alt, eaf = af),
snpinfo = ld_snps,
corr = ld_mat,
n = 29381
%>%
) mutate(z = round(beta / se, 6)) %>%
filter(!is.na(z) & !is.na(se))
}
7.3.2 Analyses
# Colocalization analyses
<- tibble()
coloc_results for (id in unique(gwas$id)) {
## Colocalization pQTLs
for (rsid in pqtls$rsid) {
### Colocalization data
<- gwas %>%
coloc_data filter(id == !!id) %>%
inner_join(
x = pqtls_impute[[rsid]],
y = .,
by = c("rsid", "chr", "pos", "ref", "alt"),
suffix = c("_1", "_2")
%>%
) relocate(id, .before = rsid)
### Colocalization Bayes factors
<- coloc_bf(
bf1 z = coloc_data$z_1,
v = coloc_data$se_1^2
)<- coloc_bf(
bf2 z = coloc_data$z_2,
v = coloc_data$se_2^2,
binary = !(
%>%
studies filter(id == !!id) %>%
pull(n_cases) %>%
is.na(.)
)
)
### Colocalization analysis
<- coloc(bf1, bf2) %>%
coloc_analysis round(., 4) %>%
t(.) %>%
as_tibble() %>%
mutate(
id = !!id,
pqtl = !!rsid,
n_snps = !!nrow(coloc_data)
%>%
) relocate(id, n_snps, pqtl, .before = 1)
### Colocalization results
<- coloc_results %>%
coloc_results bind_rows(coloc_analysis)
}
}
<- coloc_results %>%
coloc_results inner_join(
x = select(studies, id, pmid, trait, n, n_cases),
y = .,
by = "id"
)
README
id
- dataset ID
pmid
- PubMed ID
trait
- phenotype
n
- number of samples
n_cases
- number of cases
n_snps
- number of SNPs
pqtl
- pQTL used to impute the sclerostin genetic associations in the SOST region
pph0
- probability that there is no genetic signal with either sclerostin levels or the phenotype in the SOST region
pph1
- probability that there is a genetic signal with sclerostin levels in the SOST region but not with the phenotype
pph2
- probability that there is a genetic signal with phenotype in the SOST region but not with sclerostin levels
pph3
- probability that there is a genetic signal with both sclerostin levels and the phenotype in the SOST region but these genetic signals are independent (i.e., they are not the same)
pph4
- probability that there is a shared genetic signal between sclerostin levels and the phenotype in the SOST region
7.3.3 Results
This model assumes that there is at most one genetic signal for circulating sclerostin and the phenotype in the SOST region. This assumption is satisfied for the sclerostin pQTLs because we impute the associations in the SOST region based on each pQTL. Similar results were also found when relaxing this assumption for the phenotypes combining fine-mapping and colocalization approaches (Wallace 2021), with only the genetic signals of heel bone mineral density and fracture colocalizing with the genetic signals of circulating sclerostin in the SOST region (\(P(H4)\) > 0.8).↩︎
The SOST region was defined as 500kb either side of the SOST.↩︎
The prior probabilities were set to
p1 = 1e-4
,p2 = 1e-4
andp12 = 2e-6
(Giambartolomei et al. 2014; Foley et al. 2021).↩︎