Skip to contents

For a simple illustration, we generate a synthetic allele count matrix with minor allele frequency maf greater than 5%. The function simulate_traits() requires the input genotype data to be stored in the PLINK format (.bim, .bed, and .fam files). Here we use the package genio to write the allele count matrix to a PLINK file.

plink_file <- tempfile()
n_samples <- 3000
n_snps <- 5000
maf <- 0.05 + 0.45 * runif(n_snps)
random_minor_allele_counts   <- (runif(n_samples * n_snps) < maf) +
                                (runif(n_samples * n_snps) < maf)
allele_count_matrix <- matrix(random_minor_allele_counts,
  nrow = n_samples,
  ncol = n_snps,
  byrow = TRUE,
)

# Create .fam and .bim data frames
fam <- data.frame(
  fam = sprintf("F%d", 1:n_samples),       # Family ID
  id = sprintf("I%d", 1:n_samples),       # Individual ID
  pat = 0,                        # Paternal ID
  mat = 0,                        # Maternal ID
  sex = sample(1:2, n_samples, replace = TRUE),
  pheno = -9
)

bim <- data.frame(
  chr = 1,               # Chromosome number
  id = sprintf("rs%d", 1:n_snps),   # SNP name
  posg = 0,                         # Genetic distance (cM)
  pos = 1:n_snps,          # Base-pair position
  ref = sample(c("A", "C", "G", "T"), n_snps, replace = T), # Minor allele
  alt = sample(c("A", "C", "G", "T"), n_snps, replace = T)  # Major allele
)

# Write to .bed, .fam, and .bim files
write_plink(
  file = plink_file,
  X = t(allele_count_matrix),
  fam = fam,
  bim = bim
)

To simulate traits, we need to specify the genetic architecture. E.g., the function requires us to specify

  • the narrow sense heritability h2h^2
  • the indices SNPs contributing to the narrow sense heritability
  • the heritability due to epistasis
  • the indices of SNPs contributing to the epistatic trait variance
  • the path to the output file.

The simulated trait is written to file in the PLINK phenotype format.

pheno_file <- tempfile()
additive_heritability <- 0.3
gxg_heritability <- 0.25
n_snps_gxg_group <- 5
n_snps_additive <- 100
additive_snps <- sort(sample(1:n_snps, n_snps_additive, replace = F))
gxg_group_1 <- sort(sample(additive_snps, n_snps_gxg_group, replace = F))
gxg_group_2 <- sort(sample(setdiff(additive_snps, gxg_group_1),
                           n_snps_gxg_group, replace = F))

simulate_traits(
  plink_file,
  pheno_file,
  additive_heritability,
  gxg_heritability,
  additive_snps,
  gxg_group_1,
  gxg_group_2
)

The variance components of the traits are constructed to match the input. See Crawford et al. (2017) and mvMAPIT Documentation: Simulate Traits for a full description of the simulation scheme.

SessionInfo

sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] genio_1.1.2 smer_0.0.1 
#> 
#> loaded via a namespace (and not attached):
#>  [1] jsonlite_1.8.9      dplyr_1.1.4         compiler_4.4.2     
#>  [4] tidyselect_1.2.1    Rcpp_1.0.14         FMStable_0.1-4     
#>  [7] parallel_4.4.2      tidyr_1.3.1         jquerylib_0.1.4    
#> [10] systemfonts_1.2.0   textshaping_0.4.1   harmonicmeanp_3.0.1
#> [13] yaml_2.3.10         fastmap_1.2.0       R6_2.5.1           
#> [16] generics_0.1.3      knitr_1.49          iterators_1.0.14   
#> [19] backports_1.5.0     checkmate_2.3.2     tibble_3.2.1       
#> [22] desc_1.4.3          bslib_0.8.0         pillar_1.10.1      
#> [25] rlang_1.1.4         cachem_1.1.0        xfun_0.50          
#> [28] fs_1.6.5            sass_0.4.9          cli_3.6.3          
#> [31] pkgdown_2.1.1       magrittr_2.0.3      digest_0.6.37      
#> [34] mvMAPIT_2.0.3       foreach_1.5.2       mvtnorm_1.3-3      
#> [37] lifecycle_1.0.4     CompQuadForm_1.4.3  vctrs_0.6.5        
#> [40] evaluate_1.0.3      glue_1.8.0          codetools_0.2-20   
#> [43] ragg_1.3.3          purrr_1.0.2         rmarkdown_2.29     
#> [46] tools_4.4.2         pkgconfig_2.0.3     htmltools_0.5.8.1