## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = FALSE
)

## ----grs-check-local----------------------------------------------------------
# library(ukbflow)
# 
# # Local: weights file on your machine
# w <- grs_check("weights.csv", dest = "weights_clean.txt")
# #> Read 'weights.csv': 312 rows, 5 columns.
# #> ✔ No NA values.
# #> ✔ No duplicate SNPs.
# #> ✔ All SNP IDs match rs[0-9]+ format.
# #> ✔ All effect alleles are A/T/C/G.
# #> Beta summary:
# #>   Range     : -0.412 to 0.589
# #>   Mean |beta|: 0.183
# #>   Positive  : 187 (59.9%)
# #>   Negative  : 125 (40.1%)
# #>   Zero      : 0
# #> ✔ Weights file passed checks: 312 SNPs ready for UKB RAP.
# #> ✔ Saved: 'weights_clean.txt'

## ----grs-check-rap------------------------------------------------------------
# # On RAP (RStudio) -- use /mnt/project/ paths directly
# w <- grs_check(
#   file = "/mnt/project/weights/weights.csv",
#   dest = "/mnt/project/weights/weights_clean.txt"
# )

## ----grs-bgen2pgen-test-------------------------------------------------------
# # Test on the smallest chromosome first
# ids <- grs_bgen2pgen(chr = 22, dest = "/pgen/", priority = "high")
# #> Uploading 'grs_bgen2pgen_std.R' to RAP ...
# #> ✔ Uploaded: '/grs_bgen2pgen_std.R'
# #> Submitting 1 job(s) -- mem2_ssd1_v2_x4 / priority: high
# #> ✔ chr22 -> 'job-Gxxxxxxxxxxxxxxxxxxxxxxxx'
# #> ✔ 1/1 job(s) submitted. Monitor with job_ls().

## ----grs-bgen2pgen-all--------------------------------------------------------
# # Full run: small chromosomes on standard, large on upgraded instance
# ids_small <- grs_bgen2pgen(chr = 17:22, dest = "/pgen/")
# ids_large <- grs_bgen2pgen(chr = 1:16,  dest = "/pgen/", instance = "large")
# 
# # Monitor progress (job_wait() takes one job ID at a time)
# job_ls()
# for (id in c(ids_small, ids_large)) job_wait(id)

## ----grs-score----------------------------------------------------------------
# ids <- grs_score(
#   file     = c(
#     grs_a = "weights/grs_a_weights.txt",
#     grs_b = "weights/grs_b_weights.txt"
#   ),
#   pgen_dir = "/mnt/project/pgen",
#   dest     = "/grs/",
#   priority = "high"
# )
# #> ── Uploading 2 weight file(s) to RAP ────────────────────────────────────────
# #> Uploading 'grs_a_weights.txt' ...
# #> ✔ Uploaded: '/grs_a_weights.txt'
# #> Uploading 'grs_b_weights.txt' ...
# #> ✔ Uploaded: '/grs_b_weights.txt'
# #> Submitting 2 job(s) -- mem2_ssd1_v2_x4 / priority: high
# #> ✔ grs_a -> 'job-Gxxxxxxxxxxxxxxxxxxxxxxxx'
# #> ✔ grs_b -> 'job-Gyyyyyyyyyyyyyyyyyyyyyyyy'
# #> ✔ 2/2 job(s) submitted. Monitor with job_ls().
# 
# job_wait(ids)

## ----grs-score-rap------------------------------------------------------------
# # On RAP: weights already at /mnt/project/grs_a_weights.txt
# ids <- grs_score(
#   file     = c(grs_a = "/mnt/project/grs_a_weights.txt"),
#   pgen_dir = "/mnt/project/pgen",
#   dest     = "/grs/"
# )
# #> ℹ grs_a_weights.txt already at RAP root, skipping upload.

## ----grs-standardize----------------------------------------------------------
# # Auto-detect all columns containing "grs" (case-insensitive)
# cohort <- grs_standardize(cohort)
# #> Auto-detected 2 GRS column(s): 'GRS_a', 'GRS_b'
# #> ✔ GRS_a -> GRS_a_z  [mean=0.0000, sd=1.0000]
# #> ✔ GRS_b -> GRS_b_z  [mean=0.0000, sd=1.0000]

## ----grs-standardize-explicit-------------------------------------------------
# # Or specify columns explicitly
# cohort <- grs_standardize(cohort, grs_cols = c("GRS_a", "GRS_b"))

## ----grs-validate-logistic----------------------------------------------------
# res <- grs_validate(
#   data        = cohort,
#   grs_cols    = c("GRS_a_z", "GRS_b_z"),
#   outcome_col = "outcome"
# )
# #> ── Creating GRS groups ───────────────────────────────────────────────────────
# #> ── Effect per SD (OR) ───────────────────────────────────────────────────────
# #> ── High vs Low ──────────────────────────────────────────────────────────────
# #> ── Trend test ───────────────────────────────────────────────────────────────
# #> ── AUC ──────────────────────────────────────────────────────────────────────
# #> ✔ Validation complete.
# 
# res$per_sd
# res$discrimination

## ----grs-validate-cox---------------------------------------------------------
# res <- grs_validate(
#   data        = cohort,
#   grs_cols    = c("GRS_a_z", "GRS_b_z"),
#   outcome_col = "outcome",
#   time_col    = "followup_years",
#   covariates  = c("age", "sex", paste0("pc", 1:10))
# )
# 
# res$per_sd          # HR per SD x model
# res$high_vs_low     # HR: top 20% vs bottom 20%
# res$trend           # p-trend across Q1–Q4
# res$discrimination  # C-index with 95% CI

## ----grs-full-pipeline--------------------------------------------------------
# library(ukbflow)
# 
# # 1. Validate weights (local)
# grs_check("weights.csv", dest = "weights_clean.txt")
# 
# # 2. Convert BGEN -> PGEN on RAP (submit jobs)
# ids_std  <- grs_bgen2pgen(chr = 17:22, dest = "/pgen/", maf = 0.01)
# ids_lrg  <- grs_bgen2pgen(chr = 1:16,  dest = "/pgen/", maf = 0.01, instance = "large")
# for (id in c(ids_std, ids_lrg)) job_wait(id)
# 
# # 3. Score GRS on RAP (submit jobs)
# score_ids <- grs_score(
#   file     = c(grs_a = "weights_clean.txt"),
#   pgen_dir = "/mnt/project/pgen",
#   maf      = 0.01,   # must match grs_bgen2pgen()
#   dest     = "/grs/"
# )
# job_wait(score_ids)
# 
# # 4. Download score CSV from RAP
# fetch_file("/grs/GRS_a_scores.csv", dest = "GRS_a_scores.csv")
# 
# # 5. Merge into analysis dataset and standardise
# # cohort: your analysis data.table with IID and phenotype columns
# scores <- data.table::fread("GRS_a_scores.csv")   # columns: IID, GRS_a
# cohort <- scores[cohort, on = "IID"]               # right-join: keep all cohort rows
# cohort <- grs_standardize(cohort, grs_cols = "GRS_a")
# 
# # 6. Validate
# res <- grs_validate(
#   data        = cohort,
#   grs_cols    = "GRS_a_z",
#   outcome_col = "outcome",
#   time_col    = "followup_years",
#   covariates  = c("age", "sex", paste0("pc", 1:10))
# )
# 
# res$per_sd
# res$discrimination