library(tidyverse)
library(PKNCA)
library(readr)End-to-End NCA Template in R with PKNCA
Big idea: A good NCA workflow is not “run pk.nca().” It is: define profiles → QC structure → specify intervals → compute → diagnose reliability → report transparently.
Learning Objectives
By the end of this lesson, you will be able to:
- Run an end-to-end NCA workflow in a reusable, script-like way.
- Perform structural QC checks that prevent silent errors.
- Define profiles and intervals explicitly (including partial windows).
- Run
pk.nca()and extract raw results safely. - Compute high-value reliability diagnostics (extrapolated fraction, half-life plausibility).
- Produce subject-level and summary tables with documented assumptions and units.
Key Ideas
- NCA is computationally straightforward but structure-sensitive.
- “One profile” must be defined explicitly (ID + period/analyte/matrix as needed).
- QC happens before calculations: types, missingness, duplicates, dose alignment.
- AUCinf is not “always reportable” — check extrapolated fraction.
- Reporting is a separate step: reshape results, label units, and document calculation rules.
Template Setup
Step 1: Load Data (Example = Theoph)
Replace this section with your import/cleaning steps (e.g., read_csv()).
data(Theoph)
conc_df <- as_tibble(Theoph) %>%
transmute(
ID = Subject,
TIME = Time,
CONC = conc
)
dose_df <- as_tibble(Theoph) %>%
distinct(Subject, Dose) %>%
transmute(
ID = Subject,
TIME = 0,
DOSE = Dose
)Dose normalization (Theoph): Dose is recorded in mg/kg.
Because:
\[ CL/F = \frac{{Dose}}{{AUC_{{\infty}}}} \]
and AUC is typically mg·h/L, clearance here is L/h/kg (weight-normalized).
When you swap in your study data:
- Dose in mg → clearance in L/h
- Dose in mg/kg → clearance in L/h/kg
Step 2: Structural QC (Do This Every Time)
2.1 Types and missingness
qc_types <- conc_df %>%
summarise(
time_numeric = is.numeric(TIME),
conc_numeric = is.numeric(CONC),
any_missing_time = any(is.na(TIME)),
any_missing_conc = any(is.na(CONC)),
any_negative_time = any(TIME < 0, na.rm = TRUE),
any_negative_conc = any(CONC < 0, na.rm = TRUE)
)
qc_types# A tibble: 1 × 6
time_numeric conc_numeric any_missing_time any_missing_conc any_negative_time
<lgl> <lgl> <lgl> <lgl> <lgl>
1 TRUE TRUE FALSE FALSE FALSE
# ℹ 1 more variable: any_negative_conc <lgl>
2.2 Duplicate timepoints within a profile
dup_times <- conc_df %>%
count(ID, TIME, name = "n_at_time") %>%
filter(n_at_time > 1)
dup_times# A tibble: 0 × 3
# ℹ 3 variables: ID <ord>, TIME <dbl>, n_at_time <int>
If this is not empty, resolve duplicates (average replicates / pick rule) before NCA.
2.3 Dose QC (single-dose pattern)
dose_df %>%
count(ID, name = "n_dose_rows") %>%
filter(n_dose_rows > 1)# A tibble: 0 × 2
# ℹ 2 variables: ID <ord>, n_dose_rows <int>
Step 3: Define Profiles (Grouping)
For Theoph, each subject has one profile, so grouping is just ID.
In real datasets, common patterns are:
ID + PERIOD(crossover)ID + OCC(repeat occasions)ID + ANALYTE(parent/metabolite)ID + MATRIX(plasma/whole blood)
Rule: If you facet by it and expect separate curves, include it in grouping for both concentration and dose objects.
Step 4: Build PKNCA Objects
conc_obj <- PKNCAconc(CONC ~ TIME | ID, data = conc_df)
dose_obj <- PKNCAdose(DOSE ~ TIME | ID, data = dose_df)Step 5: Define Intervals (Observed + Infinite)
A practical pattern is to compute both:
- Observed exposure (\(AUC_{{0-tlast}}\) or a partial window)
- AUCinf only when reliable
intervals_df <- data.frame(
start = 0,
end = Inf,
cmax = TRUE,
tmax = TRUE,
auclast = TRUE,
aucinf.obs = TRUE,
half.life = TRUE,
cl.obs = TRUE
)
intervals_df start end cmax tmax auclast aucinf.obs half.life cl.obs
1 0 Inf TRUE TRUE TRUE TRUE TRUE TRUE
Step 6: Run NCA
nca_data <- PKNCAdata(conc_obj, dose_obj, intervals = intervals_df)
results_obj <- pk.nca(nca_data)
raw_df <- as.data.frame(results_obj)
sort(unique(raw_df$PPTESTCD)) [1] "adj.r.squared" "aucinf.obs" "auclast"
[4] "cl.obs" "clast.obs" "clast.pred"
[7] "cmax" "half.life" "lambda.z"
[10] "lambda.z.n.points" "lambda.z.time.first" "lambda.z.time.last"
[13] "r.squared" "span.ratio" "tlast"
[16] "tmax"
Step 7: Reliability Diagnostics (Minimal but High Value)
7.1 Extrapolated fraction for AUCinf
We compute:
\[ \text{{Extrapolated fraction}} = \frac{{AUC_{{\infty}} - AUC_{{0-tlast}}}}{{AUC_{{\infty}}}} \]
auc_tbl <- raw_df %>%
filter(PPTESTCD %in% c("aucinf.obs", "auclast")) %>%
select(ID, PPTESTCD, PPORRES) %>%
pivot_wider(names_from = PPTESTCD, values_from = PPORRES) %>%
mutate(across(-ID, ~ suppressWarnings(parse_number(as.character(.))))) %>%
mutate(extrap_fraction = (aucinf.obs - auclast) / aucinf.obs)
auc_tbl %>%
select(ID, aucinf.obs, auclast, extrap_fraction) %>%
head()# A tibble: 6 × 4
ID aucinf.obs auclast extrap_fraction
<ord> <dbl> <dbl> <dbl>
1 6 82.2 71.7 0.128
2 7 101. 88.0 0.129
3 8 102. 86.8 0.150
4 11 86.9 77.9 0.104
5 3 106. 95.9 0.0966
6 2 97.4 88.7 0.0888
Flag high extrapolation (example threshold: 20%):
auc_tbl %>%
filter(!is.na(extrap_fraction), extrap_fraction > 0.20) %>%
arrange(desc(extrap_fraction))# A tibble: 1 × 4
ID auclast aucinf.obs extrap_fraction
<ord> <dbl> <dbl> <dbl>
1 1 147. 215. 0.315
7.2 Half-life plausibility
raw_df %>%
filter(PPTESTCD == "half.life") %>%
arrange(desc(PPORRES)) %>%
head(12)# A tibble: 12 × 6
ID start end PPTESTCD PPORRES exclude
<ord> <dbl> <dbl> <chr> <dbl> <chr>
1 1 0 Inf half.life 14.3 <NA>
2 10 0 Inf half.life 9.25 <NA>
3 8 0 Inf half.life 8.51 <NA>
4 9 0 Inf half.life 8.41 <NA>
5 5 0 Inf half.life 8.00 <NA>
6 6 0 Inf half.life 7.89 <NA>
7 7 0 Inf half.life 7.85 <NA>
8 11 0 Inf half.life 7.26 <NA>
9 4 0 Inf half.life 6.98 <NA>
10 3 0 Inf half.life 6.77 <NA>
11 2 0 Inf half.life 6.66 <NA>
12 12 0 Inf half.life 6.29 <NA>
Step 8: Reporting Tables (Subject-Level + Summary)
8.1 Subject-level exposure table
subject_tbl <- raw_df %>%
filter(PPTESTCD %in% c("cmax", "tmax", "auclast", "aucinf.obs", "half.life", "cl.obs")) %>%
select(ID, PPTESTCD, PPORRES) %>%
pivot_wider(names_from = PPTESTCD, values_from = PPORRES) %>%
mutate(across(-ID, ~ suppressWarnings(parse_number(as.character(.)))))
subject_tbl %>%
select(ID, cmax, tmax, auclast, aucinf.obs, half.life, cl.obs) %>%
head()# A tibble: 6 × 7
ID cmax tmax auclast aucinf.obs half.life cl.obs
<ord> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 6 6.44 1.15 71.7 82.2 7.89 0.0487
2 7 7.09 3.48 88.0 101. 7.85 0.0490
3 8 7.56 2.02 86.8 102. 8.51 0.0443
4 11 8 0.98 77.9 86.9 7.26 0.0566
5 3 8.2 1.02 95.9 106. 6.77 0.0427
6 2 8.33 1.92 88.7 97.4 6.66 0.0452
Units matter: with Theoph (mg/kg dose), cl.obs is L/h/kg.
In real reporting, include units in labels, for example:
AUCinf (mg·h/L)Cmax (mg/L)CL/F (L/h/kg)
8.2 Summary table (simple but explicit)
summary_tbl <- subject_tbl %>%
summarise(
n = n(),
cmax_mean = mean(cmax, na.rm = TRUE),
cmax_cv = 100 * sd(cmax, na.rm = TRUE) / mean(cmax, na.rm = TRUE),
aucinf_mean = mean(aucinf.obs, na.rm = TRUE),
aucinf_cv = 100 * sd(aucinf.obs, na.rm = TRUE) / mean(aucinf.obs, na.rm = TRUE)
)
summary_tbl# A tibble: 1 × 5
n cmax_mean cmax_cv aucinf_mean aucinf_cv
<int> <dbl> <dbl> <dbl> <dbl>
1 12 8.76 16.8 119. 32.0
Worked Example (Conceptual): Why this is “auditable”
A reviewer (or future you) should be able to answer:
- What defines a profile?
- Were duplicates checked?
- What intervals were used?
- How reliable is AUCinf (extrapolated fraction)?
- What are the units (mg vs mg/kg)?
- What software version produced the results?
This template keeps QC + compute + reporting together so that these answers are visible from one document.
Strategies
- Treat
PKNCAconc()andPKNCAdose()as a contract defining what a profile means. - Fail fast on duplicates and missing critical fields.
- Compute AUC diagnostics every run (don’t wait until reporting).
- Make units explicit in tables and narrative text.
- Keep QC + compute + reporting in one Quarto document for auditability.
Common Mistakes
- Running NCA without resolving duplicate timepoints.
- Forgetting to include PERIOD/OCC in grouping when multiple occasions exist.
- Reporting AUCinf without checking extrapolated fraction.
- Comparing clearance values without confirming dose units (mg vs mg/kg).
- Copying summary values manually instead of exporting tables.
Practice Problems
Executable
- Add a second interval to compute AUC0–12 alongside AUC0–Inf.
- Create a flag column on
subject_tblthat marks subjects with extrapolated fraction > 0.2. - Add geometric mean AUCinf to your summary table.
Conceptual
- Why should QC live in the same script as the NCA calculation?
- Why is extrapolated fraction a reliability metric (not a “nice-to-have” statistic)?
1. Add AUC0–12:
intervals_two <- data.frame(
start = 0,
end = c(12, Inf),
auclast = TRUE,
aucinf.obs = TRUE
)
nca_two <- PKNCAdata(conc_obj, dose_obj, intervals = intervals_two)
as.data.frame(pk.nca(nca_two)) %>% head()# A tibble: 6 × 6
ID start end PPTESTCD PPORRES exclude
<ord> <dbl> <dbl> <chr> <dbl> <chr>
1 6 0 12 auclast 43.0 <NA>
2 6 0 12 tmax 1.15 <NA>
3 6 0 12 tlast 9.22 <NA>
4 6 0 12 clast.obs 3.46 <NA>
5 6 0 12 lambda.z 0.0854 <NA>
6 6 0 12 r.squared 0.996 <NA>
2. Flag high extrapolation:
subject_tbl_flagged <- subject_tbl %>%
left_join(auc_tbl %>% select(ID, extrap_fraction), by = "ID") %>%
mutate(flag_high_extrap = !is.na(extrap_fraction) & extrap_fraction > 0.2)
subject_tbl_flagged %>% head()# A tibble: 6 × 9
ID auclast cmax tmax half.life aucinf.obs cl.obs extrap_fraction
<ord> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 6 71.7 6.44 1.15 7.89 82.2 0.0487 0.128
2 7 88.0 7.09 3.48 7.85 101. 0.0490 0.129
3 8 86.8 7.56 2.02 8.51 102. 0.0443 0.150
4 11 77.9 8 0.98 7.26 86.9 0.0566 0.104
5 3 95.9 8.2 1.02 6.77 106. 0.0427 0.0966
6 2 88.7 8.33 1.92 6.66 97.4 0.0452 0.0888
# ℹ 1 more variable: flag_high_extrap <lgl>
3. Geometric mean AUCinf:
subject_tbl %>%
summarise(geo_mean_aucinf = exp(mean(log(aucinf.obs), na.rm = TRUE)))# A tibble: 1 × 1
geo_mean_aucinf
<dbl>
1 115.
4. Conceptual:
QC in the same script improves reproducibility and auditability: you can see exactly what checks were run and what passed/failed.
5. Conceptual:
Extrapolated fraction reflects how much of AUCinf is supported by observed data. High extrapolation means the estimate is driven more by terminal-slope assumptions than measurements.
Summary
This template provides a complete, reproducible NCA workflow:
- Structural QC (types, duplicates, dose alignment)
- Profile definition (grouping)
- Explicit interval specification
- NCA execution
- Reliability diagnostics (extrapolated fraction, half-life plausibility)
- Clean reporting tables and exports
- Explicit unit handling (mg vs mg/kg)
- Define “one profile” first; everything depends on grouping.
- Duplicate timepoints are a structural error — resolve them deliberately.
- Always compute and review extrapolated fraction before reporting AUCinf.
- Export tables; don’t hand-copy results.
- Document assumptions (intervals + interpolation + version + units) in every deliverable.