Building base cohorts

Introduction

Let’s first create a cdm reference to the Eunomia synthetic data.

library(CDMConnector)
library(CodelistGenerator)
library(PatientProfiles)
library(CohortConstructor)
library(dplyr)

con <- DBI::dbConnect(duckdb::duckdb(), 
                      dbdir = eunomia_dir())
cdm <- cdm_from_con(con, cdm_schema = "main", 
                    write_schema = c(prefix = "my_study_", schema = "main"))

Demographic based cohort creation

One base cohort we can create is based around patient demographics. Here for example we create a cohort where people enter on their 18th birthday and leave at age 65 or

cdm$working_age_cohort <- demographicsCohort(cdm = cdm, 
                                             ageRange = c(18, 65), 
                                             name = "working_age_cohort")

settings(cdm$working_age_cohort)
#> # A tibble: 1 × 3
#>   cohort_definition_id cohort_name  age_range
#>                  <int> <chr>        <chr>    
#> 1                    1 demographics 18_65
cohortCount(cdm$working_age_cohort)
#> # A tibble: 1 × 3
#>   cohort_definition_id number_records number_subjects
#>                  <int>          <int>           <int>
#> 1                    1           2694            2694
attrition(cdm$working_age_cohort)
#> # A tibble: 2 × 7
#>   cohort_definition_id number_records number_subjects reason_id reason          
#>                  <int>          <int>           <int>     <int> <chr>           
#> 1                    1           2694            2694         1 Initial qualify…
#> 2                    1           2694            2694         2 Age requirement…
#> # ℹ 2 more variables: excluded_records <int>, excluded_subjects <int>
cdm$working_age_cohort |> 
  addAge(indexDate = "cohort_start_date") |> 
  summarise(min_start_age = min(age), 
            median_start_age = median(age), 
            max_start_age = max(age))
#> # Source:   SQL [1 x 3]
#> # Database: DuckDB v1.0.0 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpmGYlVr\file398c50bc492.duckdb]
#>   min_start_age median_start_age max_start_age
#>           <int>            <dbl>         <int>
#> 1            17               18            18

cdm$working_age_cohort |> 
  addAge(indexDate = "cohort_end_date") |> 
  summarise(min_start_age = min(age), 
            median_start_age = median(age), 
            max_start_age = max(age))
#> # Source:   SQL [1 x 3]
#> # Database: DuckDB v1.0.0 [eburn@Windows 10 x64:R 4.2.1/C:\Users\eburn\AppData\Local\Temp\RtmpmGYlVr\file398c50bc492.duckdb]
#>   min_start_age median_start_age max_start_age
#>           <int>            <dbl>         <int>
#> 1            31               57            65

Concept based cohort creation

drug_codes <- getDrugIngredientCodes(cdm, 
                                     name = c("diclofenac", 
                                              "acetaminophen"))

drug_codes
#> 
#> - 161_acetaminophen (7 codes)
#> - 3355_diclofenac (1 codes)
cdm$medications <- conceptCohort(cdm = cdm, 
                                 conceptSet = drug_codes, 
                                 name = "medications")

settings(cdm$medications)
#> # A tibble: 2 × 4
#>   cohort_definition_id cohort_name       cdm_version vocabulary_version
#>                  <int> <chr>             <chr>       <chr>             
#> 1                    1 161_acetaminophen 5.3         v5.0 18-JAN-19    
#> 2                    2 3355_diclofenac   5.3         v5.0 18-JAN-19
cohortCount(cdm$medications)
#> # A tibble: 2 × 3
#>   cohort_definition_id number_records number_subjects
#>                  <int>          <int>           <int>
#> 1                    1          13908            2679
#> 2                    2            830             830

Concept based cohort creation for measurements

TO DO