Feature Plots

Download a copy of the vignette to follow along here: feature_plots.Rmd

Given a cluster solution formatted as a row of a solutions matrix (or extended solutions matrix) and a data_list and/or target_list containing features to plot, the auto_plot() function can automatically generate ggplot-based bar and jitter plots showing how that particular feature was divided across clusters.

library(metasnf)

data_list <- generate_data_list(
    list(subc_v, "subcortical_volume", "neuroimaging", "continuous"),
    list(income, "household_income", "demographics", "continuous"),
    list(fav_colour, "favourite_colour", "misc", "categorical"),
    list(pubertal, "pubertal_status", "demographics", "continuous"),
    list(anxiety, "anxiety", "behaviour", "ordinal"),
    list(depress, "depressed", "behaviour", "ordinal"),
    uid = "unique_id"
)
## Warning in generate_data_list(list(subc_v, "subcortical_volume",
## "neuroimaging", : 188 subject(s) dropped due to incomplete data.
# Build space of settings to cluster over
set.seed(42)
settings_matrix <- generate_settings_matrix(
    data_list,
    nrow = 2,
    min_k = 20,
    max_k = 50
)

# Clustering
solutions_matrix <- batch_snf(data_list, settings_matrix)

sm_row <- solutions_matrix[1, ]

Note, the row you pick could come directly from a solutions_matrix, but could also come from an extended_solutions_matrix or from a representative solution picked after get_representative_solutions().

plot_list <- auto_plot(
    solutions_matrix_row = sm_row,
    data_list = data_list
)

names(plot_list)
##  [1] "smri_vol_scs_cbwmatterlh"   "smri_vol_scs_ltventriclelh"
##  [3] "smri_vol_scs_inflatventlh"  "smri_vol_scs_crbwmatterlh" 
##  [5] "smri_vol_scs_crbcortexlh"   "smri_vol_scs_tplh"         
##  [7] "smri_vol_scs_caudatelh"     "smri_vol_scs_putamenlh"    
##  [9] "smri_vol_scs_pallidumlh"    "smri_vol_scs_3rdventricle" 
## [11] "smri_vol_scs_4thventricle"  "smri_vol_scs_bstem"        
## [13] "smri_vol_scs_hpuslh"        "smri_vol_scs_amygdalalh"   
## [15] "smri_vol_scs_csf"           "smri_vol_scs_aal"          
## [17] "smri_vol_scs_vedclh"        "smri_vol_scs_cbwmatterrh"  
## [19] "smri_vol_scs_ltventriclerh" "smri_vol_scs_inflatventrh" 
## [21] "smri_vol_scs_crbwmatterrh"  "smri_vol_scs_crbcortexrh"  
## [23] "smri_vol_scs_tprh"          "smri_vol_scs_caudaterh"    
## [25] "smri_vol_scs_putamenrh"     "smri_vol_scs_pallidumrh"   
## [27] "smri_vol_scs_hpusrh"        "smri_vol_scs_amygdalarh"   
## [29] "smri_vol_scs_aar"           "smri_vol_scs_vedcrh"       
## [31] "household_income"           "colour"                    
## [33] "pubertal_status"            "cbcl_anxiety_r"            
## [35] "cbcl_depress_r"
plot_list$"household_income"

plot_list$"smri_vol_scs_csf"

plot_list$"colour"

If there’s something you’d like to change about the plot, you can always tack on ggplot2 elements to build from the skeleton provided by auto_plot:

plot_list$"colour" +
    ggplot2::labs(
        fill = "Favourite Colour",
        x = "Cluster",
        title = "Favourite Colour by Cluster"
    ) +
    ggplot2::scale_fill_manual(
        values = c(
            "green" = "forestgreen",
            "red" = "firebrick3",
            "yellow" = "darkgoldenrod1"
        )
    )