## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = any(dir.exists(c("working_example_data", "benchmark_data", "new_benchmark_data", "topic_data", "valid_data", "new_stage_data"))),
  comment = "#>",
  warning = FALSE,
  fig.width = 6,
  fig.height = 6
  )

## ----results = FALSE, message=FALSE, warning=FALSE----------------------------
#install.packages("CiteSource")
library(CiteSource)

## -----------------------------------------------------------------------------
citation_files <- list.files(path = "topic_data", pattern = "\\.ris", full.names = TRUE)

citations <- read_citations(citation_files,
                            cite_sources = c("crimjust", "lens", "psycinfo", "pubmed", "scopus"),
                            tag_naming = "best_guess")

## ----results = FALSE, message=FALSE, warning=FALSE----------------------------
unique_citations <- dedup_citations(citations)
n_unique <- count_unique(unique_citations)
source_comparison <- compare_sources(unique_citations, comp_type = "sources")

## -----------------------------------------------------------------------------
plot_source_overlap_heatmap(source_comparison)

## -----------------------------------------------------------------------------
plot_source_overlap_heatmap(source_comparison, plot_type = "percentages")

## -----------------------------------------------------------------------------
plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE))

## -----------------------------------------------------------------------------
plot_contributions(n_unique, center = TRUE)

## -----------------------------------------------------------------------------
unique_lens      <- n_unique |> dplyr::filter(cite_source == "lens",     unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id")
unique_psycinfo  <- n_unique |> dplyr::filter(cite_source == "psycinfo", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id")
unique_pubmed    <- n_unique |> dplyr::filter(cite_source == "pubmed",   unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id")
unique_crimjust  <- n_unique |> dplyr::filter(cite_source == "crimjust", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id")
unique_scopus    <- n_unique |> dplyr::filter(cite_source == "scopus",   unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id")

## -----------------------------------------------------------------------------
scopus_journals <- unique_scopus |>
  dplyr::group_by(journal) |>
  dplyr::summarise(count = dplyr::n()) |>
  dplyr::arrange(dplyr::desc(count))

knitr::kable(scopus_journals[1:10, ])

## ----results=FALSE------------------------------------------------------------
unique_scopus |>
  dplyr::group_by(year) |>
  dplyr::summarise(count = dplyr::n()) |>
  ggplot2::ggplot(ggplot2::aes(year, count, group = 1)) +
  ggplot2::geom_line() +
  ggplot2::geom_point() +
  ggplot2::xlab("Publication year") +
  ggplot2::ylab("Unique records")

## ----message=FALSE------------------------------------------------------------
all_unique <- dplyr::bind_rows(unique_scopus, unique_lens, unique_pubmed, unique_psycinfo)

all_unique |>
  dplyr::group_by(cite_source.x, year) |>
  dplyr::summarise(count = dplyr::n()) |>
  ggplot2::ggplot(ggplot2::aes(year, count, group = 1)) +
  ggplot2::geom_line() +
  ggplot2::geom_point() +
  ggplot2::facet_wrap(~ cite_source.x) +
  ggplot2::xlab("Publication year") +
  ggplot2::ylab("Unique records")

## -----------------------------------------------------------------------------
#export_csv(unique_citations, filename = "unique-by-source.csv", separate = "cite_source")
#export_ris(unique_citations, filename = "unique_citations.ris", source_field = "DB", label_field = "N1")
#export_bib(unique_citations, filename = "unique_citations.bib", include = c("sources", "labels", "strings"))
#reimport_csv("unique-by-source.csv")

