Data pipelines for R/Pharma
data-pipelines
Structure
- Code: rinpharma/data-pipelines
- Rendered run: rinpharma.github.io/data-pipelines/
- Output: Direct links below. Stored in gh-pages branch
TO-DO
Rebuilding
This data (and this page) rebuild automatically based on the CRON schedule specified in the github action.
Run the pipeline with targets::tar_make()
Check the pipeline with targets::tar_manifest(fields = all_of("command"))
Visualise the pipeline with targets::tar_visnetwork()
Output
The current files are available
::read_parquet(
arrow"https://rinpharma.github.io/data-pipelines/output/processed_proceedings.parquet"
|>
) ::glimpse() dplyr
Rows: 296
Columns: 13
$ ID <chr> "rinpharma_1", "rinpharma_2", "rinpharma_3", "rinpha…
$ Event <chr> "2018 Conference", "2018 Conference", "2018 Conferen…
$ Abstract <chr> NA, NA, NA, NA, "Lilliam will be presenting a perspe…
$ Type <chr> "Workshop", "Workshop", "Workshop", "Workshop", "Key…
$ Year <chr> "2018", "2018", "2018", "2018", "2018", "2018", "201…
$ Date <dttm> 2018-08-15, 2018-08-15, 2018-08-15, 2018-08-15, 201…
$ Speaker <chr> "Marianna Foos", "Adrian Waddell", "Daniel Lee", "De…
$ Affiliation <chr> "Biogen", "Roche / Genentech", "Generable", "Metrum …
$ Title <chr> "Keeping things Peachy when Shiny gets Hairy", "Anal…
$ Slides <chr> NA, NA, NA, NA, NA, NA, NA, "https://github.com/rinp…
$ Video <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Abstract_Sanitzed <chr> NA, NA, NA, NA, "Lilliam will be presenting a perspe…
$ Missing_Content <chr> "Unfortunately we do not currently have an abstract,…
::read_parquet(
arrow"https://rinpharma.github.io/data-pipelines/output/processed_team.parquet"
|>
) ::glimpse() dplyr
Rows: 18
Columns: 9
$ name <chr> "James Black", "Paulo Bargo", "Phil Bowsher", "Ellis H…
$ role <chr> "Roche / Genentech", "Novartis", "posit", "GlaxoSmithK…
$ site_superuser <lgl> TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, F…
$ custom_link <chr> "https://epijim.uk", NA, NA, "https://twitter.com/elli…
$ github <chr> "epijim", NA, "philbowsher", "thebioengineer", "harvey…
$ linkedin <chr> "epijim", "paulo-bargo-phd-10590830", "philip-bowsher-…
$ organising_comm <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, …
$ exec_comm <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ program_comm <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
::read_parquet(
arrow"https://rinpharma.github.io/data-pipelines/output/processed_workshops.parquet"
|>
) ::glimpse() dplyr
Rows: 51
Columns: 8
$ event <chr> "2021 Conference", "2021 Conference", "2021 Conference",…
$ title <chr> "Clinical Trials Data Analysis at Roche", "Intro Shiny",…
$ date <date> 2021-10-25, 2021-10-25, 2021-10-26, 2021-10-26, 2021-10…
$ time <chr> "09:00-12:00 ET", "14:00-16:00 ET", "09:00-12:00 ET", "1…
$ presenter <chr> "Adrian Waddell (Roche)", "Ted Laderas (DNANexus)", "Dan…
$ max_attendees <dbl> 100, 40, 120, 200, 500, 75, 500, 160, 50, 250, 200, 120,…
$ ticket_url <chr> "https://www.eventbrite.com/e/187203810637", "https://ww…
$ status <chr> "soldout", "soldout", "closed", "closed", "closed", "sol…
Setup
Code to create gsheets token.
# Generate credentials for gsheet access
<- gargle:::secret_pw_name("googlesheets4")
pw_name <- gargle:::secret_pw_gen()
pw
::edit_r_environ()
added pwname_pw to usethis
token (I made via goodle dev console)
encrypt the service account :::secret_write(
garglepackage = "googlesheets4",
name = "rinpharma-4ac2ad6eba3b.json",
input = "~/Downloads/rinpharma-4ac2ad6eba3b.json"
)
in /inst/
encrypted file is now
<- "rinpharma-4ac2ad6eba3b.json"
file_name <- "googlesheets4"
secret_name <- paste0("inst/secret/", file_name)
path <- readBin(path, "raw", file.size(path))
raw <- sodium::data_decrypt(
json bin = raw, key = gargle:::secret_pw_get(secret_name),
nonce = gargle:::secret_nonce()
)<- rawToChar(json)
pass
gs4_auth(path = pass)