graph LR
style Legend fill:#FFFFFF00,stroke:#000000;
style Graph fill:#FFFFFF00,stroke:#000000;
subgraph Legend
direction LR
xf1522833a4d242c5([""Up to date""]):::uptodate --- xd03d7c7dd2ddda2b([""Stem""]):::none
xd03d7c7dd2ddda2b([""Stem""]):::none --- xeb2d7cac8a1ce544>""Function""]:::none
end
subgraph Graph
direction LR
xe6fae3abc4970767(["data_processed_talks"]):::uptodate --> x846d45454e9823b7(["write_data_to_files"]):::uptodate
x986bd675a516b5b3(["data_processed_team"]):::uptodate --> x846d45454e9823b7(["write_data_to_files"]):::uptodate
x4a9bd41924e0fdb4>"write_data"]:::uptodate --> x846d45454e9823b7(["write_data_to_files"]):::uptodate
xdd04dc2490030259>"get_data"]:::uptodate --> xeb364ad4c7efc3d3(["get_gsheet_data"]):::uptodate
xe1268667a4ed1fd8>"build_team"]:::uptodate --> x986bd675a516b5b3(["data_processed_team"]):::uptodate
xeb364ad4c7efc3d3(["get_gsheet_data"]):::uptodate --> x986bd675a516b5b3(["data_processed_team"]):::uptodate
x24d9c683daf7bfc6>"build_talks"]:::uptodate --> xe6fae3abc4970767(["data_processed_talks"]):::uptodate
xeb364ad4c7efc3d3(["get_gsheet_data"]):::uptodate --> xe6fae3abc4970767(["data_processed_talks"]):::uptodate
xa0ae033e33afa834>"build_workshops"]:::uptodate --> xa0ae033e33afa834>"build_workshops"]:::uptodate
x97fb05a8a7947628>"build_proceedings"]:::uptodate --> x97fb05a8a7947628>"build_proceedings"]:::uptodate
end
classDef uptodate stroke:#000000,color:#ffffff,fill:#354823;
classDef none stroke:#000000,color:#000000,fill:#94a4ac;
linkStyle 0 stroke-width:0px;
linkStyle 1 stroke-width:0px;
linkStyle 10 stroke-width:0px;
linkStyle 11 stroke-width:0px;
Data pipelines for R/Pharma
data-pipelines
Structure
- Code: rinpharma/data-pipelines
- Rendered run: rinpharma.github.io/data-pipelines/
- Output: Direct links below. Stored in gh-pages branch
Rebuilding
This data (and this page) rebuild automatically based on the CRON schedule specified in the github action.
Run the pipeline with targets::tar_make()
Check the pipeline with targets::tar_manifest(fields = all_of("command"))
Visualize the pipeline with targets::tar_visnetwork()
Output
The current files are available in both parquet and csv.
arrow::read_parquet(
"https://rinpharma.github.io/data-pipelines/output/processed_talks.parquet"
) |>
dplyr::glimpse()Rows: 602
Columns: 16
$ ID <chr> "rinpharma_1", "rinpharma_2", "rinpharma_3", "rinpha…
$ Event <chr> "2018 Conference", "2018 Conference", "2018 Conferen…
$ Abstract <chr> NA, NA, NA, NA, NA, "Lilliam will be presenting a pe…
$ Type <chr> "Workshop", "Workshop", "Workshop", "Workshop", "Rem…
$ APAC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Year <chr> "2018", "2018", "2018", "2018", "2018", "2018", "201…
$ Date <dttm> 2018-08-15, 2018-08-15, 2018-08-15, 2018-08-15, 201…
$ Start <chr> "08:00:00", "08:00:00", "08:00:00", "08:00:00", "09:…
$ End <chr> "09:15:00", "09:15:00", "09:15:00", "09:15:00", "09:…
$ Speaker <chr> "Marianna Foos", "Adrian Waddell", "Daniel Lee", "De…
$ Affiliation <chr> "Biogen", "Roche", "Generable", "Metrum Research Gro…
$ Title <chr> "Keeping things Peachy when Shiny gets Hairy", "Anal…
$ Slides <chr> NA, NA, NA, NA, NA, NA, NA, NA, "https://github.com/…
$ Video <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Abstract_Sanitzed <chr> NA, NA, NA, NA, NA, "Lilliam will be presenting a pe…
$ Missing_Content <chr> "Unfortunately we do not currently have an abstract,…
arrow::read_parquet(
"https://rinpharma.github.io/data-pipelines/output/processed_team.parquet"
) |>
dplyr::glimpse()Rows: 44
Columns: 8
$ Name <chr> "James Black", "Paulo Bargo", "Phil Bowsher", "Ellis…
$ Company <chr> "Novartis", "Novartis", "Posit", "GlaxoSmithKline", …
$ EUROPE_US_OC <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRU…
$ `EX-EUROPE_US_OC` <lgl> FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALS…
$ APAC_OC <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ `EX-APAC_OC` <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL…
$ github <chr> "epijim", NA, "philbowsher", "thebioengineer", "harv…
$ linkedin <chr> "epijim", "paulo-bargo-phd-10590830", "philip-bowshe…
Setup
Code to create gsheets token.
# Generate credentials for gsheet access
pw_name <- gargle:::secret_pw_name("googlesheets4")
pw <- gargle:::secret_pw_gen()
added pwname_pw to usethis::edit_r_environ()
encrypt the service account token (I made via goodle dev console)
gargle:::secret_write(
package = "googlesheets4",
name = "rinpharma-4ac2ad6eba3b.json",
input = "~/Downloads/rinpharma-4ac2ad6eba3b.json"
)
encrypted file is now in /inst/
file_name <- "rinpharma-4ac2ad6eba3b.json"
secret_name <- "googlesheets4"
path <- paste0("inst/secret/", file_name)
raw <- readBin(path, "raw", file.size(path))
json <- sodium::data_decrypt(
bin = raw, key = gargle:::secret_pw_get(secret_name),
nonce = gargle:::secret_nonce()
)
pass <- rawToChar(json)
gs4_auth(path = pass)