Title: | Read and Write CDISC Dataset JSON Files |
---|---|
Description: | Read, construct and write CDISC (Clinical Data Interchange Standards Consortium) Dataset JSON (JavaScript Object Notation) files, while validating per the Dataset JSON schema file, as described in CDISC (2023) <https://www.cdisc.org/dataset-json>. |
Authors: | Mike Stackhouse [aut, cre] , Nicholas Masel [aut] |
Maintainer: | Mike Stackhouse <[email protected]> |
License: | Apache License (>= 2) |
Version: | 0.2.0 |
Built: | 2025-01-03 03:43:17 UTC |
Source: | https://github.com/atorus-research/datasetjson |
Create the data metadata container for a Dataset JSON object
data_metadata(study = NULL, metadata_version = NULL, metadata_ref = NULL)
data_metadata(study = NULL, metadata_version = NULL, metadata_ref = NULL)
study |
Study OID value |
metadata_version |
Metadata version OID value |
metadata_ref |
Metadata reference (i.e. path to Define.xml) |
data_metadata object
# Create object directly data_meta <- data_metadata( study = "SOMESTUDY", metadata_version = "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", metadata_ref = "some/define.xml" ) # Use setter functions data_meta <- data_metadata() data_meta_updated <- set_metadata_ref(data_meta, "some/define.xml") data_meta_updated <- set_metadata_version(data_meta_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") data_meta_updated <- set_study_oid(data_meta_updated, "SOMESTUDY")
# Create object directly data_meta <- data_metadata( study = "SOMESTUDY", metadata_version = "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", metadata_ref = "some/define.xml" ) # Use setter functions data_meta <- data_metadata() data_meta_updated <- set_metadata_ref(data_meta, "some/define.xml") data_meta_updated <- set_metadata_version(data_meta_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") data_meta_updated <- set_study_oid(data_meta_updated, "SOMESTUDY")
Create the base object used to write a Dataset JSON file.
dataset_json( .data, item_id, name, label, items, dataset_meta, version = "1.0.0", data_type = c("clinicalData", "referenceData"), file_meta = file_metadata(), data_meta = data_metadata() )
dataset_json( .data, item_id, name, label, items, dataset_meta, version = "1.0.0", data_type = c("clinicalData", "referenceData"), file_meta = file_metadata(), data_meta = data_metadata() )
.data |
Input data to contain within the Dataset JSON file. Written to the itemData parameter. |
item_id |
ID used to label dataset with the itemGroupData parameter. Defined as "Object of Datasets. Key value is a unique identifier for Dataset, corresponding to ItemGroupDef/@OID in Define-XML." |
name |
Dataset name |
label |
Dataset Label |
items |
Variable metadata |
dataset_meta |
A dataset_metadata object holding pre-specified dataset metadata. |
version |
Version of Dataset JSON schema to follow. |
data_type |
Type of data being written. clinicalData for subject level data, and referenceData for non-subject level data (i.e. TDMs, Associated Persons) |
file_meta |
A file_metadata object holding pre-specified file metadata |
data_meta |
A data_metadata object holding pre-specified data metadata |
dataset_json object pertaining to the specific Dataset JSON version specific
# Create a basic object ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) # Attach attributes directly ds_json_updated <- set_data_type(ds_json, "referenceData") ds_json_updated <- set_file_oid(ds_json_updated, "/some/path") ds_json_updated <- set_metadata_ref(ds_json_updated, "some/define.xml") ds_json_updated <- set_metadata_version(ds_json_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") ds_json_updated <- set_originator(ds_json_updated, "Some Org") ds_json_updated <- set_source_system(ds_json_updated, "source system", "1.0") ds_json_updated <- set_study_oid(ds_json_updated, "SOMESTUDY") # Create independent objects for metadata sections first file_meta <- file_metadata( originator = "Some Org", sys = "source system", sys_version = "1.0" ) data_meta <- data_metadata( study = "SOMESTUDY", metadata_version = "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", metadata_ref = "some/define.xml" ) dataset_meta <- dataset_metadata( item_id = "IG.IRIS", name = "IRIS", label = "Iris", items = iris_items ) ds_json_from_meta <- dataset_json( iris, dataset_meta = dataset_meta, file_meta = file_meta, data_meta = data_meta )
# Create a basic object ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) # Attach attributes directly ds_json_updated <- set_data_type(ds_json, "referenceData") ds_json_updated <- set_file_oid(ds_json_updated, "/some/path") ds_json_updated <- set_metadata_ref(ds_json_updated, "some/define.xml") ds_json_updated <- set_metadata_version(ds_json_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") ds_json_updated <- set_originator(ds_json_updated, "Some Org") ds_json_updated <- set_source_system(ds_json_updated, "source system", "1.0") ds_json_updated <- set_study_oid(ds_json_updated, "SOMESTUDY") # Create independent objects for metadata sections first file_meta <- file_metadata( originator = "Some Org", sys = "source system", sys_version = "1.0" ) data_meta <- data_metadata( study = "SOMESTUDY", metadata_version = "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", metadata_ref = "some/define.xml" ) dataset_meta <- dataset_metadata( item_id = "IG.IRIS", name = "IRIS", label = "Iris", items = iris_items ) ds_json_from_meta <- dataset_json( iris, dataset_meta = dataset_meta, file_meta = file_meta, data_meta = data_meta )
Generate an individual element that fills the itemGroupData field
dataset_metadata(item_id, name, label, items, .data)
dataset_metadata(item_id, name, label, items, .data)
item_id |
Data Object ID for item in Dataset JSON object, corresponding to ItemGroupDef/@OID in Define-XML. |
name |
Dataset name |
label |
Dataset Label |
items |
Variable metadata |
.data |
Dataframe to be written to Dataset JSON file |
dataset_metadata object
dataset_meta <- dataset_metadata( item_id = "IG.IRIS", name = "IRIS", label = "Iris", items = iris_items )
dataset_meta <- dataset_metadata( item_id = "IG.IRIS", name = "IRIS", label = "Iris", items = iris_items )
Create a file metadata object
file_metadata( originator = NULL, sys = NULL, sys_version = NULL, file_oid = NULL, version = "1.0.0" )
file_metadata( originator = NULL, sys = NULL, sys_version = NULL, file_oid = NULL, version = "1.0.0" )
originator |
originator parameter, defined as "The organization that generated the Dataset-JSON file." |
sys |
sourceSystem parameter, defined as "The computer system or database management system that is the source of the information in this file." |
sys_version |
sourceSystemVersion, defined as "The version of the sourceSystem" |
file_oid |
fileOID parameter, defined as "A unique identifier for this file." |
version |
Dataset JSON schema version being used |
file_metadata object
# Create using parameters file_meta <- file_metadata( originator = "Some Org", sys = "source system", sys_version = "1.0" ) # Set parameters after file_meta <- file_metadata() file_meta_updated <- set_file_oid(file_meta, "/some/path") file_meta_updated <- set_originator(file_meta_updated, "Some Org") file_meta_updated <- set_source_system(file_meta_updated, "source system", "1.0")
# Create using parameters file_meta <- file_metadata( originator = "Some Org", sys = "source system", sys_version = "1.0" ) # Set parameters after file_meta <- file_metadata() file_meta_updated <- set_file_oid(file_meta, "/some/path") file_meta_updated <- set_originator(file_meta_updated, "Some Org") file_meta_updated <- set_source_system(file_meta_updated, "source system", "1.0")
Example of the necessary variable metadata included in a Dataset JSON file based on the Iris data frame.
iris_items
iris_items
iris_items
A data frame with 5 rows and 6 columns:Unique identifier for Variable. Must correspond to ItemDef/@OID in Define-XML.
Display format supports data visualization of numeric float and date values.
Label for Variable
Data type for Variable
Length for Variable
Display format supports data visualization of numeric float and date values.
Indicates that this item is a key variable in the dataset structure. It also provides an ordering for the keys.
This function validates a dataset JSON file against the Dataset JSON schema, and if valid returns a datasetjson object. The Dataset JSON file can be either a file path on disk of a URL which contains the Dataset JSON file.
read_dataset_json(file)
read_dataset_json(file)
file |
File path or URL of a Dataset JSON file |
datasetjson object
# Read from disk ## Not run: dat <- read_dataset_json("path/to/file.json") dat <- dataset_json('https://www.somesite.com/file.json') ## End(Not run) # Read from an already imported character vector ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) dat <- read_dataset_json(js)
# Read from disk ## Not run: dat <- read_dataset_json("path/to/file.json") dat <- dataset_json('https://www.somesite.com/file.json') ## End(Not run) # Read from an already imported character vector ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) dat <- read_dataset_json(js)
Valid SAS(c) date formats pulled from https://documentation.sas.com/doc/en/vdmmlcdc/8.1/ds2pg/p0bz5detpfj01qn1kz2in7xymkdl.htm
sas_date_formats
sas_date_formats
sas_date_formats
A character vector with 45 elements
Valid SAS(c) datetime formats pulled from https://documentation.sas.com/doc/en/vdmmlcdc/8.1/ds2pg/p0bz5detpfj01qn1kz2in7xymkdl.htm
sas_datetime_formats
sas_datetime_formats
sas_datetime_formats
A character vector with 7 elements
Valid SAS(c) time formats pulled from https://documentation.sas.com/doc/en/vdmmlcdc/8.1/ds2pg/p0bz5detpfj01qn1kz2in7xymkdl.htm
sas_time_formats
sas_time_formats
sas_time_formats
A character vector with 4 elements
This object is a character vector holding the schema for Dataset JSON Version 1.0.0
schema_1_0_0
schema_1_0_0
schema_1_0_0
A character vector with 1 element
Set information about the file and source system used to generate the Dataset JSON object.
set_source_system(x, sys, sys_version) set_originator(x, originator) set_file_oid(x, file_oid) set_data_type(x, data_type = c("clinicalData", "referenceData"))
set_source_system(x, sys, sys_version) set_originator(x, originator) set_file_oid(x, file_oid) set_data_type(x, data_type = c("clinicalData", "referenceData"))
x |
datasetjson object |
sys |
sourceSystem parameter, defined as "The computer system or database management system that is the source of the information in this file." |
sys_version |
sourceSystemVersion, defined as "The version of the sourceSystem" |
originator |
originator parameter, defined as "The organization that generated the Dataset-JSON file." |
file_oid |
fileOID parameter, defined as "A unique identifier for this file." |
data_type |
Type of data being written. clinicalData for subject level data, and referenceData for non-subject level data (i.e. TDMs, Associated Persons) |
The fileOID parameter should be structured following description outlined in the ODM V2.0 specification. "FileOIDs should be universally unique if at all possible. One way to ensure this is to prefix every FileOID with an internet domain name owned by the creator of the ODM file or database (followed by a forward slash, "/"). For example, FileOID="BestPharmaceuticals.com/Study5894/1" might be a good way to denote the first file in a series for study 5894 from Best Pharmaceuticals."
datasetjson or file_metadata object
file_meta <- file_metadata() file_meta_updated <- set_file_oid(file_meta, "/some/path") file_meta_updated <- set_originator(file_meta_updated, "Some Org") file_meta_updated <- set_source_system(file_meta_updated, "source system", "1.0")
file_meta <- file_metadata() file_meta_updated <- set_file_oid(file_meta, "/some/path") file_meta_updated <- set_originator(file_meta_updated, "Some Org") file_meta_updated <- set_source_system(file_meta_updated, "source system", "1.0")
This set of functions
set_study_oid(x, study, ...) set_metadata_version(x, metadata_version, ...) set_metadata_ref(x, metadata_ref)
set_study_oid(x, study, ...) set_metadata_version(x, metadata_version, ...) set_metadata_ref(x, metadata_ref)
x |
data metadata or datasetjson object |
study |
Study OID value |
... |
Additional parameters |
metadata_version |
Metadata version OID value |
metadata_ref |
Metadata reference (i.e. path to Define.xml) |
A datasetjson or data_metadata object
data_meta <- data_metadata() data_meta_updated <- set_metadata_ref(data_meta, "some/define.xml") data_meta_updated <- set_metadata_version(data_meta_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") data_meta_updated <- set_study_oid(data_meta_updated, "SOMESTUDY")
data_meta <- data_metadata() data_meta_updated <- set_metadata_ref(data_meta, "some/define.xml") data_meta_updated <- set_metadata_version(data_meta_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") data_meta_updated <- set_study_oid(data_meta_updated, "SOMESTUDY")
This function calls jsonvalidate::json_validate()
directly, with the
parameters necessary to retrieve the error information of an invalid JSON
file per the Dataset JSON schema.
validate_dataset_json(x)
validate_dataset_json(x)
x |
File path or URL of a Dataset JSON file, or a character vector holding JSON text |
A data frame
## Not run: validate_dataset_json('path/to/file.json') validate_dataset_json('https://www.somesite.com/file.json') ## End(Not run) ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) validate_dataset_json(js)
## Not run: validate_dataset_json('path/to/file.json') validate_dataset_json('https://www.somesite.com/file.json') ## End(Not run) ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) validate_dataset_json(js)
Write out a Dataset JSON file
write_dataset_json(x, file, pretty = FALSE)
write_dataset_json(x, file, pretty = FALSE)
x |
datasetjson object |
file |
File path to save Dataset JSON file |
pretty |
If TRUE, write with readable formatting |
NULL when file written to disk, otherwise character string
# Write to character object ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) # Write to disk ## Not run: write_dataset_json(ds_json, "path/to/file.json") ## End(Not run)
# Write to character object ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) js <- write_dataset_json(ds_json) # Write to disk ## Not run: write_dataset_json(ds_json, "path/to/file.json") ## End(Not run)