The datacite class (see ?datacite()) is a modification of a data frame (tibble) object, and it creates all the mandatory and recommended fields of the DataCite metadata schema for a dataset. It also covers all the properties in the more general Dublin Core standard, but in some cases, the property name is different (and follows the DataCite naming convention.)

library(dataobservatory, quietly = TRUE)
data("datacite_properties")
datacite_properties
#> # A tibble: 20 x 4
#>       ID Property       Structure                                     Obligation
#>    <dbl> <chr>          <chr>                                         <chr>     
#>  1     1 Identifier     "With mandatory type sub-property"            M         
#>  2     2 Creator        "With optional name identifier and affiliati~ M         
#>  3     3 Title          "With optional type sub-properties"           M         
#>  4     4 Publisher      ""                                            M         
#>  5     5 PublicationYe~ ""                                            M         
#>  6    10 ResourceType   "With mandatory general type description sub~ M         
#>  7     6 Subject        "With scheme sub-property"                    R         
#>  8     7 Contributor    "With type, name identifier, and affiliation~ R         
#>  9     8 Date           "With type sub-property"                      R         
#> 10     9 Language       ""                                            O         
#> 11    11 AlternateIden~ "With type sub-property"                      O         
#> 12    12 RelatedIdenti~ "With type and relation type sub-properties"  R         
#> 13    13 Size           ""                                            O         
#> 14    14 Format         ""                                            O         
#> 15    15 Version        ""                                            O         
#> 16    16 Rights         ""                                            O         
#> 17    17 Description    "With type sub-property"                      R         
#> 18    18 GeoLocation    "With point, box and polygon sub-properties"  R         
#> 19    19 FundingRefere~ "With name, identifier, and award related su~ O         
#> 20    20 RelatedItem    "With identifier, creator, title, publicatio~ O

The datacite class is equipped with a construction and helper functions that create the more complex metadata properties as JSON (default) or list objects. Not all the helper functions are fully validated, but a fully datacite object can be easily obtained.

library(jsonlite, quietly = TRUE)
data("small_population")
small_population_dataset <- dataset (
  x= small_population,
  dataset_code = "small_population_total",
  dataset_title = "Population of Small European Countries",
  freq = "A",
  unit = "NR",
  unit_name = "number")

attributes (small_population_dataset)
#> $names
#> [1] "dataset_code" "time"         "geo"          "value"        "unit"        
#> [6] "obs_status"   "freq"        
#> 
#> $row.names
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#> 
#> $class
#> [1] "dataset"    "tbl_df"     "tbl"        "data.frame"
#> 
#> $dataset_code
#> [1] "small_population_total"
#> 
#> $Title
#> [1] "Population of Small European Countries"
#> 
#> $source
#> [1] "greendeal.dataobservatory.eu"
#> 
#> $unit_name
#> [1] "number"
#> 
#> $updated
#> [1] "2021-07-06"
#> 
#> $earliest_actual_observation
#> [1] "2009-01-01"
#> 
#> $latest_actual_observation
#> [1] "2020-01-01"
#> 
#> $sessionInfo
#> [1] "{\"platform\":[\"x86_64-w64-mingw32/x64 (64-bit)\"],\"locale\":[\"LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252\"],\"running\":[\"Windows 10 x64 (build 17763)\"],\"RNGkind\":[\"Mersenne-Twister\",\"Inversion\",\"Rejection\"],\"basePkgs\":[\"stats\",\"graphics\",\"grDevices\",\"utils\",\"datasets\",\"methods\",\"base\"],\"matprod\":[\"default\"],\"BLAS\":[\"\"],\"LAPACK\":[\"\"],\"system.codepage\":[\"1250\"],\"codepage\":[\"1252\"]}"

The datacite_dataset() function fills out the datacite object with default properties. There are some properties where no default is given, these properties are also recorded in the dataset class as attributes.

small_population_datacite <- datacite_dataset(
  dataset = small_population_dataset,
  keywords = c("greendeal", "Demography", "Testing"),
  description = "Example dataset with three small countries",
  Subject = "Demography",
  Creator = "Joe, Doe")
small_population_dataset
#> Population of Small European Countries
#> dataset code:  small_population_total 
#> Actual observation range: [2009-01-01]-[2020-01-01], updated on 2021-07-06.
#> Geographic coverage:  LI, AD, SM 
#> The first 10 observations of 23 (unit: number):
#> 
#>              dataset_code       time geo value unit obs_status freq
#> 1  small_population_total 2020-01-01  LI 38747   NR          A    A
#> 2  small_population_total 2019-01-01  AD 76177   NR          A    A
#> 3  small_population_total 2019-01-01  LI 38378   NR          A    A
#> 4  small_population_total 2018-01-01  LI 38114   NR          A    A
#> 5  small_population_total 2018-01-01  SM 34453   NR          A    A
#> 6  small_population_total 2017-01-01  LI 37810   NR          A    A
#> 7  small_population_total 2016-01-01  LI 37622   NR          A    A
#> 8  small_population_total 2015-01-01  LI 37366   NR          A    A
#> 9  small_population_total 2014-01-01  LI 37129   NR          A    A
#> 10 small_population_total 2013-01-01  AD 76246   NR          A    A
#> 
#> Source: greendeal.dataobservatory.eu
small_population_datacite
#> DataCite information for Population of Small European Countries 
#> # A tibble: 21 x 2
#>    Property       Value                                                         
#>    <chr>          <chr>                                                         
#>  1 dataset_code   "small_population_total"                                      
#>  2 Identifier     "small_population_total"                                      
#>  3 Creator        "Joe, Doe"                                                    
#>  4 Title          "Population of Small European Countries"                      
#>  5 Publisher      "Reprex"                                                      
#>  6 PublicationYe~ "2021"                                                        
#>  7 ResourceType   "Dataset"                                                     
#>  8 Subject        "Demography"                                                  
#>  9 Contributor     <NA>                                                         
#> 10 Date           "{\"Updated\":[\"2021-07-06\"],\"EarliestObservation\":[\"200~
#> # ... with 11 more rows
is.datacite(small_population_datacite)
#> [1] TRUE

The Description property has three mandatory elements:

  • The Abstract is a short, textual description.
  • In the TechnicalInfo sub-field, we record automatically the utils::sessionInfo() for computational reproducability.
  • In the Other sub-field, we record the keywords for structuring the observatory.
small_population_description <- small_population_datacite$Description
jsonlite::fromJSON(small_population_description)
#> $Abstract
#> [1] "Example dataset with three small countries"
#> 
#> $TechnicalInfo
#> [1] "{\"platform\":[\"x86_64-w64-mingw32/x64 (64-bit)\"],\"locale\":[\"LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252\"],\"running\":[\"Windows 10 x64 (build 17763)\"],\"RNGkind\":[\"Mersenne-Twister\",\"Inversion\",\"Rejection\"],\"basePkgs\":[\"stats\",\"graphics\",\"grDevices\",\"utils\",\"datasets\",\"methods\",\"base\"],\"matprod\":[\"default\"],\"BLAS\":[\"\"],\"LAPACK\":[\"\"],\"system.codepage\":[\"1250\"],\"codepage\":[\"1252\"]}"
#> 
#> $Other
#> [1] "{\"id\":[\"keyword1\",\"keyword2\",\"keyword3\"],\"name\":[\"greendeal\",\"Demography\",\"Testing\"]}"

Complex Properties

The Creator property may have affiliation data, and ORCiD identifier present, By default, we create a json representation:

jane <- add_creators("Jane", "Doe", 
                     affiliation = "Green Deal Data Observatory", 
                     orcid = "0000-0000-0000-0000")
jane
#> [1] "[{\"creatorName\":[\"Doe, Jane\"],\"nameType\":[\"Person\"],\"affiliation\":[\"Green Deal Data Observatory\"],\"orcid\":[\"0000-0000-0000-0000\"]}]"

Working with lists

There are many Properties which can form a list. For example, there may be several related items present.

relitems <- add_related_items(
  RelatedItem = "Population on 1 January by age and sex", 
  relatedItemIdentifier = add_identifiers(
    id = "demo_pjan", 
    URI = "https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=demo_pjan/"
  ))

relitems
#> [1] "{\"RelatedItem\":[\"Population on 1 January by age and sex\"],\"relatedItemType\":[\"Dataset\"],\"relationType\":[\"IsDerivedFrom\"],\"relatedItemIdentifier\":[\"{\\\"id\\\":[\\\"demo_pjan\\\"],\\\"dataset_code\\\":{},\\\"URI\\\":[\\\"https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=demo_pjan/\\\"],\\\"DOI\\\":{},\\\"Version\\\":{},\\\"idAtSource\\\":{},\\\"Other\\\":{}}\"]}"
package_citation <- citation("dataobservatory")
relitems <- add_related_items(
  RelatedItem = package_citation$title, 
  relatedItemIdentifier = add_identifiers(
    id = "dataobservatory", 
    Version = package_citation$note),
    related_items = relitems
  )

relitems
#> [1] "{\"RelatedItem\":[\"dataobservatory: Tidy and Documented Datasets\"],\"relatedItemType\":[\"Dataset\"],\"relationType\":[\"IsDerivedFrom\"],\"relatedItemIdentifier\":[\"{\\\"id\\\":[\\\"dataobservatory\\\"],\\\"dataset_code\\\":{},\\\"URI\\\":{},\\\"DOI\\\":{},\\\"Version\\\":[\\\"R package version 0.1.1\\\"],\\\"idAtSource\\\":{},\\\"Other\\\":{}}\"]}"                               
#> [2] "{\"RelatedItem\":[\"Population on 1 January by age and sex\"],\"relatedItemType\":[\"Dataset\"],\"relationType\":[\"IsDerivedFrom\"],\"relatedItemIdentifier\":[\"{\\\"id\\\":[\\\"demo_pjan\\\"],\\\"dataset_code\\\":{},\\\"URI\\\":[\\\"https://appsso.eurostat.ec.europa.eu/nui/show.do?dataset=demo_pjan/\\\"],\\\"DOI\\\":{},\\\"Version\\\":{},\\\"idAtSource\\\":{},\\\"Other\\\":{}}\"]}"
print(small_population_datacite)
#> DataCite information for Population of Small European Countries 
#> # A tibble: 21 x 2
#>    Property       Value                                                         
#>    <chr>          <chr>                                                         
#>  1 dataset_code   "small_population_total"                                      
#>  2 Identifier     "small_population_total"                                      
#>  3 Creator        "Joe, Doe"                                                    
#>  4 Title          "Population of Small European Countries"                      
#>  5 Publisher      "Reprex"                                                      
#>  6 PublicationYe~ "2021"                                                        
#>  7 ResourceType   "Dataset"                                                     
#>  8 Subject        "Demography"                                                  
#>  9 Contributor     <NA>                                                         
#> 10 Date           "{\"Updated\":[\"2021-07-06\"],\"EarliestObservation\":[\"200~
#> # ... with 11 more rows
small_population_datacite$RelatedItem <- create_json_text(as.list(relitems))
small_population_datacite
#> DataCite information for Population of Small European Countries 
#> # A tibble: 21 x 2
#>    Property       Value                                                         
#>    <chr>          <chr>                                                         
#>  1 dataset_code   "small_population_total"                                      
#>  2 Identifier     "small_population_total"                                      
#>  3 Creator        "Joe, Doe"                                                    
#>  4 Title          "Population of Small European Countries"                      
#>  5 Publisher      "Reprex"                                                      
#>  6 PublicationYe~ "2021"                                                        
#>  7 ResourceType   "Dataset"                                                     
#>  8 Subject        "Demography"                                                  
#>  9 Contributor     <NA>                                                         
#> 10 Date           "{\"Updated\":[\"2021-07-06\"],\"EarliestObservation\":[\"200~
#> # ... with 11 more rows