Work with the schema

These functions support flexible schema inspection both algorithmically and in human-friendly ways.

sdf_schema_json(x, parse_json = TRUE, simplify = FALSE,
  append_complex_type = TRUE)

sdf_schema_viewer(x, simplify = TRUE, append_complex_type = TRUE)

Arguments

x	An `R` object wrapping, or containing, a Spark DataFrame.
parse_json	Logical. If `TRUE` then the JSON return value will be parsed into an R list.
simplify	Logical. If `TRUE` then the schema will be folded into itself such that `{"name" : "field1", "type" : {"type" : "array", "elementType" : "string", "containsNull" : true}, "nullable" : true, "metadata" : { } }` will be rendered simply `{"field1 (array)" : "[string]"}`
append_complex_type	Logical. This only matters if `parse_json=TRUE` and `simplify=TRUE`. In that case indicators will be included in the return value for array and struct types.

Examples

# NOT RUN {
library(testthat)
library(jsonlite)
library(sparklyr)
library(sparklyr.nested)
sample_json <- paste0(
  '{"aircraft_id":["string"],"phase_sequence":["string"],"phases (array)":{"start_point (struct)":',
  '{"segment_phase":["string"],"agl":["double"],"elevation":["double"],"time":["long"],',
  '"latitude":["double"],"longitude":["double"],"altitude":["double"],"course":["double"],',
  '"speed":["double"],"source_point_keys (array)":["[string]"],"primary_key":["string"]},',
  '"end_point (struct)":{"segment_phase":["string"],"agl":["double"],"elevation":["double"],',
  '"time":["long"],"latitude":["double"],"longitude":["double"],"altitude":["double"],',
  '"course":["double"],"speed":["double"],"source_point_keys (array)":["[string]"],',
  '"primary_key":["string"]},"phase":["string"],"primary_key":["string"]},"primary_key":["string"]}'
)

with_mock(
  # I am mocking functions so that the example works without a real spark connection
  spark_read_parquet = function(x, ...){return("this is a spark dataframe")},
  sdf_schema_json = function(x, ...){return(fromJSON(sample_json))},
  spark_connect = function(...){return("this is a spark connection")},

  # the meat of the example is here
  sc <- spark_connect(),
  spark_data <- spark_read_parquet(sc, path="path/to/data/*.parquet", name="some_name"),
  sdf_schema_viewer(spark_data)
)
# }

Arguments

See also

Examples

Contents