java.lang.Object

com.here.platform.data.client.spark.datasources.raw.RawFileFormat

All Implemented Interfaces:: org.apache.spark.sql.execution.datasources.FileFormat, org.apache.spark.sql.sources.DataSourceRegister

public class RawFileFormat extends Object implements org.apache.spark.sql.execution.datasources.FileFormat, org.apache.spark.sql.sources.DataSourceRegister

A data source to read data as raw byte arrays

The data will be stored as a raw byte array in the spark Row under field name "raw"

Constructor Summary

Constructors

Constructor

Description

RawFileFormat()
Method Summary

Modifier and Type

Method

Description

scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>>

buildReader(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.immutable.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)

scala.Option<org.apache.spark.sql.types.StructType>

inferSchema(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, scala.collection.immutable.Seq<org.apache.hadoop.fs.FileStatus> files)

org.apache.spark.sql.execution.datasources.OutputWriterFactory

prepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, org.apache.spark.sql.types.StructType dataSchema)

String

shortName()

String

toString()

Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait

Methods inherited from interface org.apache.spark.sql.execution.datasources.FileFormat
allowDuplicatedColumnNames, buildReaderWithPartitionValues, createFileMetadataCol, fileConstantMetadataExtractors, isSplitable, metadataSchemaFields, supportBatch, supportDataType, supportFieldName, vectorTypes

Constructor Details
- RawFileFormat
  
  public RawFileFormat()
Method Details
- buildReader
  
  public scala.Function1<org.apache.spark.sql.execution.datasources.PartitionedFile,scala.collection.Iterator<org.apache.spark.sql.catalyst.InternalRow>> buildReader(org.apache.spark.sql.SparkSession spark, org.apache.spark.sql.types.StructType dataSchema, org.apache.spark.sql.types.StructType partitionSchema, org.apache.spark.sql.types.StructType requiredSchema, scala.collection.immutable.Seq<org.apache.spark.sql.sources.Filter> filters, scala.collection.immutable.Map<String,String> options, org.apache.hadoop.conf.Configuration hadoopConf)
  
  Specified by:
  
  buildReader in interface org.apache.spark.sql.execution.datasources.FileFormat
- inferSchema
  
  public scala.Option<org.apache.spark.sql.types.StructType> inferSchema(org.apache.spark.sql.SparkSession sparkSession, scala.collection.immutable.Map<String,String> options, scala.collection.immutable.Seq<org.apache.hadoop.fs.FileStatus> files)
  
  Specified by:
  
  inferSchema in interface org.apache.spark.sql.execution.datasources.FileFormat
- prepareWrite
  
  public org.apache.spark.sql.execution.datasources.OutputWriterFactory prepareWrite(org.apache.spark.sql.SparkSession sparkSession, org.apache.hadoop.mapreduce.Job job, scala.collection.immutable.Map<String,String> options, org.apache.spark.sql.types.StructType dataSchema)
  
  Specified by:
  
  prepareWrite in interface org.apache.spark.sql.execution.datasources.FileFormat
- shortName
  
  public String shortName()
  
  Specified by:
  
  shortName in interface org.apache.spark.sql.sources.DataSourceRegister
- toString
  
  public String toString()
  
  Overrides:
  
  toString in class Object

Class RawFileFormat

Constructor Summary

Method Summary

Methods inherited from class java.lang.Object

Methods inherited from interface org.apache.spark.sql.execution.datasources.FileFormat

Constructor Details

RawFileFormat

Method Details

buildReader

inferSchema

prepareWrite

shortName

toString