class CatalogProxy extends Catalog
Proxy class to allow mixins for runtime loaded Catalog classes.
- Alphabetic
- By Inheritance
- CatalogProxy
- Catalog
- ContextLogging
- CatalogNullMetrics
- CatalogLight
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
-
type
CommitPart = Catalog.CommitPart
The type of each part identifier of a multipart commit.
The type of each part identifier of a multipart commit.
- Definition Classes
- CatalogProxy → CatalogLight
-
type
CommitToken = Catalog.CommitToken
The type of a commit token.
The type of a commit token.
- Definition Classes
- CatalogProxy → CatalogLight
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
abortCommit(token: CommitToken): Unit
Aborts a commit being prepared.
Aborts a commit being prepared.
- token
The token identifying the commit to abort
- Definition Classes
- CatalogProxy → CatalogLight
-
def
additionalFields: Set[AdditionalField]
The set of Catalog.AdditionalFields requested to the service when retrieving catalog metadata.
The set of Catalog.AdditionalFields requested to the service when retrieving catalog metadata.
- Definition Classes
- CatalogProxy → CatalogLight
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
classForLogger: Class[_]
Override this to change the class that is displayed in logs.
Override this to change the class that is displayed in logs.
- Attributes
- protected
- Definition Classes
- ContextLogging
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
commit(metadata: RDD[(Key, Commit)], baseVersion: BaseVersion, maxNumParts: Int = 85, token: Option[CommitToken] = None, dependencies: Seq[Dependency] = Seq()): Unit
Commits an RDD of Partition.Commit.
Commits an RDD of Partition.Commit.
- metadata
The metadata to be committed.
- baseVersion
The current version of the catalog in the Data API. This information is required to prevent race conditions if the version is changed elsewhere, in another process, while we are preparing data for the commit. If the catalog is empty, use utils.EmptyCatalog.
- maxNumParts
The maximum number of parts the metadata should be split into.
- token
A token representing a multipart commit that has already begun, if available, obtained from calling startCommit.
- Definition Classes
- Catalog
- Note
Calling this method is equivalent to calling startCommit + partialCommit + finishCommit in sequence.
-
def
defaultPartitioner: Partitioner[Key]
The default com.here.platform.data.processing.spark.partitioner.Partitioner applied when querying the catalog, if you do not explicitly specify a partitioner.
The default com.here.platform.data.processing.spark.partitioner.Partitioner applied when querying the catalog, if you do not explicitly specify a partitioner.
- Definition Classes
- CatalogProxy → Catalog
-
def
empty(partitioner: Partitioner[Key]): RDD[(Key, Meta)]
Creates a partitioned empty RDD of (Key, Meta) in compliance with other methods of this component.
Creates a partitioned empty RDD of (Key, Meta) in compliance with other methods of this component.
- partitioner
The com.here.platform.data.processing.spark.partitioner.Partitioner applied when constructing the RDD returned.
- returns
An empty RDD of (Partition.Key, Partition.Meta), which is partitioned according to the partitioner parameter.
- Definition Classes
- CatalogProxy → Catalog
-
def
emptyChanges(partitioner: Partitioner[Key]): RDD[(Key, Change)]
Creates a partitioned empty RDD of (Partition.Key, Partition.Change) in compliance with other methods of this component.
Creates a partitioned empty RDD of (Partition.Key, Partition.Change) in compliance with other methods of this component.
- partitioner
The com.here.platform.data.processing.spark.partitioner.Partitioner applied when constructing the RDD returned.
- returns
An empty RDD of (Partition.Key, Partition.Change), partitioned according to the partitioner parameter.
- Definition Classes
- CatalogProxy → Catalog
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
finishCommit(token: CommitToken, parts: Seq[CommitPart]): Unit
Completes a multipart commit.
Completes a multipart commit.
- token
The token obtained when startCommit was called to start this commit.
- parts
All the commit parts obtained from calls made to Catalog.partialCommit as part of this commit.
- Definition Classes
- CatalogProxy → CatalogLight
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getCommitCounter(hrn: HRN): Option[LongAccumulator]
Returns a commit accumulator, or None if stats not collected.
Returns a commit accumulator, or None if stats not collected.
- Definition Classes
- CatalogNullMetrics
-
def
getQueryAllCounters(layerIds: Iterable[Id], versionStart: Long): Option[Map[Id, LongAccumulator]]
Returns a serializable map of queryAll accumulators for given layers, or None if stats not collected.
Returns a serializable map of queryAll accumulators for given layers, or None if stats not collected.
- Definition Classes
- CatalogNullMetrics
-
def
getQueryChangesCounters(layerIds: Iterable[Id], versionStart: Long, versionEnd: Long): Option[Map[Id, LongAccumulator]]
Returns a serializable map of queryChanges accumulators for given layers, or None if stats not collected.
Returns a serializable map of queryChanges accumulators for given layers, or None if stats not collected.
- Definition Classes
- CatalogNullMetrics
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hrn: HRN
The HRN (HERE Resource Name) of the catalog.
The HRN (HERE Resource Name) of the catalog. It is a resource descriptor that provides a globally unique handle for a catalog in the sense of an instance.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
id: Id
The unique identifier for the catalog, that is added to every Partition.Key returned by the query APIs and is used by the commit API for logging and statistics generation.
The unique identifier for the catalog, that is added to every Partition.Key returned by the query APIs and is used by the commit API for logging and statistics generation.
- Definition Classes
- CatalogProxy → CatalogLight
- See also
Catalog.Id for more details.
- val impl: Catalog
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
lazy val
logger: ContextAwareLogger
The logger to use for normal and context-aware messages.
The logger to use for normal and context-aware messages.
- Attributes
- protected
- Definition Classes
- ContextLogging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
partialCommit(token: CommitToken, metadata: RDD[(Key, Commit)], numParts: Int): Seq[CommitPart]
Prepares the commit of an RDD of Partition.Commit, but does not complete the commit.
Prepares the commit of an RDD of Partition.Commit, but does not complete the commit.
This function appends the metadata to the commit that is being constructed, and can be called multiple times. You must specify the maximum number of parts that should be produced. However, the Data API imposes a limit of 100 parts in total, so if this function is called multiple times, the total number of parts must be less than 100.
- token
The token identifying the commit being prepared.
- metadata
The metadata to be prepared for committing.
- returns
The CommitPart objects that should be passed to finishCommit
- Definition Classes
- CatalogProxy → Catalog
-
def
queryAll(version: Version, layers: Set[Id], partitioner: Partitioner[Key]): RDD[(Key, Meta)]
Queries metadata of all the partitions in a given set of layers at a given version.
Queries metadata of all the partitions in a given set of layers at a given version.
Creates an RDD containing the queried metadata.
- version
The version at which the catalog is queried for partitions.
- layers
The set of layers for which the partitions should be queried.
- partitioner
The com.here.platform.data.processing.spark.partitioner.Partitioner applied when constructing the RDD returned.
- returns
The queried metadata in an RDD object partitioned according to the partitioner parameter.
- Definition Classes
- CatalogProxy → Catalog
-
def
queryChanges(versionStart: Version, versionEnd: Version, layers: Set[Id], partitioner: Partitioner[Key]): RDD[(Key, Change)]
Queries metadata of all the partitions in a given set of layers changed between given versions.
Queries metadata of all the partitions in a given set of layers changed between given versions.
Creates an RDD containing the queries metadata.
- versionStart
Query the partitions changed after this version.
- versionEnd
Query the changed partitions up to this version.
- layers
The set of layers for which the partitions should be queried.
- partitioner
The com.here.platform.data.processing.spark.partitioner.Partitioner applied when constructing the RDD returned.
- returns
The queried metadata in an RDD object partitioned according to the partitioner parameter.
- Definition Classes
- CatalogProxy → Catalog
-
def
queryConfiguration(): Configuration
Queries the current configuration of the catalog.
Queries the current configuration of the catalog.
- returns
The current configuration of the catalog.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
queryDependencies(version: Version): Seq[Dependency]
Queries the dependencies for a particular catalog version.
Queries the dependencies for a particular catalog version.
- version
The version of the catalog for which dependencies will be queried.
- returns
The dependencies of the catalog version.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
queryLatestVersion(): BaseVersion
Queries the latest version of the catalog.
Queries the latest version of the catalog.
- returns
The latest catalog version available in the Data API, or com.here.platform.data.processing.catalog.utils.EmptyCatalog otherwise.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
queryLayerVersions(version: Version): Map[Id, Version]
Queries the latest version for each of the catalog layers.
Queries the latest version for each of the catalog layers. This is the version of the most recent event affecting that layer, either because a partition was changed and/or deleted in that version.
For some layers, this version may be older than the catalog's overall version, particularly if those layers did not change, but other layers have changed.
- version
The version for which the catalog's layers are queried.
- returns
The latest version for each of the catalog's layers.
- Definition Classes
- CatalogProxy → CatalogLight
- Note
Layers that have never changed since catalog creation are not returned.
-
def
queryMinimumVersion(): BaseVersion
Queries the minimum version of the catalog.
Queries the minimum version of the catalog.
- returns
The minimum catalog version available in the Data API, or com.here.platform.data.processing.catalog.utils.EmptyCatalog otherwise.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
queryPartitionChangeHistory(version: Version, layer: Id, partition: Name, direction: Catalog.ChangeHistoryDirection.Value): Seq[Change]
Queries metadata history for the selected partition.
Queries metadata history for the selected partition.
This query is done directly in the Driver, and the metadata is returned. You can query up to 100 changes with one call.
- version
The version from which to begin the change history search.
- layer
The layer that contains the partition.
- partition
The partition to query the change history for.
- direction
Specifies the direction in which to lookup the history: forward or backward.
- returns
A Partition.Change object containing the partition size and checksum.
- Definition Classes
- CatalogProxy → CatalogLight
- Note
This method is scheduled to be deprecated in a future release. Only supported by the Datastore1 implementation.
-
def
querySinglePartitions(version: Version, layer: Id, partitions: Set[Name]): Map[Key, Meta]
Queries metadata for the selected partitions.
Queries metadata for the selected partitions.
This query is done directly in the Driver, and the metadata is returned. You can query up to 100 partitions with one call.
- version
The version at which the catalog is queried for partitions.
- layer
The layer that contains the partitions being queried.
- partitions
The partition names that should be queried.
- returns
The queried metadata, for partitions that have content in the catalog at the specified version. Partitions not present in the catalog at the specified version are not returned.
- Definition Classes
- CatalogProxy → CatalogLight
- Note
This method is only intended to be used internally, by the library. When implementing your compiler, you are encouraged to use Catalog.queryAll/Catalog.queryChanges instead, as these methods are implemented in a distributed way.
-
def
retriever: Retriever
Exposes the Retriever for this catalog.
Exposes the Retriever for this catalog. The returned object can be captured in Spark functions and transmitted to worker nodes.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
startCommit(baseVersion: BaseVersion, dependencies: Seq[Dependency]): CommitToken
Starts a new multipart commit on top of an existing version or an empty catalog.
Starts a new multipart commit on top of an existing version or an empty catalog.
- baseVersion
The current version of the catalog in the Data API. This information is required to prevent race conditions if the version is changed elsewhere, in another process, while we are preparing data for the commit. If the catalog is empty, use utils.EmptyCatalog.
- dependencies
The upstream dependencies for this commit.
- returns
A new multipart commit token representing the commit in progress.
- Definition Classes
- CatalogProxy → CatalogLight
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toSerializable: CatalogLight with Serializable
Returns a
CatalogLight
for this catalog that is guaranteed to be serializable.Returns a
CatalogLight
for this catalog that is guaranteed to be serializable.- Attributes
- protected[processing]
- Definition Classes
- CatalogProxy → CatalogLight
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
tokenFromString(s: String): CommitToken
Create a commit token for the catalog given its string representation.
Create a commit token for the catalog given its string representation.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
tokenToString(t: CommitToken): String
Return the string representation of a commit token.
Return the string representation of a commit token.
- Definition Classes
- CatalogProxy → CatalogLight
-
def
uploader(baseVersion: BaseVersion): Uploader
Exposes the Uploader for this catalog optimized to upload Payloads for a given base version of the catalog.
Exposes the Uploader for this catalog optimized to upload Payloads for a given base version of the catalog. The returned object can be captured in Spark functions and transmitted to worker nodes.
- baseVersion
The current version of catalog in the Data API.
- Definition Classes
- CatalogProxy → Catalog
-
def
uploader: Uploader
Exposes the Uploader for this catalog.
Exposes the Uploader for this catalog. The returned object can be captured in Spark functions and transmitted to worker nodes.
- Definition Classes
- CatalogProxy → Catalog
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()