public abstract class BaseHybridHashTable extends Object implements MemorySegmentPool
LongHybridHashTable and BinaryHashTable.| Modifier and Type | Field and Description |
|---|---|
protected long |
buildRowCount |
protected int |
buildSpillRetBufferNumbers
The number of buffers in the build spill return buffer queue that are actually not write
behind buffers, but regular buffers that only have not yet returned.
|
protected LinkedBlockingQueue<org.apache.flink.core.memory.MemorySegment> |
buildSpillReturnBuffers
The queue of buffers that can be used for write-behind.
|
protected AtomicBoolean |
closed
Flag indicating that the closing logic has been invoked.
|
protected int |
compressionBlockSize |
protected org.apache.flink.runtime.io.compression.BlockCompressionFactory |
compressionCodecFactory |
protected boolean |
compressionEnabled |
protected org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.Enumerator |
currentEnumerator
The channel enumerator that is used while processing the current partition to create channels
for the spill partitions it requires.
|
protected int |
currentRecursionDepth
The recursion depth of the partition that is currently processed.
|
protected org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView |
currentSpilledBuildSide
The reader for the spilled-file of the build partition that is currently read.
|
protected org.apache.flink.runtime.io.disk.iomanager.AbstractChannelReaderInputView |
currentSpilledProbeSide
The reader for the spilled-file of the probe partition that is currently read.
|
protected int |
initPartitionFanOut |
protected LazyMemorySegmentPool |
internalPool |
protected org.apache.flink.runtime.io.disk.iomanager.IOManager |
ioManager
The I/O manager used to instantiate writers for the spilled partitions.
|
protected static org.slf4j.Logger |
LOG |
protected static int |
MAX_NUM_PARTITIONS
The maximum number of partitions, which defines the spilling granularity.
|
protected static int |
MAX_RECURSION_DEPTH
The maximum number of recursive partitionings that the join does before giving up.
|
protected long |
numSpillFiles |
protected int |
segmentSize
The size of the segments used by the hash join buckets.
|
int |
segmentSizeBits |
int |
segmentSizeMask |
protected long |
spillInBytes |
protected int |
totalNumBuffers
The total reserved number of memory segments available to the hash join.
|
boolean |
tryDistinctBuildRow
Try to make the buildSide rows distinct.
|
| Constructor and Description |
|---|
BaseHybridHashTable(Object owner,
boolean compressionEnabled,
int compressionBlockSize,
org.apache.flink.runtime.memory.MemoryManager memManager,
long reservedMemorySize,
org.apache.flink.runtime.io.disk.iomanager.IOManager ioManager,
int avgRecordLen,
long buildRowCount,
boolean tryDistinctBuildRow) |
| Modifier and Type | Method and Description |
|---|---|
protected abstract void |
clearPartitions() |
void |
close()
Closes the hash table.
|
protected org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView |
createInputView(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID id,
int blockCount,
int lastSegmentLimit) |
void |
ensureNumBuffersReturned(int minRequiredAvailable)
This method makes sure that at least a certain number of memory segments is in the list of
free segments.
|
void |
free() |
void |
freeCurrent()
Free the memory not used.
|
int |
freePages() |
org.apache.flink.core.memory.MemorySegment |
getNextBuffer()
Gets the next buffer to be used with the hash-table, either for an in-memory partition, or
for the table buckets.
|
org.apache.flink.core.memory.MemorySegment[] |
getNextBuffers(int bufferSize)
Bulk memory acquisition.
|
protected org.apache.flink.core.memory.MemorySegment |
getNotNullNextBuffer() |
long |
getNumSpillFiles() |
long |
getSpillInBytes() |
long |
getUsedMemoryInBytes() |
static int |
hash(int hashCode,
int level)
The level parameter is needed so that we can have different hash functions when we
recursively apply the partitioning, so that the working set eventually fits into memory.
|
int |
maxInitBufferOfBucketArea(int partitions)
Give up to one-sixth of the memory of the bucket area.
|
protected int |
maxNumPartition()
Bucket area need at-least one and data need at-least one.
|
org.apache.flink.core.memory.MemorySegment |
nextSegment()
This is the method called by the partitions to request memory to serialize records.
|
int |
pageSize()
Get the page size of each page this pool holds.
|
protected List<org.apache.flink.core.memory.MemorySegment> |
readAllBuffers(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID id,
int blockCount) |
void |
releaseMemoryCacheForSMJ()
Due to adaptive hash join is introduced, the cached memory segments should be released to
MemoryManager before switch to sort merge join. |
int |
remainBuffers() |
void |
returnAll(List<org.apache.flink.core.memory.MemorySegment> memory)
Return all pages back into this pool.
|
void |
returnPage(org.apache.flink.core.memory.MemorySegment segment) |
protected abstract int |
spillPartition() |
protected static final org.slf4j.Logger LOG
protected static final int MAX_RECURSION_DEPTH
protected static final int MAX_NUM_PARTITIONS
protected final int initPartitionFanOut
protected final long buildRowCount
protected final int totalNumBuffers
protected final LazyMemorySegmentPool internalPool
protected final org.apache.flink.runtime.io.disk.iomanager.IOManager ioManager
protected final int segmentSize
protected final LinkedBlockingQueue<org.apache.flink.core.memory.MemorySegment> buildSpillReturnBuffers
public final int segmentSizeBits
public final int segmentSizeMask
protected AtomicBoolean closed
public final boolean tryDistinctBuildRow
protected int currentRecursionDepth
protected int buildSpillRetBufferNumbers
protected org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView currentSpilledBuildSide
protected org.apache.flink.runtime.io.disk.iomanager.AbstractChannelReaderInputView currentSpilledProbeSide
protected org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.Enumerator currentEnumerator
protected final boolean compressionEnabled
protected final org.apache.flink.runtime.io.compression.BlockCompressionFactory compressionCodecFactory
protected final int compressionBlockSize
protected transient long numSpillFiles
protected transient long spillInBytes
public BaseHybridHashTable(Object owner, boolean compressionEnabled, int compressionBlockSize, org.apache.flink.runtime.memory.MemoryManager memManager, long reservedMemorySize, org.apache.flink.runtime.io.disk.iomanager.IOManager ioManager, int avgRecordLen, long buildRowCount, boolean tryDistinctBuildRow)
protected int maxNumPartition()
public org.apache.flink.core.memory.MemorySegment getNextBuffer()
public org.apache.flink.core.memory.MemorySegment[] getNextBuffers(int bufferSize)
protected org.apache.flink.core.memory.MemorySegment getNotNullNextBuffer()
public org.apache.flink.core.memory.MemorySegment nextSegment()
nextSegment in interface org.apache.flink.core.memory.MemorySegmentSourcepublic int freePages()
freePages in interface MemorySegmentPoolpublic int pageSize()
MemorySegmentPoolpageSize in interface MemorySegmentPoolpublic void returnAll(List<org.apache.flink.core.memory.MemorySegment> memory)
MemorySegmentPoolreturnAll in interface MemorySegmentPoolmemory - the pages which want to be returned.protected abstract int spillPartition()
throws IOException
IOExceptionpublic void ensureNumBuffersReturned(int minRequiredAvailable)
minRequiredAvailable - The minimum number of buffers that needs to be reclaimed.public void close()
protected abstract void clearPartitions()
public void free()
public void freeCurrent()
public void releaseMemoryCacheForSMJ()
MemoryManager before switch to sort merge join. Otherwise, open sort merge join
operator maybe fail because of insufficient memory.
Note: this method should only be invoked for sort merge join.
public void returnPage(org.apache.flink.core.memory.MemorySegment segment)
public int remainBuffers()
public long getUsedMemoryInBytes()
public long getNumSpillFiles()
public long getSpillInBytes()
public int maxInitBufferOfBucketArea(int partitions)
protected List<org.apache.flink.core.memory.MemorySegment> readAllBuffers(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID id, int blockCount) throws IOException
IOExceptionprotected org.apache.flink.runtime.io.disk.iomanager.HeaderlessChannelReaderInputView createInputView(org.apache.flink.runtime.io.disk.iomanager.FileIOChannel.ID id,
int blockCount,
int lastSegmentLimit)
throws IOException
IOExceptionpublic static int hash(int hashCode,
int level)
Copyright © 2014–2025 The Apache Software Foundation. All rights reserved.