public class ParquetFileWriter extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
ParquetFileWriter.Mode |
| Modifier and Type | Field and Description |
|---|---|
static int |
CURRENT_VERSION |
static byte[] |
MAGIC |
static String |
MAGIC_STR |
static String |
PARQUET_COMMON_METADATA_FILE |
static String |
PARQUET_METADATA_FILE |
| Constructor and Description |
|---|
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0
|
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode)
Deprecated.
will be removed in 2.0.0
|
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file,
ParquetFileWriter.Mode mode,
long rowGroupSize,
int maxPaddingSize)
Deprecated.
will be removed in 2.0.0
|
ParquetFileWriter(OutputFile file,
MessageType schema,
ParquetFileWriter.Mode mode,
long rowGroupSize,
int maxPaddingSize) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendFile(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0; use
appendFile(InputFile) instead |
void |
appendFile(InputFile file) |
void |
appendRowGroup(org.apache.hadoop.fs.FSDataInputStream from,
BlockMetaData rowGroup,
boolean dropColumns)
Deprecated.
will be removed in 2.0.0;
use
appendRowGroup(SeekableInputStream,BlockMetaData,boolean) instead |
void |
appendRowGroup(SeekableInputStream from,
BlockMetaData rowGroup,
boolean dropColumns) |
void |
appendRowGroups(org.apache.hadoop.fs.FSDataInputStream file,
List<BlockMetaData> rowGroups,
boolean dropColumns)
Deprecated.
will be removed in 2.0.0;
use
appendRowGroups(SeekableInputStream,List,boolean) instead |
void |
appendRowGroups(SeekableInputStream file,
List<BlockMetaData> rowGroups,
boolean dropColumns) |
void |
end(Map<String,String> extraMetaData)
ends a file once all blocks have been written.
|
void |
endBlock()
ends a block once all column chunks have been written
|
void |
endColumn()
end a column (once all rep, def and data have been written)
|
ParquetMetadata |
getFooter() |
long |
getNextRowGroupSize() |
long |
getPos() |
static ParquetMetadata |
mergeMetadataFiles(List<org.apache.hadoop.fs.Path> files,
org.apache.hadoop.conf.Configuration conf)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
void |
start()
start the file
|
void |
startBlock(long recordCount)
start a block
|
void |
startColumn(ColumnDescriptor descriptor,
long valueCount,
org.apache.parquet.hadoop.metadata.CompressionCodecName compressionCodecName)
start a column inside a block
|
void |
writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
Deprecated.
|
void |
writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Statistics statistics,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
writes a single page
|
void |
writeDictionaryPage(DictionaryPage dictionaryPage)
writes a dictionary page page
|
static void |
writeMergedMetadataFile(List<org.apache.hadoop.fs.Path> files,
org.apache.hadoop.fs.Path outputPath,
org.apache.hadoop.conf.Configuration conf)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static void |
writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static void |
writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers,
ParquetOutputFormat.JobSummaryLevel level)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
public static final String PARQUET_METADATA_FILE
public static final String MAGIC_STR
public static final byte[] MAGIC
public static final String PARQUET_COMMON_METADATA_FILE
public static final int CURRENT_VERSION
@Deprecated public ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration, MessageType schema, org.apache.hadoop.fs.Path file) throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write toIOException - if the file can not be created@Deprecated public ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration, MessageType schema, org.apache.hadoop.fs.Path file, ParquetFileWriter.Mode mode) throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write tomode - file creation modeIOException - if the file can not be created@Deprecated public ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration, MessageType schema, org.apache.hadoop.fs.Path file, ParquetFileWriter.Mode mode, long rowGroupSize, int maxPaddingSize) throws IOException
configuration - Hadoop configurationschema - the schema of the datafile - the file to write tomode - file creation moderowGroupSize - the row group sizemaxPaddingSize - the maximum paddingIOException - if the file can not be createdpublic ParquetFileWriter(OutputFile file, MessageType schema, ParquetFileWriter.Mode mode, long rowGroupSize, int maxPaddingSize) throws IOException
file - OutputFile to create or overwriteschema - the schema of the datamode - file creation moderowGroupSize - the row group sizemaxPaddingSize - the maximum paddingIOException - if the file can not be createdpublic void start()
throws IOException
IOException - if there is an error while writingpublic void startBlock(long recordCount)
throws IOException
recordCount - the record count in this blockIOException - if there is an error while writingpublic void startColumn(ColumnDescriptor descriptor, long valueCount, org.apache.parquet.hadoop.metadata.CompressionCodecName compressionCodecName) throws IOException
descriptor - the column descriptorvalueCount - the value count in this columncompressionCodecName - a compression codec nameIOException - if there is an error while writingpublic void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException
dictionaryPage - the dictionary pageIOException - if there is an error while writing@Deprecated public void writeDataPage(int valueCount, int uncompressedPageSize, org.apache.parquet.bytes.BytesInput bytes, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException
valueCount - count of valuesuncompressedPageSize - the size of the data once uncompressedbytes - the compressed data for the page without headerrlEncoding - encoding of the repetition leveldlEncoding - encoding of the definition levelvaluesEncoding - encoding of valuesIOException - if there is an error while writingpublic void writeDataPage(int valueCount,
int uncompressedPageSize,
org.apache.parquet.bytes.BytesInput bytes,
Statistics statistics,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
throws IOException
valueCount - count of valuesuncompressedPageSize - the size of the data once uncompressedbytes - the compressed data for the page without headerstatistics - statistics for the pagerlEncoding - encoding of the repetition leveldlEncoding - encoding of the definition levelvaluesEncoding - encoding of valuesIOException - if there is an error while writingpublic void endColumn()
throws IOException
IOException - if there is an error while writingpublic void endBlock()
throws IOException
IOException - if there is an error while writing@Deprecated public void appendFile(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file) throws IOException
appendFile(InputFile) insteadconf - a configurationfile - a file path to append the contents of to this fileIOException - if there is an error while reading or writingpublic void appendFile(InputFile file) throws IOException
IOException@Deprecated public void appendRowGroups(org.apache.hadoop.fs.FSDataInputStream file, List<BlockMetaData> rowGroups, boolean dropColumns) throws IOException
appendRowGroups(SeekableInputStream,List,boolean) insteadfile - a file stream to read fromrowGroups - row groups to copydropColumns - whether to drop columns from the file that are not in this file's schemaIOException - if there is an error while reading or writingpublic void appendRowGroups(SeekableInputStream file, List<BlockMetaData> rowGroups, boolean dropColumns) throws IOException
IOException@Deprecated public void appendRowGroup(org.apache.hadoop.fs.FSDataInputStream from, BlockMetaData rowGroup, boolean dropColumns) throws IOException
appendRowGroup(SeekableInputStream,BlockMetaData,boolean) insteadfrom - a file stream to read fromrowGroup - row group to copydropColumns - whether to drop columns from the file that are not in this file's schemaIOException - if there is an error while reading or writingpublic void appendRowGroup(SeekableInputStream from, BlockMetaData rowGroup, boolean dropColumns) throws IOException
IOExceptionpublic void end(Map<String,String> extraMetaData) throws IOException
extraMetaData - the extra meta data to write in the footerIOException - if there is an error while writingpublic ParquetMetadata getFooter()
@Deprecated public static ParquetMetadata mergeMetadataFiles(List<org.apache.hadoop.fs.Path> files, org.apache.hadoop.conf.Configuration conf) throws IOException
files - a list of files to merge metadata fromconf - a configurationIOException - if there is an error while writing@Deprecated public static void writeMergedMetadataFile(List<org.apache.hadoop.fs.Path> files, org.apache.hadoop.fs.Path outputPath, org.apache.hadoop.conf.Configuration conf) throws IOException
files - a list of files to merge metadata fromoutputPath - path to write merged metadata toconf - a configurationIOException - if there is an error while reading or writing@Deprecated public static void writeMetadataFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path outputPath, List<Footer> footers) throws IOException
configuration - the configuration to use to get the FileSystemoutputPath - the directory to write the _metadata file tofooters - the list of footers to mergeIOException - if there is an error while writing@Deprecated public static void writeMetadataFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path outputPath, List<Footer> footers, ParquetOutputFormat.JobSummaryLevel level) throws IOException
ParquetOutputFormat.JobSummaryLevel providedconfiguration - the configuration to use to get the FileSystemoutputPath - the directory to write the _metadata file tofooters - the list of footers to mergelevel - level of summary to writeIOException - if there is an error while writingpublic long getPos()
throws IOException
IOException - if there is an error while getting the current stream's positionpublic long getNextRowGroupSize()
throws IOException
IOExceptionCopyright © 2019 The Apache Software Foundation. All rights reserved.