public class ParquetFileReader extends Object implements Closeable
| Modifier and Type | Field and Description |
|---|---|
static String |
PARQUET_READ_PARALLELISM |
| Constructor and Description |
|---|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
FileMetaData fileMetaData,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path filePath,
List<BlockMetaData> blocks,
List<ColumnDescriptor> columns)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0.
|
ParquetFileReader(InputFile file,
ParquetReadOptions options) |
| Modifier and Type | Method and Description |
|---|---|
void |
appendTo(ParquetFileWriter writer) |
void |
close() |
org.apache.parquet.hadoop.DictionaryPageReader |
getDictionaryReader(BlockMetaData block) |
String |
getFile() |
FileMetaData |
getFileMetaData() |
ParquetMetadata |
getFooter() |
DictionaryPageReadStore |
getNextDictionaryReader()
Returns a
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup(). |
org.apache.hadoop.fs.Path |
getPath()
Deprecated.
will be removed in 2.0.0; use
getFile() instead |
long |
getRecordCount() |
List<BlockMetaData> |
getRowGroups() |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0; use
open(InputFile) |
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadata footer)
Deprecated.
will be removed in 2.0.0
|
static ParquetFileReader |
open(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0; use
open(InputFile,ParquetReadOptions) |
static ParquetFileReader |
open(InputFile file)
Open a
file. |
static ParquetFileReader |
open(InputFile file,
ParquetReadOptions options)
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus fileStatus,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static List<Footer> |
readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
Collection<org.apache.hadoop.fs.FileStatus> partFiles,
boolean skipRowGroups)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static List<Footer> |
readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration,
List<org.apache.hadoop.fs.FileStatus> partFiles)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static ParquetMetadata |
readFooter(InputFile file,
ParquetMetadataConverter.MetadataFilter filter)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus pathStatus,
boolean skipRowGroups)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
static List<Footer> |
readFooters(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path path)
Deprecated.
will be removed in 2.0.0;
use
open(InputFile, ParquetReadOptions) |
PageReadStore |
readNextRowGroup()
Reads all the columns requested from the row group at the current file position.
|
static List<Footer> |
readSummaryFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.FileStatus summaryStatus)
Deprecated.
metadata files are not recommended and will be removed in 2.0.0
|
void |
setRequestedSchema(MessageType projection) |
boolean |
skipNextRowGroup() |
public static String PARQUET_READ_PARALLELISM
@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException
configuration - the Hadoop conffilePath - Path for the parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration configuration, FileMetaData fileMetaData, org.apache.hadoop.fs.Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException
configuration - the Hadoop conffileMetaData - fileMetaData for parquet filefilePath - Path for the parquet fileblocks - the blocks to readcolumns - the columns to read (their path)IOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefilter - a ParquetMetadataConverter.MetadataFilter for selecting row groupsIOException - if the file can not be opened@Deprecated public ParquetFileReader(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadata footer) throws IOException
conf - the Hadoop Configurationfile - Path to a parquet filefooter - a ParquetMetadata footer already read from the fileIOException - if the file can not be openedpublic ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException
IOException@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallelUsingSummaryFiles(org.apache.hadoop.conf.Configuration configuration, Collection<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
configuration - the hadoop conf to connect to the file system;partFiles - the part files to readskipRowGroups - to skipRowGroups in the footersIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles) throws IOException
configuration - the conf to access the File SystempartFiles - the files to readIOException - if an exception was raised while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, List<org.apache.hadoop.fs.FileStatus> partFiles, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - the conf to access the File SystempartFiles - the files to readskipRowGroups - to skip the rowGroup infoIOException - if there is an exception while reading footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfileStatus - a file status to recursively listskipRowGroups - whether to skip reading row group metadataIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readAllFootersInParallel(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus fileStatus) throws IOException
open(InputFile, ParquetReadOptions)configuration - the configuration to access the FSfileStatus - the root dirIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path path) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationpath - a file pathIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationpathStatus - a file status to read footers fromIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readFooters(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus pathStatus, boolean skipRowGroups) throws IOException
open(InputFile, ParquetReadOptions)configuration - the configuration to access the FSpathStatus - the root dirskipRowGroups - whether to skip reading row group metadataIOException - if an exception is thrown while reading the footers@Deprecated public static List<Footer> readSummaryFile(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus summaryStatus) throws IOException
configuration - a configurationsummaryStatus - file status for a summary fileIOException - if an exception is thrown while reading the summary file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the parquet FileIOException - if an error occurs while reading the filepublic static ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the Parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the Parquet FileIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(org.apache.hadoop.conf.Configuration configuration, org.apache.hadoop.fs.FileStatus file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)configuration - a configurationfile - the parquet Filefilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static final ParquetMetadata readFooter(InputFile file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile, ParquetReadOptions)file - a InputFile to readfilter - the filter to apply to row groupsIOException - if an error occurs while reading the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file) throws IOException
open(InputFile)conf - a configurationfile - a file path to openIOException - if there is an error while opening the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadataConverter.MetadataFilter filter) throws IOException
open(InputFile,ParquetReadOptions)conf - a configurationfile - a file path to openfilter - a metadata filterIOException - if there is an error while opening the file@Deprecated public static ParquetFileReader open(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path file, ParquetMetadata footer) throws IOException
conf - a configurationfile - a file path to openfooter - a footer for the file if already loadedIOException - if there is an error while opening the filepublic static ParquetFileReader open(InputFile file) throws IOException
file.file - an input fileIOException - if there is an error while opening the filepublic static ParquetFileReader open(InputFile file, ParquetReadOptions options) throws IOException
file - an input fileoptions - parquet read optionsIOException - if there is an error while opening the filepublic ParquetMetadata getFooter()
public FileMetaData getFileMetaData()
public long getRecordCount()
@Deprecated public org.apache.hadoop.fs.Path getPath()
getFile() insteadpublic String getFile()
public List<BlockMetaData> getRowGroups()
public void setRequestedSchema(MessageType projection)
public void appendTo(ParquetFileWriter writer) throws IOException
IOExceptionpublic PageReadStore readNextRowGroup() throws IOException
IOException - if an error occurs while readingpublic boolean skipNextRowGroup()
public DictionaryPageReadStore getNextDictionaryReader()
DictionaryPageReadStore for the row group that would be
returned by calling readNextRowGroup() or skipped by calling
skipNextRowGroup().public org.apache.parquet.hadoop.DictionaryPageReader getDictionaryReader(BlockMetaData block)
public void close()
throws IOException
close in interface Closeableclose in interface AutoCloseableIOExceptionCopyright © 2019 The Apache Software Foundation. All rights reserved.