001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.datanode.fsdataset;
019    
020    
021    import java.io.File;
022    import java.io.FileDescriptor;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.util.Collection;
026    import java.util.List;
027    import java.util.Map;
028    
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.conf.Configuration;
031    import org.apache.hadoop.hdfs.DFSConfigKeys;
032    import org.apache.hadoop.hdfs.StorageType;
033    import org.apache.hadoop.hdfs.protocol.Block;
034    import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
035    import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
036    import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
037    import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata;
038    import org.apache.hadoop.hdfs.server.datanode.DataNode;
039    import org.apache.hadoop.hdfs.server.datanode.DataStorage;
040    import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
041    import org.apache.hadoop.hdfs.server.datanode.Replica;
042    import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
043    import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
044    import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetFactory;
045    import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl;
046    import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
047    import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
048    import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
049    import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
050    import org.apache.hadoop.hdfs.server.protocol.StorageReport;
051    import org.apache.hadoop.util.DiskChecker.DiskErrorException;
052    import org.apache.hadoop.util.ReflectionUtils;
053    
054    /**
055     * This is a service provider interface for the underlying storage that
056     * stores replicas for a data node.
057     * The default implementation stores replicas on local drives. 
058     */
059    @InterfaceAudience.Private
060    public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
061      /**
062       * A factory for creating {@link FsDatasetSpi} objects.
063       */
064      public static abstract class Factory<D extends FsDatasetSpi<?>> {
065        /** @return the configured factory. */
066        public static Factory<?> getFactory(Configuration conf) {
067          @SuppressWarnings("rawtypes")
068          final Class<? extends Factory> clazz = conf.getClass(
069              DFSConfigKeys.DFS_DATANODE_FSDATASET_FACTORY_KEY,
070              FsDatasetFactory.class,
071              Factory.class);
072          return ReflectionUtils.newInstance(clazz, conf);
073        }
074    
075        /** Create a new object. */
076        public abstract D newInstance(DataNode datanode, DataStorage storage,
077            Configuration conf) throws IOException;
078    
079        /** Does the factory create simulated objects? */
080        public boolean isSimulated() {
081          return false;
082        }
083      }
084    
085      /**
086       * Create rolling logs.
087       *
088       * @param prefix the prefix of the log names.
089       * @return rolling logs
090       */
091      public RollingLogs createRollingLogs(String bpid, String prefix
092          ) throws IOException;
093    
094      /** @return a list of volumes. */
095      public List<V> getVolumes();
096    
097      /** Add an array of StorageLocation to FsDataset. */
098      public List<StorageLocation> addVolumes(List<StorageLocation> volumes,
099          final Collection<String> bpids);
100    
101      /** Removes a collection of volumes from FsDataset. */
102      public void removeVolumes(Collection<StorageLocation> volumes);
103    
104      /** @return a storage with the given storage ID */
105      public DatanodeStorage getStorage(final String storageUuid);
106    
107      /** @return one or more storage reports for attached volumes. */
108      public StorageReport[] getStorageReports(String bpid)
109          throws IOException;
110    
111      /** @return the volume that contains a replica of the block. */
112      public V getVolume(ExtendedBlock b);
113    
114      /** @return a volume information map (name => info). */
115      public Map<String, Object> getVolumeInfoMap();
116    
117      /** @return a list of finalized blocks for the given block pool. */
118      public List<FinalizedReplica> getFinalizedBlocks(String bpid);
119    
120      /** @return a list of finalized blocks for the given block pool. */
121      public List<FinalizedReplica> getFinalizedBlocksOnPersistentStorage(String bpid);
122    
123      /**
124       * Check whether the in-memory block record matches the block on the disk,
125       * and, in case that they are not matched, update the record or mark it
126       * as corrupted.
127       */
128      public void checkAndUpdate(String bpid, long blockId, File diskFile,
129          File diskMetaFile, FsVolumeSpi vol) throws IOException;
130    
131      /**
132       * @param b - the block
133       * @return a stream if the meta-data of the block exists;
134       *         otherwise, return null.
135       * @throws IOException
136       */
137      public LengthInputStream getMetaDataInputStream(ExtendedBlock b
138          ) throws IOException;
139    
140      /**
141       * Returns the specified block's on-disk length (excluding metadata)
142       * @return   the specified block's on-disk length (excluding metadta)
143       * @throws IOException on error
144       */
145      public long getLength(ExtendedBlock b) throws IOException;
146    
147      /**
148       * Get reference to the replica meta info in the replicasMap. 
149       * To be called from methods that are synchronized on {@link FSDataset}
150       * @return replica from the replicas map
151       */
152      @Deprecated
153      public Replica getReplica(String bpid, long blockId);
154    
155      /**
156       * @return replica meta information
157       */
158      public String getReplicaString(String bpid, long blockId);
159    
160      /**
161       * @return the generation stamp stored with the block.
162       */
163      public Block getStoredBlock(String bpid, long blkid) throws IOException;
164      
165      /**
166       * Returns an input stream at specified offset of the specified block
167       * @param b block
168       * @param seekOffset offset with in the block to seek to
169       * @return an input stream to read the contents of the specified block,
170       *  starting at the offset
171       * @throws IOException
172       */
173      public InputStream getBlockInputStream(ExtendedBlock b, long seekOffset)
174                throws IOException;
175    
176      /**
177       * Returns an input stream at specified offset of the specified block
178       * The block is still in the tmp directory and is not finalized
179       * @return an input stream to read the contents of the specified block,
180       *  starting at the offset
181       * @throws IOException
182       */
183      public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b, long blkoff,
184          long ckoff) throws IOException;
185    
186      /**
187       * Creates a temporary replica and returns the meta information of the replica
188       * 
189       * @param b block
190       * @return the meta info of the replica which is being written to
191       * @throws IOException if an error occurs
192       */
193      public ReplicaInPipelineInterface createTemporary(StorageType storageType,
194          ExtendedBlock b) throws IOException;
195    
196      /**
197       * Creates a RBW replica and returns the meta info of the replica
198       * 
199       * @param b block
200       * @return the meta info of the replica which is being written to
201       * @throws IOException if an error occurs
202       */
203      public ReplicaInPipelineInterface createRbw(StorageType storageType,
204          ExtendedBlock b, boolean allowLazyPersist) throws IOException;
205    
206      /**
207       * Recovers a RBW replica and returns the meta info of the replica
208       * 
209       * @param b block
210       * @param newGS the new generation stamp for the replica
211       * @param minBytesRcvd the minimum number of bytes that the replica could have
212       * @param maxBytesRcvd the maximum number of bytes that the replica could have
213       * @return the meta info of the replica which is being written to
214       * @throws IOException if an error occurs
215       */
216      public ReplicaInPipelineInterface recoverRbw(ExtendedBlock b, 
217          long newGS, long minBytesRcvd, long maxBytesRcvd) throws IOException;
218    
219      /**
220       * Covert a temporary replica to a RBW.
221       * @param temporary the temporary replica being converted
222       * @return the result RBW
223       */
224      public ReplicaInPipelineInterface convertTemporaryToRbw(
225          ExtendedBlock temporary) throws IOException;
226    
227      /**
228       * Append to a finalized replica and returns the meta info of the replica
229       * 
230       * @param b block
231       * @param newGS the new generation stamp for the replica
232       * @param expectedBlockLen the number of bytes the replica is expected to have
233       * @return the meata info of the replica which is being written to
234       * @throws IOException
235       */
236      public ReplicaInPipelineInterface append(ExtendedBlock b, long newGS,
237          long expectedBlockLen) throws IOException;
238    
239      /**
240       * Recover a failed append to a finalized replica
241       * and returns the meta info of the replica
242       * 
243       * @param b block
244       * @param newGS the new generation stamp for the replica
245       * @param expectedBlockLen the number of bytes the replica is expected to have
246       * @return the meta info of the replica which is being written to
247       * @throws IOException
248       */
249      public ReplicaInPipelineInterface recoverAppend(ExtendedBlock b, long newGS,
250          long expectedBlockLen) throws IOException;
251      
252      /**
253       * Recover a failed pipeline close
254       * It bumps the replica's generation stamp and finalize it if RBW replica
255       * 
256       * @param b block
257       * @param newGS the new generation stamp for the replica
258       * @param expectedBlockLen the number of bytes the replica is expected to have
259       * @return the storage uuid of the replica.
260       * @throws IOException
261       */
262      public String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen
263          ) throws IOException;
264      
265      /**
266       * Finalizes the block previously opened for writing using writeToBlock.
267       * The block size is what is in the parameter b and it must match the amount
268       *  of data written
269       * @throws IOException
270       */
271      public void finalizeBlock(ExtendedBlock b) throws IOException;
272    
273      /**
274       * Unfinalizes the block previously opened for writing using writeToBlock.
275       * The temporary file associated with this block is deleted.
276       * @throws IOException
277       */
278      public void unfinalizeBlock(ExtendedBlock b) throws IOException;
279    
280      /**
281       * Returns one block report per volume.
282       * @param bpid Block Pool Id
283       * @return - a map of DatanodeStorage to block report for the volume.
284       */
285      public Map<DatanodeStorage, BlockListAsLongs> getBlockReports(String bpid);
286    
287      /**
288       * Returns the cache report - the full list of cached block IDs of a
289       * block pool.
290       * @param   bpid Block Pool Id
291       * @return  the cache report - the full list of cached block IDs.
292       */
293      public List<Long> getCacheReport(String bpid);
294    
295      /** Does the dataset contain the block? */
296      public boolean contains(ExtendedBlock block);
297    
298      /**
299       * Is the block valid?
300       * @return - true if the specified block is valid
301       */
302      public boolean isValidBlock(ExtendedBlock b);
303    
304      /**
305       * Is the block a valid RBW?
306       * @return - true if the specified block is a valid RBW
307       */
308      public boolean isValidRbw(ExtendedBlock b);
309    
310      /**
311       * Invalidates the specified blocks
312       * @param bpid Block pool Id
313       * @param invalidBlks - the blocks to be invalidated
314       * @throws IOException
315       */
316      public void invalidate(String bpid, Block invalidBlks[]) throws IOException;
317    
318      /**
319       * Caches the specified blocks
320       * @param bpid Block pool id
321       * @param blockIds - block ids to cache
322       */
323      public void cache(String bpid, long[] blockIds);
324    
325      /**
326       * Uncaches the specified blocks
327       * @param bpid Block pool id
328       * @param blockIds - blocks ids to uncache
329       */
330      public void uncache(String bpid, long[] blockIds);
331    
332      /**
333       * Determine if the specified block is cached.
334       * @param bpid Block pool id
335       * @param blockIds - block id
336       * @return true if the block is cached
337       */
338      public boolean isCached(String bpid, long blockId);
339    
340        /**
341         * Check if all the data directories are healthy
342         * @throws DiskErrorException
343         */
344      public void checkDataDir() throws DiskErrorException;
345    
346      /**
347       * Shutdown the FSDataset
348       */
349      public void shutdown();
350    
351      /**
352       * Sets the file pointer of the checksum stream so that the last checksum
353       * will be overwritten
354       * @param b block
355       * @param outs The streams for the data file and checksum file
356       * @param checksumSize number of bytes each checksum has
357       * @throws IOException
358       */
359      public void adjustCrcChannelPosition(ExtendedBlock b,
360          ReplicaOutputStreams outs, int checksumSize) throws IOException;
361    
362      /**
363       * Checks how many valid storage volumes there are in the DataNode.
364       * @return true if more than the minimum number of valid volumes are left 
365       * in the FSDataSet.
366       */
367      public boolean hasEnoughResource();
368    
369      /**
370       * Get visible length of the specified replica.
371       */
372      long getReplicaVisibleLength(final ExtendedBlock block) throws IOException;
373    
374      /**
375       * Initialize a replica recovery.
376       * @return actual state of the replica on this data-node or 
377       * null if data-node does not have the replica.
378       */
379      public ReplicaRecoveryInfo initReplicaRecovery(RecoveringBlock rBlock
380          ) throws IOException;
381    
382      /**
383       * Update replica's generation stamp and length and finalize it.
384       * @return the ID of storage that stores the block
385       */
386      public String updateReplicaUnderRecovery(ExtendedBlock oldBlock,
387          long recoveryId, long newLength) throws IOException;
388    
389      /**
390       * add new block pool ID
391       * @param bpid Block pool Id
392       * @param conf Configuration
393       */
394      public void addBlockPool(String bpid, Configuration conf) throws IOException;
395      
396      /**
397       * Shutdown and remove the block pool from underlying storage.
398       * @param bpid Block pool Id to be removed
399       */
400      public void shutdownBlockPool(String bpid) ;
401      
402      /**
403       * Deletes the block pool directories. If force is false, directories are 
404       * deleted only if no block files exist for the block pool. If force 
405       * is true entire directory for the blockpool is deleted along with its
406       * contents.
407       * @param bpid BlockPool Id to be deleted.
408       * @param force If force is false, directories are deleted only if no
409       *        block files exist for the block pool, otherwise entire 
410       *        directory for the blockpool is deleted along with its contents.
411       * @throws IOException
412       */
413      public void deleteBlockPool(String bpid, boolean force) throws IOException;
414      
415      /**
416       * Get {@link BlockLocalPathInfo} for the given block.
417       */
418      public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b
419          ) throws IOException;
420    
421      /**
422       * Get a {@link HdfsBlocksMetadata} corresponding to the list of blocks in 
423       * <code>blocks</code>.
424       * 
425       * @param bpid pool to query
426       * @param blockIds List of block ids for which to return metadata
427       * @return metadata Metadata for the list of blocks
428       * @throws IOException
429       */
430      public HdfsBlocksMetadata getHdfsBlocksMetadata(String bpid,
431          long[] blockIds) throws IOException;
432    
433      /**
434       * Enable 'trash' for the given dataset. When trash is enabled, files are
435       * moved to a separate trash directory instead of being deleted immediately.
436       * This can be useful for example during rolling upgrades.
437       */
438      public void enableTrash(String bpid);
439    
440      /**
441       * Restore trash
442       */
443      public void restoreTrash(String bpid);
444    
445      /**
446       * @return true when trash is enabled
447       */
448      public boolean trashEnabled(String bpid);
449    
450      /**
451       * Create a marker file indicating that a rolling upgrade is in progress.
452       */
453      public void setRollingUpgradeMarker(String bpid) throws IOException;
454    
455      /**
456       * Delete the rolling upgrade marker file if it exists.
457       * @param bpid
458       */
459      public void clearRollingUpgradeMarker(String bpid) throws IOException;
460    
461      /**
462       * submit a sync_file_range request to AsyncDiskService
463       */
464      public void submitBackgroundSyncFileRangeRequest(final ExtendedBlock block,
465          final FileDescriptor fd, final long offset, final long nbytes,
466          final int flags);
467    
468      /**
469       * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task end
470       */
471       public void onCompleteLazyPersist(String bpId, long blockId,
472          long creationTime, File[] savedFiles, FsVolumeImpl targetVolume);
473    
474       /**
475        * Callback from RamDiskAsyncLazyPersistService upon async lazy persist task fail
476        */
477       public void onFailLazyPersist(String bpId, long blockId);
478    }