001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
021    import static org.apache.hadoop.util.Time.now;
022    
023    import java.io.FilterInputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.util.Arrays;
027    import java.util.EnumMap;
028    import java.util.EnumSet;
029    import java.util.List;
030    
031    import org.apache.commons.logging.Log;
032    import org.apache.commons.logging.LogFactory;
033    import org.apache.hadoop.classification.InterfaceAudience;
034    import org.apache.hadoop.classification.InterfaceStability;
035    import org.apache.hadoop.fs.FileSystem;
036    import org.apache.hadoop.fs.XAttrSetFlag;
037    import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
038    import org.apache.hadoop.hdfs.protocol.Block;
039    import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
040    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041    import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
042    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
043    import org.apache.hadoop.hdfs.protocol.LocatedBlock;
044    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
045    import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
046    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.RollingUpgradeStartupOption;
047    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
048    import org.apache.hadoop.hdfs.server.common.Storage;
049    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
050    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCacheDirectiveInfoOp;
051    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCachePoolOp;
052    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
053    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllocateBlockIdOp;
054    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AllowSnapshotOp;
055    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
056    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
057    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
058    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
059    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CreateSnapshotOp;
060    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteOp;
061    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DeleteSnapshotOp;
062    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.DisallowSnapshotOp;
063    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.GetDelegationTokenOp;
064    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
065    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCacheDirectiveInfoOp;
066    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ModifyCachePoolOp;
067    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ReassignLeaseOp;
068    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCacheDirectiveInfoOp;
069    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveCachePoolOp;
070    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOldOp;
071    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameOp;
072    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenameSnapshotOp;
073    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RenewDelegationTokenOp;
074    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetAclOp;
075    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RollingUpgradeOp;
076    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV1Op;
077    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetGenstampV2Op;
078    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetNSQuotaOp;
079    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetOwnerOp;
080    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp;
081    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
082    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
083    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp;
084    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp;
085    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp;
086    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
087    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
088    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
089    import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
090    import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
091    import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
092    import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
093    import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
094    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
095    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
096    import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
097    import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
098    import org.apache.hadoop.hdfs.util.ChunkedArrayList;
099    import org.apache.hadoop.hdfs.util.Holder;
100    
101    import com.google.common.base.Joiner;
102    import com.google.common.base.Preconditions;
103    
104    @InterfaceAudience.Private
105    @InterfaceStability.Evolving
106    public class FSEditLogLoader {
107      static final Log LOG = LogFactory.getLog(FSEditLogLoader.class.getName());
108      static final long REPLAY_TRANSACTION_LOG_INTERVAL = 1000; // 1sec
109    
110      private final FSNamesystem fsNamesys;
111      private long lastAppliedTxId;
112      /** Total number of end transactions loaded. */
113      private int totalEdits = 0;
114      
115      public FSEditLogLoader(FSNamesystem fsNamesys, long lastAppliedTxId) {
116        this.fsNamesys = fsNamesys;
117        this.lastAppliedTxId = lastAppliedTxId;
118      }
119      
120      long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
121          throws IOException {
122        return loadFSEdits(edits, expectedStartingTxId, null, null);
123      }
124    
125      /**
126       * Load an edit log, and apply the changes to the in-memory structure
127       * This is where we apply edits that we've been writing to disk all
128       * along.
129       */
130      long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId,
131          StartupOption startOpt, MetaRecoveryContext recovery) throws IOException {
132        StartupProgress prog = NameNode.getStartupProgress();
133        Step step = createStartupProgressStep(edits);
134        prog.beginStep(Phase.LOADING_EDITS, step);
135        fsNamesys.writeLock();
136        try {
137          long startTime = now();
138          FSImage.LOG.info("Start loading edits file " + edits.getName());
139          long numEdits = loadEditRecords(edits, false, expectedStartingTxId,
140              startOpt, recovery);
141          FSImage.LOG.info("Edits file " + edits.getName() 
142              + " of size " + edits.length() + " edits # " + numEdits 
143              + " loaded in " + (now()-startTime)/1000 + " seconds");
144          return numEdits;
145        } finally {
146          edits.close();
147          fsNamesys.writeUnlock();
148          prog.endStep(Phase.LOADING_EDITS, step);
149        }
150      }
151    
152      long loadEditRecords(EditLogInputStream in, boolean closeOnExit,
153          long expectedStartingTxId, StartupOption startOpt,
154          MetaRecoveryContext recovery) throws IOException {
155        FSDirectory fsDir = fsNamesys.dir;
156    
157        EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
158          new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
159    
160        if (LOG.isTraceEnabled()) {
161          LOG.trace("Acquiring write lock to replay edit log");
162        }
163    
164        fsNamesys.writeLock();
165        fsDir.writeLock();
166    
167        long recentOpcodeOffsets[] = new long[4];
168        Arrays.fill(recentOpcodeOffsets, -1);
169        
170        long expectedTxId = expectedStartingTxId;
171        long numEdits = 0;
172        long lastTxId = in.getLastTxId();
173        long numTxns = (lastTxId - expectedStartingTxId) + 1;
174        StartupProgress prog = NameNode.getStartupProgress();
175        Step step = createStartupProgressStep(in);
176        prog.setTotal(Phase.LOADING_EDITS, step, numTxns);
177        Counter counter = prog.getCounter(Phase.LOADING_EDITS, step);
178        long lastLogTime = now();
179        long lastInodeId = fsNamesys.getLastInodeId();
180        
181        try {
182          while (true) {
183            try {
184              FSEditLogOp op;
185              try {
186                op = in.readOp();
187                if (op == null) {
188                  break;
189                }
190              } catch (Throwable e) {
191                // Handle a problem with our input
192                check203UpgradeFailure(in.getVersion(true), e);
193                String errorMessage =
194                  formatEditLogReplayError(in, recentOpcodeOffsets, expectedTxId);
195                FSImage.LOG.error(errorMessage, e);
196                if (recovery == null) {
197                   // We will only try to skip over problematic opcodes when in
198                   // recovery mode.
199                  throw new EditLogInputException(errorMessage, e, numEdits);
200                }
201                MetaRecoveryContext.editLogLoaderPrompt(
202                    "We failed to read txId " + expectedTxId,
203                    recovery, "skipping the bad section in the log");
204                in.resync();
205                continue;
206              }
207              recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
208                in.getPosition();
209              if (op.hasTransactionId()) {
210                if (op.getTransactionId() > expectedTxId) { 
211                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
212                      "to be a gap in the edit log.  We expected txid " +
213                      expectedTxId + ", but got txid " +
214                      op.getTransactionId() + ".", recovery, "ignoring missing " +
215                      " transaction IDs");
216                } else if (op.getTransactionId() < expectedTxId) { 
217                  MetaRecoveryContext.editLogLoaderPrompt("There appears " +
218                      "to be an out-of-order edit in the edit log.  We " +
219                      "expected txid " + expectedTxId + ", but got txid " +
220                      op.getTransactionId() + ".", recovery,
221                      "skipping the out-of-order edit");
222                  continue;
223                }
224              }
225              try {
226                if (LOG.isTraceEnabled()) {
227                  LOG.trace("op=" + op + ", startOpt=" + startOpt
228                      + ", numEdits=" + numEdits + ", totalEdits=" + totalEdits);
229                }
230                long inodeId = applyEditLogOp(op, fsDir, startOpt,
231                    in.getVersion(true), lastInodeId);
232                if (lastInodeId < inodeId) {
233                  lastInodeId = inodeId;
234                }
235              } catch (RollingUpgradeOp.RollbackException e) {
236                throw e;
237              } catch (Throwable e) {
238                LOG.error("Encountered exception on operation " + op, e);
239                if (recovery == null) {
240                  throw e instanceof IOException? (IOException)e: new IOException(e);
241                }
242    
243                MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
244                 "apply edit log operation " + op + ": error " +
245                 e.getMessage(), recovery, "applying edits");
246              }
247              // Now that the operation has been successfully decoded and
248              // applied, update our bookkeeping.
249              incrOpCount(op.opCode, opCounts, step, counter);
250              if (op.hasTransactionId()) {
251                lastAppliedTxId = op.getTransactionId();
252                expectedTxId = lastAppliedTxId + 1;
253              } else {
254                expectedTxId = lastAppliedTxId = expectedStartingTxId;
255              }
256              // log progress
257              if (op.hasTransactionId()) {
258                long now = now();
259                if (now - lastLogTime > REPLAY_TRANSACTION_LOG_INTERVAL) {
260                  long deltaTxId = lastAppliedTxId - expectedStartingTxId + 1;
261                  int percent = Math.round((float) deltaTxId / numTxns * 100);
262                  LOG.info("replaying edit log: " + deltaTxId + "/" + numTxns
263                      + " transactions completed. (" + percent + "%)");
264                  lastLogTime = now;
265                }
266              }
267              numEdits++;
268              totalEdits++;
269            } catch (RollingUpgradeOp.RollbackException e) {
270              LOG.info("Stopped at OP_START_ROLLING_UPGRADE for rollback.");
271              break;
272            } catch (MetaRecoveryContext.RequestStopException e) {
273              MetaRecoveryContext.LOG.warn("Stopped reading edit log at " +
274                  in.getPosition() + "/"  + in.length());
275              break;
276            }
277          }
278        } finally {
279          fsNamesys.resetLastInodeId(lastInodeId);
280          if(closeOnExit) {
281            in.close();
282          }
283          fsDir.writeUnlock();
284          fsNamesys.writeUnlock();
285    
286          if (LOG.isTraceEnabled()) {
287            LOG.trace("replaying edit log finished");
288          }
289    
290          if (FSImage.LOG.isDebugEnabled()) {
291            dumpOpCounts(opCounts);
292          }
293        }
294        return numEdits;
295      }
296      
297      // allocate and update last allocated inode id
298      private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion,
299          long lastInodeId) throws IOException {
300        long inodeId = inodeIdFromOp;
301    
302        if (inodeId == INodeId.GRANDFATHER_INODE_ID) {
303          if (NameNodeLayoutVersion.supports(
304              LayoutVersion.Feature.ADD_INODE_ID, logVersion)) {
305            throw new IOException("The layout version " + logVersion
306                + " supports inodeId but gave bogus inodeId");
307          }
308          inodeId = fsNamesys.allocateNewInodeId();
309        } else {
310          // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from
311          // fsimage but editlog captures more recent inodeId allocations
312          if (inodeId > lastInodeId) {
313            fsNamesys.resetLastInodeId(inodeId);
314          }
315        }
316        return inodeId;
317      }
318    
319      @SuppressWarnings("deprecation")
320      private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
321          StartupOption startOpt, int logVersion, long lastInodeId) throws IOException {
322        long inodeId = INodeId.GRANDFATHER_INODE_ID;
323        if (LOG.isTraceEnabled()) {
324          LOG.trace("replaying edit log: " + op);
325        }
326        final boolean toAddRetryCache = fsNamesys.hasRetryCache() && op.hasRpcIds();
327        
328        switch (op.opCode) {
329        case OP_ADD: {
330          AddCloseOp addCloseOp = (AddCloseOp)op;
331          final String path =
332              renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
333          if (FSNamesystem.LOG.isDebugEnabled()) {
334            FSNamesystem.LOG.debug(op.opCode + ": " + path +
335                " numblocks : " + addCloseOp.blocks.length +
336                " clientHolder " + addCloseOp.clientName +
337                " clientMachine " + addCloseOp.clientMachine);
338          }
339          // There three cases here:
340          // 1. OP_ADD to create a new file
341          // 2. OP_ADD to update file blocks
342          // 3. OP_ADD to open file for append
343    
344          // See if the file already exists (persistBlocks call)
345          final INodesInPath iip = fsDir.getINodesInPath(path, true);
346          final INode[] inodes = iip.getINodes();
347          INodeFile oldFile = INodeFile.valueOf(
348              inodes[inodes.length - 1], path, true);
349          if (oldFile != null && addCloseOp.overwrite) {
350            // This is OP_ADD with overwrite
351            fsDir.unprotectedDelete(path, addCloseOp.mtime);
352            oldFile = null;
353          }
354          INodeFile newFile = oldFile;
355          if (oldFile == null) { // this is OP_ADD on a new file (case 1)
356            // versions > 0 support per file replication
357            // get name and replication
358            final short replication = fsNamesys.getBlockManager()
359                .adjustReplication(addCloseOp.replication);
360            assert addCloseOp.blocks.length == 0;
361    
362            // add to the file tree
363            inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
364                lastInodeId);
365            newFile = fsDir.unprotectedAddFile(inodeId,
366                path, addCloseOp.permissions, addCloseOp.aclEntries,
367                addCloseOp.xAttrs,
368                replication, addCloseOp.mtime, addCloseOp.atime,
369                addCloseOp.blockSize, true, addCloseOp.clientName,
370                addCloseOp.clientMachine, addCloseOp.storagePolicyId);
371            fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
372    
373            // add the op into retry cache if necessary
374            if (toAddRetryCache) {
375              HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
376                  HdfsFileStatus.EMPTY_NAME, newFile,
377                  BlockStoragePolicySuite.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID,
378                  false, iip);
379              fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
380                  addCloseOp.rpcCallId, stat);
381            }
382          } else { // This is OP_ADD on an existing file
383            if (!oldFile.isUnderConstruction()) {
384              // This is case 3: a call to append() on an already-closed file.
385              if (FSNamesystem.LOG.isDebugEnabled()) {
386                FSNamesystem.LOG.debug("Reopening an already-closed file " +
387                    "for append");
388              }
389              LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
390                  oldFile, addCloseOp.clientName, addCloseOp.clientMachine, false, iip.getLatestSnapshotId(), false);
391              newFile = INodeFile.valueOf(fsDir.getINode(path),
392                  path, true);
393              
394              // add the op into retry cache is necessary
395              if (toAddRetryCache) {
396                fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
397                    addCloseOp.rpcCallId, lb);
398              }
399            }
400          }
401          // Fall-through for case 2.
402          // Regardless of whether it's a new file or an updated file,
403          // update the block list.
404          
405          // Update the salient file attributes.
406          newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
407          newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
408          updateBlocks(fsDir, addCloseOp, newFile);
409          break;
410        }
411        case OP_CLOSE: {
412          AddCloseOp addCloseOp = (AddCloseOp)op;
413          final String path =
414              renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
415          if (FSNamesystem.LOG.isDebugEnabled()) {
416            FSNamesystem.LOG.debug(op.opCode + ": " + path +
417                " numblocks : " + addCloseOp.blocks.length +
418                " clientHolder " + addCloseOp.clientName +
419                " clientMachine " + addCloseOp.clientMachine);
420          }
421    
422          final INodesInPath iip = fsDir.getLastINodeInPath(path);
423          final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
424    
425          // Update the salient file attributes.
426          file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
427          file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
428          updateBlocks(fsDir, addCloseOp, file);
429    
430          // Now close the file
431          if (!file.isUnderConstruction() &&
432              logVersion <= LayoutVersion.BUGFIX_HDFS_2991_VERSION) {
433            // There was a bug (HDFS-2991) in hadoop < 0.23.1 where OP_CLOSE
434            // could show up twice in a row. But after that version, this
435            // should be fixed, so we should treat it as an error.
436            throw new IOException(
437                "File is not under construction: " + path);
438          }
439          // One might expect that you could use removeLease(holder, path) here,
440          // but OP_CLOSE doesn't serialize the holder. So, remove by path.
441          if (file.isUnderConstruction()) {
442            fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
443            file.toCompleteFile(file.getModificationTime());
444          }
445          break;
446        }
447        case OP_UPDATE_BLOCKS: {
448          UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
449          final String path =
450              renameReservedPathsOnUpgrade(updateOp.path, logVersion);
451          if (FSNamesystem.LOG.isDebugEnabled()) {
452            FSNamesystem.LOG.debug(op.opCode + ": " + path +
453                " numblocks : " + updateOp.blocks.length);
454          }
455          INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
456              path);
457          // Update in-memory data structures
458          updateBlocks(fsDir, updateOp, oldFile);
459          
460          if (toAddRetryCache) {
461            fsNamesys.addCacheEntry(updateOp.rpcClientId, updateOp.rpcCallId);
462          }
463          break;
464        }
465        case OP_ADD_BLOCK: {
466          AddBlockOp addBlockOp = (AddBlockOp) op;
467          String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
468          if (FSNamesystem.LOG.isDebugEnabled()) {
469            FSNamesystem.LOG.debug(op.opCode + ": " + path +
470                " new block id : " + addBlockOp.getLastBlock().getBlockId());
471          }
472          INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path), path);
473          // add the new block to the INodeFile
474          addNewBlock(fsDir, addBlockOp, oldFile);
475          break;
476        }
477        case OP_SET_REPLICATION: {
478          SetReplicationOp setReplicationOp = (SetReplicationOp)op;
479          short replication = fsNamesys.getBlockManager().adjustReplication(
480              setReplicationOp.replication);
481          fsDir.unprotectedSetReplication(
482              renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
483                                          replication, null);
484          break;
485        }
486        case OP_CONCAT_DELETE: {
487          ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
488          String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
489          String[] srcs = new String[concatDeleteOp.srcs.length];
490          for (int i=0; i<srcs.length; i++) {
491            srcs[i] =
492                renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
493          }
494          fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
495          
496          if (toAddRetryCache) {
497            fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
498                concatDeleteOp.rpcCallId);
499          }
500          break;
501        }
502        case OP_RENAME_OLD: {
503          RenameOldOp renameOp = (RenameOldOp)op;
504          final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
505          final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
506          fsDir.unprotectedRenameTo(src, dst,
507                                    renameOp.timestamp);
508          
509          if (toAddRetryCache) {
510            fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
511          }
512          break;
513        }
514        case OP_DELETE: {
515          DeleteOp deleteOp = (DeleteOp)op;
516          fsDir.unprotectedDelete(
517              renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
518              deleteOp.timestamp);
519          
520          if (toAddRetryCache) {
521            fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
522          }
523          break;
524        }
525        case OP_MKDIR: {
526          MkdirOp mkdirOp = (MkdirOp)op;
527          inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
528              lastInodeId);
529          fsDir.unprotectedMkdir(inodeId,
530              renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
531              mkdirOp.permissions, mkdirOp.aclEntries, mkdirOp.timestamp);
532          break;
533        }
534        case OP_SET_GENSTAMP_V1: {
535          SetGenstampV1Op setGenstampV1Op = (SetGenstampV1Op)op;
536          fsNamesys.setGenerationStampV1(setGenstampV1Op.genStampV1);
537          break;
538        }
539        case OP_SET_PERMISSIONS: {
540          SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
541          fsDir.unprotectedSetPermission(
542              renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
543              setPermissionsOp.permissions);
544          break;
545        }
546        case OP_SET_OWNER: {
547          SetOwnerOp setOwnerOp = (SetOwnerOp)op;
548          fsDir.unprotectedSetOwner(
549              renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
550              setOwnerOp.username, setOwnerOp.groupname);
551          break;
552        }
553        case OP_SET_NS_QUOTA: {
554          SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
555          fsDir.unprotectedSetQuota(
556              renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
557              setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
558          break;
559        }
560        case OP_CLEAR_NS_QUOTA: {
561          ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
562          fsDir.unprotectedSetQuota(
563              renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
564              HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
565          break;
566        }
567    
568        case OP_SET_QUOTA:
569          SetQuotaOp setQuotaOp = (SetQuotaOp)op;
570          fsDir.unprotectedSetQuota(
571              renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
572              setQuotaOp.nsQuota, setQuotaOp.dsQuota);
573          break;
574    
575        case OP_TIMES: {
576          TimesOp timesOp = (TimesOp)op;
577    
578          fsDir.unprotectedSetTimes(
579              renameReservedPathsOnUpgrade(timesOp.path, logVersion),
580              timesOp.mtime, timesOp.atime, true);
581          break;
582        }
583        case OP_SYMLINK: {
584          if (!FileSystem.areSymlinksEnabled()) {
585            throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
586          }
587          SymlinkOp symlinkOp = (SymlinkOp)op;
588          inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
589              lastInodeId);
590          fsDir.unprotectedAddSymlink(inodeId,
591              renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
592              symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
593              symlinkOp.permissionStatus);
594          
595          if (toAddRetryCache) {
596            fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
597          }
598          break;
599        }
600        case OP_RENAME: {
601          RenameOp renameOp = (RenameOp)op;
602          fsDir.unprotectedRenameTo(
603              renameReservedPathsOnUpgrade(renameOp.src, logVersion),
604              renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
605              renameOp.timestamp, renameOp.options);
606          
607          if (toAddRetryCache) {
608            fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
609          }
610          break;
611        }
612        case OP_GET_DELEGATION_TOKEN: {
613          GetDelegationTokenOp getDelegationTokenOp
614            = (GetDelegationTokenOp)op;
615    
616          fsNamesys.getDelegationTokenSecretManager()
617            .addPersistedDelegationToken(getDelegationTokenOp.token,
618                                         getDelegationTokenOp.expiryTime);
619          break;
620        }
621        case OP_RENEW_DELEGATION_TOKEN: {
622          RenewDelegationTokenOp renewDelegationTokenOp
623            = (RenewDelegationTokenOp)op;
624          fsNamesys.getDelegationTokenSecretManager()
625            .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
626                                         renewDelegationTokenOp.expiryTime);
627          break;
628        }
629        case OP_CANCEL_DELEGATION_TOKEN: {
630          CancelDelegationTokenOp cancelDelegationTokenOp
631            = (CancelDelegationTokenOp)op;
632          fsNamesys.getDelegationTokenSecretManager()
633              .updatePersistedTokenCancellation(
634                  cancelDelegationTokenOp.token);
635          break;
636        }
637        case OP_UPDATE_MASTER_KEY: {
638          UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
639          fsNamesys.getDelegationTokenSecretManager()
640            .updatePersistedMasterKey(updateMasterKeyOp.key);
641          break;
642        }
643        case OP_REASSIGN_LEASE: {
644          ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
645    
646          Lease lease = fsNamesys.leaseManager.getLease(
647              reassignLeaseOp.leaseHolder);
648          final String path =
649              renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
650          INodeFile pendingFile = fsDir.getINode(path).asFile();
651          Preconditions.checkState(pendingFile.isUnderConstruction());
652          fsNamesys.reassignLeaseInternal(lease,
653              path, reassignLeaseOp.newHolder, pendingFile);
654          break;
655        }
656        case OP_START_LOG_SEGMENT:
657        case OP_END_LOG_SEGMENT: {
658          // no data in here currently.
659          break;
660        }
661        case OP_CREATE_SNAPSHOT: {
662          CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
663          final String snapshotRoot =
664              renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
665                  logVersion);
666          String path = fsNamesys.getSnapshotManager().createSnapshot(
667              snapshotRoot, createSnapshotOp.snapshotName);
668          if (toAddRetryCache) {
669            fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
670                createSnapshotOp.rpcCallId, path);
671          }
672          break;
673        }
674        case OP_DELETE_SNAPSHOT: {
675          DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
676          BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
677          List<INode> removedINodes = new ChunkedArrayList<INode>();
678          final String snapshotRoot =
679              renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
680                  logVersion);
681          fsNamesys.getSnapshotManager().deleteSnapshot(
682              snapshotRoot, deleteSnapshotOp.snapshotName,
683              collectedBlocks, removedINodes);
684          fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
685          collectedBlocks.clear();
686          fsNamesys.dir.removeFromInodeMap(removedINodes);
687          removedINodes.clear();
688          
689          if (toAddRetryCache) {
690            fsNamesys.addCacheEntry(deleteSnapshotOp.rpcClientId,
691                deleteSnapshotOp.rpcCallId);
692          }
693          break;
694        }
695        case OP_RENAME_SNAPSHOT: {
696          RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
697          final String snapshotRoot =
698              renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
699                  logVersion);
700          fsNamesys.getSnapshotManager().renameSnapshot(
701              snapshotRoot, renameSnapshotOp.snapshotOldName,
702              renameSnapshotOp.snapshotNewName);
703          
704          if (toAddRetryCache) {
705            fsNamesys.addCacheEntry(renameSnapshotOp.rpcClientId,
706                renameSnapshotOp.rpcCallId);
707          }
708          break;
709        }
710        case OP_ALLOW_SNAPSHOT: {
711          AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
712          final String snapshotRoot =
713              renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
714          fsNamesys.getSnapshotManager().setSnapshottable(
715              snapshotRoot, false);
716          break;
717        }
718        case OP_DISALLOW_SNAPSHOT: {
719          DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
720          final String snapshotRoot =
721              renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
722                  logVersion);
723          fsNamesys.getSnapshotManager().resetSnapshottable(
724              snapshotRoot);
725          break;
726        }
727        case OP_SET_GENSTAMP_V2: {
728          SetGenstampV2Op setGenstampV2Op = (SetGenstampV2Op) op;
729          fsNamesys.setGenerationStampV2(setGenstampV2Op.genStampV2);
730          break;
731        }
732        case OP_ALLOCATE_BLOCK_ID: {
733          AllocateBlockIdOp allocateBlockIdOp = (AllocateBlockIdOp) op;
734          fsNamesys.setLastAllocatedBlockId(allocateBlockIdOp.blockId);
735          break;
736        }
737        case OP_ROLLING_UPGRADE_START: {
738          if (startOpt == StartupOption.ROLLINGUPGRADE) {
739            final RollingUpgradeStartupOption rollingUpgradeOpt
740                = startOpt.getRollingUpgradeStartupOption(); 
741            if (rollingUpgradeOpt == RollingUpgradeStartupOption.ROLLBACK) {
742              throw new RollingUpgradeOp.RollbackException();
743            } else if (rollingUpgradeOpt == RollingUpgradeStartupOption.DOWNGRADE) {
744              //ignore upgrade marker
745              break;
746            }
747          }
748          // start rolling upgrade
749          final long startTime = ((RollingUpgradeOp) op).getTime();
750          fsNamesys.startRollingUpgradeInternal(startTime);
751          fsNamesys.triggerRollbackCheckpoint();
752          break;
753        }
754        case OP_ROLLING_UPGRADE_FINALIZE: {
755          final long finalizeTime = ((RollingUpgradeOp) op).getTime();
756          if (fsNamesys.isRollingUpgrade()) {
757            // Only do it when NN is actually doing rolling upgrade.
758            // We can get FINALIZE without corresponding START, if NN is restarted
759            // before this op is consumed and a new checkpoint is created.
760            fsNamesys.finalizeRollingUpgradeInternal(finalizeTime);
761          }
762          fsNamesys.getFSImage().updateStorageVersion();
763          fsNamesys.getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK,
764              NameNodeFile.IMAGE);
765          break;
766        }
767        case OP_ADD_CACHE_DIRECTIVE: {
768          AddCacheDirectiveInfoOp addOp = (AddCacheDirectiveInfoOp) op;
769          CacheDirectiveInfo result = fsNamesys.
770              getCacheManager().addDirectiveFromEditLog(addOp.directive);
771          if (toAddRetryCache) {
772            Long id = result.getId();
773            fsNamesys.addCacheEntryWithPayload(op.rpcClientId, op.rpcCallId, id);
774          }
775          break;
776        }
777        case OP_MODIFY_CACHE_DIRECTIVE: {
778          ModifyCacheDirectiveInfoOp modifyOp =
779              (ModifyCacheDirectiveInfoOp) op;
780          fsNamesys.getCacheManager().modifyDirectiveFromEditLog(
781              modifyOp.directive);
782          if (toAddRetryCache) {
783            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
784          }
785          break;
786        }
787        case OP_REMOVE_CACHE_DIRECTIVE: {
788          RemoveCacheDirectiveInfoOp removeOp =
789              (RemoveCacheDirectiveInfoOp) op;
790          fsNamesys.getCacheManager().removeDirective(removeOp.id, null);
791          if (toAddRetryCache) {
792            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
793          }
794          break;
795        }
796        case OP_ADD_CACHE_POOL: {
797          AddCachePoolOp addOp = (AddCachePoolOp) op;
798          fsNamesys.getCacheManager().addCachePool(addOp.info);
799          if (toAddRetryCache) {
800            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
801          }
802          break;
803        }
804        case OP_MODIFY_CACHE_POOL: {
805          ModifyCachePoolOp modifyOp = (ModifyCachePoolOp) op;
806          fsNamesys.getCacheManager().modifyCachePool(modifyOp.info);
807          if (toAddRetryCache) {
808            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
809          }
810          break;
811        }
812        case OP_REMOVE_CACHE_POOL: {
813          RemoveCachePoolOp removeOp = (RemoveCachePoolOp) op;
814          fsNamesys.getCacheManager().removeCachePool(removeOp.poolName);
815          if (toAddRetryCache) {
816            fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId);
817          }
818          break;
819        }
820        case OP_SET_ACL: {
821          SetAclOp setAclOp = (SetAclOp) op;
822          fsDir.unprotectedSetAcl(setAclOp.src, setAclOp.aclEntries);
823          break;
824        }
825        case OP_SET_XATTR: {
826          SetXAttrOp setXAttrOp = (SetXAttrOp) op;
827          fsDir.unprotectedSetXAttrs(setXAttrOp.src, setXAttrOp.xAttrs,
828              EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
829          if (toAddRetryCache) {
830            fsNamesys.addCacheEntry(setXAttrOp.rpcClientId, setXAttrOp.rpcCallId);
831          }
832          break;
833        }
834        case OP_REMOVE_XATTR: {
835          RemoveXAttrOp removeXAttrOp = (RemoveXAttrOp) op;
836          fsDir.unprotectedRemoveXAttrs(removeXAttrOp.src,
837              removeXAttrOp.xAttrs);
838          if (toAddRetryCache) {
839            fsNamesys.addCacheEntry(removeXAttrOp.rpcClientId,
840                removeXAttrOp.rpcCallId);
841          }
842          break;
843        }
844        case OP_SET_STORAGE_POLICY: {
845          SetStoragePolicyOp setStoragePolicyOp = (SetStoragePolicyOp) op;
846          fsDir.unprotectedSetStoragePolicy(
847              renameReservedPathsOnUpgrade(setStoragePolicyOp.path, logVersion),
848              setStoragePolicyOp.policyId);
849          break;
850        }
851        default:
852          throw new IOException("Invalid operation read " + op.opCode);
853        }
854        return inodeId;
855      }
856      
857      private static String formatEditLogReplayError(EditLogInputStream in,
858          long recentOpcodeOffsets[], long txid) {
859        StringBuilder sb = new StringBuilder();
860        sb.append("Error replaying edit log at offset " + in.getPosition());
861        sb.append(".  Expected transaction ID was ").append(txid);
862        if (recentOpcodeOffsets[0] != -1) {
863          Arrays.sort(recentOpcodeOffsets);
864          sb.append("\nRecent opcode offsets:");
865          for (long offset : recentOpcodeOffsets) {
866            if (offset != -1) {
867              sb.append(' ').append(offset);
868            }
869          }
870        }
871        return sb.toString();
872      }
873    
874      /**
875       * Add a new block into the given INodeFile
876       */
877      private void addNewBlock(FSDirectory fsDir, AddBlockOp op, INodeFile file)
878          throws IOException {
879        BlockInfo[] oldBlocks = file.getBlocks();
880        Block pBlock = op.getPenultimateBlock();
881        Block newBlock= op.getLastBlock();
882        
883        if (pBlock != null) { // the penultimate block is not null
884          Preconditions.checkState(oldBlocks != null && oldBlocks.length > 0);
885          // compare pBlock with the last block of oldBlocks
886          Block oldLastBlock = oldBlocks[oldBlocks.length - 1];
887          if (oldLastBlock.getBlockId() != pBlock.getBlockId()
888              || oldLastBlock.getGenerationStamp() != pBlock.getGenerationStamp()) {
889            throw new IOException(
890                "Mismatched block IDs or generation stamps for the old last block of file "
891                    + op.getPath() + ", the old last block is " + oldLastBlock
892                    + ", and the block read from editlog is " + pBlock);
893          }
894          
895          oldLastBlock.setNumBytes(pBlock.getNumBytes());
896          if (oldLastBlock instanceof BlockInfoUnderConstruction) {
897            fsNamesys.getBlockManager().forceCompleteBlock(file,
898                (BlockInfoUnderConstruction) oldLastBlock);
899            fsNamesys.getBlockManager().processQueuedMessagesForBlock(pBlock);
900          }
901        } else { // the penultimate block is null
902          Preconditions.checkState(oldBlocks == null || oldBlocks.length == 0);
903        }
904        // add the new block
905        BlockInfo newBI = new BlockInfoUnderConstruction(
906              newBlock, file.getBlockReplication());
907        fsNamesys.getBlockManager().addBlockCollection(newBI, file);
908        file.addBlock(newBI);
909        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
910      }
911      
912      /**
913       * Update in-memory data structures with new block information.
914       * @throws IOException
915       */
916      private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
917          INodeFile file) throws IOException {
918        // Update its block list
919        BlockInfo[] oldBlocks = file.getBlocks();
920        Block[] newBlocks = op.getBlocks();
921        String path = op.getPath();
922        
923        // Are we only updating the last block's gen stamp.
924        boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
925        
926        // First, update blocks in common
927        for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
928          BlockInfo oldBlock = oldBlocks[i];
929          Block newBlock = newBlocks[i];
930          
931          boolean isLastBlock = i == newBlocks.length - 1;
932          if (oldBlock.getBlockId() != newBlock.getBlockId() ||
933              (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
934                  !(isGenStampUpdate && isLastBlock))) {
935            throw new IOException("Mismatched block IDs or generation stamps, " +
936                "attempting to replace block " + oldBlock + " with " + newBlock +
937                " as block # " + i + "/" + newBlocks.length + " of " +
938                path);
939          }
940          
941          oldBlock.setNumBytes(newBlock.getNumBytes());
942          boolean changeMade =
943            oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
944          oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
945          
946          if (oldBlock instanceof BlockInfoUnderConstruction &&
947              (!isLastBlock || op.shouldCompleteLastBlock())) {
948            changeMade = true;
949            fsNamesys.getBlockManager().forceCompleteBlock(file,
950                (BlockInfoUnderConstruction) oldBlock);
951          }
952          if (changeMade) {
953            // The state or gen-stamp of the block has changed. So, we may be
954            // able to process some messages from datanodes that we previously
955            // were unable to process.
956            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
957          }
958        }
959        
960        if (newBlocks.length < oldBlocks.length) {
961          // We're removing a block from the file, e.g. abandonBlock(...)
962          if (!file.isUnderConstruction()) {
963            throw new IOException("Trying to remove a block from file " +
964                path + " which is not under construction.");
965          }
966          if (newBlocks.length != oldBlocks.length - 1) {
967            throw new IOException("Trying to remove more than one block from file "
968                + path);
969          }
970          Block oldBlock = oldBlocks[oldBlocks.length - 1];
971          boolean removed = fsDir.unprotectedRemoveBlock(path, file, oldBlock);
972          if (!removed && !(op instanceof UpdateBlocksOp)) {
973            throw new IOException("Trying to delete non-existant block " + oldBlock);
974          }
975        } else if (newBlocks.length > oldBlocks.length) {
976          // We're adding blocks
977          for (int i = oldBlocks.length; i < newBlocks.length; i++) {
978            Block newBlock = newBlocks[i];
979            BlockInfo newBI;
980            if (!op.shouldCompleteLastBlock()) {
981              // TODO: shouldn't this only be true for the last block?
982              // what about an old-version fsync() where fsync isn't called
983              // until several blocks in?
984              newBI = new BlockInfoUnderConstruction(
985                  newBlock, file.getBlockReplication());
986            } else {
987              // OP_CLOSE should add finalized blocks. This code path
988              // is only executed when loading edits written by prior
989              // versions of Hadoop. Current versions always log
990              // OP_ADD operations as each block is allocated.
991              newBI = new BlockInfo(newBlock, file.getBlockReplication());
992            }
993            fsNamesys.getBlockManager().addBlockCollection(newBI, file);
994            file.addBlock(newBI);
995            fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
996          }
997        }
998      }
999    
1000      private static void dumpOpCounts(
1001          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
1002        StringBuilder sb = new StringBuilder();
1003        sb.append("Summary of operations loaded from edit log:\n  ");
1004        Joiner.on("\n  ").withKeyValueSeparator("=").appendTo(sb, opCounts);
1005        FSImage.LOG.debug(sb.toString());
1006      }
1007    
1008      private void incrOpCount(FSEditLogOpCodes opCode,
1009          EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts, Step step,
1010          Counter counter) {
1011        Holder<Integer> holder = opCounts.get(opCode);
1012        if (holder == null) {
1013          holder = new Holder<Integer>(1);
1014          opCounts.put(opCode, holder);
1015        } else {
1016          holder.held++;
1017        }
1018        counter.increment();
1019      }
1020    
1021      /**
1022       * Throw appropriate exception during upgrade from 203, when editlog loading
1023       * could fail due to opcode conflicts.
1024       */
1025      private void check203UpgradeFailure(int logVersion, Throwable e)
1026          throws IOException {
1027        // 0.20.203 version version has conflicting opcodes with the later releases.
1028        // The editlog must be emptied by restarting the namenode, before proceeding
1029        // with the upgrade.
1030        if (Storage.is203LayoutVersion(logVersion)
1031            && logVersion != HdfsConstants.NAMENODE_LAYOUT_VERSION) {
1032          String msg = "During upgrade failed to load the editlog version "
1033              + logVersion + " from release 0.20.203. Please go back to the old "
1034              + " release and restart the namenode. This empties the editlog "
1035              + " and saves the namespace. Resume the upgrade after this step.";
1036          throw new IOException(msg, e);
1037        }
1038      }
1039      
1040      /**
1041       * Find the last valid transaction ID in the stream.
1042       * If there are invalid or corrupt transactions in the middle of the stream,
1043       * validateEditLog will skip over them.
1044       * This reads through the stream but does not close it.
1045       */
1046      static EditLogValidation validateEditLog(EditLogInputStream in) {
1047        long lastPos = 0;
1048        long lastTxId = HdfsConstants.INVALID_TXID;
1049        long numValid = 0;
1050        FSEditLogOp op = null;
1051        while (true) {
1052          lastPos = in.getPosition();
1053          try {
1054            if ((op = in.readOp()) == null) {
1055              break;
1056            }
1057          } catch (Throwable t) {
1058            FSImage.LOG.warn("Caught exception after reading " + numValid +
1059                " ops from " + in + " while determining its valid length." +
1060                "Position was " + lastPos, t);
1061            in.resync();
1062            FSImage.LOG.warn("After resync, position is " + in.getPosition());
1063            continue;
1064          }
1065          if (lastTxId == HdfsConstants.INVALID_TXID
1066              || op.getTransactionId() > lastTxId) {
1067            lastTxId = op.getTransactionId();
1068          }
1069          numValid++;
1070        }
1071        return new EditLogValidation(lastPos, lastTxId, false);
1072      }
1073    
1074      static EditLogValidation scanEditLog(EditLogInputStream in) {
1075        long lastPos = 0;
1076        long lastTxId = HdfsConstants.INVALID_TXID;
1077        long numValid = 0;
1078        FSEditLogOp op = null;
1079        while (true) {
1080          lastPos = in.getPosition();
1081          try {
1082            if ((op = in.readOp()) == null) { // TODO
1083              break;
1084            }
1085          } catch (Throwable t) {
1086            FSImage.LOG.warn("Caught exception after reading " + numValid +
1087                " ops from " + in + " while determining its valid length." +
1088                "Position was " + lastPos, t);
1089            in.resync();
1090            FSImage.LOG.warn("After resync, position is " + in.getPosition());
1091            continue;
1092          }
1093          if (lastTxId == HdfsConstants.INVALID_TXID
1094              || op.getTransactionId() > lastTxId) {
1095            lastTxId = op.getTransactionId();
1096          }
1097          numValid++;
1098        }
1099        return new EditLogValidation(lastPos, lastTxId, false);
1100      }
1101    
1102      static class EditLogValidation {
1103        private final long validLength;
1104        private final long endTxId;
1105        private final boolean hasCorruptHeader;
1106    
1107        EditLogValidation(long validLength, long endTxId,
1108            boolean hasCorruptHeader) {
1109          this.validLength = validLength;
1110          this.endTxId = endTxId;
1111          this.hasCorruptHeader = hasCorruptHeader;
1112        }
1113    
1114        long getValidLength() { return validLength; }
1115    
1116        long getEndTxId() { return endTxId; }
1117    
1118        boolean hasCorruptHeader() { return hasCorruptHeader; }
1119      }
1120    
1121      /**
1122       * Stream wrapper that keeps track of the current stream position.
1123       * 
1124       * This stream also allows us to set a limit on how many bytes we can read
1125       * without getting an exception.
1126       */
1127      public static class PositionTrackingInputStream extends FilterInputStream
1128          implements StreamLimiter {
1129        private long curPos = 0;
1130        private long markPos = -1;
1131        private long limitPos = Long.MAX_VALUE;
1132    
1133        public PositionTrackingInputStream(InputStream is) {
1134          super(is);
1135        }
1136    
1137        private void checkLimit(long amt) throws IOException {
1138          long extra = (curPos + amt) - limitPos;
1139          if (extra > 0) {
1140            throw new IOException("Tried to read " + amt + " byte(s) past " +
1141                "the limit at offset " + limitPos);
1142          }
1143        }
1144        
1145        @Override
1146        public int read() throws IOException {
1147          checkLimit(1);
1148          int ret = super.read();
1149          if (ret != -1) curPos++;
1150          return ret;
1151        }
1152    
1153        @Override
1154        public int read(byte[] data) throws IOException {
1155          checkLimit(data.length);
1156          int ret = super.read(data);
1157          if (ret > 0) curPos += ret;
1158          return ret;
1159        }
1160    
1161        @Override
1162        public int read(byte[] data, int offset, int length) throws IOException {
1163          checkLimit(length);
1164          int ret = super.read(data, offset, length);
1165          if (ret > 0) curPos += ret;
1166          return ret;
1167        }
1168    
1169        @Override
1170        public void setLimit(long limit) {
1171          limitPos = curPos + limit;
1172        }
1173    
1174        @Override
1175        public void clearLimit() {
1176          limitPos = Long.MAX_VALUE;
1177        }
1178    
1179        @Override
1180        public void mark(int limit) {
1181          super.mark(limit);
1182          markPos = curPos;
1183        }
1184    
1185        @Override
1186        public void reset() throws IOException {
1187          if (markPos == -1) {
1188            throw new IOException("Not marked!");
1189          }
1190          super.reset();
1191          curPos = markPos;
1192          markPos = -1;
1193        }
1194    
1195        public long getPos() {
1196          return curPos;
1197        }
1198        
1199        @Override
1200        public long skip(long amt) throws IOException {
1201          long extra = (curPos + amt) - limitPos;
1202          if (extra > 0) {
1203            throw new IOException("Tried to skip " + extra + " bytes past " +
1204                "the limit at offset " + limitPos);
1205          }
1206          long ret = super.skip(amt);
1207          curPos += ret;
1208          return ret;
1209        }
1210      }
1211    
1212      public long getLastAppliedTxId() {
1213        return lastAppliedTxId;
1214      }
1215    
1216      /**
1217       * Creates a Step used for updating startup progress, populated with
1218       * information from the given edits.  The step always includes the log's name.
1219       * If the log has a known length, then the length is included in the step too.
1220       * 
1221       * @param edits EditLogInputStream to use for populating step
1222       * @return Step populated with information from edits
1223       * @throws IOException thrown if there is an I/O error
1224       */
1225      private static Step createStartupProgressStep(EditLogInputStream edits)
1226          throws IOException {
1227        long length = edits.length();
1228        String name = edits.getCurrentStreamName();
1229        return length != -1 ? new Step(name, length) : new Step(name);
1230      }
1231    }