001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import static org.apache.hadoop.util.Time.now;
021
022 import java.io.DataInput;
023 import java.io.DataInputStream;
024 import java.io.DataOutputStream;
025 import java.io.File;
026 import java.io.FileInputStream;
027 import java.io.FileNotFoundException;
028 import java.io.FileOutputStream;
029 import java.io.IOException;
030 import java.security.DigestInputStream;
031 import java.security.DigestOutputStream;
032 import java.security.MessageDigest;
033 import java.util.ArrayList;
034 import java.util.Arrays;
035 import java.util.Collection;
036 import java.util.HashMap;
037 import java.util.List;
038 import java.util.Map;
039 import java.util.TreeMap;
040
041 import org.apache.commons.logging.Log;
042 import org.apache.hadoop.classification.InterfaceAudience;
043 import org.apache.hadoop.classification.InterfaceStability;
044 import org.apache.hadoop.conf.Configuration;
045 import org.apache.hadoop.fs.FileSystem;
046 import org.apache.hadoop.fs.Path;
047 import org.apache.hadoop.fs.PathIsNotDirectoryException;
048 import org.apache.hadoop.fs.UnresolvedLinkException;
049 import org.apache.hadoop.fs.permission.PermissionStatus;
050 import org.apache.hadoop.hdfs.DFSUtil;
051 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
052 import org.apache.hadoop.hdfs.protocol.LayoutFlags;
053 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
054 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
055 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
056 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
057 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
058 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
059 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
060 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
061 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
062 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
063 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
064 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
065 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
066 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
067 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
068 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
069 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
070 import org.apache.hadoop.hdfs.util.ReadOnlyList;
071 import org.apache.hadoop.io.IOUtils;
072 import org.apache.hadoop.io.MD5Hash;
073 import org.apache.hadoop.io.Text;
074 import org.apache.hadoop.util.StringUtils;
075
076 import com.google.common.annotations.VisibleForTesting;
077 import com.google.common.base.Preconditions;
078
079 /**
080 * Contains inner classes for reading or writing the on-disk format for
081 * FSImages.
082 *
083 * In particular, the format of the FSImage looks like:
084 * <pre>
085 * FSImage {
086 * layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long,
087 * namesystemGenerationStampV1: long, namesystemGenerationStampV2: long,
088 * generationStampAtBlockIdSwitch:long, lastAllocatedBlockId:
089 * long transactionID: long, snapshotCounter: int, numberOfSnapshots: int,
090 * numOfSnapshottableDirs: int,
091 * {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed)
092 * }
093 *
094 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) {
095 * INodeInfo of root, numberOfChildren of root: int
096 * [list of INodeInfo of root's children],
097 * [list of INodeDirectoryInfo of root's directory children]
098 * }
099 *
100 * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){
101 * [list of INodeInfo of INodes in topological order]
102 * }
103 *
104 * INodeInfo {
105 * {
106 * localName: short + byte[]
107 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported
108 * or
109 * {
110 * fullPath: byte[]
111 * } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported
112 * replicationFactor: short, modificationTime: long,
113 * accessTime: long, preferredBlockSize: long,
114 * numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink),
115 * {
116 * nsQuota: long, dsQuota: long,
117 * {
118 * isINodeSnapshottable: byte,
119 * isINodeWithSnapshot: byte (if isINodeSnapshottable is false)
120 * } (when {@link Feature#SNAPSHOT} is supported),
121 * fsPermission: short, PermissionStatus
122 * } for INodeDirectory
123 * or
124 * {
125 * symlinkString, fsPermission: short, PermissionStatus
126 * } for INodeSymlink
127 * or
128 * {
129 * [list of BlockInfo]
130 * [list of FileDiff]
131 * {
132 * isINodeFileUnderConstructionSnapshot: byte,
133 * {clientName: short + byte[], clientMachine: short + byte[]} (when
134 * isINodeFileUnderConstructionSnapshot is true),
135 * } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode),
136 * fsPermission: short, PermissionStatus
137 * } for INodeFile
138 * }
139 *
140 * INodeDirectoryInfo {
141 * fullPath of the directory: short + byte[],
142 * numberOfChildren: int, [list of INodeInfo of children INode],
143 * {
144 * numberOfSnapshots: int,
145 * [list of Snapshot] (when NumberOfSnapshots is positive),
146 * numberOfDirectoryDiffs: int,
147 * [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive),
148 * number of children that are directories,
149 * [list of INodeDirectoryInfo of the directory children] (includes
150 * snapshot copies of deleted sub-directories)
151 * } (when {@link Feature#SNAPSHOT} is supported),
152 * }
153 *
154 * Snapshot {
155 * snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is
156 * the name of the snapshot)
157 * }
158 *
159 * DirectoryDiff {
160 * full path of the root of the associated Snapshot: short + byte[],
161 * childrenSize: int,
162 * isSnapshotRoot: byte,
163 * snapshotINodeIsNotNull: byte (when isSnapshotRoot is false),
164 * snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff
165 * }
166 *
167 * Diff {
168 * createdListSize: int, [Local name of INode in created list],
169 * deletedListSize: int, [INode in deleted list: INodeInfo]
170 * }
171 *
172 * FileDiff {
173 * full path of the root of the associated Snapshot: short + byte[],
174 * fileSize: long,
175 * snapshotINodeIsNotNull: byte,
176 * snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff
177 * }
178 * </pre>
179 */
180 @InterfaceAudience.Private
181 @InterfaceStability.Evolving
182 public class FSImageFormat {
183 private static final Log LOG = FSImage.LOG;
184
185 // Static-only class
186 private FSImageFormat() {}
187
188 interface AbstractLoader {
189 MD5Hash getLoadedImageMd5();
190 long getLoadedImageTxId();
191 }
192
193 static class LoaderDelegator implements AbstractLoader {
194 private AbstractLoader impl;
195 private final Configuration conf;
196 private final FSNamesystem fsn;
197
198 LoaderDelegator(Configuration conf, FSNamesystem fsn) {
199 this.conf = conf;
200 this.fsn = fsn;
201 }
202
203 @Override
204 public MD5Hash getLoadedImageMd5() {
205 return impl.getLoadedImageMd5();
206 }
207
208 @Override
209 public long getLoadedImageTxId() {
210 return impl.getLoadedImageTxId();
211 }
212
213 public void load(File file, boolean requireSameLayoutVersion)
214 throws IOException {
215 Preconditions.checkState(impl == null, "Image already loaded!");
216
217 FileInputStream is = null;
218 try {
219 is = new FileInputStream(file);
220 byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length];
221 IOUtils.readFully(is, magic, 0, magic.length);
222 if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
223 FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
224 conf, fsn, requireSameLayoutVersion);
225 impl = loader;
226 loader.load(file);
227 } else {
228 Loader loader = new Loader(conf, fsn);
229 impl = loader;
230 loader.load(file);
231 }
232 } finally {
233 IOUtils.cleanup(LOG, is);
234 }
235 }
236 }
237
238 /**
239 * Construct a loader class to load the image. It chooses the loader based on
240 * the layout version.
241 */
242 public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) {
243 return new LoaderDelegator(conf, fsn);
244 }
245
246 /**
247 * A one-shot class responsible for loading an image. The load() function
248 * should be called once, after which the getter methods may be used to retrieve
249 * information about the image that was loaded, if loading was successful.
250 */
251 public static class Loader implements AbstractLoader {
252 private final Configuration conf;
253 /** which namesystem this loader is working for */
254 private final FSNamesystem namesystem;
255
256 /** Set to true once a file has been loaded using this loader. */
257 private boolean loaded = false;
258
259 /** The transaction ID of the last edit represented by the loaded file */
260 private long imgTxId;
261 /** The MD5 sum of the loaded file */
262 private MD5Hash imgDigest;
263
264 private Map<Integer, Snapshot> snapshotMap = null;
265 private final ReferenceMap referenceMap = new ReferenceMap();
266
267 Loader(Configuration conf, FSNamesystem namesystem) {
268 this.conf = conf;
269 this.namesystem = namesystem;
270 }
271
272 /**
273 * Return the MD5 checksum of the image that has been loaded.
274 * @throws IllegalStateException if load() has not yet been called.
275 */
276 @Override
277 public MD5Hash getLoadedImageMd5() {
278 checkLoaded();
279 return imgDigest;
280 }
281
282 @Override
283 public long getLoadedImageTxId() {
284 checkLoaded();
285 return imgTxId;
286 }
287
288 /**
289 * Throw IllegalStateException if load() has not yet been called.
290 */
291 private void checkLoaded() {
292 if (!loaded) {
293 throw new IllegalStateException("Image not yet loaded!");
294 }
295 }
296
297 /**
298 * Throw IllegalStateException if load() has already been called.
299 */
300 private void checkNotLoaded() {
301 if (loaded) {
302 throw new IllegalStateException("Image already loaded!");
303 }
304 }
305
306 public void load(File curFile) throws IOException {
307 checkNotLoaded();
308 assert curFile != null : "curFile is null";
309
310 StartupProgress prog = NameNode.getStartupProgress();
311 Step step = new Step(StepType.INODES);
312 prog.beginStep(Phase.LOADING_FSIMAGE, step);
313 long startTime = now();
314
315 //
316 // Load in bits
317 //
318 MessageDigest digester = MD5Hash.getDigester();
319 DigestInputStream fin = new DigestInputStream(
320 new FileInputStream(curFile), digester);
321
322 DataInputStream in = new DataInputStream(fin);
323 try {
324 // read image version: first appeared in version -1
325 int imgVersion = in.readInt();
326 if (getLayoutVersion() != imgVersion) {
327 throw new InconsistentFSStateException(curFile,
328 "imgVersion " + imgVersion +
329 " expected to be " + getLayoutVersion());
330 }
331 boolean supportSnapshot = NameNodeLayoutVersion.supports(
332 LayoutVersion.Feature.SNAPSHOT, imgVersion);
333 if (NameNodeLayoutVersion.supports(
334 LayoutVersion.Feature.ADD_LAYOUT_FLAGS, imgVersion)) {
335 LayoutFlags.read(in);
336 }
337
338 // read namespaceID: first appeared in version -2
339 in.readInt();
340
341 long numFiles = in.readLong();
342
343 // read in the last generation stamp for legacy blocks.
344 long genstamp = in.readLong();
345 namesystem.setGenerationStampV1(genstamp);
346
347 if (NameNodeLayoutVersion.supports(
348 LayoutVersion.Feature.SEQUENTIAL_BLOCK_ID, imgVersion)) {
349 // read the starting generation stamp for sequential block IDs
350 genstamp = in.readLong();
351 namesystem.setGenerationStampV2(genstamp);
352
353 // read the last generation stamp for blocks created after
354 // the switch to sequential block IDs.
355 long stampAtIdSwitch = in.readLong();
356 namesystem.setGenerationStampV1Limit(stampAtIdSwitch);
357
358 // read the max sequential block ID.
359 long maxSequentialBlockId = in.readLong();
360 namesystem.setLastAllocatedBlockId(maxSequentialBlockId);
361 } else {
362 long startingGenStamp = namesystem.upgradeGenerationStampToV2();
363 // This is an upgrade.
364 LOG.info("Upgrading to sequential block IDs. Generation stamp " +
365 "for new blocks set to " + startingGenStamp);
366 }
367
368 // read the transaction ID of the last edit represented by
369 // this image
370 if (NameNodeLayoutVersion.supports(
371 LayoutVersion.Feature.STORED_TXIDS, imgVersion)) {
372 imgTxId = in.readLong();
373 } else {
374 imgTxId = 0;
375 }
376
377 // read the last allocated inode id in the fsimage
378 if (NameNodeLayoutVersion.supports(
379 LayoutVersion.Feature.ADD_INODE_ID, imgVersion)) {
380 long lastInodeId = in.readLong();
381 namesystem.resetLastInodeId(lastInodeId);
382 if (LOG.isDebugEnabled()) {
383 LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId);
384 }
385 } else {
386 if (LOG.isDebugEnabled()) {
387 LOG.debug("Old layout version doesn't have inode id."
388 + " Will assign new id for each inode.");
389 }
390 }
391
392 if (supportSnapshot) {
393 snapshotMap = namesystem.getSnapshotManager().read(in, this);
394 }
395
396 // read compression related info
397 FSImageCompression compression;
398 if (NameNodeLayoutVersion.supports(
399 LayoutVersion.Feature.FSIMAGE_COMPRESSION, imgVersion)) {
400 compression = FSImageCompression.readCompressionHeader(conf, in);
401 } else {
402 compression = FSImageCompression.createNoopCompression();
403 }
404 in = compression.unwrapInputStream(fin);
405
406 LOG.info("Loading image file " + curFile + " using " + compression);
407
408 // load all inodes
409 LOG.info("Number of files = " + numFiles);
410 prog.setTotal(Phase.LOADING_FSIMAGE, step, numFiles);
411 Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step);
412 if (NameNodeLayoutVersion.supports(
413 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, imgVersion)) {
414 if (supportSnapshot) {
415 loadLocalNameINodesWithSnapshot(numFiles, in, counter);
416 } else {
417 loadLocalNameINodes(numFiles, in, counter);
418 }
419 } else {
420 loadFullNameINodes(numFiles, in, counter);
421 }
422
423 loadFilesUnderConstruction(in, supportSnapshot, counter);
424 prog.endStep(Phase.LOADING_FSIMAGE, step);
425 // Now that the step is finished, set counter equal to total to adjust
426 // for possible under-counting due to reference inodes.
427 prog.setCount(Phase.LOADING_FSIMAGE, step, numFiles);
428
429 loadSecretManagerState(in);
430
431 loadCacheManagerState(in);
432
433 // make sure to read to the end of file
434 boolean eof = (in.read() == -1);
435 assert eof : "Should have reached the end of image file " + curFile;
436 } finally {
437 in.close();
438 }
439
440 imgDigest = new MD5Hash(digester.digest());
441 loaded = true;
442
443 LOG.info("Image file " + curFile + " of size " + curFile.length() +
444 " bytes loaded in " + (now() - startTime)/1000 + " seconds.");
445 }
446
447 /** Update the root node's attributes */
448 private void updateRootAttr(INodeWithAdditionalFields root) {
449 final Quota.Counts q = root.getQuotaCounts();
450 final long nsQuota = q.get(Quota.NAMESPACE);
451 final long dsQuota = q.get(Quota.DISKSPACE);
452 FSDirectory fsDir = namesystem.dir;
453 if (nsQuota != -1 || dsQuota != -1) {
454 fsDir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota);
455 }
456 fsDir.rootDir.cloneModificationTime(root);
457 fsDir.rootDir.clonePermissionStatus(root);
458 }
459
460 /**
461 * Load fsimage files when 1) only local names are stored,
462 * and 2) snapshot is supported.
463 *
464 * @param numFiles number of files expected to be read
465 * @param in Image input stream
466 * @param counter Counter to increment for namenode startup progress
467 */
468 private void loadLocalNameINodesWithSnapshot(long numFiles, DataInput in,
469 Counter counter) throws IOException {
470 assert NameNodeLayoutVersion.supports(
471 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion());
472 assert NameNodeLayoutVersion.supports(
473 LayoutVersion.Feature.SNAPSHOT, getLayoutVersion());
474
475 // load root
476 loadRoot(in, counter);
477 // load rest of the nodes recursively
478 loadDirectoryWithSnapshot(in, counter);
479 }
480
481 /**
482 * load fsimage files assuming only local names are stored. Used when
483 * snapshots are not supported by the layout version.
484 *
485 * @param numFiles number of files expected to be read
486 * @param in image input stream
487 * @param counter Counter to increment for namenode startup progress
488 * @throws IOException
489 */
490 private void loadLocalNameINodes(long numFiles, DataInput in, Counter counter)
491 throws IOException {
492 assert NameNodeLayoutVersion.supports(
493 LayoutVersion.Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion());
494 assert numFiles > 0;
495
496 // load root
497 loadRoot(in, counter);
498 // have loaded the first file (the root)
499 numFiles--;
500
501 // load rest of the nodes directory by directory
502 while (numFiles > 0) {
503 numFiles -= loadDirectory(in, counter);
504 }
505 if (numFiles != 0) {
506 throw new IOException("Read unexpect number of files: " + -numFiles);
507 }
508 }
509
510 /**
511 * Load information about root, and use the information to update the root
512 * directory of NameSystem.
513 * @param in The {@link DataInput} instance to read.
514 * @param counter Counter to increment for namenode startup progress
515 */
516 private void loadRoot(DataInput in, Counter counter)
517 throws IOException {
518 // load root
519 if (in.readShort() != 0) {
520 throw new IOException("First node is not root");
521 }
522 final INodeDirectory root = loadINode(null, false, in, counter)
523 .asDirectory();
524 // update the root's attributes
525 updateRootAttr(root);
526 }
527
528 /** Load children nodes for the parent directory. */
529 private int loadChildren(INodeDirectory parent, DataInput in,
530 Counter counter) throws IOException {
531 int numChildren = in.readInt();
532 for (int i = 0; i < numChildren; i++) {
533 // load single inode
534 INode newNode = loadINodeWithLocalName(false, in, true, counter);
535 addToParent(parent, newNode);
536 }
537 return numChildren;
538 }
539
540 /**
541 * Load a directory when snapshot is supported.
542 * @param in The {@link DataInput} instance to read.
543 * @param counter Counter to increment for namenode startup progress
544 */
545 private void loadDirectoryWithSnapshot(DataInput in, Counter counter)
546 throws IOException {
547 // Step 1. Identify the parent INode
548 long inodeId = in.readLong();
549 final INodeDirectory parent = this.namesystem.dir.getInode(inodeId)
550 .asDirectory();
551
552 // Check if the whole subtree has been saved (for reference nodes)
553 boolean toLoadSubtree = referenceMap.toProcessSubtree(parent.getId());
554 if (!toLoadSubtree) {
555 return;
556 }
557
558 // Step 2. Load snapshots if parent is snapshottable
559 int numSnapshots = in.readInt();
560 if (numSnapshots >= 0) {
561 // load snapshots and snapshotQuota
562 SnapshotFSImageFormat.loadSnapshotList(parent, numSnapshots, in, this);
563 if (parent.getDirectorySnapshottableFeature().getSnapshotQuota() > 0) {
564 // add the directory to the snapshottable directory list in
565 // SnapshotManager. Note that we only add root when its snapshot quota
566 // is positive.
567 this.namesystem.getSnapshotManager().addSnapshottable(parent);
568 }
569 }
570
571 // Step 3. Load children nodes under parent
572 loadChildren(parent, in, counter);
573
574 // Step 4. load Directory Diff List
575 SnapshotFSImageFormat.loadDirectoryDiffList(parent, in, this);
576
577 // Recursively load sub-directories, including snapshot copies of deleted
578 // directories
579 int numSubTree = in.readInt();
580 for (int i = 0; i < numSubTree; i++) {
581 loadDirectoryWithSnapshot(in, counter);
582 }
583 }
584
585 /**
586 * Load all children of a directory
587 *
588 * @param in input to load from
589 * @param counter Counter to increment for namenode startup progress
590 * @return number of child inodes read
591 * @throws IOException
592 */
593 private int loadDirectory(DataInput in, Counter counter) throws IOException {
594 String parentPath = FSImageSerialization.readString(in);
595 // Rename .snapshot paths if we're doing an upgrade
596 parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion());
597 final INodeDirectory parent = INodeDirectory.valueOf(
598 namesystem.dir.getNode(parentPath, true), parentPath);
599 return loadChildren(parent, in, counter);
600 }
601
602 /**
603 * load fsimage files assuming full path names are stored
604 *
605 * @param numFiles total number of files to load
606 * @param in data input stream
607 * @param counter Counter to increment for namenode startup progress
608 * @throws IOException if any error occurs
609 */
610 private void loadFullNameINodes(long numFiles, DataInput in, Counter counter)
611 throws IOException {
612 byte[][] pathComponents;
613 byte[][] parentPath = {{}};
614 FSDirectory fsDir = namesystem.dir;
615 INodeDirectory parentINode = fsDir.rootDir;
616 for (long i = 0; i < numFiles; i++) {
617 pathComponents = FSImageSerialization.readPathComponents(in);
618 for (int j=0; j < pathComponents.length; j++) {
619 byte[] newComponent = renameReservedComponentOnUpgrade
620 (pathComponents[j], getLayoutVersion());
621 if (!Arrays.equals(newComponent, pathComponents[j])) {
622 String oldPath = DFSUtil.byteArray2PathString(pathComponents);
623 pathComponents[j] = newComponent;
624 String newPath = DFSUtil.byteArray2PathString(pathComponents);
625 LOG.info("Renaming reserved path " + oldPath + " to " + newPath);
626 }
627 }
628 final INode newNode = loadINode(
629 pathComponents[pathComponents.length-1], false, in, counter);
630
631 if (isRoot(pathComponents)) { // it is the root
632 // update the root's attributes
633 updateRootAttr(newNode.asDirectory());
634 continue;
635 }
636
637 namesystem.dir.addToInodeMap(newNode);
638 // check if the new inode belongs to the same parent
639 if(!isParent(pathComponents, parentPath)) {
640 parentINode = getParentINodeDirectory(pathComponents);
641 parentPath = getParent(pathComponents);
642 }
643
644 // add new inode
645 addToParent(parentINode, newNode);
646 }
647 }
648
649 private INodeDirectory getParentINodeDirectory(byte[][] pathComponents
650 ) throws FileNotFoundException, PathIsNotDirectoryException,
651 UnresolvedLinkException {
652 if (pathComponents.length < 2) { // root
653 return null;
654 }
655 // Gets the parent INode
656 final INodesInPath inodes = namesystem.dir.getExistingPathINodes(
657 pathComponents);
658 return INodeDirectory.valueOf(inodes.getINode(-2), pathComponents);
659 }
660
661 /**
662 * Add the child node to parent and, if child is a file, update block map.
663 * This method is only used for image loading so that synchronization,
664 * modification time update and space count update are not needed.
665 */
666 private void addToParent(INodeDirectory parent, INode child) {
667 FSDirectory fsDir = namesystem.dir;
668 if (parent == fsDir.rootDir) {
669 child.setLocalName(renameReservedRootComponentOnUpgrade(
670 child.getLocalNameBytes(), getLayoutVersion()));
671 }
672 // NOTE: This does not update space counts for parents
673 if (!parent.addChild(child)) {
674 return;
675 }
676 namesystem.dir.cacheName(child);
677
678 if (child.isFile()) {
679 updateBlocksMap(child.asFile());
680 }
681 }
682
683 public void updateBlocksMap(INodeFile file) {
684 // Add file->block mapping
685 final BlockInfo[] blocks = file.getBlocks();
686 if (blocks != null) {
687 final BlockManager bm = namesystem.getBlockManager();
688 for (int i = 0; i < blocks.length; i++) {
689 file.setBlock(i, bm.addBlockCollection(blocks[i], file));
690 }
691 }
692 }
693
694 /** @return The FSDirectory of the namesystem where the fsimage is loaded */
695 public FSDirectory getFSDirectoryInLoading() {
696 return namesystem.dir;
697 }
698
699 public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
700 boolean updateINodeMap) throws IOException {
701 return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
702 }
703
704 public INode loadINodeWithLocalName(boolean isSnapshotINode,
705 DataInput in, boolean updateINodeMap, Counter counter)
706 throws IOException {
707 byte[] localName = FSImageSerialization.readLocalName(in);
708 localName =
709 renameReservedComponentOnUpgrade(localName, getLayoutVersion());
710 INode inode = loadINode(localName, isSnapshotINode, in, counter);
711 if (updateINodeMap) {
712 namesystem.dir.addToInodeMap(inode);
713 }
714 return inode;
715 }
716
717 /**
718 * load an inode from fsimage except for its name
719 *
720 * @param in data input stream from which image is read
721 * @param counter Counter to increment for namenode startup progress
722 * @return an inode
723 */
724 @SuppressWarnings("deprecation")
725 INode loadINode(final byte[] localName, boolean isSnapshotINode,
726 DataInput in, Counter counter) throws IOException {
727 final int imgVersion = getLayoutVersion();
728 if (NameNodeLayoutVersion.supports(
729 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
730 namesystem.getFSDirectory().verifyINodeName(localName);
731 }
732
733 long inodeId = NameNodeLayoutVersion.supports(
734 LayoutVersion.Feature.ADD_INODE_ID, imgVersion) ? in.readLong()
735 : namesystem.allocateNewInodeId();
736
737 final short replication = namesystem.getBlockManager().adjustReplication(
738 in.readShort());
739 final long modificationTime = in.readLong();
740 long atime = 0;
741 if (NameNodeLayoutVersion.supports(
742 LayoutVersion.Feature.FILE_ACCESS_TIME, imgVersion)) {
743 atime = in.readLong();
744 }
745 final long blockSize = in.readLong();
746 final int numBlocks = in.readInt();
747
748 if (numBlocks >= 0) {
749 // file
750
751 // read blocks
752 BlockInfo[] blocks = new BlockInfo[numBlocks];
753 for (int j = 0; j < numBlocks; j++) {
754 blocks[j] = new BlockInfo(replication);
755 blocks[j].readFields(in);
756 }
757
758 String clientName = "";
759 String clientMachine = "";
760 boolean underConstruction = false;
761 FileDiffList fileDiffs = null;
762 if (NameNodeLayoutVersion.supports(
763 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
764 // read diffs
765 fileDiffs = SnapshotFSImageFormat.loadFileDiffList(in, this);
766
767 if (isSnapshotINode) {
768 underConstruction = in.readBoolean();
769 if (underConstruction) {
770 clientName = FSImageSerialization.readString(in);
771 clientMachine = FSImageSerialization.readString(in);
772 // convert the last block to BlockUC
773 if (blocks.length > 0) {
774 BlockInfo lastBlk = blocks[blocks.length - 1];
775 blocks[blocks.length - 1] = new BlockInfoUnderConstruction(
776 lastBlk, replication);
777 }
778 }
779 }
780 }
781
782 final PermissionStatus permissions = PermissionStatus.read(in);
783
784 // return
785 if (counter != null) {
786 counter.increment();
787 }
788
789 final INodeFile file = new INodeFile(inodeId, localName, permissions,
790 modificationTime, atime, blocks, replication, blockSize, (byte)0);
791 if (underConstruction) {
792 file.toUnderConstruction(clientName, clientMachine);
793 }
794 return fileDiffs == null ? file : new INodeFile(file, fileDiffs);
795 } else if (numBlocks == -1) {
796 //directory
797
798 //read quotas
799 final long nsQuota = in.readLong();
800 long dsQuota = -1L;
801 if (NameNodeLayoutVersion.supports(
802 LayoutVersion.Feature.DISKSPACE_QUOTA, imgVersion)) {
803 dsQuota = in.readLong();
804 }
805
806 //read snapshot info
807 boolean snapshottable = false;
808 boolean withSnapshot = false;
809 if (NameNodeLayoutVersion.supports(
810 LayoutVersion.Feature.SNAPSHOT, imgVersion)) {
811 snapshottable = in.readBoolean();
812 if (!snapshottable) {
813 withSnapshot = in.readBoolean();
814 }
815 }
816
817 final PermissionStatus permissions = PermissionStatus.read(in);
818
819 //return
820 if (counter != null) {
821 counter.increment();
822 }
823 final INodeDirectory dir = new INodeDirectory(inodeId, localName,
824 permissions, modificationTime);
825 if (nsQuota >= 0 || dsQuota >= 0) {
826 dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
827 }
828 if (withSnapshot) {
829 dir.addSnapshotFeature(null);
830 }
831 if (snapshottable) {
832 dir.addSnapshottableFeature();
833 }
834 return dir;
835 } else if (numBlocks == -2) {
836 //symlink
837 if (!FileSystem.areSymlinksEnabled()) {
838 throw new IOException("Symlinks not supported - please remove symlink before upgrading to this version of HDFS");
839 }
840
841 final String symlink = Text.readString(in);
842 final PermissionStatus permissions = PermissionStatus.read(in);
843 if (counter != null) {
844 counter.increment();
845 }
846 return new INodeSymlink(inodeId, localName, permissions,
847 modificationTime, atime, symlink);
848 } else if (numBlocks == -3) {
849 //reference
850 // Intentionally do not increment counter, because it is too difficult at
851 // this point to assess whether or not this is a reference that counts
852 // toward quota.
853
854 final boolean isWithName = in.readBoolean();
855 // lastSnapshotId for WithName node, dstSnapshotId for DstReference node
856 int snapshotId = in.readInt();
857
858 final INodeReference.WithCount withCount
859 = referenceMap.loadINodeReferenceWithCount(isSnapshotINode, in, this);
860
861 if (isWithName) {
862 return new INodeReference.WithName(null, withCount, localName,
863 snapshotId);
864 } else {
865 final INodeReference ref = new INodeReference.DstReference(null,
866 withCount, snapshotId);
867 return ref;
868 }
869 }
870
871 throw new IOException("Unknown inode type: numBlocks=" + numBlocks);
872 }
873
874 /** Load {@link INodeFileAttributes}. */
875 public INodeFileAttributes loadINodeFileAttributes(DataInput in)
876 throws IOException {
877 final int layoutVersion = getLayoutVersion();
878
879 if (!NameNodeLayoutVersion.supports(
880 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) {
881 return loadINodeWithLocalName(true, in, false).asFile();
882 }
883
884 final byte[] name = FSImageSerialization.readLocalName(in);
885 final PermissionStatus permissions = PermissionStatus.read(in);
886 final long modificationTime = in.readLong();
887 final long accessTime = in.readLong();
888
889 final short replication = namesystem.getBlockManager().adjustReplication(
890 in.readShort());
891 final long preferredBlockSize = in.readLong();
892
893 return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime,
894 accessTime, replication, preferredBlockSize, (byte) 0, null);
895 }
896
897 public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in)
898 throws IOException {
899 final int layoutVersion = getLayoutVersion();
900
901 if (!NameNodeLayoutVersion.supports(
902 LayoutVersion.Feature.OPTIMIZE_SNAPSHOT_INODES, layoutVersion)) {
903 return loadINodeWithLocalName(true, in, false).asDirectory();
904 }
905
906 final byte[] name = FSImageSerialization.readLocalName(in);
907 final PermissionStatus permissions = PermissionStatus.read(in);
908 final long modificationTime = in.readLong();
909
910 //read quotas
911 final long nsQuota = in.readLong();
912 final long dsQuota = in.readLong();
913
914 return nsQuota == -1L && dsQuota == -1L ? new INodeDirectoryAttributes.SnapshotCopy(
915 name, permissions, null, modificationTime, null)
916 : new INodeDirectoryAttributes.CopyWithQuota(name, permissions,
917 null, modificationTime, nsQuota, dsQuota, null);
918 }
919
920 private void loadFilesUnderConstruction(DataInput in,
921 boolean supportSnapshot, Counter counter) throws IOException {
922 FSDirectory fsDir = namesystem.dir;
923 int size = in.readInt();
924
925 LOG.info("Number of files under construction = " + size);
926
927 for (int i = 0; i < size; i++) {
928 INodeFile cons = FSImageSerialization.readINodeUnderConstruction(in,
929 namesystem, getLayoutVersion());
930 counter.increment();
931
932 // verify that file exists in namespace
933 String path = cons.getLocalName();
934 INodeFile oldnode = null;
935 boolean inSnapshot = false;
936 if (path != null && FSDirectory.isReservedName(path) &&
937 NameNodeLayoutVersion.supports(
938 LayoutVersion.Feature.ADD_INODE_ID, getLayoutVersion())) {
939 // TODO: for HDFS-5428, we use reserved path for those INodeFileUC in
940 // snapshot. If we support INode ID in the layout version, we can use
941 // the inode id to find the oldnode.
942 oldnode = namesystem.dir.getInode(cons.getId()).asFile();
943 inSnapshot = true;
944 } else {
945 path = renameReservedPathsOnUpgrade(path, getLayoutVersion());
946 final INodesInPath iip = fsDir.getLastINodeInPath(path);
947 oldnode = INodeFile.valueOf(iip.getINode(0), path);
948 }
949
950 FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
951 oldnode.toUnderConstruction(uc.getClientName(), uc.getClientMachine());
952 if (oldnode.numBlocks() > 0) {
953 BlockInfo ucBlock = cons.getLastBlock();
954 // we do not replace the inode, just replace the last block of oldnode
955 BlockInfo info = namesystem.getBlockManager().addBlockCollection(
956 ucBlock, oldnode);
957 oldnode.setBlock(oldnode.numBlocks() - 1, info);
958 }
959
960 if (!inSnapshot) {
961 namesystem.leaseManager.addLease(cons
962 .getFileUnderConstructionFeature().getClientName(), path);
963 }
964 }
965 }
966
967 private void loadSecretManagerState(DataInput in)
968 throws IOException {
969 int imgVersion = getLayoutVersion();
970
971 if (!NameNodeLayoutVersion.supports(
972 LayoutVersion.Feature.DELEGATION_TOKEN, imgVersion)) {
973 //SecretManagerState is not available.
974 //This must not happen if security is turned on.
975 return;
976 }
977 namesystem.loadSecretManagerStateCompat(in);
978 }
979
980 private void loadCacheManagerState(DataInput in) throws IOException {
981 int imgVersion = getLayoutVersion();
982 if (!NameNodeLayoutVersion.supports(
983 LayoutVersion.Feature.CACHING, imgVersion)) {
984 return;
985 }
986 namesystem.getCacheManager().loadStateCompat(in);
987 }
988
989 private int getLayoutVersion() {
990 return namesystem.getFSImage().getStorage().getLayoutVersion();
991 }
992
993 private boolean isRoot(byte[][] path) {
994 return path.length == 1 &&
995 path[0] == null;
996 }
997
998 private boolean isParent(byte[][] path, byte[][] parent) {
999 if (path == null || parent == null)
1000 return false;
1001 if (parent.length == 0 || path.length != parent.length + 1)
1002 return false;
1003 boolean isParent = true;
1004 for (int i = 0; i < parent.length; i++) {
1005 isParent = isParent && Arrays.equals(path[i], parent[i]);
1006 }
1007 return isParent;
1008 }
1009
1010 /**
1011 * Return string representing the parent of the given path.
1012 */
1013 String getParent(String path) {
1014 return path.substring(0, path.lastIndexOf(Path.SEPARATOR));
1015 }
1016
1017 byte[][] getParent(byte[][] path) {
1018 byte[][] result = new byte[path.length - 1][];
1019 for (int i = 0; i < result.length; i++) {
1020 result[i] = new byte[path[i].length];
1021 System.arraycopy(path[i], 0, result[i], 0, path[i].length);
1022 }
1023 return result;
1024 }
1025
1026 public Snapshot getSnapshot(DataInput in) throws IOException {
1027 return snapshotMap.get(in.readInt());
1028 }
1029 }
1030
1031 @VisibleForTesting
1032 public static final TreeMap<String, String> renameReservedMap =
1033 new TreeMap<String, String>();
1034
1035 /**
1036 * Use the default key-value pairs that will be used to determine how to
1037 * rename reserved paths on upgrade.
1038 */
1039 @VisibleForTesting
1040 public static void useDefaultRenameReservedPairs() {
1041 renameReservedMap.clear();
1042 for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) {
1043 renameReservedMap.put(
1044 key,
1045 key + "." + HdfsConstants.NAMENODE_LAYOUT_VERSION + "."
1046 + "UPGRADE_RENAMED");
1047 }
1048 }
1049
1050 /**
1051 * Set the key-value pairs that will be used to determine how to rename
1052 * reserved paths on upgrade.
1053 */
1054 @VisibleForTesting
1055 public static void setRenameReservedPairs(String renameReserved) {
1056 // Clear and set the default values
1057 useDefaultRenameReservedPairs();
1058 // Overwrite with provided values
1059 setRenameReservedMapInternal(renameReserved);
1060 }
1061
1062 private static void setRenameReservedMapInternal(String renameReserved) {
1063 Collection<String> pairs =
1064 StringUtils.getTrimmedStringCollection(renameReserved);
1065 for (String p : pairs) {
1066 String[] pair = StringUtils.split(p, '/', '=');
1067 Preconditions.checkArgument(pair.length == 2,
1068 "Could not parse key-value pair " + p);
1069 String key = pair[0];
1070 String value = pair[1];
1071 Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key),
1072 "Unknown reserved path " + key);
1073 Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value),
1074 "Invalid rename path for " + key + ": " + value);
1075 LOG.info("Will rename reserved path " + key + " to " + value);
1076 renameReservedMap.put(key, value);
1077 }
1078 }
1079
1080 /**
1081 * When upgrading from an old version, the filesystem could contain paths
1082 * that are now reserved in the new version (e.g. .snapshot). This renames
1083 * these new reserved paths to a user-specified value to avoid collisions
1084 * with the reserved name.
1085 *
1086 * @param path Old path potentially containing a reserved path
1087 * @return New path with reserved path components renamed to user value
1088 */
1089 static String renameReservedPathsOnUpgrade(String path,
1090 final int layoutVersion) {
1091 final String oldPath = path;
1092 // If any known LVs aren't supported, we're doing an upgrade
1093 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
1094 String[] components = INode.getPathNames(path);
1095 // Only need to worry about the root directory
1096 if (components.length > 1) {
1097 components[1] = DFSUtil.bytes2String(
1098 renameReservedRootComponentOnUpgrade(
1099 DFSUtil.string2Bytes(components[1]),
1100 layoutVersion));
1101 path = DFSUtil.strings2PathString(components);
1102 }
1103 }
1104 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
1105 String[] components = INode.getPathNames(path);
1106 // Special case the root path
1107 if (components.length == 0) {
1108 return path;
1109 }
1110 for (int i=0; i<components.length; i++) {
1111 components[i] = DFSUtil.bytes2String(
1112 renameReservedComponentOnUpgrade(
1113 DFSUtil.string2Bytes(components[i]),
1114 layoutVersion));
1115 }
1116 path = DFSUtil.strings2PathString(components);
1117 }
1118
1119 if (!path.equals(oldPath)) {
1120 LOG.info("Upgrade process renamed reserved path " + oldPath + " to "
1121 + path);
1122 }
1123 return path;
1124 }
1125
1126 private final static String RESERVED_ERROR_MSG =
1127 FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and "
1128 + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in"
1129 + " this version of HDFS. Please rollback and delete or rename"
1130 + " this path, or upgrade with the "
1131 + StartupOption.RENAMERESERVED.getName()
1132 + " [key-value pairs]"
1133 + " option to automatically rename these paths during upgrade.";
1134
1135 /**
1136 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
1137 * byte array path component.
1138 */
1139 private static byte[] renameReservedComponentOnUpgrade(byte[] component,
1140 final int layoutVersion) {
1141 // If the LV doesn't support snapshots, we're doing an upgrade
1142 if (!NameNodeLayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
1143 if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) {
1144 Preconditions.checkArgument(
1145 renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR),
1146 RESERVED_ERROR_MSG);
1147 component =
1148 DFSUtil.string2Bytes(renameReservedMap
1149 .get(HdfsConstants.DOT_SNAPSHOT_DIR));
1150 }
1151 }
1152 return component;
1153 }
1154
1155 /**
1156 * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
1157 * byte array path component.
1158 */
1159 private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
1160 final int layoutVersion) {
1161 // If the LV doesn't support inode IDs, we're doing an upgrade
1162 if (!NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
1163 if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) {
1164 Preconditions.checkArgument(
1165 renameReservedMap.containsKey(FSDirectory.DOT_RESERVED_STRING),
1166 RESERVED_ERROR_MSG);
1167 final String renameString = renameReservedMap
1168 .get(FSDirectory.DOT_RESERVED_STRING);
1169 component =
1170 DFSUtil.string2Bytes(renameString);
1171 LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING
1172 + " to " + renameString);
1173 }
1174 }
1175 return component;
1176 }
1177
1178 /**
1179 * A one-shot class responsible for writing an image file.
1180 * The write() function should be called once, after which the getter
1181 * functions may be used to retrieve information about the file that was written.
1182 *
1183 * This is replaced by the PB-based FSImage. The class is to maintain
1184 * compatibility for the external fsimage tool.
1185 */
1186 @Deprecated
1187 static class Saver {
1188 private static final int LAYOUT_VERSION = -51;
1189 private final SaveNamespaceContext context;
1190 /** Set to true once an image has been written */
1191 private boolean saved = false;
1192
1193 /** The MD5 checksum of the file that was written */
1194 private MD5Hash savedDigest;
1195 private final ReferenceMap referenceMap = new ReferenceMap();
1196
1197 private final Map<Long, INodeFile> snapshotUCMap =
1198 new HashMap<Long, INodeFile>();
1199
1200 /** @throws IllegalStateException if the instance has not yet saved an image */
1201 private void checkSaved() {
1202 if (!saved) {
1203 throw new IllegalStateException("FSImageSaver has not saved an image");
1204 }
1205 }
1206
1207 /** @throws IllegalStateException if the instance has already saved an image */
1208 private void checkNotSaved() {
1209 if (saved) {
1210 throw new IllegalStateException("FSImageSaver has already saved an image");
1211 }
1212 }
1213
1214
1215 Saver(SaveNamespaceContext context) {
1216 this.context = context;
1217 }
1218
1219 /**
1220 * Return the MD5 checksum of the image file that was saved.
1221 */
1222 MD5Hash getSavedDigest() {
1223 checkSaved();
1224 return savedDigest;
1225 }
1226
1227 void save(File newFile, FSImageCompression compression) throws IOException {
1228 checkNotSaved();
1229
1230 final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
1231 final INodeDirectory rootDir = sourceNamesystem.dir.rootDir;
1232 final long numINodes = rootDir.getDirectoryWithQuotaFeature()
1233 .getSpaceConsumed().get(Quota.NAMESPACE);
1234 String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath();
1235 Step step = new Step(StepType.INODES, sdPath);
1236 StartupProgress prog = NameNode.getStartupProgress();
1237 prog.beginStep(Phase.SAVING_CHECKPOINT, step);
1238 prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes);
1239 Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
1240 long startTime = now();
1241 //
1242 // Write out data
1243 //
1244 MessageDigest digester = MD5Hash.getDigester();
1245 FileOutputStream fout = new FileOutputStream(newFile);
1246 DigestOutputStream fos = new DigestOutputStream(fout, digester);
1247 DataOutputStream out = new DataOutputStream(fos);
1248 try {
1249 out.writeInt(LAYOUT_VERSION);
1250 LayoutFlags.write(out);
1251 // We use the non-locked version of getNamespaceInfo here since
1252 // the coordinating thread of saveNamespace already has read-locked
1253 // the namespace for us. If we attempt to take another readlock
1254 // from the actual saver thread, there's a potential of a
1255 // fairness-related deadlock. See the comments on HDFS-2223.
1256 out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo()
1257 .getNamespaceID());
1258 out.writeLong(numINodes);
1259 out.writeLong(sourceNamesystem.getGenerationStampV1());
1260 out.writeLong(sourceNamesystem.getGenerationStampV2());
1261 out.writeLong(sourceNamesystem.getGenerationStampAtblockIdSwitch());
1262 out.writeLong(sourceNamesystem.getLastAllocatedBlockId());
1263 out.writeLong(context.getTxId());
1264 out.writeLong(sourceNamesystem.getLastInodeId());
1265
1266
1267 sourceNamesystem.getSnapshotManager().write(out);
1268
1269 // write compression info and set up compressed stream
1270 out = compression.writeHeaderAndWrapStream(fos);
1271 LOG.info("Saving image file " + newFile +
1272 " using " + compression);
1273
1274 // save the root
1275 saveINode2Image(rootDir, out, false, referenceMap, counter);
1276 // save the rest of the nodes
1277 saveImage(rootDir, out, true, false, counter);
1278 prog.endStep(Phase.SAVING_CHECKPOINT, step);
1279 // Now that the step is finished, set counter equal to total to adjust
1280 // for possible under-counting due to reference inodes.
1281 prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes);
1282 // save files under construction
1283 // TODO: for HDFS-5428, since we cannot break the compatibility of
1284 // fsimage, we store part of the under-construction files that are only
1285 // in snapshots in this "under-construction-file" section. As a
1286 // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their
1287 // paths, so that when loading fsimage we do not put them into the lease
1288 // map. In the future, we can remove this hack when we can bump the
1289 // layout version.
1290 sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap);
1291
1292 context.checkCancelled();
1293 sourceNamesystem.saveSecretManagerStateCompat(out, sdPath);
1294 context.checkCancelled();
1295 sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath);
1296 context.checkCancelled();
1297 out.flush();
1298 context.checkCancelled();
1299 fout.getChannel().force(true);
1300 } finally {
1301 out.close();
1302 }
1303
1304 saved = true;
1305 // set md5 of the saved image
1306 savedDigest = new MD5Hash(digester.digest());
1307
1308 LOG.info("Image file " + newFile + " of size " + newFile.length() +
1309 " bytes saved in " + (now() - startTime)/1000 + " seconds.");
1310 }
1311
1312 /**
1313 * Save children INodes.
1314 * @param children The list of children INodes
1315 * @param out The DataOutputStream to write
1316 * @param inSnapshot Whether the parent directory or its ancestor is in
1317 * the deleted list of some snapshot (caused by rename or
1318 * deletion)
1319 * @param counter Counter to increment for namenode startup progress
1320 * @return Number of children that are directory
1321 */
1322 private int saveChildren(ReadOnlyList<INode> children,
1323 DataOutputStream out, boolean inSnapshot, Counter counter)
1324 throws IOException {
1325 // Write normal children INode.
1326 out.writeInt(children.size());
1327 int dirNum = 0;
1328 int i = 0;
1329 for(INode child : children) {
1330 // print all children first
1331 // TODO: for HDFS-5428, we cannot change the format/content of fsimage
1332 // here, thus even if the parent directory is in snapshot, we still
1333 // do not handle INodeUC as those stored in deleted list
1334 saveINode2Image(child, out, false, referenceMap, counter);
1335 if (child.isDirectory()) {
1336 dirNum++;
1337 } else if (inSnapshot && child.isFile()
1338 && child.asFile().isUnderConstruction()) {
1339 this.snapshotUCMap.put(child.getId(), child.asFile());
1340 }
1341 if (i++ % 50 == 0) {
1342 context.checkCancelled();
1343 }
1344 }
1345 return dirNum;
1346 }
1347
1348 /**
1349 * Save file tree image starting from the given root.
1350 * This is a recursive procedure, which first saves all children and
1351 * snapshot diffs of a current directory and then moves inside the
1352 * sub-directories.
1353 *
1354 * @param current The current node
1355 * @param out The DataoutputStream to write the image
1356 * @param toSaveSubtree Whether or not to save the subtree to fsimage. For
1357 * reference node, its subtree may already have been
1358 * saved before.
1359 * @param inSnapshot Whether the current directory is in snapshot
1360 * @param counter Counter to increment for namenode startup progress
1361 */
1362 private void saveImage(INodeDirectory current, DataOutputStream out,
1363 boolean toSaveSubtree, boolean inSnapshot, Counter counter)
1364 throws IOException {
1365 // write the inode id of the directory
1366 out.writeLong(current.getId());
1367
1368 if (!toSaveSubtree) {
1369 return;
1370 }
1371
1372 final ReadOnlyList<INode> children = current
1373 .getChildrenList(Snapshot.CURRENT_STATE_ID);
1374 int dirNum = 0;
1375 List<INodeDirectory> snapshotDirs = null;
1376 DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();
1377 if (sf != null) {
1378 snapshotDirs = new ArrayList<INodeDirectory>();
1379 sf.getSnapshotDirectory(snapshotDirs);
1380 dirNum += snapshotDirs.size();
1381 }
1382
1383 // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
1384 // Snapshots
1385 if (current.isDirectory() && current.asDirectory().isSnapshottable()) {
1386 SnapshotFSImageFormat.saveSnapshots(current.asDirectory(), out);
1387 } else {
1388 out.writeInt(-1); // # of snapshots
1389 }
1390
1391 // 3. Write children INode
1392 dirNum += saveChildren(children, out, inSnapshot, counter);
1393
1394 // 4. Write DirectoryDiff lists, if there is any.
1395 SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap);
1396
1397 // Write sub-tree of sub-directories, including possible snapshots of
1398 // deleted sub-directories
1399 out.writeInt(dirNum); // the number of sub-directories
1400 for(INode child : children) {
1401 if(!child.isDirectory()) {
1402 continue;
1403 }
1404 // make sure we only save the subtree under a reference node once
1405 boolean toSave = child.isReference() ?
1406 referenceMap.toProcessSubtree(child.getId()) : true;
1407 saveImage(child.asDirectory(), out, toSave, inSnapshot, counter);
1408 }
1409 if (snapshotDirs != null) {
1410 for (INodeDirectory subDir : snapshotDirs) {
1411 // make sure we only save the subtree under a reference node once
1412 boolean toSave = subDir.getParentReference() != null ?
1413 referenceMap.toProcessSubtree(subDir.getId()) : true;
1414 saveImage(subDir, out, toSave, true, counter);
1415 }
1416 }
1417 }
1418
1419 /**
1420 * Saves inode and increments progress counter.
1421 *
1422 * @param inode INode to save
1423 * @param out DataOutputStream to receive inode
1424 * @param writeUnderConstruction boolean true if this is under construction
1425 * @param referenceMap ReferenceMap containing reference inodes
1426 * @param counter Counter to increment for namenode startup progress
1427 * @throws IOException thrown if there is an I/O error
1428 */
1429 private void saveINode2Image(INode inode, DataOutputStream out,
1430 boolean writeUnderConstruction, ReferenceMap referenceMap,
1431 Counter counter) throws IOException {
1432 FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction,
1433 referenceMap);
1434 // Intentionally do not increment counter for reference inodes, because it
1435 // is too difficult at this point to assess whether or not this is a
1436 // reference that counts toward quota.
1437 if (!(inode instanceof INodeReference)) {
1438 counter.increment();
1439 }
1440 }
1441 }
1442 }