001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.namenode;
019
020 import java.io.Closeable;
021 import java.io.File;
022 import java.io.IOException;
023 import java.io.RandomAccessFile;
024 import java.net.URI;
025 import java.net.UnknownHostException;
026 import java.util.ArrayList;
027 import java.util.Collection;
028 import java.util.EnumSet;
029 import java.util.HashMap;
030 import java.util.Iterator;
031 import java.util.List;
032 import java.util.Properties;
033 import java.util.UUID;
034 import java.util.concurrent.CopyOnWriteArrayList;
035
036 import org.apache.hadoop.classification.InterfaceAudience;
037 import org.apache.hadoop.conf.Configuration;
038 import org.apache.hadoop.fs.FileUtil;
039 import org.apache.hadoop.hdfs.DFSUtil;
040 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
043 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
044 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
046 import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
047 import org.apache.hadoop.hdfs.server.common.Storage;
048 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
049 import org.apache.hadoop.hdfs.server.common.Util;
050 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
051 import org.apache.hadoop.hdfs.util.PersistentLongFile;
052 import org.apache.hadoop.io.IOUtils;
053 import org.apache.hadoop.net.DNS;
054 import org.apache.hadoop.util.Time;
055
056 import com.google.common.annotations.VisibleForTesting;
057 import com.google.common.base.Preconditions;
058 import com.google.common.collect.Lists;
059
060 /**
061 * NNStorage is responsible for management of the StorageDirectories used by
062 * the NameNode.
063 */
064 @InterfaceAudience.Private
065 public class NNStorage extends Storage implements Closeable,
066 StorageErrorReporter {
067 static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
068 static final String LOCAL_URI_SCHEME = "file";
069
070 //
071 // The filenames used for storing the images
072 //
073 public enum NameNodeFile {
074 IMAGE ("fsimage"),
075 TIME ("fstime"), // from "old" pre-HDFS-1073 format
076 SEEN_TXID ("seen_txid"),
077 EDITS ("edits"),
078 IMAGE_NEW ("fsimage.ckpt"),
079 IMAGE_ROLLBACK("fsimage_rollback"),
080 EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
081 EDITS_INPROGRESS ("edits_inprogress"),
082 EDITS_TMP ("edits_tmp"),
083 IMAGE_LEGACY_OIV ("fsimage_legacy_oiv"); // For pre-PB format
084
085 private String fileName = null;
086 private NameNodeFile(String name) { this.fileName = name; }
087 @VisibleForTesting
088 public String getName() { return fileName; }
089 }
090
091 /**
092 * Implementation of StorageDirType specific to namenode storage
093 * A Storage directory could be of type IMAGE which stores only fsimage,
094 * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
095 * stores both fsimage and edits.
096 */
097 @VisibleForTesting
098 public static enum NameNodeDirType implements StorageDirType {
099 UNDEFINED,
100 IMAGE,
101 EDITS,
102 IMAGE_AND_EDITS;
103
104 @Override
105 public StorageDirType getStorageDirType() {
106 return this;
107 }
108
109 @Override
110 public boolean isOfType(StorageDirType type) {
111 if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
112 return true;
113 return this == type;
114 }
115 }
116
117 protected String blockpoolID = ""; // id of the block pool
118
119 /**
120 * flag that controls if we try to restore failed storages
121 */
122 private boolean restoreFailedStorage = false;
123 private final Object restorationLock = new Object();
124 private boolean disablePreUpgradableLayoutCheck = false;
125
126
127 /**
128 * TxId of the last transaction that was included in the most
129 * recent fsimage file. This does not include any transactions
130 * that have since been written to the edit log.
131 */
132 protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
133
134 /**
135 * Time of the last checkpoint, in milliseconds since the epoch.
136 */
137 private long mostRecentCheckpointTime = 0;
138
139 /**
140 * list of failed (and thus removed) storages
141 */
142 final protected List<StorageDirectory> removedStorageDirs
143 = new CopyOnWriteArrayList<StorageDirectory>();
144
145 /**
146 * Properties from old layout versions that may be needed
147 * during upgrade only.
148 */
149 private HashMap<String, String> deprecatedProperties;
150
151 /**
152 * Construct the NNStorage.
153 * @param conf Namenode configuration.
154 * @param imageDirs Directories the image can be stored in.
155 * @param editsDirs Directories the editlog can be stored in.
156 * @throws IOException if any directories are inaccessible.
157 */
158 public NNStorage(Configuration conf,
159 Collection<URI> imageDirs, Collection<URI> editsDirs)
160 throws IOException {
161 super(NodeType.NAME_NODE);
162
163 storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
164
165 // this may modify the editsDirs, so copy before passing in
166 setStorageDirectories(imageDirs,
167 Lists.newArrayList(editsDirs),
168 FSNamesystem.getSharedEditsDirs(conf));
169 }
170
171 @Override // Storage
172 public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
173 if (disablePreUpgradableLayoutCheck) {
174 return false;
175 }
176
177 File oldImageDir = new File(sd.getRoot(), "image");
178 if (!oldImageDir.exists()) {
179 return false;
180 }
181 // check the layout version inside the image file
182 File oldF = new File(oldImageDir, "fsimage");
183 RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
184 try {
185 oldFile.seek(0);
186 int oldVersion = oldFile.readInt();
187 oldFile.close();
188 oldFile = null;
189 if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
190 return false;
191 } finally {
192 IOUtils.cleanup(LOG, oldFile);
193 }
194 return true;
195 }
196
197 @Override // Closeable
198 public void close() throws IOException {
199 unlockAll();
200 storageDirs.clear();
201 }
202
203 /**
204 * Set flag whether an attempt should be made to restore failed storage
205 * directories at the next available oppurtuinity.
206 *
207 * @param val Whether restoration attempt should be made.
208 */
209 void setRestoreFailedStorage(boolean val) {
210 LOG.warn("set restore failed storage to " + val);
211 restoreFailedStorage=val;
212 }
213
214 /**
215 * @return Whether failed storage directories are to be restored.
216 */
217 boolean getRestoreFailedStorage() {
218 return restoreFailedStorage;
219 }
220
221 /**
222 * See if any of removed storages is "writable" again, and can be returned
223 * into service.
224 */
225 void attemptRestoreRemovedStorage() {
226 // if directory is "alive" - copy the images there...
227 if(!restoreFailedStorage || removedStorageDirs.size() == 0)
228 return; //nothing to restore
229
230 /* We don't want more than one thread trying to restore at a time */
231 synchronized (this.restorationLock) {
232 LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
233 "storarge. removedStorages size = " + removedStorageDirs.size());
234 for(Iterator<StorageDirectory> it
235 = this.removedStorageDirs.iterator(); it.hasNext();) {
236 StorageDirectory sd = it.next();
237 File root = sd.getRoot();
238 LOG.info("currently disabled dir " + root.getAbsolutePath() +
239 "; type="+sd.getStorageDirType()
240 + ";canwrite="+FileUtil.canWrite(root));
241 if(root.exists() && FileUtil.canWrite(root)) {
242 LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
243 this.addStorageDir(sd); // restore
244 this.removedStorageDirs.remove(sd);
245 }
246 }
247 }
248 }
249
250 /**
251 * @return A list of storage directories which are in the errored state.
252 */
253 List<StorageDirectory> getRemovedStorageDirs() {
254 return this.removedStorageDirs;
255 }
256
257 /**
258 * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
259 */
260 @VisibleForTesting
261 synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
262 Collection<URI> fsEditsDirs)
263 throws IOException {
264 setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
265 }
266
267 /**
268 * Set the storage directories which will be used. This should only ever be
269 * called from inside NNStorage. However, it needs to remain package private
270 * for testing, as StorageDirectories need to be reinitialised after using
271 * Mockito.spy() on this class, as Mockito doesn't work well with inner
272 * classes, such as StorageDirectory in this case.
273 *
274 * Synchronized due to initialization of storageDirs and removedStorageDirs.
275 *
276 * @param fsNameDirs Locations to store images.
277 * @param fsEditsDirs Locations to store edit logs.
278 * @throws IOException
279 */
280 @VisibleForTesting
281 synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
282 Collection<URI> fsEditsDirs,
283 Collection<URI> sharedEditsDirs)
284 throws IOException {
285 this.storageDirs.clear();
286 this.removedStorageDirs.clear();
287
288 // Add all name dirs with appropriate NameNodeDirType
289 for (URI dirName : fsNameDirs) {
290 checkSchemeConsistency(dirName);
291 boolean isAlsoEdits = false;
292 for (URI editsDirName : fsEditsDirs) {
293 if (editsDirName.compareTo(dirName) == 0) {
294 isAlsoEdits = true;
295 fsEditsDirs.remove(editsDirName);
296 break;
297 }
298 }
299 NameNodeDirType dirType = (isAlsoEdits) ?
300 NameNodeDirType.IMAGE_AND_EDITS :
301 NameNodeDirType.IMAGE;
302 // Add to the list of storage directories, only if the
303 // URI is of type file://
304 if(dirName.getScheme().compareTo("file") == 0) {
305 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
306 dirType,
307 sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
308 }
309 }
310
311 // Add edits dirs if they are different from name dirs
312 for (URI dirName : fsEditsDirs) {
313 checkSchemeConsistency(dirName);
314 // Add to the list of storage directories, only if the
315 // URI is of type file://
316 if(dirName.getScheme().compareTo("file") == 0)
317 this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
318 NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
319 }
320 }
321
322 /**
323 * Return the storage directory corresponding to the passed URI
324 * @param uri URI of a storage directory
325 * @return The matching storage directory or null if none found
326 */
327 StorageDirectory getStorageDirectory(URI uri) {
328 try {
329 uri = Util.fileAsURI(new File(uri));
330 Iterator<StorageDirectory> it = dirIterator();
331 for (; it.hasNext(); ) {
332 StorageDirectory sd = it.next();
333 if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
334 return sd;
335 }
336 }
337 } catch (IOException ioe) {
338 LOG.warn("Error converting file to URI", ioe);
339 }
340 return null;
341 }
342
343 /**
344 * Checks the consistency of a URI, in particular if the scheme
345 * is specified
346 * @param u URI whose consistency is being checked.
347 */
348 private static void checkSchemeConsistency(URI u) throws IOException {
349 String scheme = u.getScheme();
350 // the URI should have a proper scheme
351 if(scheme == null) {
352 throw new IOException("Undefined scheme for " + u);
353 }
354 }
355
356 /**
357 * Retrieve current directories of type IMAGE
358 * @return Collection of URI representing image directories
359 * @throws IOException in case of URI processing error
360 */
361 Collection<URI> getImageDirectories() throws IOException {
362 return getDirectories(NameNodeDirType.IMAGE);
363 }
364
365 /**
366 * Retrieve current directories of type EDITS
367 * @return Collection of URI representing edits directories
368 * @throws IOException in case of URI processing error
369 */
370 Collection<URI> getEditsDirectories() throws IOException {
371 return getDirectories(NameNodeDirType.EDITS);
372 }
373
374 /**
375 * Return number of storage directories of the given type.
376 * @param dirType directory type
377 * @return number of storage directories of type dirType
378 */
379 int getNumStorageDirs(NameNodeDirType dirType) {
380 if(dirType == null)
381 return getNumStorageDirs();
382 Iterator<StorageDirectory> it = dirIterator(dirType);
383 int numDirs = 0;
384 for(; it.hasNext(); it.next())
385 numDirs++;
386 return numDirs;
387 }
388
389 /**
390 * Return the list of locations being used for a specific purpose.
391 * i.e. Image or edit log storage.
392 *
393 * @param dirType Purpose of locations requested.
394 * @throws IOException
395 */
396 Collection<URI> getDirectories(NameNodeDirType dirType)
397 throws IOException {
398 ArrayList<URI> list = new ArrayList<URI>();
399 Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
400 dirIterator(dirType);
401 for ( ;it.hasNext(); ) {
402 StorageDirectory sd = it.next();
403 try {
404 list.add(Util.fileAsURI(sd.getRoot()));
405 } catch (IOException e) {
406 throw new IOException("Exception while processing " +
407 "StorageDirectory " + sd.getRoot(), e);
408 }
409 }
410 return list;
411 }
412
413 /**
414 * Determine the last transaction ID noted in this storage directory.
415 * This txid is stored in a special seen_txid file since it might not
416 * correspond to the latest image or edit log. For example, an image-only
417 * directory will have this txid incremented when edits logs roll, even
418 * though the edits logs are in a different directory.
419 *
420 * @param sd StorageDirectory to check
421 * @return If file exists and can be read, last recorded txid. If not, 0L.
422 * @throws IOException On errors processing file pointed to by sd
423 */
424 static long readTransactionIdFile(StorageDirectory sd) throws IOException {
425 File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
426 return PersistentLongFile.readFile(txidFile, 0);
427 }
428
429 /**
430 * Write last checkpoint time into a separate file.
431 * @param sd storage directory
432 * @throws IOException
433 */
434 void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
435 Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
436
437 File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
438 PersistentLongFile.writeFile(txIdFile, txid);
439 }
440
441 /**
442 * Set the transaction ID and time of the last checkpoint
443 *
444 * @param txid transaction id of the last checkpoint
445 * @param time time of the last checkpoint, in millis since the epoch
446 */
447 void setMostRecentCheckpointInfo(long txid, long time) {
448 this.mostRecentCheckpointTxId = txid;
449 this.mostRecentCheckpointTime = time;
450 }
451
452 /**
453 * @return the transaction ID of the last checkpoint.
454 */
455 public long getMostRecentCheckpointTxId() {
456 return mostRecentCheckpointTxId;
457 }
458
459 /**
460 * @return the time of the most recent checkpoint in millis since the epoch.
461 */
462 long getMostRecentCheckpointTime() {
463 return mostRecentCheckpointTime;
464 }
465
466 /**
467 * Write a small file in all available storage directories that
468 * indicates that the namespace has reached some given transaction ID.
469 *
470 * This is used when the image is loaded to avoid accidental rollbacks
471 * in the case where an edit log is fully deleted but there is no
472 * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
473 * @param txid the txid that has been reached
474 */
475 public void writeTransactionIdFileToStorage(long txid) {
476 // Write txid marker in all storage directories
477 for (StorageDirectory sd : storageDirs) {
478 try {
479 writeTransactionIdFile(sd, txid);
480 } catch(IOException e) {
481 // Close any edits stream associated with this dir and remove directory
482 LOG.warn("writeTransactionIdToStorage failed on " + sd,
483 e);
484 reportErrorsOnDirectory(sd);
485 }
486 }
487 }
488
489 /**
490 * Return the name of the image file that is uploaded by periodic
491 * checkpointing
492 *
493 * @return List of filenames to save checkpoints to.
494 */
495 public File[] getFsImageNameCheckpoint(long txid) {
496 ArrayList<File> list = new ArrayList<File>();
497 for (Iterator<StorageDirectory> it =
498 dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
499 list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
500 }
501 return list.toArray(new File[list.size()]);
502 }
503
504 /**
505 * @return The first image file with the given txid and image type.
506 */
507 public File getFsImageName(long txid, NameNodeFile nnf) {
508 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
509 it.hasNext();) {
510 StorageDirectory sd = it.next();
511 File fsImage = getStorageFile(sd, nnf, txid);
512 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
513 return fsImage;
514 }
515 }
516 return null;
517 }
518
519 /**
520 * @return The first image file whose txid is the same with the given txid and
521 * image type is one of the given types.
522 */
523 public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
524 for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
525 it.hasNext();) {
526 StorageDirectory sd = it.next();
527 for (NameNodeFile nnf : nnfs) {
528 File fsImage = getStorageFile(sd, nnf, txid);
529 if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
530 return fsImage;
531 }
532 }
533 }
534 return null;
535 }
536
537 public File getFsImageName(long txid) {
538 return getFsImageName(txid, NameNodeFile.IMAGE);
539 }
540
541 public File getHighestFsImageName() {
542 return getFsImageName(getMostRecentCheckpointTxId());
543 }
544
545 /** Create new dfs name directory. Caution: this destroys all files
546 * in this filesystem. */
547 private void format(StorageDirectory sd) throws IOException {
548 sd.clearDirectory(); // create currrent dir
549 writeProperties(sd);
550 writeTransactionIdFile(sd, 0);
551
552 LOG.info("Storage directory " + sd.getRoot()
553 + " has been successfully formatted.");
554 }
555
556 /**
557 * Format all available storage directories.
558 */
559 public void format(NamespaceInfo nsInfo) throws IOException {
560 Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
561 nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
562 "Bad layout version: %s", nsInfo.getLayoutVersion());
563
564 this.setStorageInfo(nsInfo);
565 this.blockpoolID = nsInfo.getBlockPoolID();
566 for (Iterator<StorageDirectory> it =
567 dirIterator(); it.hasNext();) {
568 StorageDirectory sd = it.next();
569 format(sd);
570 }
571 }
572
573 public static NamespaceInfo newNamespaceInfo()
574 throws UnknownHostException {
575 return new NamespaceInfo(newNamespaceID(), newClusterID(),
576 newBlockPoolID(), 0L);
577 }
578
579 public void format() throws IOException {
580 this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
581 for (Iterator<StorageDirectory> it =
582 dirIterator(); it.hasNext();) {
583 StorageDirectory sd = it.next();
584 format(sd);
585 }
586 }
587
588 /**
589 * Generate new namespaceID.
590 *
591 * namespaceID is a persistent attribute of the namespace.
592 * It is generated when the namenode is formatted and remains the same
593 * during the life cycle of the namenode.
594 * When a datanodes register they receive it as the registrationID,
595 * which is checked every time the datanode is communicating with the
596 * namenode. Datanodes that do not 'know' the namespaceID are rejected.
597 *
598 * @return new namespaceID
599 */
600 private static int newNamespaceID() {
601 int newID = 0;
602 while(newID == 0)
603 newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF); // use 31 bits only
604 return newID;
605 }
606
607 @Override // Storage
608 protected void setFieldsFromProperties(
609 Properties props, StorageDirectory sd) throws IOException {
610 super.setFieldsFromProperties(props, sd);
611 if (layoutVersion == 0) {
612 throw new IOException("NameNode directory "
613 + sd.getRoot() + " is not formatted.");
614 }
615
616 // Set Block pool ID in version with federation support
617 if (NameNodeLayoutVersion.supports(
618 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
619 String sbpid = props.getProperty("blockpoolID");
620 setBlockPoolID(sd.getRoot(), sbpid);
621 }
622 setDeprecatedPropertiesForUpgrade(props);
623 }
624
625 void readProperties(StorageDirectory sd, StartupOption startupOption)
626 throws IOException {
627 Properties props = readPropertiesFile(sd.getVersionFile());
628 if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
629 (startupOption)) {
630 int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
631 if (lv > getServiceLayoutVersion()) {
632 // we should not use a newer version for rollingUpgrade rollback
633 throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
634 "storage directory " + sd.getRoot().getAbsolutePath());
635 }
636 props.setProperty("layoutVersion",
637 Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
638 }
639 setFieldsFromProperties(props, sd);
640 }
641
642 /**
643 * Pull any properties out of the VERSION file that are from older
644 * versions of HDFS and only necessary during upgrade.
645 */
646 private void setDeprecatedPropertiesForUpgrade(Properties props) {
647 deprecatedProperties = new HashMap<String, String>();
648 String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
649 if (md5 != null) {
650 deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
651 }
652 }
653
654 /**
655 * Return a property that was stored in an earlier version of HDFS.
656 *
657 * This should only be used during upgrades.
658 */
659 String getDeprecatedProperty(String prop) {
660 assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
661 "getDeprecatedProperty should only be done when loading " +
662 "storage from past versions during upgrade.";
663 return deprecatedProperties.get(prop);
664 }
665
666 /**
667 * Write version file into the storage directory.
668 *
669 * The version file should always be written last.
670 * Missing or corrupted version file indicates that
671 * the checkpoint is not valid.
672 *
673 * @param sd storage directory
674 * @throws IOException
675 */
676 @Override // Storage
677 protected void setPropertiesFromFields(Properties props,
678 StorageDirectory sd
679 ) throws IOException {
680 super.setPropertiesFromFields(props, sd);
681 // Set blockpoolID in version with federation support
682 if (NameNodeLayoutVersion.supports(
683 LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
684 props.setProperty("blockpoolID", blockpoolID);
685 }
686 }
687
688 static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
689 return new File(sd.getCurrentDir(),
690 String.format("%s_%019d", type.getName(), imageTxId));
691 }
692
693 /**
694 * Get a storage file for one of the files that doesn't need a txid associated
695 * (e.g version, seen_txid)
696 */
697 static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
698 return new File(sd.getCurrentDir(), type.getName());
699 }
700
701 @VisibleForTesting
702 public static String getCheckpointImageFileName(long txid) {
703 return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
704 }
705
706 @VisibleForTesting
707 public static String getImageFileName(long txid) {
708 return getNameNodeFileName(NameNodeFile.IMAGE, txid);
709 }
710
711 @VisibleForTesting
712 public static String getRollbackImageFileName(long txid) {
713 return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
714 }
715
716 public static String getLegacyOIVImageFileName(long txid) {
717 return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
718 }
719
720 private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
721 return String.format("%s_%019d", nnf.getName(), txid);
722 }
723
724 @VisibleForTesting
725 public static String getInProgressEditsFileName(long startTxId) {
726 return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
727 }
728
729 static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
730 return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
731 }
732
733 static File getFinalizedEditsFile(StorageDirectory sd,
734 long startTxId, long endTxId) {
735 return new File(sd.getCurrentDir(),
736 getFinalizedEditsFileName(startTxId, endTxId));
737 }
738
739 static File getTemporaryEditsFile(StorageDirectory sd,
740 long startTxId, long endTxId, long timestamp) {
741 return new File(sd.getCurrentDir(),
742 getTemporaryEditsFileName(startTxId, endTxId, timestamp));
743 }
744
745 static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
746 return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
747 }
748
749 @VisibleForTesting
750 public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
751 return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
752 startTxId, endTxId);
753 }
754
755 public static String getTemporaryEditsFileName(long startTxId, long endTxId,
756 long timestamp) {
757 return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
758 startTxId, endTxId, timestamp);
759 }
760
761 /**
762 * Return the first readable finalized edits file for the given txid.
763 */
764 File findFinalizedEditsFile(long startTxId, long endTxId)
765 throws IOException {
766 File ret = findFile(NameNodeDirType.EDITS,
767 getFinalizedEditsFileName(startTxId, endTxId));
768 if (ret == null) {
769 throw new IOException(
770 "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
771 }
772 return ret;
773 }
774
775 /**
776 * Return the first readable image file for the given txid and image type, or
777 * null if no such image can be found
778 */
779 File findImageFile(NameNodeFile nnf, long txid) {
780 return findFile(NameNodeDirType.IMAGE,
781 getNameNodeFileName(nnf, txid));
782 }
783
784 /**
785 * Return the first readable storage file of the given name
786 * across any of the 'current' directories in SDs of the
787 * given type, or null if no such file exists.
788 */
789 private File findFile(NameNodeDirType dirType, String name) {
790 for (StorageDirectory sd : dirIterable(dirType)) {
791 File candidate = new File(sd.getCurrentDir(), name);
792 if (FileUtil.canRead(sd.getCurrentDir()) &&
793 candidate.exists()) {
794 return candidate;
795 }
796 }
797 return null;
798 }
799
800 /**
801 * Disable the check for pre-upgradable layouts. Needed for BackupImage.
802 * @param val Whether to disable the preupgradeable layout check.
803 */
804 void setDisablePreUpgradableLayoutCheck(boolean val) {
805 disablePreUpgradableLayoutCheck = val;
806 }
807
808 /**
809 * Marks a list of directories as having experienced an error.
810 *
811 * @param sds A list of storage directories to mark as errored.
812 */
813 void reportErrorsOnDirectories(List<StorageDirectory> sds) {
814 for (StorageDirectory sd : sds) {
815 reportErrorsOnDirectory(sd);
816 }
817 }
818
819 /**
820 * Reports that a directory has experienced an error.
821 * Notifies listeners that the directory is no longer
822 * available.
823 *
824 * @param sd A storage directory to mark as errored.
825 */
826 private void reportErrorsOnDirectory(StorageDirectory sd) {
827 LOG.error("Error reported on storage directory " + sd);
828
829 String lsd = listStorageDirectories();
830 LOG.debug("current list of storage dirs:" + lsd);
831
832 LOG.warn("About to remove corresponding storage: "
833 + sd.getRoot().getAbsolutePath());
834 try {
835 sd.unlock();
836 } catch (Exception e) {
837 LOG.warn("Unable to unlock bad storage directory: "
838 + sd.getRoot().getPath(), e);
839 }
840
841 if (this.storageDirs.remove(sd)) {
842 this.removedStorageDirs.add(sd);
843 }
844
845 lsd = listStorageDirectories();
846 LOG.debug("at the end current list of storage dirs:" + lsd);
847 }
848
849 /**
850 * Processes the startup options for the clusterid and blockpoolid
851 * for the upgrade.
852 * @param startOpt Startup options
853 * @param layoutVersion Layout version for the upgrade
854 * @throws IOException
855 */
856 void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
857 throws IOException {
858 if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) {
859 // If upgrade from a release that does not support federation,
860 // if clusterId is provided in the startupOptions use it.
861 // Else generate a new cluster ID
862 if (!NameNodeLayoutVersion.supports(
863 LayoutVersion.Feature.FEDERATION, layoutVersion)) {
864 if (startOpt.getClusterId() == null) {
865 startOpt.setClusterId(newClusterID());
866 }
867 setClusterID(startOpt.getClusterId());
868 setBlockPoolID(newBlockPoolID());
869 } else {
870 // Upgrade from one version of federation to another supported
871 // version of federation doesn't require clusterID.
872 // Warn the user if the current clusterid didn't match with the input
873 // clusterid.
874 if (startOpt.getClusterId() != null
875 && !startOpt.getClusterId().equals(getClusterID())) {
876 LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
877 + ", Ignoring given clusterid: " + startOpt.getClusterId());
878 }
879 }
880 LOG.info("Using clusterid: " + getClusterID());
881 }
882 }
883
884 /**
885 * Report that an IOE has occurred on some file which may
886 * or may not be within one of the NN image storage directories.
887 */
888 @Override
889 public void reportErrorOnFile(File f) {
890 // We use getAbsolutePath here instead of getCanonicalPath since we know
891 // that there is some IO problem on that drive.
892 // getCanonicalPath may need to call stat() or readlink() and it's likely
893 // those calls would fail due to the same underlying IO problem.
894 String absPath = f.getAbsolutePath();
895 for (StorageDirectory sd : storageDirs) {
896 String dirPath = sd.getRoot().getAbsolutePath();
897 if (!dirPath.endsWith(File.separator)) {
898 dirPath += File.separator;
899 }
900 if (absPath.startsWith(dirPath)) {
901 reportErrorsOnDirectory(sd);
902 return;
903 }
904 }
905
906 }
907
908 /**
909 * Generate new clusterID.
910 *
911 * clusterID is a persistent attribute of the cluster.
912 * It is generated when the cluster is created and remains the same
913 * during the life cycle of the cluster. When a new name node is formated, if
914 * this is a new cluster, a new clusterID is geneated and stored. Subsequent
915 * name node must be given the same ClusterID during its format to be in the
916 * same cluster.
917 * When a datanode register it receive the clusterID and stick with it.
918 * If at any point, name node or data node tries to join another cluster, it
919 * will be rejected.
920 *
921 * @return new clusterID
922 */
923 public static String newClusterID() {
924 return "CID-" + UUID.randomUUID().toString();
925 }
926
927 void setClusterID(String cid) {
928 clusterID = cid;
929 }
930
931 /**
932 * try to find current cluster id in the VERSION files
933 * returns first cluster id found in any VERSION file
934 * null in case none found
935 * @return clusterId or null in case no cluster id found
936 */
937 public String determineClusterId() {
938 String cid = null;
939 Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
940 while(sdit.hasNext()) {
941 StorageDirectory sd = sdit.next();
942 try {
943 Properties props = readPropertiesFile(sd.getVersionFile());
944 cid = props.getProperty("clusterID");
945 LOG.info("current cluster id for sd="+sd.getCurrentDir() +
946 ";lv=" + layoutVersion + ";cid=" + cid);
947
948 if(cid != null && !cid.equals(""))
949 return cid;
950 } catch (Exception e) {
951 LOG.warn("this sd not available: " + e.getLocalizedMessage());
952 } //ignore
953 }
954 LOG.warn("couldn't find any VERSION file containing valid ClusterId");
955 return null;
956 }
957
958 /**
959 * Generate new blockpoolID.
960 *
961 * @return new blockpoolID
962 */
963 static String newBlockPoolID() throws UnknownHostException{
964 String ip = "unknownIP";
965 try {
966 ip = DNS.getDefaultIP("default");
967 } catch (UnknownHostException e) {
968 LOG.warn("Could not find ip address of \"default\" inteface.");
969 throw e;
970 }
971
972 int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
973 String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
974 return bpid;
975 }
976
977 /** Validate and set block pool ID */
978 void setBlockPoolID(String bpid) {
979 blockpoolID = bpid;
980 }
981
982 /** Validate and set block pool ID */
983 private void setBlockPoolID(File storage, String bpid)
984 throws InconsistentFSStateException {
985 if (bpid == null || bpid.equals("")) {
986 throw new InconsistentFSStateException(storage, "file "
987 + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
988 }
989
990 if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
991 throw new InconsistentFSStateException(storage,
992 "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
993 }
994 setBlockPoolID(bpid);
995 }
996
997 public String getBlockPoolID() {
998 return blockpoolID;
999 }
1000
1001 /**
1002 * Iterate over all current storage directories, inspecting them
1003 * with the given inspector.
1004 */
1005 void inspectStorageDirs(FSImageStorageInspector inspector)
1006 throws IOException {
1007
1008 // Process each of the storage directories to find the pair of
1009 // newest image file and edit file
1010 for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1011 StorageDirectory sd = it.next();
1012 inspector.inspectDirectory(sd);
1013 }
1014 }
1015
1016 /**
1017 * Iterate over all of the storage dirs, reading their contents to determine
1018 * their layout versions. Returns an FSImageStorageInspector which has
1019 * inspected each directory.
1020 *
1021 * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1022 * @throws IOException if no valid storage dirs are found or no valid layout version
1023 */
1024 FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1025 StartupOption startupOption) throws IOException {
1026 Integer layoutVersion = null;
1027 boolean multipleLV = false;
1028 StringBuilder layoutVersions = new StringBuilder();
1029
1030 // First determine what range of layout versions we're going to inspect
1031 for (Iterator<StorageDirectory> it = dirIterator(false);
1032 it.hasNext();) {
1033 StorageDirectory sd = it.next();
1034 if (!sd.getVersionFile().exists()) {
1035 FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1036 continue;
1037 }
1038 readProperties(sd, startupOption); // sets layoutVersion
1039 int lv = getLayoutVersion();
1040 if (layoutVersion == null) {
1041 layoutVersion = Integer.valueOf(lv);
1042 } else if (!layoutVersion.equals(lv)) {
1043 multipleLV = true;
1044 }
1045 layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1046 }
1047
1048 if (layoutVersion == null) {
1049 throw new IOException("No storage directories contained VERSION information");
1050 }
1051 if (multipleLV) {
1052 throw new IOException(
1053 "Storage directories contain multiple layout versions: "
1054 + layoutVersions);
1055 }
1056 // If the storage directories are with the new layout version
1057 // (ie edits_<txnid>) then use the new inspector, which will ignore
1058 // the old format dirs.
1059 FSImageStorageInspector inspector;
1060 if (NameNodeLayoutVersion.supports(
1061 LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1062 inspector = new FSImageTransactionalStorageInspector(fileTypes);
1063 } else {
1064 inspector = new FSImagePreTransactionalStorageInspector();
1065 }
1066
1067 inspectStorageDirs(inspector);
1068 return inspector;
1069 }
1070
1071 public NamespaceInfo getNamespaceInfo() {
1072 return new NamespaceInfo(
1073 getNamespaceID(),
1074 getClusterID(),
1075 getBlockPoolID(),
1076 getCTime());
1077 }
1078 }