001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.hdfs.server.namenode;
019    
020    import java.io.Closeable;
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.RandomAccessFile;
024    import java.net.URI;
025    import java.net.UnknownHostException;
026    import java.util.ArrayList;
027    import java.util.Collection;
028    import java.util.EnumSet;
029    import java.util.HashMap;
030    import java.util.Iterator;
031    import java.util.List;
032    import java.util.Properties;
033    import java.util.UUID;
034    import java.util.concurrent.CopyOnWriteArrayList;
035    
036    import org.apache.hadoop.classification.InterfaceAudience;
037    import org.apache.hadoop.conf.Configuration;
038    import org.apache.hadoop.fs.FileUtil;
039    import org.apache.hadoop.hdfs.DFSUtil;
040    import org.apache.hadoop.hdfs.protocol.HdfsConstants;
041    import org.apache.hadoop.hdfs.protocol.LayoutVersion;
042    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
043    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
044    import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
045    import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
046    import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
047    import org.apache.hadoop.hdfs.server.common.Storage;
048    import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
049    import org.apache.hadoop.hdfs.server.common.Util;
050    import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
051    import org.apache.hadoop.hdfs.util.PersistentLongFile;
052    import org.apache.hadoop.io.IOUtils;
053    import org.apache.hadoop.net.DNS;
054    import org.apache.hadoop.util.Time;
055    
056    import com.google.common.annotations.VisibleForTesting;
057    import com.google.common.base.Preconditions;
058    import com.google.common.collect.Lists;
059    
060    /**
061     * NNStorage is responsible for management of the StorageDirectories used by
062     * the NameNode.
063     */
064    @InterfaceAudience.Private
065    public class NNStorage extends Storage implements Closeable,
066        StorageErrorReporter {
067      static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
068      static final String LOCAL_URI_SCHEME = "file";
069    
070      //
071      // The filenames used for storing the images
072      //
073      public enum NameNodeFile {
074        IMAGE     ("fsimage"),
075        TIME      ("fstime"), // from "old" pre-HDFS-1073 format
076        SEEN_TXID ("seen_txid"),
077        EDITS     ("edits"),
078        IMAGE_NEW ("fsimage.ckpt"),
079        IMAGE_ROLLBACK("fsimage_rollback"),
080        EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
081        EDITS_INPROGRESS ("edits_inprogress"),
082        EDITS_TMP ("edits_tmp"),
083        IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
084    
085        private String fileName = null;
086        private NameNodeFile(String name) { this.fileName = name; }
087        @VisibleForTesting
088        public String getName() { return fileName; }
089      }
090    
091      /**
092       * Implementation of StorageDirType specific to namenode storage
093       * A Storage directory could be of type IMAGE which stores only fsimage,
094       * or of type EDITS which stores edits or of type IMAGE_AND_EDITS which
095       * stores both fsimage and edits.
096       */
097      @VisibleForTesting
098      public static enum NameNodeDirType implements StorageDirType {
099        UNDEFINED,
100        IMAGE,
101        EDITS,
102        IMAGE_AND_EDITS;
103    
104        @Override
105        public StorageDirType getStorageDirType() {
106          return this;
107        }
108    
109        @Override
110        public boolean isOfType(StorageDirType type) {
111          if ((this == IMAGE_AND_EDITS) && (type == IMAGE || type == EDITS))
112            return true;
113          return this == type;
114        }
115      }
116    
117      protected String blockpoolID = ""; // id of the block pool
118      
119      /**
120       * flag that controls if we try to restore failed storages
121       */
122      private boolean restoreFailedStorage = false;
123      private final Object restorationLock = new Object();
124      private boolean disablePreUpgradableLayoutCheck = false;
125    
126    
127      /**
128       * TxId of the last transaction that was included in the most
129       * recent fsimage file. This does not include any transactions
130       * that have since been written to the edit log.
131       */
132      protected volatile long mostRecentCheckpointTxId = HdfsConstants.INVALID_TXID;
133      
134      /**
135       * Time of the last checkpoint, in milliseconds since the epoch.
136       */
137      private long mostRecentCheckpointTime = 0;
138    
139      /**
140       * list of failed (and thus removed) storages
141       */
142      final protected List<StorageDirectory> removedStorageDirs
143        = new CopyOnWriteArrayList<StorageDirectory>();
144    
145      /**
146       * Properties from old layout versions that may be needed
147       * during upgrade only.
148       */
149      private HashMap<String, String> deprecatedProperties;
150    
151      /**
152       * Construct the NNStorage.
153       * @param conf Namenode configuration.
154       * @param imageDirs Directories the image can be stored in.
155       * @param editsDirs Directories the editlog can be stored in.
156       * @throws IOException if any directories are inaccessible.
157       */
158      public NNStorage(Configuration conf, 
159                       Collection<URI> imageDirs, Collection<URI> editsDirs) 
160          throws IOException {
161        super(NodeType.NAME_NODE);
162    
163        storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
164        
165        // this may modify the editsDirs, so copy before passing in
166        setStorageDirectories(imageDirs, 
167                              Lists.newArrayList(editsDirs),
168                              FSNamesystem.getSharedEditsDirs(conf));
169      }
170    
171      @Override // Storage
172      public boolean isPreUpgradableLayout(StorageDirectory sd) throws IOException {
173        if (disablePreUpgradableLayoutCheck) {
174          return false;
175        }
176    
177        File oldImageDir = new File(sd.getRoot(), "image");
178        if (!oldImageDir.exists()) {
179          return false;
180        }
181        // check the layout version inside the image file
182        File oldF = new File(oldImageDir, "fsimage");
183        RandomAccessFile oldFile = new RandomAccessFile(oldF, "rws");
184        try {
185          oldFile.seek(0);
186          int oldVersion = oldFile.readInt();
187          oldFile.close();
188          oldFile = null;
189          if (oldVersion < LAST_PRE_UPGRADE_LAYOUT_VERSION)
190            return false;
191        } finally {
192          IOUtils.cleanup(LOG, oldFile);
193        }
194        return true;
195      }
196    
197      @Override // Closeable
198      public void close() throws IOException {
199        unlockAll();
200        storageDirs.clear();
201      }
202    
203      /**
204       * Set flag whether an attempt should be made to restore failed storage
205       * directories at the next available oppurtuinity.
206       *
207       * @param val Whether restoration attempt should be made.
208       */
209      void setRestoreFailedStorage(boolean val) {
210        LOG.warn("set restore failed storage to " + val);
211        restoreFailedStorage=val;
212      }
213    
214      /**
215       * @return Whether failed storage directories are to be restored.
216       */
217      boolean getRestoreFailedStorage() {
218        return restoreFailedStorage;
219      }
220    
221      /**
222       * See if any of removed storages is "writable" again, and can be returned
223       * into service.
224       */
225      void attemptRestoreRemovedStorage() {
226        // if directory is "alive" - copy the images there...
227        if(!restoreFailedStorage || removedStorageDirs.size() == 0)
228          return; //nothing to restore
229    
230        /* We don't want more than one thread trying to restore at a time */
231        synchronized (this.restorationLock) {
232          LOG.info("NNStorage.attemptRestoreRemovedStorage: check removed(failed) "+
233                   "storarge. removedStorages size = " + removedStorageDirs.size());
234          for(Iterator<StorageDirectory> it
235                = this.removedStorageDirs.iterator(); it.hasNext();) {
236            StorageDirectory sd = it.next();
237            File root = sd.getRoot();
238            LOG.info("currently disabled dir " + root.getAbsolutePath() +
239                     "; type="+sd.getStorageDirType() 
240                     + ";canwrite="+FileUtil.canWrite(root));
241            if(root.exists() && FileUtil.canWrite(root)) {
242              LOG.info("restoring dir " + sd.getRoot().getAbsolutePath());
243              this.addStorageDir(sd); // restore
244              this.removedStorageDirs.remove(sd);
245            }
246          }
247        }
248      }
249    
250      /**
251       * @return A list of storage directories which are in the errored state.
252       */
253      List<StorageDirectory> getRemovedStorageDirs() {
254        return this.removedStorageDirs;
255      }
256      
257      /**
258       * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
259       */
260      @VisibleForTesting
261      synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
262                                              Collection<URI> fsEditsDirs)
263          throws IOException {
264        setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
265      }
266    
267      /**
268       * Set the storage directories which will be used. This should only ever be
269       * called from inside NNStorage. However, it needs to remain package private
270       * for testing, as StorageDirectories need to be reinitialised after using
271       * Mockito.spy() on this class, as Mockito doesn't work well with inner
272       * classes, such as StorageDirectory in this case.
273       *
274       * Synchronized due to initialization of storageDirs and removedStorageDirs.
275       *
276       * @param fsNameDirs Locations to store images.
277       * @param fsEditsDirs Locations to store edit logs.
278       * @throws IOException
279       */
280      @VisibleForTesting
281      synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
282                                              Collection<URI> fsEditsDirs,
283                                              Collection<URI> sharedEditsDirs)
284          throws IOException {
285        this.storageDirs.clear();
286        this.removedStorageDirs.clear();
287    
288       // Add all name dirs with appropriate NameNodeDirType
289        for (URI dirName : fsNameDirs) {
290          checkSchemeConsistency(dirName);
291          boolean isAlsoEdits = false;
292          for (URI editsDirName : fsEditsDirs) {
293            if (editsDirName.compareTo(dirName) == 0) {
294              isAlsoEdits = true;
295              fsEditsDirs.remove(editsDirName);
296              break;
297            }
298          }
299          NameNodeDirType dirType = (isAlsoEdits) ?
300                              NameNodeDirType.IMAGE_AND_EDITS :
301                              NameNodeDirType.IMAGE;
302          // Add to the list of storage directories, only if the
303          // URI is of type file://
304          if(dirName.getScheme().compareTo("file") == 0) {
305            this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
306                dirType,
307                sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
308          }
309        }
310    
311        // Add edits dirs if they are different from name dirs
312        for (URI dirName : fsEditsDirs) {
313          checkSchemeConsistency(dirName);
314          // Add to the list of storage directories, only if the
315          // URI is of type file://
316          if(dirName.getScheme().compareTo("file") == 0)
317            this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
318                        NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
319        }
320      }
321    
322      /**
323       * Return the storage directory corresponding to the passed URI
324       * @param uri URI of a storage directory
325       * @return The matching storage directory or null if none found
326       */
327      StorageDirectory getStorageDirectory(URI uri) {
328        try {
329          uri = Util.fileAsURI(new File(uri));
330          Iterator<StorageDirectory> it = dirIterator();
331          for (; it.hasNext(); ) {
332            StorageDirectory sd = it.next();
333            if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
334              return sd;
335            }
336          }
337        } catch (IOException ioe) {
338          LOG.warn("Error converting file to URI", ioe);
339        }
340        return null;
341      }
342    
343      /**
344       * Checks the consistency of a URI, in particular if the scheme
345       * is specified 
346       * @param u URI whose consistency is being checked.
347       */
348      private static void checkSchemeConsistency(URI u) throws IOException {
349        String scheme = u.getScheme();
350        // the URI should have a proper scheme
351        if(scheme == null) {
352          throw new IOException("Undefined scheme for " + u);
353        }
354      }
355    
356      /**
357       * Retrieve current directories of type IMAGE
358       * @return Collection of URI representing image directories
359       * @throws IOException in case of URI processing error
360       */
361      Collection<URI> getImageDirectories() throws IOException {
362        return getDirectories(NameNodeDirType.IMAGE);
363      }
364    
365      /**
366       * Retrieve current directories of type EDITS
367       * @return Collection of URI representing edits directories
368       * @throws IOException in case of URI processing error
369       */
370      Collection<URI> getEditsDirectories() throws IOException {
371        return getDirectories(NameNodeDirType.EDITS);
372      }
373    
374      /**
375       * Return number of storage directories of the given type.
376       * @param dirType directory type
377       * @return number of storage directories of type dirType
378       */
379      int getNumStorageDirs(NameNodeDirType dirType) {
380        if(dirType == null)
381          return getNumStorageDirs();
382        Iterator<StorageDirectory> it = dirIterator(dirType);
383        int numDirs = 0;
384        for(; it.hasNext(); it.next())
385          numDirs++;
386        return numDirs;
387      }
388    
389      /**
390       * Return the list of locations being used for a specific purpose.
391       * i.e. Image or edit log storage.
392       *
393       * @param dirType Purpose of locations requested.
394       * @throws IOException
395       */
396      Collection<URI> getDirectories(NameNodeDirType dirType)
397          throws IOException {
398        ArrayList<URI> list = new ArrayList<URI>();
399        Iterator<StorageDirectory> it = (dirType == null) ? dirIterator() :
400                                        dirIterator(dirType);
401        for ( ;it.hasNext(); ) {
402          StorageDirectory sd = it.next();
403          try {
404            list.add(Util.fileAsURI(sd.getRoot()));
405          } catch (IOException e) {
406            throw new IOException("Exception while processing " +
407                "StorageDirectory " + sd.getRoot(), e);
408          }
409        }
410        return list;
411      }
412      
413      /**
414       * Determine the last transaction ID noted in this storage directory.
415       * This txid is stored in a special seen_txid file since it might not
416       * correspond to the latest image or edit log. For example, an image-only
417       * directory will have this txid incremented when edits logs roll, even
418       * though the edits logs are in a different directory.
419       *
420       * @param sd StorageDirectory to check
421       * @return If file exists and can be read, last recorded txid. If not, 0L.
422       * @throws IOException On errors processing file pointed to by sd
423       */
424      static long readTransactionIdFile(StorageDirectory sd) throws IOException {
425        File txidFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
426        return PersistentLongFile.readFile(txidFile, 0);
427      }
428      
429      /**
430       * Write last checkpoint time into a separate file.
431       * @param sd storage directory
432       * @throws IOException
433       */
434      void writeTransactionIdFile(StorageDirectory sd, long txid) throws IOException {
435        Preconditions.checkArgument(txid >= 0, "bad txid: " + txid);
436        
437        File txIdFile = getStorageFile(sd, NameNodeFile.SEEN_TXID);
438        PersistentLongFile.writeFile(txIdFile, txid);
439      }
440    
441      /**
442       * Set the transaction ID and time of the last checkpoint
443       * 
444       * @param txid transaction id of the last checkpoint
445       * @param time time of the last checkpoint, in millis since the epoch
446       */
447      void setMostRecentCheckpointInfo(long txid, long time) {
448        this.mostRecentCheckpointTxId = txid;
449        this.mostRecentCheckpointTime = time;
450      }
451    
452      /**
453       * @return the transaction ID of the last checkpoint.
454       */
455      public long getMostRecentCheckpointTxId() {
456        return mostRecentCheckpointTxId;
457      }
458      
459      /**
460       * @return the time of the most recent checkpoint in millis since the epoch.
461       */
462      long getMostRecentCheckpointTime() {
463        return mostRecentCheckpointTime;
464      }
465    
466      /**
467       * Write a small file in all available storage directories that
468       * indicates that the namespace has reached some given transaction ID.
469       * 
470       * This is used when the image is loaded to avoid accidental rollbacks
471       * in the case where an edit log is fully deleted but there is no
472       * checkpoint. See TestNameEditsConfigs.testNameEditsConfigsFailure()
473       * @param txid the txid that has been reached
474       */
475      public void writeTransactionIdFileToStorage(long txid) {
476        // Write txid marker in all storage directories
477        for (StorageDirectory sd : storageDirs) {
478          try {
479            writeTransactionIdFile(sd, txid);
480          } catch(IOException e) {
481            // Close any edits stream associated with this dir and remove directory
482            LOG.warn("writeTransactionIdToStorage failed on " + sd,
483                e);
484            reportErrorsOnDirectory(sd);
485          }
486        }
487      }
488    
489      /**
490       * Return the name of the image file that is uploaded by periodic
491       * checkpointing
492       *
493       * @return List of filenames to save checkpoints to.
494       */
495      public File[] getFsImageNameCheckpoint(long txid) {
496        ArrayList<File> list = new ArrayList<File>();
497        for (Iterator<StorageDirectory> it =
498                     dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
499          list.add(getStorageFile(it.next(), NameNodeFile.IMAGE_NEW, txid));
500        }
501        return list.toArray(new File[list.size()]);
502      }
503    
504      /**
505       * @return The first image file with the given txid and image type.
506       */
507      public File getFsImageName(long txid, NameNodeFile nnf) {
508        for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
509            it.hasNext();) {
510          StorageDirectory sd = it.next();
511          File fsImage = getStorageFile(sd, nnf, txid);
512          if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
513            return fsImage;
514          }
515        }
516        return null;
517      }
518    
519      /**
520       * @return The first image file whose txid is the same with the given txid and
521       * image type is one of the given types.
522       */
523      public File getFsImage(long txid, EnumSet<NameNodeFile> nnfs) {
524        for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
525            it.hasNext();) {
526          StorageDirectory sd = it.next();
527          for (NameNodeFile nnf : nnfs) {
528            File fsImage = getStorageFile(sd, nnf, txid);
529            if (FileUtil.canRead(sd.getRoot()) && fsImage.exists()) {
530              return fsImage;
531            }
532          }
533        }
534        return null;
535      }
536    
537      public File getFsImageName(long txid) {
538        return getFsImageName(txid, NameNodeFile.IMAGE);
539      }
540    
541      public File getHighestFsImageName() {
542        return getFsImageName(getMostRecentCheckpointTxId());
543      }
544    
545      /** Create new dfs name directory.  Caution: this destroys all files
546       * in this filesystem. */
547      private void format(StorageDirectory sd) throws IOException {
548        sd.clearDirectory(); // create currrent dir
549        writeProperties(sd);
550        writeTransactionIdFile(sd, 0);
551    
552        LOG.info("Storage directory " + sd.getRoot()
553                 + " has been successfully formatted.");
554      }
555    
556      /**
557       * Format all available storage directories.
558       */
559      public void format(NamespaceInfo nsInfo) throws IOException {
560        Preconditions.checkArgument(nsInfo.getLayoutVersion() == 0 ||
561            nsInfo.getLayoutVersion() == HdfsConstants.NAMENODE_LAYOUT_VERSION,
562            "Bad layout version: %s", nsInfo.getLayoutVersion());
563        
564        this.setStorageInfo(nsInfo);
565        this.blockpoolID = nsInfo.getBlockPoolID();
566        for (Iterator<StorageDirectory> it =
567                               dirIterator(); it.hasNext();) {
568          StorageDirectory sd = it.next();
569          format(sd);
570        }
571      }
572      
573      public static NamespaceInfo newNamespaceInfo()
574          throws UnknownHostException {
575        return new NamespaceInfo(newNamespaceID(), newClusterID(),
576            newBlockPoolID(), 0L);
577      }
578      
579      public void format() throws IOException {
580        this.layoutVersion = HdfsConstants.NAMENODE_LAYOUT_VERSION;
581        for (Iterator<StorageDirectory> it =
582                               dirIterator(); it.hasNext();) {
583          StorageDirectory sd = it.next();
584          format(sd);
585        }
586      }
587    
588      /**
589       * Generate new namespaceID.
590       *
591       * namespaceID is a persistent attribute of the namespace.
592       * It is generated when the namenode is formatted and remains the same
593       * during the life cycle of the namenode.
594       * When a datanodes register they receive it as the registrationID,
595       * which is checked every time the datanode is communicating with the
596       * namenode. Datanodes that do not 'know' the namespaceID are rejected.
597       *
598       * @return new namespaceID
599       */
600      private static int newNamespaceID() {
601        int newID = 0;
602        while(newID == 0)
603          newID = DFSUtil.getRandom().nextInt(0x7FFFFFFF);  // use 31 bits only
604        return newID;
605      }
606    
607      @Override // Storage
608      protected void setFieldsFromProperties(
609          Properties props, StorageDirectory sd) throws IOException {
610        super.setFieldsFromProperties(props, sd);
611        if (layoutVersion == 0) {
612          throw new IOException("NameNode directory "
613                                + sd.getRoot() + " is not formatted.");
614        }
615    
616        // Set Block pool ID in version with federation support
617        if (NameNodeLayoutVersion.supports(
618            LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
619          String sbpid = props.getProperty("blockpoolID");
620          setBlockPoolID(sd.getRoot(), sbpid);
621        }
622        setDeprecatedPropertiesForUpgrade(props);
623      }
624    
625      void readProperties(StorageDirectory sd, StartupOption startupOption)
626          throws IOException {
627        Properties props = readPropertiesFile(sd.getVersionFile());
628        if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
629            (startupOption)) {
630          int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
631          if (lv > getServiceLayoutVersion()) {
632            // we should not use a newer version for rollingUpgrade rollback
633            throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
634                "storage directory " + sd.getRoot().getAbsolutePath());
635          }
636          props.setProperty("layoutVersion",
637              Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
638        }
639        setFieldsFromProperties(props, sd);
640      }
641    
642      /**
643       * Pull any properties out of the VERSION file that are from older
644       * versions of HDFS and only necessary during upgrade.
645       */
646      private void setDeprecatedPropertiesForUpgrade(Properties props) {
647        deprecatedProperties = new HashMap<String, String>();
648        String md5 = props.getProperty(DEPRECATED_MESSAGE_DIGEST_PROPERTY);
649        if (md5 != null) {
650          deprecatedProperties.put(DEPRECATED_MESSAGE_DIGEST_PROPERTY, md5);
651        }
652      }
653      
654      /**
655       * Return a property that was stored in an earlier version of HDFS.
656       * 
657       * This should only be used during upgrades.
658       */
659      String getDeprecatedProperty(String prop) {
660        assert getLayoutVersion() > HdfsConstants.NAMENODE_LAYOUT_VERSION :
661          "getDeprecatedProperty should only be done when loading " +
662          "storage from past versions during upgrade.";
663        return deprecatedProperties.get(prop);
664      }
665    
666      /**
667       * Write version file into the storage directory.
668       *
669       * The version file should always be written last.
670       * Missing or corrupted version file indicates that
671       * the checkpoint is not valid.
672       *
673       * @param sd storage directory
674       * @throws IOException
675       */
676      @Override // Storage
677      protected void setPropertiesFromFields(Properties props,
678                               StorageDirectory sd
679                               ) throws IOException {
680        super.setPropertiesFromFields(props, sd);
681        // Set blockpoolID in version with federation support
682        if (NameNodeLayoutVersion.supports(
683            LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
684          props.setProperty("blockpoolID", blockpoolID);
685        }
686      }
687      
688      static File getStorageFile(StorageDirectory sd, NameNodeFile type, long imageTxId) {
689        return new File(sd.getCurrentDir(),
690                        String.format("%s_%019d", type.getName(), imageTxId));
691      }
692      
693      /**
694       * Get a storage file for one of the files that doesn't need a txid associated
695       * (e.g version, seen_txid)
696       */
697      static File getStorageFile(StorageDirectory sd, NameNodeFile type) {
698        return new File(sd.getCurrentDir(), type.getName());
699      }
700    
701      @VisibleForTesting
702      public static String getCheckpointImageFileName(long txid) {
703        return getNameNodeFileName(NameNodeFile.IMAGE_NEW, txid);
704      }
705    
706      @VisibleForTesting
707      public static String getImageFileName(long txid) {
708        return getNameNodeFileName(NameNodeFile.IMAGE, txid);
709      }
710    
711      @VisibleForTesting
712      public static String getRollbackImageFileName(long txid) {
713        return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
714      }
715    
716      public static String getLegacyOIVImageFileName(long txid) {
717        return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
718      }
719    
720      private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
721        return String.format("%s_%019d", nnf.getName(), txid);
722      }
723    
724      @VisibleForTesting
725      public static String getInProgressEditsFileName(long startTxId) {
726        return getNameNodeFileName(NameNodeFile.EDITS_INPROGRESS, startTxId);
727      }
728      
729      static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
730        return new File(sd.getCurrentDir(), getInProgressEditsFileName(startTxId));
731      }
732      
733      static File getFinalizedEditsFile(StorageDirectory sd,
734          long startTxId, long endTxId) {
735        return new File(sd.getCurrentDir(),
736            getFinalizedEditsFileName(startTxId, endTxId));
737      }
738    
739      static File getTemporaryEditsFile(StorageDirectory sd,
740          long startTxId, long endTxId, long timestamp) {
741        return new File(sd.getCurrentDir(),
742            getTemporaryEditsFileName(startTxId, endTxId, timestamp));
743      }
744    
745      static File getImageFile(StorageDirectory sd, NameNodeFile nnf, long txid) {
746        return new File(sd.getCurrentDir(), getNameNodeFileName(nnf, txid));
747      }
748    
749      @VisibleForTesting
750      public static String getFinalizedEditsFileName(long startTxId, long endTxId) {
751        return String.format("%s_%019d-%019d", NameNodeFile.EDITS.getName(),
752                             startTxId, endTxId);
753      }
754    
755      public static String getTemporaryEditsFileName(long startTxId, long endTxId,
756          long timestamp) {
757        return String.format("%s_%019d-%019d_%019d", NameNodeFile.EDITS_TMP.getName(),
758                             startTxId, endTxId, timestamp);
759      }
760      
761      /**
762       * Return the first readable finalized edits file for the given txid.
763       */
764      File findFinalizedEditsFile(long startTxId, long endTxId)
765      throws IOException {
766        File ret = findFile(NameNodeDirType.EDITS,
767            getFinalizedEditsFileName(startTxId, endTxId));
768        if (ret == null) {
769          throw new IOException(
770              "No edits file for txid " + startTxId + "-" + endTxId + " exists!");
771        }
772        return ret;
773      }
774        
775      /**
776       * Return the first readable image file for the given txid and image type, or
777       * null if no such image can be found
778       */
779      File findImageFile(NameNodeFile nnf, long txid) {
780        return findFile(NameNodeDirType.IMAGE,
781            getNameNodeFileName(nnf, txid));
782      }
783    
784      /**
785       * Return the first readable storage file of the given name
786       * across any of the 'current' directories in SDs of the
787       * given type, or null if no such file exists.
788       */
789      private File findFile(NameNodeDirType dirType, String name) {
790        for (StorageDirectory sd : dirIterable(dirType)) {
791          File candidate = new File(sd.getCurrentDir(), name);
792          if (FileUtil.canRead(sd.getCurrentDir()) &&
793              candidate.exists()) {
794            return candidate;
795          }
796        }
797        return null;
798      }
799    
800      /**
801       * Disable the check for pre-upgradable layouts. Needed for BackupImage.
802       * @param val Whether to disable the preupgradeable layout check.
803       */
804      void setDisablePreUpgradableLayoutCheck(boolean val) {
805        disablePreUpgradableLayoutCheck = val;
806      }
807    
808      /**
809       * Marks a list of directories as having experienced an error.
810       *
811       * @param sds A list of storage directories to mark as errored.
812       */
813      void reportErrorsOnDirectories(List<StorageDirectory> sds) {
814        for (StorageDirectory sd : sds) {
815          reportErrorsOnDirectory(sd);
816        }
817      }
818    
819      /**
820       * Reports that a directory has experienced an error.
821       * Notifies listeners that the directory is no longer
822       * available.
823       *
824       * @param sd A storage directory to mark as errored.
825       */
826      private void reportErrorsOnDirectory(StorageDirectory sd) {
827        LOG.error("Error reported on storage directory " + sd);
828    
829        String lsd = listStorageDirectories();
830        LOG.debug("current list of storage dirs:" + lsd);
831    
832        LOG.warn("About to remove corresponding storage: "
833                 + sd.getRoot().getAbsolutePath());
834        try {
835          sd.unlock();
836        } catch (Exception e) {
837          LOG.warn("Unable to unlock bad storage directory: "
838                   +  sd.getRoot().getPath(), e);
839        }
840    
841        if (this.storageDirs.remove(sd)) {
842          this.removedStorageDirs.add(sd);
843        }
844        
845        lsd = listStorageDirectories();
846        LOG.debug("at the end current list of storage dirs:" + lsd);
847      }
848      
849      /** 
850       * Processes the startup options for the clusterid and blockpoolid 
851       * for the upgrade. 
852       * @param startOpt Startup options 
853       * @param layoutVersion Layout version for the upgrade 
854       * @throws IOException
855       */
856      void processStartupOptionsForUpgrade(StartupOption startOpt, int layoutVersion)
857          throws IOException {
858        if (startOpt == StartupOption.UPGRADE || startOpt == StartupOption.UPGRADEONLY) {
859          // If upgrade from a release that does not support federation,
860          // if clusterId is provided in the startupOptions use it.
861          // Else generate a new cluster ID      
862          if (!NameNodeLayoutVersion.supports(
863              LayoutVersion.Feature.FEDERATION, layoutVersion)) {
864            if (startOpt.getClusterId() == null) {
865              startOpt.setClusterId(newClusterID());
866            }
867            setClusterID(startOpt.getClusterId());
868            setBlockPoolID(newBlockPoolID());
869          } else {
870            // Upgrade from one version of federation to another supported
871            // version of federation doesn't require clusterID.
872            // Warn the user if the current clusterid didn't match with the input
873            // clusterid.
874            if (startOpt.getClusterId() != null
875                && !startOpt.getClusterId().equals(getClusterID())) {
876              LOG.warn("Clusterid mismatch - current clusterid: " + getClusterID()
877                  + ", Ignoring given clusterid: " + startOpt.getClusterId());
878            }
879          }
880          LOG.info("Using clusterid: " + getClusterID());
881        }
882      }
883      
884      /**
885       * Report that an IOE has occurred on some file which may
886       * or may not be within one of the NN image storage directories.
887       */
888      @Override
889      public void reportErrorOnFile(File f) {
890        // We use getAbsolutePath here instead of getCanonicalPath since we know
891        // that there is some IO problem on that drive.
892        // getCanonicalPath may need to call stat() or readlink() and it's likely
893        // those calls would fail due to the same underlying IO problem.
894        String absPath = f.getAbsolutePath();
895        for (StorageDirectory sd : storageDirs) {
896          String dirPath = sd.getRoot().getAbsolutePath();
897          if (!dirPath.endsWith(File.separator)) {
898            dirPath += File.separator;
899          }
900          if (absPath.startsWith(dirPath)) {
901            reportErrorsOnDirectory(sd);
902            return;
903          }
904        }
905        
906      }
907      
908      /**
909       * Generate new clusterID.
910       * 
911       * clusterID is a persistent attribute of the cluster.
912       * It is generated when the cluster is created and remains the same
913       * during the life cycle of the cluster.  When a new name node is formated, if 
914       * this is a new cluster, a new clusterID is geneated and stored.  Subsequent 
915       * name node must be given the same ClusterID during its format to be in the 
916       * same cluster.
917       * When a datanode register it receive the clusterID and stick with it.
918       * If at any point, name node or data node tries to join another cluster, it 
919       * will be rejected.
920       * 
921       * @return new clusterID
922       */ 
923      public static String newClusterID() {
924        return "CID-" + UUID.randomUUID().toString();
925      }
926    
927      void setClusterID(String cid) {
928        clusterID = cid;
929      }
930    
931      /**
932       * try to find current cluster id in the VERSION files
933       * returns first cluster id found in any VERSION file
934       * null in case none found
935       * @return clusterId or null in case no cluster id found
936       */
937      public String determineClusterId() {
938        String cid = null;
939        Iterator<StorageDirectory> sdit = dirIterator(NameNodeDirType.IMAGE);
940        while(sdit.hasNext()) {
941          StorageDirectory sd = sdit.next();
942          try {
943            Properties props = readPropertiesFile(sd.getVersionFile());
944            cid = props.getProperty("clusterID");
945            LOG.info("current cluster id for sd="+sd.getCurrentDir() + 
946                ";lv=" + layoutVersion + ";cid=" + cid);
947            
948            if(cid != null && !cid.equals(""))
949              return cid;
950          } catch (Exception e) {
951            LOG.warn("this sd not available: " + e.getLocalizedMessage());
952          } //ignore
953        }
954        LOG.warn("couldn't find any VERSION file containing valid ClusterId");
955        return null;
956      }
957    
958      /**
959       * Generate new blockpoolID.
960       * 
961       * @return new blockpoolID
962       */ 
963      static String newBlockPoolID() throws UnknownHostException{
964        String ip = "unknownIP";
965        try {
966          ip = DNS.getDefaultIP("default");
967        } catch (UnknownHostException e) {
968          LOG.warn("Could not find ip address of \"default\" inteface.");
969          throw e;
970        }
971        
972        int rand = DFSUtil.getSecureRandom().nextInt(Integer.MAX_VALUE);
973        String bpid = "BP-" + rand + "-"+ ip + "-" + Time.now();
974        return bpid;
975      }
976    
977      /** Validate and set block pool ID */
978      void setBlockPoolID(String bpid) {
979        blockpoolID = bpid;
980      }
981    
982      /** Validate and set block pool ID */
983      private void setBlockPoolID(File storage, String bpid)
984          throws InconsistentFSStateException {
985        if (bpid == null || bpid.equals("")) {
986          throw new InconsistentFSStateException(storage, "file "
987              + Storage.STORAGE_FILE_VERSION + " has no block pool Id.");
988        }
989        
990        if (!blockpoolID.equals("") && !blockpoolID.equals(bpid)) {
991          throw new InconsistentFSStateException(storage,
992              "Unexepcted blockpoolID " + bpid + " . Expected " + blockpoolID);
993        }
994        setBlockPoolID(bpid);
995      }
996      
997      public String getBlockPoolID() {
998        return blockpoolID;
999      }
1000    
1001      /**
1002       * Iterate over all current storage directories, inspecting them
1003       * with the given inspector.
1004       */
1005      void inspectStorageDirs(FSImageStorageInspector inspector)
1006          throws IOException {
1007    
1008        // Process each of the storage directories to find the pair of
1009        // newest image file and edit file
1010        for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
1011          StorageDirectory sd = it.next();
1012          inspector.inspectDirectory(sd);
1013        }
1014      }
1015    
1016      /**
1017       * Iterate over all of the storage dirs, reading their contents to determine
1018       * their layout versions. Returns an FSImageStorageInspector which has
1019       * inspected each directory.
1020       * 
1021       * <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
1022       * @throws IOException if no valid storage dirs are found or no valid layout version
1023       */
1024      FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1025          StartupOption startupOption) throws IOException {
1026        Integer layoutVersion = null;
1027        boolean multipleLV = false;
1028        StringBuilder layoutVersions = new StringBuilder();
1029    
1030        // First determine what range of layout versions we're going to inspect
1031        for (Iterator<StorageDirectory> it = dirIterator(false);
1032             it.hasNext();) {
1033          StorageDirectory sd = it.next();
1034          if (!sd.getVersionFile().exists()) {
1035            FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
1036            continue;
1037          }
1038          readProperties(sd, startupOption); // sets layoutVersion
1039          int lv = getLayoutVersion();
1040          if (layoutVersion == null) {
1041            layoutVersion = Integer.valueOf(lv);
1042          } else if (!layoutVersion.equals(lv)) {
1043            multipleLV = true;
1044          }
1045          layoutVersions.append("(").append(sd.getRoot()).append(", ").append(lv).append(") ");
1046        }
1047        
1048        if (layoutVersion == null) {
1049          throw new IOException("No storage directories contained VERSION information");
1050        }
1051        if (multipleLV) {            
1052          throw new IOException(
1053              "Storage directories contain multiple layout versions: "
1054                  + layoutVersions);
1055        }
1056        // If the storage directories are with the new layout version
1057        // (ie edits_<txnid>) then use the new inspector, which will ignore
1058        // the old format dirs.
1059        FSImageStorageInspector inspector;
1060        if (NameNodeLayoutVersion.supports(
1061            LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
1062          inspector = new FSImageTransactionalStorageInspector(fileTypes);
1063        } else {
1064          inspector = new FSImagePreTransactionalStorageInspector();
1065        }
1066        
1067        inspectStorageDirs(inspector);
1068        return inspector;
1069      }
1070    
1071      public NamespaceInfo getNamespaceInfo() {
1072        return new NamespaceInfo(
1073            getNamespaceID(),
1074            getClusterID(),
1075            getBlockPoolID(),
1076            getCTime());
1077      }
1078    }