001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.hdfs.server.common;
019
020 import java.io.File;
021 import java.io.FileOutputStream;
022 import java.io.FileNotFoundException;
023 import java.io.IOException;
024 import java.io.RandomAccessFile;
025 import java.lang.management.ManagementFactory;
026 import java.nio.channels.FileLock;
027 import java.nio.channels.OverlappingFileLockException;
028 import java.util.ArrayList;
029 import java.util.Iterator;
030 import java.util.List;
031 import java.util.Properties;
032
033 import org.apache.commons.logging.Log;
034 import org.apache.commons.logging.LogFactory;
035 import org.apache.hadoop.classification.InterfaceAudience;
036 import org.apache.hadoop.fs.FileUtil;
037 import org.apache.hadoop.fs.Path;
038 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
039 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
040 import org.apache.hadoop.io.nativeio.NativeIO;
041 import org.apache.hadoop.io.nativeio.NativeIOException;
042 import org.apache.hadoop.util.ToolRunner;
043 import org.apache.hadoop.util.VersionInfo;
044
045 import com.google.common.base.Charsets;
046 import com.google.common.base.Preconditions;
047
048
049
050 /**
051 * Storage information file.
052 * <p>
053 * Local storage information is stored in a separate file VERSION.
054 * It contains type of the node,
055 * the storage layout version, the namespace id, and
056 * the fs state creation time.
057 * <p>
058 * Local storage can reside in multiple directories.
059 * Each directory should contain the same VERSION file as the others.
060 * During startup Hadoop servers (name-node and data-nodes) read their local
061 * storage information from them.
062 * <p>
063 * The servers hold a lock for each storage directory while they run so that
064 * other nodes were not able to startup sharing the same storage.
065 * The locks are released when the servers stop (normally or abnormally).
066 *
067 */
068 @InterfaceAudience.Private
069 public abstract class Storage extends StorageInfo {
070 public static final Log LOG = LogFactory.getLog(Storage.class.getName());
071
072 // last layout version that did not support upgrades
073 public static final int LAST_PRE_UPGRADE_LAYOUT_VERSION = -3;
074
075 // this corresponds to Hadoop-0.18
076 public static final int LAST_UPGRADABLE_LAYOUT_VERSION = -16;
077 protected static final String LAST_UPGRADABLE_HADOOP_VERSION = "Hadoop-0.18";
078
079 /** Layout versions of 0.20.203 release */
080 public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
081
082 public static final String STORAGE_FILE_LOCK = "in_use.lock";
083 public static final String STORAGE_DIR_CURRENT = "current";
084 public static final String STORAGE_DIR_PREVIOUS = "previous";
085 public static final String STORAGE_TMP_REMOVED = "removed.tmp";
086 public static final String STORAGE_TMP_PREVIOUS = "previous.tmp";
087 public static final String STORAGE_TMP_FINALIZED = "finalized.tmp";
088 public static final String STORAGE_TMP_LAST_CKPT = "lastcheckpoint.tmp";
089 public static final String STORAGE_PREVIOUS_CKPT = "previous.checkpoint";
090
091 /**
092 * The blocksBeingWritten directory which was used in some 1.x and earlier
093 * releases.
094 */
095 public static final String STORAGE_1_BBW = "blocksBeingWritten";
096
097 public enum StorageState {
098 NON_EXISTENT,
099 NOT_FORMATTED,
100 COMPLETE_UPGRADE,
101 RECOVER_UPGRADE,
102 COMPLETE_FINALIZE,
103 COMPLETE_ROLLBACK,
104 RECOVER_ROLLBACK,
105 COMPLETE_CHECKPOINT,
106 RECOVER_CHECKPOINT,
107 NORMAL;
108 }
109
110 /**
111 * An interface to denote storage directory type
112 * Implementations can define a type for storage directory by implementing
113 * this interface.
114 */
115 @InterfaceAudience.Private
116 public interface StorageDirType {
117 public StorageDirType getStorageDirType();
118 public boolean isOfType(StorageDirType type);
119 }
120
121 protected List<StorageDirectory> storageDirs = new ArrayList<StorageDirectory>();
122
123 private class DirIterator implements Iterator<StorageDirectory> {
124 final StorageDirType dirType;
125 final boolean includeShared;
126 int prevIndex; // for remove()
127 int nextIndex; // for next()
128
129 DirIterator(StorageDirType dirType, boolean includeShared) {
130 this.dirType = dirType;
131 this.nextIndex = 0;
132 this.prevIndex = 0;
133 this.includeShared = includeShared;
134 }
135
136 @Override
137 public boolean hasNext() {
138 if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
139 return false;
140 if (dirType != null || !includeShared) {
141 while (nextIndex < storageDirs.size()) {
142 if (shouldReturnNextDir())
143 break;
144 nextIndex++;
145 }
146 if (nextIndex >= storageDirs.size())
147 return false;
148 }
149 return true;
150 }
151
152 @Override
153 public StorageDirectory next() {
154 StorageDirectory sd = getStorageDir(nextIndex);
155 prevIndex = nextIndex;
156 nextIndex++;
157 if (dirType != null || !includeShared) {
158 while (nextIndex < storageDirs.size()) {
159 if (shouldReturnNextDir())
160 break;
161 nextIndex++;
162 }
163 }
164 return sd;
165 }
166
167 @Override
168 public void remove() {
169 nextIndex = prevIndex; // restore previous state
170 storageDirs.remove(prevIndex); // remove last returned element
171 hasNext(); // reset nextIndex to correct place
172 }
173
174 private boolean shouldReturnNextDir() {
175 StorageDirectory sd = getStorageDir(nextIndex);
176 return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
177 (includeShared || !sd.isShared());
178 }
179 }
180
181 /**
182 * @return A list of the given File in every available storage directory,
183 * regardless of whether it might exist.
184 */
185 public List<File> getFiles(StorageDirType dirType, String fileName) {
186 ArrayList<File> list = new ArrayList<File>();
187 Iterator<StorageDirectory> it =
188 (dirType == null) ? dirIterator() : dirIterator(dirType);
189 for ( ;it.hasNext(); ) {
190 list.add(new File(it.next().getCurrentDir(), fileName));
191 }
192 return list;
193 }
194
195
196 /**
197 * Return default iterator
198 * This iterator returns all entries in storageDirs
199 */
200 public Iterator<StorageDirectory> dirIterator() {
201 return dirIterator(null);
202 }
203
204 /**
205 * Return iterator based on Storage Directory Type
206 * This iterator selects entries in storageDirs of type dirType and returns
207 * them via the Iterator
208 */
209 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
210 return dirIterator(dirType, true);
211 }
212
213 /**
214 * Return all entries in storageDirs, potentially excluding shared dirs.
215 * @param includeShared whether or not to include shared dirs.
216 * @return an iterator over the configured storage dirs.
217 */
218 public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
219 return dirIterator(null, includeShared);
220 }
221
222 /**
223 * @param dirType all entries will be of this type of dir
224 * @param includeShared true to include any shared directories,
225 * false otherwise
226 * @return an iterator over the configured storage dirs.
227 */
228 public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
229 boolean includeShared) {
230 return new DirIterator(dirType, includeShared);
231 }
232
233 public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
234 return new Iterable<StorageDirectory>() {
235 @Override
236 public Iterator<StorageDirectory> iterator() {
237 return dirIterator(dirType);
238 }
239 };
240 }
241
242
243 /**
244 * generate storage list (debug line)
245 */
246 public String listStorageDirectories() {
247 StringBuilder buf = new StringBuilder();
248 for (StorageDirectory sd : storageDirs) {
249 buf.append(sd.getRoot() + "(" + sd.getStorageDirType() + ");");
250 }
251 return buf.toString();
252 }
253
254 /**
255 * One of the storage directories.
256 */
257 @InterfaceAudience.Private
258 public static class StorageDirectory implements FormatConfirmable {
259 final File root; // root directory
260 // whether or not this dir is shared between two separate NNs for HA, or
261 // between multiple block pools in the case of federation.
262 final boolean isShared;
263 final StorageDirType dirType; // storage dir type
264 FileLock lock; // storage lock
265
266 private String storageUuid = null; // Storage directory identifier.
267
268 public StorageDirectory(File dir) {
269 // default dirType is null
270 this(dir, null, false);
271 }
272
273 public StorageDirectory(File dir, StorageDirType dirType) {
274 this(dir, dirType, false);
275 }
276
277 public void setStorageUuid(String storageUuid) {
278 this.storageUuid = storageUuid;
279 }
280
281 public String getStorageUuid() {
282 return storageUuid;
283 }
284
285 /**
286 * Constructor
287 * @param dir directory corresponding to the storage
288 * @param dirType storage directory type
289 * @param isShared whether or not this dir is shared between two NNs. true
290 * disables locking on the storage directory, false enables locking
291 */
292 public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
293 this.root = dir;
294 this.lock = null;
295 this.dirType = dirType;
296 this.isShared = isShared;
297 }
298
299 /**
300 * Get root directory of this storage
301 */
302 public File getRoot() {
303 return root;
304 }
305
306 /**
307 * Get storage directory type
308 */
309 public StorageDirType getStorageDirType() {
310 return dirType;
311 }
312
313 public void read(File from, Storage storage) throws IOException {
314 Properties props = readPropertiesFile(from);
315 storage.setFieldsFromProperties(props, this);
316 }
317
318 /**
319 * Clear and re-create storage directory.
320 * <p>
321 * Removes contents of the current directory and creates an empty directory.
322 *
323 * This does not fully format storage directory.
324 * It cannot write the version file since it should be written last after
325 * all other storage type dependent files are written.
326 * Derived storage is responsible for setting specific storage values and
327 * writing the version file to disk.
328 *
329 * @throws IOException
330 */
331 public void clearDirectory() throws IOException {
332 File curDir = this.getCurrentDir();
333 if (curDir.exists())
334 if (!(FileUtil.fullyDelete(curDir)))
335 throw new IOException("Cannot remove current directory: " + curDir);
336 if (!curDir.mkdirs())
337 throw new IOException("Cannot create directory " + curDir);
338 }
339
340 /**
341 * Directory {@code current} contains latest files defining
342 * the file system meta-data.
343 *
344 * @return the directory path
345 */
346 public File getCurrentDir() {
347 return new File(root, STORAGE_DIR_CURRENT);
348 }
349
350 /**
351 * File {@code VERSION} contains the following fields:
352 * <ol>
353 * <li>node type</li>
354 * <li>layout version</li>
355 * <li>namespaceID</li>
356 * <li>fs state creation time</li>
357 * <li>other fields specific for this node type</li>
358 * </ol>
359 * The version file is always written last during storage directory updates.
360 * The existence of the version file indicates that all other files have
361 * been successfully written in the storage directory, the storage is valid
362 * and does not need to be recovered.
363 *
364 * @return the version file path
365 */
366 public File getVersionFile() {
367 return new File(new File(root, STORAGE_DIR_CURRENT), STORAGE_FILE_VERSION);
368 }
369
370 /**
371 * File {@code VERSION} from the {@code previous} directory.
372 *
373 * @return the previous version file path
374 */
375 public File getPreviousVersionFile() {
376 return new File(new File(root, STORAGE_DIR_PREVIOUS), STORAGE_FILE_VERSION);
377 }
378
379 /**
380 * Directory {@code previous} contains the previous file system state,
381 * which the system can be rolled back to.
382 *
383 * @return the directory path
384 */
385 public File getPreviousDir() {
386 return new File(root, STORAGE_DIR_PREVIOUS);
387 }
388
389 /**
390 * {@code previous.tmp} is a transient directory, which holds
391 * current file system state while the new state is saved into the new
392 * {@code current} during upgrade.
393 * If the saving succeeds {@code previous.tmp} will be moved to
394 * {@code previous}, otherwise it will be renamed back to
395 * {@code current} by the recovery procedure during startup.
396 *
397 * @return the directory path
398 */
399 public File getPreviousTmp() {
400 return new File(root, STORAGE_TMP_PREVIOUS);
401 }
402
403 /**
404 * {@code removed.tmp} is a transient directory, which holds
405 * current file system state while the previous state is moved into
406 * {@code current} during rollback.
407 * If the moving succeeds {@code removed.tmp} will be removed,
408 * otherwise it will be renamed back to
409 * {@code current} by the recovery procedure during startup.
410 *
411 * @return the directory path
412 */
413 public File getRemovedTmp() {
414 return new File(root, STORAGE_TMP_REMOVED);
415 }
416
417 /**
418 * {@code finalized.tmp} is a transient directory, which holds
419 * the {@code previous} file system state while it is being removed
420 * in response to the finalize request.
421 * Finalize operation will remove {@code finalized.tmp} when completed,
422 * otherwise the removal will resume upon the system startup.
423 *
424 * @return the directory path
425 */
426 public File getFinalizedTmp() {
427 return new File(root, STORAGE_TMP_FINALIZED);
428 }
429
430 /**
431 * {@code lastcheckpoint.tmp} is a transient directory, which holds
432 * current file system state while the new state is saved into the new
433 * {@code current} during regular namespace updates.
434 * If the saving succeeds {@code lastcheckpoint.tmp} will be moved to
435 * {@code previous.checkpoint}, otherwise it will be renamed back to
436 * {@code current} by the recovery procedure during startup.
437 *
438 * @return the directory path
439 */
440 public File getLastCheckpointTmp() {
441 return new File(root, STORAGE_TMP_LAST_CKPT);
442 }
443
444 /**
445 * {@code previous.checkpoint} is a directory, which holds the previous
446 * (before the last save) state of the storage directory.
447 * The directory is created as a reference only, it does not play role
448 * in state recovery procedures, and is recycled automatically,
449 * but it may be useful for manual recovery of a stale state of the system.
450 *
451 * @return the directory path
452 */
453 public File getPreviousCheckpoint() {
454 return new File(root, STORAGE_PREVIOUS_CKPT);
455 }
456
457 /**
458 * Check consistency of the storage directory
459 *
460 * @param startOpt a startup option.
461 *
462 * @return state {@link StorageState} of the storage directory
463 * @throws InconsistentFSStateException if directory state is not
464 * consistent and cannot be recovered.
465 * @throws IOException
466 */
467 public StorageState analyzeStorage(StartupOption startOpt, Storage storage)
468 throws IOException {
469 assert root != null : "root is null";
470 boolean hadMkdirs = false;
471 String rootPath = root.getCanonicalPath();
472 try { // check that storage exists
473 if (!root.exists()) {
474 // storage directory does not exist
475 if (startOpt != StartupOption.FORMAT &&
476 startOpt != StartupOption.HOTSWAP) {
477 LOG.warn("Storage directory " + rootPath + " does not exist");
478 return StorageState.NON_EXISTENT;
479 }
480 LOG.info(rootPath + " does not exist. Creating ...");
481 if (!root.mkdirs())
482 throw new IOException("Cannot create directory " + rootPath);
483 hadMkdirs = true;
484 }
485 // or is inaccessible
486 if (!root.isDirectory()) {
487 LOG.warn(rootPath + "is not a directory");
488 return StorageState.NON_EXISTENT;
489 }
490 if (!FileUtil.canWrite(root)) {
491 LOG.warn("Cannot access storage directory " + rootPath);
492 return StorageState.NON_EXISTENT;
493 }
494 } catch(SecurityException ex) {
495 LOG.warn("Cannot access storage directory " + rootPath, ex);
496 return StorageState.NON_EXISTENT;
497 }
498
499 this.lock(); // lock storage if it exists
500
501 // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory,
502 // while it also checks the layout version.
503 if (startOpt == HdfsServerConstants.StartupOption.FORMAT ||
504 (startOpt == StartupOption.HOTSWAP && hadMkdirs))
505 return StorageState.NOT_FORMATTED;
506
507 if (startOpt != HdfsServerConstants.StartupOption.IMPORT) {
508 storage.checkOldLayoutStorage(this);
509 }
510
511 // check whether current directory is valid
512 File versionFile = getVersionFile();
513 boolean hasCurrent = versionFile.exists();
514
515 // check which directories exist
516 boolean hasPrevious = getPreviousDir().exists();
517 boolean hasPreviousTmp = getPreviousTmp().exists();
518 boolean hasRemovedTmp = getRemovedTmp().exists();
519 boolean hasFinalizedTmp = getFinalizedTmp().exists();
520 boolean hasCheckpointTmp = getLastCheckpointTmp().exists();
521
522 if (!(hasPreviousTmp || hasRemovedTmp
523 || hasFinalizedTmp || hasCheckpointTmp)) {
524 // no temp dirs - no recovery
525 if (hasCurrent)
526 return StorageState.NORMAL;
527 if (hasPrevious)
528 throw new InconsistentFSStateException(root,
529 "version file in current directory is missing.");
530 return StorageState.NOT_FORMATTED;
531 }
532
533 if ((hasPreviousTmp?1:0) + (hasRemovedTmp?1:0)
534 + (hasFinalizedTmp?1:0) + (hasCheckpointTmp?1:0) > 1)
535 // more than one temp dirs
536 throw new InconsistentFSStateException(root,
537 "too many temporary directories.");
538
539 // # of temp dirs == 1 should either recover or complete a transition
540 if (hasCheckpointTmp) {
541 return hasCurrent ? StorageState.COMPLETE_CHECKPOINT
542 : StorageState.RECOVER_CHECKPOINT;
543 }
544
545 if (hasFinalizedTmp) {
546 if (hasPrevious)
547 throw new InconsistentFSStateException(root,
548 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_FINALIZED
549 + "cannot exist together.");
550 return StorageState.COMPLETE_FINALIZE;
551 }
552
553 if (hasPreviousTmp) {
554 if (hasPrevious)
555 throw new InconsistentFSStateException(root,
556 STORAGE_DIR_PREVIOUS + " and " + STORAGE_TMP_PREVIOUS
557 + " cannot exist together.");
558 if (hasCurrent)
559 return StorageState.COMPLETE_UPGRADE;
560 return StorageState.RECOVER_UPGRADE;
561 }
562
563 assert hasRemovedTmp : "hasRemovedTmp must be true";
564 if (!(hasCurrent ^ hasPrevious))
565 throw new InconsistentFSStateException(root,
566 "one and only one directory " + STORAGE_DIR_CURRENT
567 + " or " + STORAGE_DIR_PREVIOUS
568 + " must be present when " + STORAGE_TMP_REMOVED
569 + " exists.");
570 if (hasCurrent)
571 return StorageState.COMPLETE_ROLLBACK;
572 return StorageState.RECOVER_ROLLBACK;
573 }
574
575 /**
576 * Complete or recover storage state from previously failed transition.
577 *
578 * @param curState specifies what/how the state should be recovered
579 * @throws IOException
580 */
581 public void doRecover(StorageState curState) throws IOException {
582 File curDir = getCurrentDir();
583 String rootPath = root.getCanonicalPath();
584 switch(curState) {
585 case COMPLETE_UPGRADE: // mv previous.tmp -> previous
586 LOG.info("Completing previous upgrade for storage directory "
587 + rootPath);
588 rename(getPreviousTmp(), getPreviousDir());
589 return;
590 case RECOVER_UPGRADE: // mv previous.tmp -> current
591 LOG.info("Recovering storage directory " + rootPath
592 + " from previous upgrade");
593 if (curDir.exists())
594 deleteDir(curDir);
595 rename(getPreviousTmp(), curDir);
596 return;
597 case COMPLETE_ROLLBACK: // rm removed.tmp
598 LOG.info("Completing previous rollback for storage directory "
599 + rootPath);
600 deleteDir(getRemovedTmp());
601 return;
602 case RECOVER_ROLLBACK: // mv removed.tmp -> current
603 LOG.info("Recovering storage directory " + rootPath
604 + " from previous rollback");
605 rename(getRemovedTmp(), curDir);
606 return;
607 case COMPLETE_FINALIZE: // rm finalized.tmp
608 LOG.info("Completing previous finalize for storage directory "
609 + rootPath);
610 deleteDir(getFinalizedTmp());
611 return;
612 case COMPLETE_CHECKPOINT: // mv lastcheckpoint.tmp -> previous.checkpoint
613 LOG.info("Completing previous checkpoint for storage directory "
614 + rootPath);
615 File prevCkptDir = getPreviousCheckpoint();
616 if (prevCkptDir.exists())
617 deleteDir(prevCkptDir);
618 rename(getLastCheckpointTmp(), prevCkptDir);
619 return;
620 case RECOVER_CHECKPOINT: // mv lastcheckpoint.tmp -> current
621 LOG.info("Recovering storage directory " + rootPath
622 + " from failed checkpoint");
623 if (curDir.exists())
624 deleteDir(curDir);
625 rename(getLastCheckpointTmp(), curDir);
626 return;
627 default:
628 throw new IOException("Unexpected FS state: " + curState);
629 }
630 }
631
632 /**
633 * @return true if the storage directory should prompt the user prior
634 * to formatting (i.e if the directory appears to contain some data)
635 * @throws IOException if the SD cannot be accessed due to an IO error
636 */
637 @Override
638 public boolean hasSomeData() throws IOException {
639 // Its alright for a dir not to exist, or to exist (properly accessible)
640 // and be completely empty.
641 if (!root.exists()) return false;
642
643 if (!root.isDirectory()) {
644 // a file where you expect a directory should not cause silent
645 // formatting
646 return true;
647 }
648
649 if (FileUtil.listFiles(root).length == 0) {
650 // Empty dir can format without prompt.
651 return false;
652 }
653
654 return true;
655 }
656
657 public boolean isShared() {
658 return isShared;
659 }
660
661
662 /**
663 * Lock storage to provide exclusive access.
664 *
665 * <p> Locking is not supported by all file systems.
666 * E.g., NFS does not consistently support exclusive locks.
667 *
668 * <p> If locking is supported we guarantee exclusive access to the
669 * storage directory. Otherwise, no guarantee is given.
670 *
671 * @throws IOException if locking fails
672 */
673 public void lock() throws IOException {
674 if (isShared()) {
675 LOG.info("Locking is disabled");
676 return;
677 }
678 FileLock newLock = tryLock();
679 if (newLock == null) {
680 String msg = "Cannot lock storage " + this.root
681 + ". The directory is already locked";
682 LOG.info(msg);
683 throw new IOException(msg);
684 }
685 // Don't overwrite lock until success - this way if we accidentally
686 // call lock twice, the internal state won't be cleared by the second
687 // (failed) lock attempt
688 lock = newLock;
689 }
690
691 /**
692 * Attempts to acquire an exclusive lock on the storage.
693 *
694 * @return A lock object representing the newly-acquired lock or
695 * <code>null</code> if storage is already locked.
696 * @throws IOException if locking fails.
697 */
698 @SuppressWarnings("resource")
699 FileLock tryLock() throws IOException {
700 boolean deletionHookAdded = false;
701 File lockF = new File(root, STORAGE_FILE_LOCK);
702 if (!lockF.exists()) {
703 lockF.deleteOnExit();
704 deletionHookAdded = true;
705 }
706 RandomAccessFile file = new RandomAccessFile(lockF, "rws");
707 String jvmName = ManagementFactory.getRuntimeMXBean().getName();
708 FileLock res = null;
709 try {
710 res = file.getChannel().tryLock();
711 if (null == res) {
712 throw new OverlappingFileLockException();
713 }
714 file.write(jvmName.getBytes(Charsets.UTF_8));
715 LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
716 } catch(OverlappingFileLockException oe) {
717 // Cannot read from the locked file on Windows.
718 String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
719 LOG.error("It appears that another namenode" + lockingJvmName
720 + " has already locked the storage directory");
721 file.close();
722 return null;
723 } catch(IOException e) {
724 LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, "
725 + "ensure that the appropriate nfs lock services are running.", e);
726 file.close();
727 throw e;
728 }
729 if (res != null && !deletionHookAdded) {
730 // If the file existed prior to our startup, we didn't
731 // call deleteOnExit above. But since we successfully locked
732 // the dir, we can take care of cleaning it up.
733 lockF.deleteOnExit();
734 }
735 return res;
736 }
737
738 /**
739 * Unlock storage.
740 *
741 * @throws IOException
742 */
743 public void unlock() throws IOException {
744 if (this.lock == null)
745 return;
746 this.lock.release();
747 lock.channel().close();
748 lock = null;
749 }
750
751 @Override
752 public String toString() {
753 return "Storage Directory " + this.root;
754 }
755
756 /**
757 * Check whether underlying file system supports file locking.
758 *
759 * @return <code>true</code> if exclusive locks are supported or
760 * <code>false</code> otherwise.
761 * @throws IOException
762 * @see StorageDirectory#lock()
763 */
764 public boolean isLockSupported() throws IOException {
765 FileLock firstLock = null;
766 FileLock secondLock = null;
767 try {
768 firstLock = lock;
769 if(firstLock == null) {
770 firstLock = tryLock();
771 if(firstLock == null)
772 return true;
773 }
774 secondLock = tryLock();
775 if(secondLock == null)
776 return true;
777 } finally {
778 if(firstLock != null && firstLock != lock) {
779 firstLock.release();
780 firstLock.channel().close();
781 }
782 if(secondLock != null) {
783 secondLock.release();
784 secondLock.channel().close();
785 }
786 }
787 return false;
788 }
789 }
790
791 /**
792 * Create empty storage info of the specified type
793 */
794 protected Storage(NodeType type) {
795 super(type);
796 }
797
798 protected Storage(StorageInfo storageInfo) {
799 super(storageInfo);
800 }
801
802 public int getNumStorageDirs() {
803 return storageDirs.size();
804 }
805
806 public StorageDirectory getStorageDir(int idx) {
807 return storageDirs.get(idx);
808 }
809
810 /**
811 * @return the storage directory, with the precondition that this storage
812 * has exactly one storage directory
813 */
814 public StorageDirectory getSingularStorageDir() {
815 Preconditions.checkState(storageDirs.size() == 1);
816 return storageDirs.get(0);
817 }
818
819 protected void addStorageDir(StorageDirectory sd) {
820 storageDirs.add(sd);
821 }
822
823 /**
824 * Return true if the layout of the given storage directory is from a version
825 * of Hadoop prior to the introduction of the "current" and "previous"
826 * directories which allow upgrade and rollback.
827 */
828 public abstract boolean isPreUpgradableLayout(StorageDirectory sd)
829 throws IOException;
830
831 /**
832 * Check if the given storage directory comes from a version of Hadoop
833 * prior to when the directory layout changed (ie 0.13). If this is
834 * the case, this method throws an IOException.
835 */
836 private void checkOldLayoutStorage(StorageDirectory sd) throws IOException {
837 if (isPreUpgradableLayout(sd)) {
838 checkVersionUpgradable(0);
839 }
840 }
841
842 /**
843 * Checks if the upgrade from {@code oldVersion} is supported.
844 * @param oldVersion the version of the metadata to check with the current
845 * version
846 * @throws IOException if upgrade is not supported
847 */
848 public static void checkVersionUpgradable(int oldVersion)
849 throws IOException {
850 if (oldVersion > LAST_UPGRADABLE_LAYOUT_VERSION) {
851 String msg = "*********** Upgrade is not supported from this " +
852 " older version " + oldVersion +
853 " of storage to the current version." +
854 " Please upgrade to " + LAST_UPGRADABLE_HADOOP_VERSION +
855 " or a later version and then upgrade to current" +
856 " version. Old layout version is " +
857 (oldVersion == 0 ? "'too old'" : (""+oldVersion)) +
858 " and latest layout version this software version can" +
859 " upgrade from is " + LAST_UPGRADABLE_LAYOUT_VERSION +
860 ". ************";
861 LOG.error(msg);
862 throw new IOException(msg);
863 }
864
865 }
866
867 /**
868 * Iterate over each of the {@link FormatConfirmable} objects,
869 * potentially checking with the user whether it should be formatted.
870 *
871 * If running in interactive mode, will prompt the user for each
872 * directory to allow them to format anyway. Otherwise, returns
873 * false, unless 'force' is specified.
874 *
875 * @param force format regardless of whether dirs exist
876 * @param interactive prompt the user when a dir exists
877 * @return true if formatting should proceed
878 * @throws IOException if some storage cannot be accessed
879 */
880 public static boolean confirmFormat(
881 Iterable<? extends FormatConfirmable> items,
882 boolean force, boolean interactive) throws IOException {
883 for (FormatConfirmable item : items) {
884 if (!item.hasSomeData())
885 continue;
886 if (force) { // Don't confirm, always format.
887 System.err.println(
888 "Data exists in " + item + ". Formatting anyway.");
889 continue;
890 }
891 if (!interactive) { // Don't ask - always don't format
892 System.err.println(
893 "Running in non-interactive mode, and data appears to exist in " +
894 item + ". Not formatting.");
895 return false;
896 }
897 if (!ToolRunner.confirmPrompt("Re-format filesystem in " + item + " ?")) {
898 System.err.println("Format aborted in " + item);
899 return false;
900 }
901 }
902
903 return true;
904 }
905
906 /**
907 * Interface for classes which need to have the user confirm their
908 * formatting during NameNode -format and other similar operations.
909 *
910 * This is currently a storage directory or journal manager.
911 */
912 @InterfaceAudience.Private
913 public interface FormatConfirmable {
914 /**
915 * @return true if the storage seems to have some valid data in it,
916 * and the user should be required to confirm the format. Otherwise,
917 * false.
918 * @throws IOException if the storage cannot be accessed at all.
919 */
920 public boolean hasSomeData() throws IOException;
921
922 /**
923 * @return a string representation of the formattable item, suitable
924 * for display to the user inside a prompt
925 */
926 public String toString();
927 }
928
929 /**
930 * Set common storage fields into the given properties object.
931 * Should be overloaded if additional fields need to be set.
932 *
933 * @param props the Properties object to write into
934 */
935 protected void setPropertiesFromFields(Properties props,
936 StorageDirectory sd)
937 throws IOException {
938 props.setProperty("layoutVersion", String.valueOf(layoutVersion));
939 props.setProperty("storageType", storageType.toString());
940 props.setProperty("namespaceID", String.valueOf(namespaceID));
941 // Set clusterID in version with federation support
942 if (versionSupportsFederation(getServiceLayoutFeatureMap())) {
943 props.setProperty("clusterID", clusterID);
944 }
945 props.setProperty("cTime", String.valueOf(cTime));
946 }
947
948 /**
949 * Write properties to the VERSION file in the given storage directory.
950 */
951 public void writeProperties(StorageDirectory sd) throws IOException {
952 writeProperties(sd.getVersionFile(), sd);
953 }
954
955 public void writeProperties(File to, StorageDirectory sd) throws IOException {
956 Properties props = new Properties();
957 setPropertiesFromFields(props, sd);
958 writeProperties(to, sd, props);
959 }
960
961 public static void writeProperties(File to, StorageDirectory sd,
962 Properties props) throws IOException {
963 RandomAccessFile file = new RandomAccessFile(to, "rws");
964 FileOutputStream out = null;
965 try {
966 file.seek(0);
967 out = new FileOutputStream(file.getFD());
968 /*
969 * If server is interrupted before this line,
970 * the version file will remain unchanged.
971 */
972 props.store(out, null);
973 /*
974 * Now the new fields are flushed to the head of the file, but file
975 * length can still be larger then required and therefore the file can
976 * contain whole or corrupted fields from its old contents in the end.
977 * If server is interrupted here and restarted later these extra fields
978 * either should not effect server behavior or should be handled
979 * by the server correctly.
980 */
981 file.setLength(out.getChannel().position());
982 } finally {
983 if (out != null) {
984 out.close();
985 }
986 file.close();
987 }
988 }
989
990 public static void rename(File from, File to) throws IOException {
991 try {
992 NativeIO.renameTo(from, to);
993 } catch (NativeIOException e) {
994 throw new IOException("Failed to rename " + from.getCanonicalPath()
995 + " to " + to.getCanonicalPath() + " due to failure in native rename. "
996 + e.toString());
997 }
998 }
999
1000 /**
1001 * Copies a file (usually large) to a new location using native unbuffered IO.
1002 * <p>
1003 * This method copies the contents of the specified source file
1004 * to the specified destination file using OS specific unbuffered IO.
1005 * The goal is to avoid churning the file system buffer cache when copying
1006 * large files.
1007 *
1008 * We can't use FileUtils#copyFile from apache-commons-io because it
1009 * is a buffered IO based on FileChannel#transferFrom, which uses MmapByteBuffer
1010 * internally.
1011 *
1012 * The directory holding the destination file is created if it does not exist.
1013 * If the destination file exists, then this method will delete it first.
1014 * <p>
1015 * <strong>Note:</strong> Setting <code>preserveFileDate</code> to
1016 * {@code true} tries to preserve the file's last modified
1017 * date/times using {@link File#setLastModified(long)}, however it is
1018 * not guaranteed that the operation will succeed.
1019 * If the modification operation fails, no indication is provided.
1020 *
1021 * @param srcFile an existing file to copy, must not be {@code null}
1022 * @param destFile the new file, must not be {@code null}
1023 * @param preserveFileDate true if the file date of the copy
1024 * should be the same as the original
1025 *
1026 * @throws NullPointerException if source or destination is {@code null}
1027 * @throws IOException if source or destination is invalid
1028 * @throws IOException if an IO error occurs during copying
1029 */
1030 public static void nativeCopyFileUnbuffered(File srcFile, File destFile,
1031 boolean preserveFileDate) throws IOException {
1032 if (srcFile == null) {
1033 throw new NullPointerException("Source must not be null");
1034 }
1035 if (destFile == null) {
1036 throw new NullPointerException("Destination must not be null");
1037 }
1038 if (srcFile.exists() == false) {
1039 throw new FileNotFoundException("Source '" + srcFile + "' does not exist");
1040 }
1041 if (srcFile.isDirectory()) {
1042 throw new IOException("Source '" + srcFile + "' exists but is a directory");
1043 }
1044 if (srcFile.getCanonicalPath().equals(destFile.getCanonicalPath())) {
1045 throw new IOException("Source '" + srcFile + "' and destination '" +
1046 destFile + "' are the same");
1047 }
1048 File parentFile = destFile.getParentFile();
1049 if (parentFile != null) {
1050 if (!parentFile.mkdirs() && !parentFile.isDirectory()) {
1051 throw new IOException("Destination '" + parentFile
1052 + "' directory cannot be created");
1053 }
1054 }
1055 if (destFile.exists()) {
1056 if (FileUtil.canWrite(destFile) == false) {
1057 throw new IOException("Destination '" + destFile
1058 + "' exists but is read-only");
1059 } else {
1060 if (destFile.delete() == false) {
1061 throw new IOException("Destination '" + destFile
1062 + "' exists but cannot be deleted");
1063 }
1064 }
1065 }
1066 try {
1067 NativeIO.copyFileUnbuffered(srcFile, destFile);
1068 } catch (NativeIOException e) {
1069 throw new IOException("Failed to copy " + srcFile.getCanonicalPath()
1070 + " to " + destFile.getCanonicalPath()
1071 + " due to failure in NativeIO#copyFileUnbuffered(). "
1072 + e.toString());
1073 }
1074 if (srcFile.length() != destFile.length()) {
1075 throw new IOException("Failed to copy full contents from '" + srcFile
1076 + "' to '" + destFile + "'");
1077 }
1078 if (preserveFileDate) {
1079 if (destFile.setLastModified(srcFile.lastModified()) == false) {
1080 if (LOG.isDebugEnabled()) {
1081 LOG.debug("Failed to preserve last modified date from'" + srcFile
1082 + "' to '" + destFile + "'");
1083 }
1084 }
1085 }
1086 }
1087
1088 /**
1089 * Recursively delete all the content of the directory first and then
1090 * the directory itself from the local filesystem.
1091 * @param dir The directory to delete
1092 * @throws IOException
1093 */
1094 public static void deleteDir(File dir) throws IOException {
1095 if (!FileUtil.fullyDelete(dir))
1096 throw new IOException("Failed to delete " + dir.getCanonicalPath());
1097 }
1098
1099 /**
1100 * Write all data storage files.
1101 * @throws IOException
1102 */
1103 public void writeAll() throws IOException {
1104 this.layoutVersion = getServiceLayoutVersion();
1105 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1106 writeProperties(it.next());
1107 }
1108 }
1109
1110 /**
1111 * Unlock all storage directories.
1112 * @throws IOException
1113 */
1114 public void unlockAll() throws IOException {
1115 for (Iterator<StorageDirectory> it = storageDirs.iterator(); it.hasNext();) {
1116 it.next().unlock();
1117 }
1118 }
1119
1120 public static String getBuildVersion() {
1121 return VersionInfo.getRevision();
1122 }
1123
1124 public static String getRegistrationID(StorageInfo storage) {
1125 return "NS-" + Integer.toString(storage.getNamespaceID())
1126 + "-" + storage.getClusterID()
1127 + "-" + Long.toString(storage.getCTime());
1128 }
1129
1130 public static boolean is203LayoutVersion(int layoutVersion) {
1131 for (int lv203 : LAYOUT_VERSIONS_203) {
1132 if (lv203 == layoutVersion) {
1133 return true;
1134 }
1135 }
1136 return false;
1137 }
1138 }