001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.hdfs;
020
021 import com.google.common.collect.Iterators;
022 import com.google.common.util.concurrent.UncheckedExecutionException;
023 import org.apache.hadoop.classification.InterfaceAudience;
024 import org.apache.hadoop.classification.InterfaceStability;
025 import org.apache.hadoop.hdfs.inotify.Event;
026 import org.apache.hadoop.hdfs.inotify.EventsList;
027 import org.apache.hadoop.hdfs.inotify.MissingEventsException;
028 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
029 import org.apache.hadoop.util.Time;
030 import org.slf4j.Logger;
031 import org.slf4j.LoggerFactory;
032
033 import java.io.IOException;
034 import java.util.Iterator;
035 import java.util.Random;
036 import java.util.concurrent.Callable;
037 import java.util.concurrent.ExecutionException;
038 import java.util.concurrent.ExecutorService;
039 import java.util.concurrent.Executors;
040 import java.util.concurrent.Future;
041 import java.util.concurrent.TimeUnit;
042 import java.util.concurrent.TimeoutException;
043
044 /**
045 * Stream for reading inotify events. DFSInotifyEventInputStreams should not
046 * be shared among multiple threads.
047 */
048 @InterfaceAudience.Public
049 @InterfaceStability.Unstable
050 public class DFSInotifyEventInputStream {
051 public static Logger LOG = LoggerFactory.getLogger(DFSInotifyEventInputStream
052 .class);
053
054 private final ClientProtocol namenode;
055 private Iterator<Event> it;
056 private long lastReadTxid;
057 /**
058 * The most recent txid the NameNode told us it has sync'ed -- helps us
059 * determine how far behind we are in the edit stream.
060 */
061 private long syncTxid;
062 /**
063 * Used to generate wait times in {@link DFSInotifyEventInputStream#take()}.
064 */
065 private Random rng = new Random();
066
067 private static final int INITIAL_WAIT_MS = 10;
068
069 DFSInotifyEventInputStream(ClientProtocol namenode) throws IOException {
070 this(namenode, namenode.getCurrentEditLogTxid()); // only consider new txn's
071 }
072
073 DFSInotifyEventInputStream(ClientProtocol namenode, long lastReadTxid)
074 throws IOException {
075 this.namenode = namenode;
076 this.it = Iterators.emptyIterator();
077 this.lastReadTxid = lastReadTxid;
078 }
079
080 /**
081 * Returns the next event in the stream or null if no new events are currently
082 * available.
083 *
084 * @throws IOException because of network error or edit log
085 * corruption. Also possible if JournalNodes are unresponsive in the
086 * QJM setting (even one unresponsive JournalNode is enough in rare cases),
087 * so catching this exception and retrying at least a few times is
088 * recommended.
089 * @throws MissingEventsException if we cannot return the next event in the
090 * stream because the data for the event (and possibly some subsequent events)
091 * has been deleted (generally because this stream is a very large number of
092 * events behind the current state of the NameNode). It is safe to continue
093 * reading from the stream after this exception is thrown -- the next
094 * available event will be returned.
095 */
096 public Event poll() throws IOException, MissingEventsException {
097 // need to keep retrying until the NN sends us the latest committed txid
098 if (lastReadTxid == -1) {
099 LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN");
100 lastReadTxid = namenode.getCurrentEditLogTxid();
101 return null;
102 }
103 if (!it.hasNext()) {
104 EventsList el = namenode.getEditsFromTxid(lastReadTxid + 1);
105 if (el.getLastTxid() != -1) {
106 // we only want to set syncTxid when we were actually able to read some
107 // edits on the NN -- otherwise it will seem like edits are being
108 // generated faster than we can read them when the problem is really
109 // that we are temporarily unable to read edits
110 syncTxid = el.getSyncTxid();
111 it = el.getEvents().iterator();
112 long formerLastReadTxid = lastReadTxid;
113 lastReadTxid = el.getLastTxid();
114 if (el.getFirstTxid() != formerLastReadTxid + 1) {
115 throw new MissingEventsException(formerLastReadTxid + 1,
116 el.getFirstTxid());
117 }
118 } else {
119 LOG.debug("poll(): read no edits from the NN when requesting edits " +
120 "after txid {}", lastReadTxid);
121 return null;
122 }
123 }
124
125 if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the
126 // newly seen edit log ops actually got converted to events
127 return it.next();
128 } else {
129 return null;
130 }
131 }
132
133 /**
134 * Return a estimate of how many events behind the NameNode's current state
135 * this stream is. Clients should periodically call this method and check if
136 * its result is steadily increasing, which indicates that they are falling
137 * behind (i.e. events are being generated faster than the client is reading
138 * them). If a client falls too far behind events may be deleted before the
139 * client can read them.
140 * <p/>
141 * A return value of -1 indicates that an estimate could not be produced, and
142 * should be ignored. The value returned by this method is really only useful
143 * when compared to previous or subsequent returned values.
144 */
145 public long getEventsBehindEstimate() {
146 if (syncTxid == 0) {
147 return -1;
148 } else {
149 assert syncTxid >= lastReadTxid;
150 // this gives the difference between the last txid we have fetched to the
151 // client and syncTxid at the time we last fetched events from the
152 // NameNode
153 return syncTxid - lastReadTxid;
154 }
155 }
156
157 /**
158 * Returns the next event in the stream, waiting up to the specified amount of
159 * time for a new event. Returns null if a new event is not available at the
160 * end of the specified amount of time. The time before the method returns may
161 * exceed the specified amount of time by up to the time required for an RPC
162 * to the NameNode.
163 *
164 * @param time number of units of the given TimeUnit to wait
165 * @param tu the desired TimeUnit
166 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
167 * @throws MissingEventsException
168 * see {@link DFSInotifyEventInputStream#poll()}
169 * @throws InterruptedException if the calling thread is interrupted
170 */
171 public Event poll(long time, TimeUnit tu) throws IOException,
172 InterruptedException, MissingEventsException {
173 long initialTime = Time.monotonicNow();
174 long totalWait = TimeUnit.MILLISECONDS.convert(time, tu);
175 long nextWait = INITIAL_WAIT_MS;
176 Event next = null;
177 while ((next = poll()) == null) {
178 long timeLeft = totalWait - (Time.monotonicNow() - initialTime);
179 if (timeLeft <= 0) {
180 LOG.debug("timed poll(): timed out");
181 break;
182 } else if (timeLeft < nextWait * 2) {
183 nextWait = timeLeft;
184 } else {
185 nextWait *= 2;
186 }
187 LOG.debug("timed poll(): poll() returned null, sleeping for {} ms",
188 nextWait);
189 Thread.sleep(nextWait);
190 }
191
192 return next;
193 }
194
195 /**
196 * Returns the next event in the stream, waiting indefinitely if a new event
197 * is not immediately available.
198 *
199 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
200 * @throws MissingEventsException see
201 * {@link DFSInotifyEventInputStream#poll()}
202 * @throws InterruptedException if the calling thread is interrupted
203 */
204 public Event take() throws IOException, InterruptedException,
205 MissingEventsException {
206 Event next = null;
207 int nextWaitMin = INITIAL_WAIT_MS;
208 while ((next = poll()) == null) {
209 // sleep for a random period between nextWaitMin and nextWaitMin * 2
210 // to avoid stampedes at the NN if there are multiple clients
211 int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin);
212 LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime);
213 Thread.sleep(sleepTime);
214 // the maximum sleep is 2 minutes
215 nextWaitMin = Math.min(60000, nextWaitMin * 2);
216 }
217
218 return next;
219 }
220 }