summaryrefslogtreecommitdiffstats
path: root/package/docker-engine/0001-Fix-issues-with-tailing-rotated-jsonlog-file.patch
blob: 413cfd6f7b734f1c50fb34e5d5274ceb525a9d42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
From 8d6f2e3fe8851b581309da25fc4c32f8be675932 Mon Sep 17 00:00:00 2001
From: Brian Goff <cpuguy83@gmail.com>
Date: Mon, 11 Jul 2016 16:31:42 -0400
Subject: [PATCH] Fix issues with tailing rotated jsonlog file

Fixes a race where the log reader would get events for both an actual
rotation as we from fsnotify (`fsnotify.Rename`).
This issue becomes extremely apparent when rotations are fast, for
example:

```
$ docker run -d --name test --log-opt max-size=1 --log-opt max-file=2
busybox sh -c 'while true; do echo hello; usleep 100000; done'
```

With this change the log reader for jsonlogs can handle rotations that
happen as above.

Instead of listening for both fs events AND rotation events
simultaneously, potentially meaning we see 2 rotations for only a single
rotation due to channel buffering, only listen for fs events (like
`Rename`) and then wait to be notified about rotation by the logger.
This makes sure that we don't see 2 rotations for 1, and that we don't
start trying to read until the logger is actually ready for us to.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>

This commit is pending upstream commit fixing broken log tailing. The
original commit can be found in the PR here:

  - https://github.com/docker/docker/pull/24514

Signed-off-by: Christian Stewart <christian@paral.in>
---
 daemon/logger/jsonfilelog/read.go | 180 +++++++++++++++++++++++++-------------
 1 file changed, 119 insertions(+), 61 deletions(-)

diff --git a/daemon/logger/jsonfilelog/read.go b/daemon/logger/jsonfilelog/read.go
index bea83dd..0cb44af 100644
--- a/daemon/logger/jsonfilelog/read.go
+++ b/daemon/logger/jsonfilelog/read.go
@@ -3,11 +3,14 @@ package jsonfilelog
 import (
 	"bytes"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"os"
 	"time"
 
+	"gopkg.in/fsnotify.v1"
+
 	"github.com/Sirupsen/logrus"
 	"github.com/docker/docker/daemon/logger"
 	"github.com/docker/docker/pkg/filenotify"
@@ -44,6 +47,10 @@ func (l *JSONFileLogger) ReadLogs(config logger.ReadConfig) *logger.LogWatcher {
 func (l *JSONFileLogger) readLogs(logWatcher *logger.LogWatcher, config logger.ReadConfig) {
 	defer close(logWatcher.Msg)
 
+	// lock so the read stream doesn't get corrupted do to rotations or other log data written while we read
+	// This will block writes!!!
+	l.mu.Lock()
+
 	pth := l.writer.LogPath()
 	var files []io.ReadSeeker
 	for i := l.writer.MaxFiles(); i > 1; i-- {
@@ -61,6 +68,7 @@ func (l *JSONFileLogger) readLogs(logWatcher *logger.LogWatcher, config logger.R
 	latestFile, err := os.Open(pth)
 	if err != nil {
 		logWatcher.Err <- err
+		l.mu.Unlock()
 		return
 	}
 
@@ -80,6 +88,7 @@ func (l *JSONFileLogger) readLogs(logWatcher *logger.LogWatcher, config logger.R
 		if err := latestFile.Close(); err != nil {
 			logrus.Errorf("Error closing file: %v", err)
 		}
+		l.mu.Unlock()
 		return
 	}
 
@@ -87,7 +96,6 @@ func (l *JSONFileLogger) readLogs(logWatcher *logger.LogWatcher, config logger.R
 		latestFile.Seek(0, os.SEEK_END)
 	}
 
-	l.mu.Lock()
 	l.readers[logWatcher] = struct{}{}
 	l.mu.Unlock()
 
@@ -128,92 +136,142 @@ func tailFile(f io.ReadSeeker, logWatcher *logger.LogWatcher, tail int, since ti
 	}
 }
 
+func watchFile(name string) (filenotify.FileWatcher, error) {
+	fileWatcher, err := filenotify.New()
+	if err != nil {
+		return nil, err
+	}
+
+	if err := fileWatcher.Add(name); err != nil {
+		logrus.WithField("logger", "json-file").Warnf("falling back to file poller due to error: %v", err)
+		fileWatcher.Close()
+		fileWatcher = filenotify.NewPollingWatcher()
+
+		if err := fileWatcher.Add(name); err != nil {
+			fileWatcher.Close()
+			logrus.Debugf("error watching log file for modifications: %v", err)
+			return nil, err
+		}
+	}
+	return fileWatcher, nil
+}
+
 func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan interface{}, since time.Time) {
 	dec := json.NewDecoder(f)
 	l := &jsonlog.JSONLog{}
 
-	fileWatcher, err := filenotify.New()
+	name := f.Name()
+	fileWatcher, err := watchFile(name)
 	if err != nil {
 		logWatcher.Err <- err
+		return
 	}
 	defer func() {
 		f.Close()
 		fileWatcher.Close()
 	}()
-	name := f.Name()
 
-	if err := fileWatcher.Add(name); err != nil {
-		logrus.WithField("logger", "json-file").Warnf("falling back to file poller due to error: %v", err)
-		fileWatcher.Close()
-		fileWatcher = filenotify.NewPollingWatcher()
+	var retries int
+	handleRotate := func() error {
+		f.Close()
+		fileWatcher.Remove(name)
 
+		// retry when the file doesn't exist
+		for retries := 0; retries <= 5; retries++ {
+			f, err = os.Open(name)
+			if err == nil || !os.IsNotExist(err) {
+				break
+			}
+		}
+		if err != nil {
+			return err
+		}
 		if err := fileWatcher.Add(name); err != nil {
-			logrus.Debugf("error watching log file for modifications: %v", err)
-			logWatcher.Err <- err
-			return
+			return err
 		}
+		dec = json.NewDecoder(f)
+		return nil
 	}
 
-	var retries int
-	for {
-		msg, err := decodeLogLine(dec, l)
-		if err != nil {
-			if err != io.EOF {
-				// try again because this shouldn't happen
-				if _, ok := err.(*json.SyntaxError); ok && retries <= maxJSONDecodeRetry {
-					dec = json.NewDecoder(f)
-					retries++
-					continue
+	errRetry := errors.New("retry")
+	errDone := errors.New("done")
+	waitRead := func() error {
+		select {
+		case e := <-fileWatcher.Events():
+			switch e.Op {
+			case fsnotify.Write:
+				dec = json.NewDecoder(f)
+				return nil
+			case fsnotify.Rename, fsnotify.Remove:
+				<-notifyRotate
+				if err := handleRotate(); err != nil {
+					return err
 				}
-
-				// io.ErrUnexpectedEOF is returned from json.Decoder when there is
-				// remaining data in the parser's buffer while an io.EOF occurs.
-				// If the json logger writes a partial json log entry to the disk
-				// while at the same time the decoder tries to decode it, the race condition happens.
-				if err == io.ErrUnexpectedEOF && retries <= maxJSONDecodeRetry {
-					reader := io.MultiReader(dec.Buffered(), f)
-					dec = json.NewDecoder(reader)
-					retries++
-					continue
+				return nil
+			}
+			return errRetry
+		case err := <-fileWatcher.Errors():
+			logrus.Debug("logger got error watching file: %v", err)
+			// Something happened, let's try and stay alive and create a new watcher
+			if retries <= 5 {
+				fileWatcher, err = watchFile(name)
+				if err != nil {
+					return err
 				}
-
-				return
+				retries++
+				return errRetry
 			}
+			return err
+		case <-logWatcher.WatchClose():
+			fileWatcher.Remove(name)
+			return errDone
+		}
+	}
 
-			select {
-			case <-fileWatcher.Events():
-				dec = json.NewDecoder(f)
-				continue
-			case <-fileWatcher.Errors():
-				logWatcher.Err <- err
-				return
-			case <-logWatcher.WatchClose():
-				fileWatcher.Remove(name)
-				return
-			case <-notifyRotate:
-				f.Close()
-				fileWatcher.Remove(name)
-
-				// retry when the file doesn't exist
-				for retries := 0; retries <= 5; retries++ {
-					f, err = os.Open(name)
-					if err == nil || !os.IsNotExist(err) {
-						break
-					}
+	handleDecodeErr := func(err error) error {
+		if err == io.EOF {
+			for err := waitRead(); err != nil; {
+				if err == errRetry {
+					// retry the waitRead
+					continue
 				}
+				return err
+			}
+			return nil
+		}
+		// try again because this shouldn't happen
+		if _, ok := err.(*json.SyntaxError); ok && retries <= maxJSONDecodeRetry {
+			dec = json.NewDecoder(f)
+			retries++
+			return nil
+		}
+		// io.ErrUnexpectedEOF is returned from json.Decoder when there is
+		// remaining data in the parser's buffer while an io.EOF occurs.
+		// If the json logger writes a partial json log entry to the disk
+		// while at the same time the decoder tries to decode it, the race condition happens.
+		if err == io.ErrUnexpectedEOF && retries <= maxJSONDecodeRetry {
+			reader := io.MultiReader(dec.Buffered(), f)
+			dec = json.NewDecoder(reader)
+			retries++
+			return nil
+		}
+		return err
+	}
 
-				if err = fileWatcher.Add(name); err != nil {
-					logWatcher.Err <- err
-					return
-				}
-				if err != nil {
-					logWatcher.Err <- err
+	// main loop
+	for {
+		msg, err := decodeLogLine(dec, l)
+		if err != nil {
+			if err := handleDecodeErr(err); err != nil {
+				if err == errDone {
 					return
 				}
-
-				dec = json.NewDecoder(f)
-				continue
+				// we got an unrecoverable error, so return
+				logWatcher.Err <- err
+				return
 			}
+			// ready to try again
+			continue
 		}
 
 		retries = 0 // reset retries since we've succeeded
-- 
2.7.3

OpenPOWER on IntegriCloud