From c26fd66edd20baaca27eba07578bc629f4d015c1 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 24 Jul 2017 12:37:14 -0500 Subject: [PATCH 1/2] Clean up temporary files on write errors, and ignore any temporary service files on load with a warning. This fixes #3207 --- CHANGELOG.md | 1 + agent/agent.go | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d4555d1f5..adfbb3c1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ FEATURES: IMPROVEMENTS: BUG FIXES: +* agent: Clean up temporary files during disk write errors when persisting services and checks. [GH-3207] ## 0.9.0 (July 20, 2017) diff --git a/agent/agent.go b/agent/agent.go index 8fc32dce27..5fa43c5abb 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1428,7 +1428,7 @@ func (a *Agent) persistService(service *structs.NodeService) error { return err } - return writeFileAtomic(svcPath, encoded) + return writeFileAtomic(svcPath, encoded, a.logger) } // purgeService removes a persisted service definition file from the data dir @@ -1456,7 +1456,7 @@ func (a *Agent) persistCheck(check *structs.HealthCheck, chkType *structs.CheckT return err } - return writeFileAtomic(checkPath, encoded) + return writeFileAtomic(checkPath, encoded, a.logger) } // purgeCheck removes a persisted check definition file from the data dir @@ -1470,7 +1470,7 @@ func (a *Agent) purgeCheck(checkID types.CheckID) error { // writeFileAtomic writes the given contents to a temporary file in the same // directory, does an fsync and then renames the file to its real path -func writeFileAtomic(path string, contents []byte) error { +func writeFileAtomic(path string, contents []byte, logger *log.Logger) error { uuid, err := uuid.GenerateUUID() if err != nil { return err @@ -1485,15 +1485,30 @@ func writeFileAtomic(path string, contents []byte) error { return err } if _, err := fh.Write(contents); err != nil { + logger.Printf("[INFO] Writing to temp file at %v failed, deleting...\n", tempPath) + fh.Close() + os.Remove(tempPath) return err } if err := fh.Sync(); err != nil { + logger.Printf("[INFO] Syncing temp file at %v failed, deleting...\n", tempPath) + fh.Close() + os.Remove(tempPath) return err } if err := fh.Close(); err != nil { + logger.Printf("[INFO] Closing file handle to temp file at %v failed, deleting...\n", tempPath) + fh.Close() + os.Remove(tempPath) return err } - return os.Rename(tempPath, path) + if err := os.Rename(tempPath, path); err != nil { + logger.Printf("[INFO] Renaming temp file at %v failed, deleting...\n", tempPath) + fh.Close() + os.Remove(tempPath) + return err + } + return nil } // AddService is used to add a service entry. @@ -2072,6 +2087,11 @@ func (a *Agent) loadServices(conf *Config) error { continue } + // Skip all partially written temporary files + if strings.HasSuffix(fi.Name(), "tmp") { + a.logger.Printf("[WARN] Ignoring temporary service file %v", fi.Name()) + continue + } // Open the file for reading file := filepath.Join(svcDir, fi.Name()) fh, err := os.Open(file) From f8b633c69e5945bf1d9ecfadaf8dff0e5ae2ceb0 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Mon, 24 Jul 2017 21:07:48 -0500 Subject: [PATCH 2/2] Removed redundant logging --- agent/agent.go | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 5fa43c5abb..01c9ce6a38 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1428,7 +1428,7 @@ func (a *Agent) persistService(service *structs.NodeService) error { return err } - return writeFileAtomic(svcPath, encoded, a.logger) + return writeFileAtomic(svcPath, encoded) } // purgeService removes a persisted service definition file from the data dir @@ -1456,7 +1456,7 @@ func (a *Agent) persistCheck(check *structs.HealthCheck, chkType *structs.CheckT return err } - return writeFileAtomic(checkPath, encoded, a.logger) + return writeFileAtomic(checkPath, encoded) } // purgeCheck removes a persisted check definition file from the data dir @@ -1470,7 +1470,7 @@ func (a *Agent) purgeCheck(checkID types.CheckID) error { // writeFileAtomic writes the given contents to a temporary file in the same // directory, does an fsync and then renames the file to its real path -func writeFileAtomic(path string, contents []byte, logger *log.Logger) error { +func writeFileAtomic(path string, contents []byte) error { uuid, err := uuid.GenerateUUID() if err != nil { return err @@ -1485,26 +1485,21 @@ func writeFileAtomic(path string, contents []byte, logger *log.Logger) error { return err } if _, err := fh.Write(contents); err != nil { - logger.Printf("[INFO] Writing to temp file at %v failed, deleting...\n", tempPath) fh.Close() os.Remove(tempPath) return err } if err := fh.Sync(); err != nil { - logger.Printf("[INFO] Syncing temp file at %v failed, deleting...\n", tempPath) fh.Close() os.Remove(tempPath) return err } if err := fh.Close(); err != nil { - logger.Printf("[INFO] Closing file handle to temp file at %v failed, deleting...\n", tempPath) fh.Close() os.Remove(tempPath) return err } if err := os.Rename(tempPath, path); err != nil { - logger.Printf("[INFO] Renaming temp file at %v failed, deleting...\n", tempPath) - fh.Close() os.Remove(tempPath) return err } @@ -2092,6 +2087,7 @@ func (a *Agent) loadServices(conf *Config) error { a.logger.Printf("[WARN] Ignoring temporary service file %v", fi.Name()) continue } + // Open the file for reading file := filepath.Join(svcDir, fi.Name()) fh, err := os.Open(file)