// Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: BUSL-1.1 // The archive utilities manage the internal format of a snapshot, which is a // tar file with the following contents: // // meta.json - JSON-encoded snapshot metadata from Raft // state.bin - Encoded snapshot data from Raft // SHA256SUMS - SHA-256 sums of the above two files // // The integrity information is automatically created and checked, and a failure // there just looks like an error to the caller. package snapshot import ( "archive/tar" "bufio" "bytes" "crypto/sha256" "encoding/json" "fmt" "hash" "io" "time" "github.com/hashicorp/raft" ) // hashList manages a list of filenames and their hashes. type hashList struct { hashes map[string]hash.Hash } // newHashList returns a new hashList. func newHashList() *hashList { return &hashList{ hashes: make(map[string]hash.Hash), } } // Add creates a new hash for the given file. func (hl *hashList) Add(file string) hash.Hash { if existing, ok := hl.hashes[file]; ok { return existing } h := sha256.New() hl.hashes[file] = h return h } // Encode takes the current sum of all the hashes and saves the hash list as a // SHA256SUMS-style text file. func (hl *hashList) Encode(w io.Writer) error { for file, h := range hl.hashes { if _, err := fmt.Fprintf(w, "%x %s\n", h.Sum([]byte{}), file); err != nil { return err } } return nil } // DecodeAndVerify reads a SHA256SUMS-style text file and checks the results // against the current sums for all the hashes. func (hl *hashList) DecodeAndVerify(r io.Reader) error { // Read the file and make sure everything in there has a matching hash. seen := make(map[string]struct{}) s := bufio.NewScanner(r) for s.Scan() { sha := make([]byte, sha256.Size) var file string if _, err := fmt.Sscanf(s.Text(), "%x %s", &sha, &file); err != nil { return err } h, ok := hl.hashes[file] if !ok { return fmt.Errorf("list missing hash for %q", file) } if !bytes.Equal(sha, h.Sum([]byte{})) { return fmt.Errorf("hash check failed for %q", file) } seen[file] = struct{}{} } if err := s.Err(); err != nil { return err } // Make sure everything we had a hash for was seen. for file := range hl.hashes { if _, ok := seen[file]; !ok { return fmt.Errorf("file missing for %q", file) } } return nil } // write takes a writer and creates an archive with the snapshot metadata, // the snapshot itself, and adds some integrity checking information. func write(out io.Writer, metadata *raft.SnapshotMeta, snap io.Reader) error { // Start a new tarball. now := time.Now() archive := tar.NewWriter(out) // Create a hash list that we will use to write a SHA256SUMS file into // the archive. hl := newHashList() // Encode the snapshot metadata, which we need to feed back during a // restore. metaHash := hl.Add("meta.json") var metaBuffer bytes.Buffer enc := json.NewEncoder(&metaBuffer) if err := enc.Encode(metadata); err != nil { return fmt.Errorf("failed to encode snapshot metadata: %v", err) } if err := archive.WriteHeader(&tar.Header{ Name: "meta.json", Mode: 0600, Size: int64(metaBuffer.Len()), ModTime: now, }); err != nil { return fmt.Errorf("failed to write snapshot metadata header: %v", err) } if _, err := io.Copy(archive, io.TeeReader(&metaBuffer, metaHash)); err != nil { return fmt.Errorf("failed to write snapshot metadata: %v", err) } // Copy the snapshot data given the size from the metadata. snapHash := hl.Add("state.bin") if err := archive.WriteHeader(&tar.Header{ Name: "state.bin", Mode: 0600, Size: metadata.Size, ModTime: now, }); err != nil { return fmt.Errorf("failed to write snapshot data header: %v", err) } if _, err := io.CopyN(archive, io.TeeReader(snap, snapHash), metadata.Size); err != nil { return fmt.Errorf("failed to write snapshot metadata: %v", err) } // Create a SHA256SUMS file that we can use to verify on restore. var shaBuffer bytes.Buffer if err := hl.Encode(&shaBuffer); err != nil { return fmt.Errorf("failed to encode snapshot hashes: %v", err) } if err := archive.WriteHeader(&tar.Header{ Name: "SHA256SUMS", Mode: 0600, Size: int64(shaBuffer.Len()), ModTime: now, }); err != nil { return fmt.Errorf("failed to write snapshot hashes header: %v", err) } if _, err := io.Copy(archive, &shaBuffer); err != nil { return fmt.Errorf("failed to write snapshot metadata: %v", err) } // Finalize the archive. if err := archive.Close(); err != nil { return fmt.Errorf("failed to finalize snapshot: %v", err) } return nil } // read takes a reader and extracts the snapshot metadata and the snapshot // itself, and also checks the integrity of the data. You must arrange to call // Close() on the returned object or else you will leak a temporary file. func read(in io.Reader, metadata *raft.SnapshotMeta, snap io.Writer) error { // Start a new tar reader. archive := tar.NewReader(in) // Create a hash list that we will use to compare with the SHA256SUMS // file in the archive. hl := newHashList() // Populate the hashes for all the files we expect to see. The check at // the end will make sure these are all present in the SHA256SUMS file // and that the hashes match. metaHash := hl.Add("meta.json") snapHash := hl.Add("state.bin") // Look through the archive for the pieces we care about. var shaBuffer bytes.Buffer for { hdr, err := archive.Next() if err == io.EOF { break } if err != nil { return fmt.Errorf("failed reading snapshot: %v", err) } switch hdr.Name { case "meta.json": // Previously we used json.Decode to decode the archive stream. There are // edgecases in which it doesn't read all the bytes from the stream, even // though the json object is still being parsed properly. Since we // simultaneously feeded everything to metaHash, our hash ended up being // different than what we calculated when creating the snapshot. Which in // turn made the snapshot verification fail. By explicitly reading the // whole thing first we ensure that we calculate the correct hash // independent of how json.Decode works internally. buf, err := io.ReadAll(io.TeeReader(archive, metaHash)) if err != nil { return fmt.Errorf("failed to read snapshot metadata: %v", err) } if err := json.Unmarshal(buf, &metadata); err != nil { return fmt.Errorf("failed to decode snapshot metadata: %v", err) } case "state.bin": if _, err := io.Copy(io.MultiWriter(snap, snapHash), archive); err != nil { return fmt.Errorf("failed to read or write snapshot data: %v", err) } case "SHA256SUMS": if _, err := io.Copy(&shaBuffer, archive); err != nil { return fmt.Errorf("failed to read snapshot hashes: %v", err) } default: return fmt.Errorf("unexpected file %q in snapshot", hdr.Name) } } // Verify all the hashes. if err := hl.DecodeAndVerify(&shaBuffer); err != nil { return fmt.Errorf("failed checking integrity of snapshot: %v", err) } return nil }