2019-01-12 04:58:27 +00:00
|
|
|
// Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package oomparser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"path"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/euank/go-kmsg-parser/kmsgparser"
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
"k8s.io/klog/v2"
|
2019-01-12 04:58:27 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2020-08-10 17:43:49 +00:00
|
|
|
legacyContainerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
|
|
|
|
// Starting in 5.0 linux kernels, the OOM message changed
|
2021-03-18 22:40:29 +00:00
|
|
|
containerRegexp = regexp.MustCompile(`oom-kill:constraint=(.*),nodemask=(.*),cpuset=(.*),mems_allowed=(.*),oom_memcg=(.*),task_memcg=(.*),task=(.*),pid=(.*),uid=(.*)`)
|
2019-01-12 04:58:27 +00:00
|
|
|
lastLineRegexp = regexp.MustCompile(`Killed process ([0-9]+) \((.+)\)`)
|
|
|
|
firstLineRegexp = regexp.MustCompile(`invoked oom-killer:`)
|
|
|
|
)
|
|
|
|
|
|
|
|
// OomParser wraps a kmsgparser in order to extract OOM events from the
|
|
|
|
// individual kernel ring buffer messages.
|
|
|
|
type OomParser struct {
|
|
|
|
parser kmsgparser.Parser
|
|
|
|
}
|
|
|
|
|
|
|
|
// struct that contains information related to an OOM kill instance
|
|
|
|
type OomInstance struct {
|
|
|
|
// process id of the killed process
|
|
|
|
Pid int
|
|
|
|
// the name of the killed process
|
|
|
|
ProcessName string
|
|
|
|
// the time that the process was reported to be killed,
|
|
|
|
// accurate to the minute
|
|
|
|
TimeOfDeath time.Time
|
|
|
|
// the absolute name of the container that OOMed
|
|
|
|
ContainerName string
|
|
|
|
// the absolute name of the container that was killed
|
|
|
|
// due to the OOM.
|
|
|
|
VictimContainerName string
|
2020-08-10 17:43:49 +00:00
|
|
|
// the constraint that triggered the OOM. One of CONSTRAINT_NONE,
|
|
|
|
// CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG
|
|
|
|
Constraint string
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// gets the container name from a line and adds it to the oomInstance.
|
2020-08-10 17:43:49 +00:00
|
|
|
func getLegacyContainerName(line string, currentOomInstance *OomInstance) error {
|
|
|
|
parsedLine := legacyContainerRegexp.FindStringSubmatch(line)
|
2019-01-12 04:58:27 +00:00
|
|
|
if parsedLine == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
currentOomInstance.ContainerName = path.Join("/", parsedLine[1])
|
|
|
|
currentOomInstance.VictimContainerName = path.Join("/", parsedLine[2])
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
// gets the container name from a line and adds it to the oomInstance.
|
|
|
|
func getContainerName(line string, currentOomInstance *OomInstance) (bool, error) {
|
|
|
|
parsedLine := containerRegexp.FindStringSubmatch(line)
|
|
|
|
if parsedLine == nil {
|
|
|
|
// Fall back to the legacy format if it isn't found here.
|
|
|
|
return false, getLegacyContainerName(line, currentOomInstance)
|
|
|
|
}
|
2021-03-18 22:40:29 +00:00
|
|
|
currentOomInstance.ContainerName = parsedLine[6]
|
2020-08-10 17:43:49 +00:00
|
|
|
currentOomInstance.VictimContainerName = parsedLine[5]
|
|
|
|
currentOomInstance.Constraint = parsedLine[1]
|
2021-03-18 22:40:29 +00:00
|
|
|
pid, err := strconv.Atoi(parsedLine[8])
|
2020-08-10 17:43:49 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
currentOomInstance.Pid = pid
|
2021-03-18 22:40:29 +00:00
|
|
|
currentOomInstance.ProcessName = parsedLine[7]
|
2020-08-10 17:43:49 +00:00
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
2019-01-12 04:58:27 +00:00
|
|
|
// gets the pid, name, and date from a line and adds it to oomInstance
|
|
|
|
func getProcessNamePid(line string, currentOomInstance *OomInstance) (bool, error) {
|
|
|
|
reList := lastLineRegexp.FindStringSubmatch(line)
|
|
|
|
|
|
|
|
if reList == nil {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
pid, err := strconv.Atoi(reList[1])
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
currentOomInstance.Pid = pid
|
|
|
|
currentOomInstance.ProcessName = reList[2]
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// uses regex to see if line is the start of a kernel oom log
|
|
|
|
func checkIfStartOfOomMessages(line string) bool {
|
2020-08-10 17:43:49 +00:00
|
|
|
potentialOomStart := firstLineRegexp.MatchString(line)
|
|
|
|
return potentialOomStart
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// StreamOoms writes to a provided a stream of OomInstance objects representing
|
|
|
|
// OOM events that are found in the logs.
|
|
|
|
// It will block and should be called from a goroutine.
|
2020-08-10 17:43:49 +00:00
|
|
|
func (p *OomParser) StreamOoms(outStream chan<- *OomInstance) {
|
|
|
|
kmsgEntries := p.parser.Parse()
|
|
|
|
defer p.parser.Close()
|
2019-01-12 04:58:27 +00:00
|
|
|
|
|
|
|
for msg := range kmsgEntries {
|
2020-08-10 17:43:49 +00:00
|
|
|
isOomMessage := checkIfStartOfOomMessages(msg.Message)
|
|
|
|
if isOomMessage {
|
2019-01-12 04:58:27 +00:00
|
|
|
oomCurrentInstance := &OomInstance{
|
2019-04-07 17:07:55 +00:00
|
|
|
ContainerName: "/",
|
|
|
|
VictimContainerName: "/",
|
|
|
|
TimeOfDeath: msg.Timestamp,
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
for msg := range kmsgEntries {
|
2020-08-10 17:43:49 +00:00
|
|
|
finished, err := getContainerName(msg.Message, oomCurrentInstance)
|
2019-01-12 04:58:27 +00:00
|
|
|
if err != nil {
|
|
|
|
klog.Errorf("%v", err)
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
if !finished {
|
|
|
|
finished, err = getProcessNamePid(msg.Message, oomCurrentInstance)
|
|
|
|
if err != nil {
|
|
|
|
klog.Errorf("%v", err)
|
|
|
|
}
|
2019-01-12 04:58:27 +00:00
|
|
|
}
|
|
|
|
if finished {
|
|
|
|
oomCurrentInstance.TimeOfDeath = msg.Timestamp
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
outStream <- oomCurrentInstance
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Should not happen
|
|
|
|
klog.Errorf("exiting analyzeLines. OOM events will not be reported.")
|
|
|
|
}
|
|
|
|
|
|
|
|
// initializes an OomParser object. Returns an OomParser object and an error.
|
|
|
|
func New() (*OomParser, error) {
|
|
|
|
parser, err := kmsgparser.NewParser()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
parser.SetLogger(glogAdapter{})
|
|
|
|
return &OomParser{parser: parser}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type glogAdapter struct{}
|
|
|
|
|
|
|
|
var _ kmsgparser.Logger = glogAdapter{}
|
|
|
|
|
|
|
|
func (glogAdapter) Infof(format string, args ...interface{}) {
|
|
|
|
klog.V(4).Infof(format, args...)
|
|
|
|
}
|
|
|
|
func (glogAdapter) Warningf(format string, args ...interface{}) {
|
|
|
|
klog.V(2).Infof(format, args...)
|
|
|
|
}
|
|
|
|
func (glogAdapter) Errorf(format string, args ...interface{}) {
|
|
|
|
klog.Warningf(format, args...)
|
|
|
|
}
|