k3s/pkg/dns/dns.go

892 lines
31 KiB
Go

/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dns
import (
"encoding/json"
"fmt"
"net"
"strings"
"sync"
"time"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/api/v1/endpoints"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
kcache "k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/dns/config"
"k8s.io/kubernetes/pkg/dns/treecache"
"k8s.io/kubernetes/pkg/dns/util"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/validation"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/pkg/watch"
etcd "github.com/coreos/etcd/client"
"github.com/golang/glog"
"github.com/miekg/dns"
skymsg "github.com/skynetservices/skydns/msg"
)
const (
// A subdomain added to the user specified domain for all services.
serviceSubdomain = "svc"
// A subdomain added to the user specified dmoain for all pods.
podSubdomain = "pod"
// Resync period for the kube controller loop.
resyncPeriod = 5 * time.Minute
// Duration for which the TTL cache should hold the node resource to retrieve the zone
// annotation from it so that it could be added to federation CNAMEs. There is ideally
// no need to expire this cache, but we don't want to assume that node annotations
// never change. So we expire the cache and retrieve a node once every 180 seconds.
// The value is chosen to be neither too long nor too short.
nodeCacheTTL = 180 * time.Second
)
type KubeDNS struct {
// kubeClient makes calls to API Server and registers calls with API Server
// to get Endpoints and Service objects.
kubeClient clientset.Interface
// domain for which this DNS Server is authoritative.
domain string
// configMap where kube-dns dynamic configuration is store. If this
// is empty then getting configuration from a configMap will be
// disabled.
configMap string
// endpointsStore that contains all the endpoints in the system.
endpointsStore kcache.Store
// servicesStore that contains all the services in the system.
servicesStore kcache.Store
// nodesStore contains some subset of nodes in the system so that we
// can retrieve the cluster zone annotation from the cached node
// instead of getting it from the API server every time.
nodesStore kcache.Store
// cache stores DNS records for the domain. A Records and SRV Records for
// (regular) services and headless Services. CNAME Records for
// ExternalName Services.
cache treecache.TreeCache
// TODO(nikhiljindal): Remove this. It can be recreated using
// clusterIPServiceMap.
reverseRecordMap map[string]*skymsg.Service
// clusterIPServiceMap to service object. Headless services are not
// part of this map. Used to get a service when given its cluster
// IP. Access to this is coordinated using cacheLock. We use the
// same lock for cache and this map to ensure that they don't get
// out of sync.
clusterIPServiceMap map[string]*v1.Service
// cacheLock protecting the cache. caller is responsible for using
// the cacheLock before invoking methods on cache the cache is not
// thread-safe, and the caller can guarantee thread safety by using
// the cacheLock
cacheLock sync.RWMutex
// The domain for which this DNS Server is authoritative, in array
// format and reversed. e.g. if domain is "cluster.local",
// domainPath is []string{"local", "cluster"}
domainPath []string
// endpointsController invokes registered callbacks when endpoints change.
endpointsController *kcache.Controller
// serviceController invokes registered callbacks when services change.
serviceController *kcache.Controller
// config set from the dynamic configuration source.
config *config.Config
// configLock protects the config below.
configLock sync.RWMutex
// configSync manages synchronization of the config map
configSync config.Sync
// Initial timeout for endpoints and services to be synced from APIServer
initialSyncTimeout time.Duration
}
func NewKubeDNS(client clientset.Interface, clusterDomain string, timeout time.Duration, configSync config.Sync) *KubeDNS {
kd := &KubeDNS{
kubeClient: client,
domain: clusterDomain,
cache: treecache.NewTreeCache(),
cacheLock: sync.RWMutex{},
nodesStore: kcache.NewStore(kcache.MetaNamespaceKeyFunc),
reverseRecordMap: make(map[string]*skymsg.Service),
clusterIPServiceMap: make(map[string]*v1.Service),
domainPath: util.ReverseArray(strings.Split(strings.TrimRight(clusterDomain, "."), ".")),
initialSyncTimeout: timeout,
configLock: sync.RWMutex{},
configSync: configSync,
}
kd.setEndpointsStore()
kd.setServicesStore()
return kd
}
func (kd *KubeDNS) Start() {
glog.V(2).Infof("Starting endpointsController")
go kd.endpointsController.Run(wait.NeverStop)
glog.V(2).Infof("Starting serviceController")
go kd.serviceController.Run(wait.NeverStop)
kd.startConfigMapSync()
// Wait synchronously for the initial list operations to be
// complete of endpoints and services from APIServer.
kd.waitForResourceSyncedOrDie()
}
func (kd *KubeDNS) waitForResourceSyncedOrDie() {
// Wait for both controllers have completed an initial resource listing
timeout := time.After(kd.initialSyncTimeout)
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-timeout:
glog.Fatalf("Timeout waiting for initialization")
case <-ticker.C:
if kd.endpointsController.HasSynced() && kd.serviceController.HasSynced() {
glog.V(0).Infof("Initialized services and endpoints from apiserver")
return
}
glog.V(0).Infof("DNS server not ready, retry in 500 milliseconds")
}
}
}
func (kd *KubeDNS) startConfigMapSync() {
initialConfig, err := kd.configSync.Once()
if err != nil {
glog.Errorf(
"Error getting initial ConfigMap: %v, starting with default values", err)
kd.config = config.NewDefaultConfig()
} else {
kd.config = initialConfig
}
go kd.syncConfigMap(kd.configSync.Periodic())
}
func (kd *KubeDNS) syncConfigMap(syncChan <-chan *config.Config) {
for {
nextConfig := <-syncChan
kd.configLock.Lock()
kd.config = nextConfig
glog.V(2).Infof("Configuration updated: %+v", *kd.config)
kd.configLock.Unlock()
}
}
func (kd *KubeDNS) GetCacheAsJSON() (string, error) {
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
json, err := kd.cache.Serialize()
return json, err
}
func (kd *KubeDNS) setServicesStore() {
// Returns a cache.ListWatch that gets all changes to services.
kd.servicesStore, kd.serviceController = kcache.NewInformer(
&kcache.ListWatch{
ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Services(v1.NamespaceAll).List(options)
},
WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Services(v1.NamespaceAll).Watch(options)
},
},
&v1.Service{},
resyncPeriod,
kcache.ResourceEventHandlerFuncs{
AddFunc: kd.newService,
DeleteFunc: kd.removeService,
UpdateFunc: kd.updateService,
},
)
}
func (kd *KubeDNS) setEndpointsStore() {
// Returns a cache.ListWatch that gets all changes to endpoints.
kd.endpointsStore, kd.endpointsController = kcache.NewInformer(
&kcache.ListWatch{
ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).List(options)
},
WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
return kd.kubeClient.Core().Endpoints(v1.NamespaceAll).Watch(options)
},
},
&v1.Endpoints{},
resyncPeriod,
kcache.ResourceEventHandlerFuncs{
AddFunc: kd.handleEndpointAdd,
UpdateFunc: func(oldObj, newObj interface{}) {
// TODO: Avoid unwanted updates.
kd.handleEndpointAdd(newObj)
},
// No DeleteFunc for EndpointsStore because endpoint object will be deleted
// when corresponding service is deleted.
},
)
}
func assertIsService(obj interface{}) (*v1.Service, bool) {
if service, ok := obj.(*v1.Service); ok {
return service, ok
} else {
glog.Errorf("Type assertion failed! Expected 'Service', got %T", service)
return nil, ok
}
}
func (kd *KubeDNS) newService(obj interface{}) {
if service, ok := assertIsService(obj); ok {
glog.V(2).Infof("New service: %v", service.Name)
glog.V(4).Infof("Service details: %v", service)
// ExternalName services are a special kind that return CNAME records
if service.Spec.Type == v1.ServiceTypeExternalName {
kd.newExternalNameService(service)
return
}
// if ClusterIP is not set, a DNS entry should not be created
if !v1.IsServiceIPSet(service) {
kd.newHeadlessService(service)
return
}
if len(service.Spec.Ports) == 0 {
glog.Warningf("Service with no ports, this should not have happened: %v",
service)
}
kd.newPortalService(service)
}
}
func (kd *KubeDNS) removeService(obj interface{}) {
if s, ok := assertIsService(obj); ok {
subCachePath := append(kd.domainPath, serviceSubdomain, s.Namespace, s.Name)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
success := kd.cache.DeletePath(subCachePath...)
glog.V(2).Infof("removeService %v at path %v. Success: %v",
s.Name, subCachePath, success)
// ExternalName services have no IP
if v1.IsServiceIPSet(s) {
delete(kd.reverseRecordMap, s.Spec.ClusterIP)
delete(kd.clusterIPServiceMap, s.Spec.ClusterIP)
}
}
}
func (kd *KubeDNS) updateService(oldObj, newObj interface{}) {
if new, ok := assertIsService(newObj); ok {
if old, ok := assertIsService(oldObj); ok {
// Remove old cache path only if changing type to/from ExternalName.
// In all other cases, we'll update records in place.
if (new.Spec.Type == v1.ServiceTypeExternalName) !=
(old.Spec.Type == v1.ServiceTypeExternalName) {
kd.removeService(oldObj)
}
kd.newService(newObj)
}
}
}
func (kd *KubeDNS) handleEndpointAdd(obj interface{}) {
if e, ok := obj.(*v1.Endpoints); ok {
kd.addDNSUsingEndpoints(e)
}
}
func (kd *KubeDNS) addDNSUsingEndpoints(e *v1.Endpoints) error {
svc, err := kd.getServiceFromEndpoints(e)
if err != nil {
return err
}
if svc == nil || v1.IsServiceIPSet(svc) {
// No headless service found corresponding to endpoints object.
return nil
}
return kd.generateRecordsForHeadlessService(e, svc)
}
func (kd *KubeDNS) getServiceFromEndpoints(e *v1.Endpoints) (*v1.Service, error) {
key, err := kcache.MetaNamespaceKeyFunc(e)
if err != nil {
return nil, err
}
obj, exists, err := kd.servicesStore.GetByKey(key)
if err != nil {
return nil, fmt.Errorf("failed to get service object from services store - %v", err)
}
if !exists {
glog.V(3).Infof("No service for endpoint %q in namespace %q",
e.Name, e.Namespace)
return nil, nil
}
if svc, ok := assertIsService(obj); ok {
return svc, nil
}
return nil, fmt.Errorf("got a non service object in services store %v", obj)
}
// fqdn constructs the fqdn for the given service. subpaths is a list of path
// elements rooted at the given service, ending at a service record.
func (kd *KubeDNS) fqdn(service *v1.Service, subpaths ...string) string {
domainLabels := append(append(kd.domainPath, serviceSubdomain, service.Namespace, service.Name), subpaths...)
return dns.Fqdn(strings.Join(util.ReverseArray(domainLabels), "."))
}
func (kd *KubeDNS) newPortalService(service *v1.Service) {
subCache := treecache.NewTreeCache()
recordValue, recordLabel := util.GetSkyMsg(service.Spec.ClusterIP, 0)
subCache.SetEntry(recordLabel, recordValue, kd.fqdn(service, recordLabel))
// Generate SRV Records
for i := range service.Spec.Ports {
port := &service.Spec.Ports[i]
if port.Name != "" && port.Protocol != "" {
srvValue := kd.generateSRVRecordValue(service, int(port.Port))
l := []string{"_" + strings.ToLower(string(port.Protocol)), "_" + port.Name}
glog.V(2).Infof("Added SRV record %+v", srvValue)
subCache.SetEntry(recordLabel, srvValue, kd.fqdn(service, append(l, recordLabel)...), l...)
}
}
subCachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
host := getServiceFQDN(kd.domain, service)
reverseRecord, _ := util.GetSkyMsg(host, 0)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
kd.cache.SetSubCache(service.Name, subCache, subCachePath...)
kd.reverseRecordMap[service.Spec.ClusterIP] = reverseRecord
kd.clusterIPServiceMap[service.Spec.ClusterIP] = service
}
func (kd *KubeDNS) generateRecordsForHeadlessService(e *v1.Endpoints, svc *v1.Service) error {
// TODO: remove this after v1.4 is released and the old annotations are EOL
podHostnames, err := getPodHostnamesFromAnnotation(e.Annotations)
if err != nil {
return err
}
subCache := treecache.NewTreeCache()
glog.V(4).Infof("Endpoints Annotations: %v", e.Annotations)
for idx := range e.Subsets {
for subIdx := range e.Subsets[idx].Addresses {
address := &e.Subsets[idx].Addresses[subIdx]
endpointIP := address.IP
recordValue, endpointName := util.GetSkyMsg(endpointIP, 0)
if hostLabel, exists := getHostname(address, podHostnames); exists {
endpointName = hostLabel
}
subCache.SetEntry(endpointName, recordValue, kd.fqdn(svc, endpointName))
for portIdx := range e.Subsets[idx].Ports {
endpointPort := &e.Subsets[idx].Ports[portIdx]
if endpointPort.Name != "" && endpointPort.Protocol != "" {
srvValue := kd.generateSRVRecordValue(svc, int(endpointPort.Port), endpointName)
glog.V(2).Infof("Added SRV record %+v", srvValue)
l := []string{"_" + strings.ToLower(string(endpointPort.Protocol)), "_" + endpointPort.Name}
subCache.SetEntry(endpointName, srvValue, kd.fqdn(svc, append(l, endpointName)...), l...)
}
}
}
}
subCachePath := append(kd.domainPath, serviceSubdomain, svc.Namespace)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
kd.cache.SetSubCache(svc.Name, subCache, subCachePath...)
return nil
}
func getHostname(address *v1.EndpointAddress, podHostnames map[string]endpoints.HostRecord) (string, bool) {
if len(address.Hostname) > 0 {
return address.Hostname, true
}
if hostRecord, exists := podHostnames[address.IP]; exists && len(validation.IsDNS1123Label(hostRecord.HostName)) == 0 {
return hostRecord.HostName, true
}
return "", false
}
func getPodHostnamesFromAnnotation(annotations map[string]string) (map[string]endpoints.HostRecord, error) {
hostnames := map[string]endpoints.HostRecord{}
if annotations != nil {
if serializedHostnames, exists := annotations[endpoints.PodHostnamesAnnotation]; exists && len(serializedHostnames) > 0 {
err := json.Unmarshal([]byte(serializedHostnames), &hostnames)
if err != nil {
return nil, err
}
}
}
return hostnames, nil
}
func (kd *KubeDNS) generateSRVRecordValue(svc *v1.Service, portNumber int, labels ...string) *skymsg.Service {
host := strings.Join([]string{svc.Name, svc.Namespace, serviceSubdomain, kd.domain}, ".")
for _, cNameLabel := range labels {
host = cNameLabel + "." + host
}
recordValue, _ := util.GetSkyMsg(host, portNumber)
return recordValue
}
// Generates skydns records for a headless service.
func (kd *KubeDNS) newHeadlessService(service *v1.Service) error {
// Create an A record for every pod in the service.
// This record must be periodically updated.
// Format is as follows:
// For a service x, with pods a and b create DNS records,
// a.x.ns.domain. and, b.x.ns.domain.
key, err := kcache.MetaNamespaceKeyFunc(service)
if err != nil {
return err
}
e, exists, err := kd.endpointsStore.GetByKey(key)
if err != nil {
return fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
}
if !exists {
glog.V(1).Infof("Could not find endpoints for service %q in namespace %q. DNS records will be created once endpoints show up.",
service.Name, service.Namespace)
return nil
}
if e, ok := e.(*v1.Endpoints); ok {
return kd.generateRecordsForHeadlessService(e, service)
}
return nil
}
// Generates skydns records for an ExternalName service.
func (kd *KubeDNS) newExternalNameService(service *v1.Service) {
// Create a CNAME record for the service's ExternalName.
// TODO: TTL?
recordValue, _ := util.GetSkyMsg(service.Spec.ExternalName, 0)
cachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
fqdn := kd.fqdn(service)
glog.V(2).Infof("newExternalNameService: storing key %s with value %v as %s under %v",
service.Name, recordValue, fqdn, cachePath)
kd.cacheLock.Lock()
defer kd.cacheLock.Unlock()
// Store the service name directly as the leaf key
kd.cache.SetEntry(service.Name, recordValue, fqdn, cachePath...)
}
// Records responds with DNS records that match the given name, in a format
// understood by the skydns server. If "exact" is true, a single record
// matching the given name is returned, otherwise all records stored under
// the subtree matching the name are returned.
func (kd *KubeDNS) Records(name string, exact bool) (retval []skymsg.Service, err error) {
glog.V(3).Infof("Query for %q, exact: %v", name, exact)
trimmed := strings.TrimRight(name, ".")
segments := strings.Split(trimmed, ".")
isFederationQuery := false
federationSegments := []string{}
if !exact && kd.isFederationQuery(segments) {
glog.V(3).Infof("Received federation query, trying local service first")
// Try querying the non-federation (local) service first. Will try
// the federation one later, if this fails.
isFederationQuery = true
federationSegments = append(federationSegments, segments...)
// To try local service, remove federation name from segments.
// Federation name is 3rd in the segment (after service name and
// namespace).
segments = append(segments[:2], segments[3:]...)
}
path := util.ReverseArray(segments)
records, err := kd.getRecordsForPath(path, exact)
if err != nil {
return nil, err
}
if isFederationQuery {
return kd.recordsForFederation(records, path, exact, federationSegments)
} else if len(records) > 0 {
glog.V(4).Infof("Records for %v: %v", name, records)
return records, nil
}
glog.V(3).Infof("No record found for %v", name)
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
func (kd *KubeDNS) recordsForFederation(records []skymsg.Service, path []string, exact bool, federationSegments []string) (retval []skymsg.Service, err error) {
// For federation query, verify that the local service has endpoints.
validRecord := false
for _, val := range records {
// We know that a headless service has endpoints for sure if a
// record was returned for it. The record contains endpoint
// IPs. So nothing to check for headless services.
//
// TODO: this access to the cluster IP map does not seem to be
// threadsafe.
if !kd.isHeadlessServiceRecord(&val) {
ok, err := kd.serviceWithClusterIPHasEndpoints(&val)
if err != nil {
glog.V(2).Infof(
"Federation: error finding if service has endpoint: %v", err)
continue
}
if !ok {
glog.V(2).Infof("Federation: skipping record since service has no endpoint: %v", val)
continue
}
}
validRecord = true
break
}
if validRecord {
// There is a local service with valid endpoints, return its CNAME.
name := strings.Join(util.ReverseArray(path), ".")
// Ensure that this name that we are returning as a CNAME response
// is a fully qualified domain name so that the client's resolver
// library doesn't have to go through its search list all over
// again.
if !strings.HasSuffix(name, ".") {
name = name + "."
}
glog.V(3).Infof(
"Federation: Returning CNAME for local service: %v", name)
return []skymsg.Service{{Host: name}}, nil
}
// If the name query is not an exact query and does not match any
// records in the local store, attempt to send a federation redirect
// (CNAME) response.
if !exact {
glog.V(3).Infof(
"Federation: Did not find a local service. Trying federation redirect (CNAME)")
return kd.federationRecords(util.ReverseArray(federationSegments))
}
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
func (kd *KubeDNS) getRecordsForPath(path []string, exact bool) ([]skymsg.Service, error) {
if kd.isPodRecord(path) {
ip, err := kd.getPodIP(path)
if err == nil {
skyMsg, _ := util.GetSkyMsg(ip, 0)
return []skymsg.Service{*skyMsg}, nil
}
return nil, err
}
if exact {
key := path[len(path)-1]
if key == "" {
return []skymsg.Service{}, nil
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
if record, ok := kd.cache.GetEntry(key, path[:len(path)-1]...); ok {
glog.V(3).Infof("Exact match %v for %v received from cache", record, path[:len(path)-1])
return []skymsg.Service{*(record.(*skymsg.Service))}, nil
}
glog.V(3).Infof("Exact match for %v not found in cache", path)
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
records := kd.cache.GetValuesForPathWithWildcards(path...)
glog.V(3).Infof("Found %d records for %v in the cache", len(records), path)
retval := []skymsg.Service{}
for _, val := range records {
retval = append(retval, *val)
}
glog.V(4).Infof("getRecordsForPath retval=%+v, path=%v", retval, path)
return retval, nil
}
// Returns true if the given record corresponds to a headless service.
// Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
// This is because the code will panic, if we try to acquire it again if we already have it.
func (kd *KubeDNS) isHeadlessServiceRecord(msg *skymsg.Service) bool {
// If it is not a headless service, then msg.Host will be the cluster IP.
// So we can check if msg.host exists in our clusterIPServiceMap.
_, ok := kd.clusterIPServiceMap[msg.Host]
// It is headless service if no record was found.
return !ok
}
// Returns true if the service corresponding to the given message has endpoints.
// Note: Works only for services with ClusterIP. Will return an error for headless service (service without a clusterIP).
// Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
// This is because the code will panic, if we try to acquire it again if we already have it.
func (kd *KubeDNS) serviceWithClusterIPHasEndpoints(msg *skymsg.Service) (bool, error) {
svc, ok := kd.clusterIPServiceMap[msg.Host]
if !ok {
// It is a headless service.
return false, fmt.Errorf("method not expected to be called for headless service")
}
key, err := kcache.MetaNamespaceKeyFunc(svc)
if err != nil {
return false, err
}
e, exists, err := kd.endpointsStore.GetByKey(key)
if err != nil {
return false, fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
}
if !exists {
return false, nil
}
if e, ok := e.(*v1.Endpoints); ok {
return len(e.Subsets) > 0, nil
}
return false, fmt.Errorf("unexpected: found non-endpoint object in endpoint store: %v", e)
}
// ReverseRecords performs a reverse lookup for the given name.
func (kd *KubeDNS) ReverseRecord(name string) (*skymsg.Service, error) {
glog.V(3).Infof("Query for ReverseRecord %q", name)
// if portalIP is not a valid IP, the reverseRecordMap lookup will fail
portalIP, ok := util.ExtractIP(name)
if !ok {
return nil, fmt.Errorf("does not support reverse lookup for %s", name)
}
kd.cacheLock.RLock()
defer kd.cacheLock.RUnlock()
if reverseRecord, ok := kd.reverseRecordMap[portalIP]; ok {
return reverseRecord, nil
}
return nil, fmt.Errorf("must be exactly one service record")
}
// e.g {"local", "cluster", "pod", "default", "10-0-0-1"}
func (kd *KubeDNS) isPodRecord(path []string) bool {
if len(path) != len(kd.domainPath)+3 {
return false
}
if path[len(kd.domainPath)] != "pod" {
return false
}
for _, segment := range path {
if segment == "*" {
return false
}
}
return true
}
func (kd *KubeDNS) getPodIP(path []string) (string, error) {
ipStr := path[len(path)-1]
ip := strings.Replace(ipStr, "-", ".", -1)
if parsed := net.ParseIP(ip); parsed != nil {
return ip, nil
}
return "", fmt.Errorf("Invalid IP Address %v", ip)
}
// isFederationQuery checks if the given query `path` matches the federated service query pattern.
// The conjunction of the following conditions forms the test for the federated service query
// pattern:
// 1. `path` has exactly 4+len(domainPath) segments: mysvc.myns.myfederation.svc.domain.path.
// 2. Service name component must be a valid RFC 1035 name.
// 3. Namespace component must be a valid RFC 1123 name.
// 4. Federation component must also be a valid RFC 1123 name.
// 5. Fourth segment is exactly "svc"
// 6. The remaining segments match kd.domainPath.
// 7. And federation must be one of the listed federations in the config.
// Note: Because of the above conditions, this method will treat wildcard queries such as
// *.mysvc.myns.myfederation.svc.domain.path as non-federation queries.
// We can add support for wildcard queries later, if needed.
func (kd *KubeDNS) isFederationQuery(path []string) bool {
if len(path) != 4+len(kd.domainPath) {
glog.V(4).Infof("Not a federation query: len(%q) != 4+len(%q)", path, kd.domainPath)
return false
}
if errs := validation.IsDNS1035Label(path[0]); len(errs) != 0 {
glog.V(4).Infof("Not a federation query: %q is not an RFC 1035 label: %q",
path[0], errs)
return false
}
if errs := validation.IsDNS1123Label(path[1]); len(errs) != 0 {
glog.V(4).Infof("Not a federation query: %q is not an RFC 1123 label: %q",
path[1], errs)
return false
}
if errs := validation.IsDNS1123Label(path[2]); len(errs) != 0 {
glog.V(4).Infof("Not a federation query: %q is not an RFC 1123 label: %q",
path[2], errs)
return false
}
if path[3] != serviceSubdomain {
glog.V(4).Infof("Not a federation query: %q != %q (serviceSubdomain)",
path[3], serviceSubdomain)
return false
}
for i, domComp := range kd.domainPath {
// kd.domainPath is reversed, so we need to look in the `path` in the reverse order.
if domComp != path[len(path)-i-1] {
glog.V(4).Infof("Not a federation query: kd.domainPath[%d] != path[%d] (%q != %q)",
i, len(path)-i-1, domComp, path[len(path)-i-1])
return false
}
}
kd.configLock.RLock()
defer kd.configLock.RUnlock()
if _, ok := kd.config.Federations[path[2]]; !ok {
glog.V(4).Infof("Not a federation query: label %q not found", path[2])
return false
}
return true
}
// federationRecords checks if the given `queryPath` is for a federated service and if it is,
// it returns a CNAME response containing the cluster zone name and federation domain name
// suffix.
func (kd *KubeDNS) federationRecords(queryPath []string) ([]skymsg.Service, error) {
// `queryPath` is a reversed-array of the queried name, reverse it back to make it easy
// to follow through this code and reduce confusion. There is no reason for it to be
// reversed here.
path := util.ReverseArray(queryPath)
// Check if the name query matches the federation query pattern.
if !kd.isFederationQuery(path) {
return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
}
// Now that we have already established that the query is a federation query, remove the local
// domain path components, i.e. kd.domainPath, from the query.
path = path[:len(path)-len(kd.domainPath)]
// Append the zone name (zone in the cloud provider terminology, not a DNS
// zone) and the region name.
zone, region, err := kd.getClusterZoneAndRegion()
if err != nil {
return nil, fmt.Errorf("failed to obtain the cluster zone and region: %v", err)
}
path = append(path, zone, region)
// We have already established that the map entry exists for the given federation,
// we just need to retrieve the domain name, validate it and append it to the path.
kd.configLock.RLock()
domain := kd.config.Federations[path[2]]
kd.configLock.RUnlock()
// We accept valid subdomains as well, so just let all the valid subdomains.
if len(validation.IsDNS1123Subdomain(domain)) != 0 {
return nil, fmt.Errorf("%s is not a valid domain name for federation %s", domain, path[2])
}
name := strings.Join(append(path, domain), ".")
// Ensure that this name that we are returning as a CNAME response is a fully qualified
// domain name so that the client's resolver library doesn't have to go through its
// search list all over again.
if !strings.HasSuffix(name, ".") {
name = name + "."
}
return []skymsg.Service{{Host: name}}, nil
}
// getClusterZoneAndRegion returns the name of the zone and the region the
// cluster is running in. It arbitrarily selects a node and reads the failure
// domain label on the node. An alternative is to obtain this pod's
// (i.e. kube-dns pod's) name using the downward API, get the pod, get the
// node the pod is bound to and retrieve that node's labels. But even just by
// reading those steps, it looks complex and it is not entirely clear what
// that complexity is going to buy us. So taking a simpler approach here.
// Also note that zone here means the zone in cloud provider terminology, not
// the DNS zone.
func (kd *KubeDNS) getClusterZoneAndRegion() (string, string, error) {
var node *v1.Node
objs := kd.nodesStore.List()
if len(objs) > 0 {
var ok bool
if node, ok = objs[0].(*v1.Node); !ok {
return "", "", fmt.Errorf("expected node object, got: %T", objs[0])
}
} else {
// An alternative to listing nodes each time is to set a watch, but that is totally
// wasteful in case of non-federated independent Kubernetes clusters. So carefully
// proceeding here.
// TODO(madhusudancs): Move this to external/v1 API.
nodeList, err := kd.kubeClient.Core().Nodes().List(v1.ListOptions{})
if err != nil || len(nodeList.Items) == 0 {
return "", "", fmt.Errorf("failed to retrieve the cluster nodes: %v", err)
}
// Select a node (arbitrarily the first node) that has
// `LabelZoneFailureDomain` and `LabelZoneRegion` set.
for _, nodeItem := range nodeList.Items {
_, zfound := nodeItem.Labels[metav1.LabelZoneFailureDomain]
_, rfound := nodeItem.Labels[metav1.LabelZoneRegion]
if !zfound || !rfound {
continue
}
// Make a copy of the node, don't rely on the loop variable.
node = &(*(&nodeItem))
if err := kd.nodesStore.Add(node); err != nil {
return "", "", fmt.Errorf("couldn't add the retrieved node to the cache: %v", err)
}
// Node is found, break out of the loop.
break
}
}
if node == nil {
return "", "", fmt.Errorf("Could not find any nodes")
}
zone, ok := node.Labels[metav1.LabelZoneFailureDomain]
if !ok || zone == "" {
return "", "", fmt.Errorf("unknown cluster zone")
}
region, ok := node.Labels[metav1.LabelZoneRegion]
if !ok || region == "" {
return "", "", fmt.Errorf("unknown cluster region")
}
return zone, region, nil
}
func getServiceFQDN(domain string, service *v1.Service) string {
return strings.Join(
[]string{service.Name, service.Namespace, serviceSubdomain, domain}, ".")
}