Source code for google.cloud.forseti.scanner.audit.log_sink_rules_engine

# Copyright 2018 The Forseti Security Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Log Sinks/Exports rules engine.

Builds the RuleBook (LogSinkRuleBook) from the rule definitions (file either
stored locally or in GCS) and compares a resource's log sinks against the
RuleBook to determine whether there are violations. Log Sinks rules can be
defined on organization, folder, billing_account and project.
"""

from builtins import object
import collections
import itertools
import re
import threading

from google.cloud.forseti.common.gcp_type import resource_util
from google.cloud.forseti.common.util import logger
from google.cloud.forseti.common.util import relationship
from google.cloud.forseti.common.util.regular_exp import escape_and_globify
from google.cloud.forseti.scanner.audit import base_rules_engine as bre
from google.cloud.forseti.scanner.audit import errors as audit_errors
from google.cloud.forseti.services.utils import to_full_resource_name


LOGGER = logger.get_logger(__name__)

VIOLATION_TYPE = 'LOG_SINK_VIOLATION'

# Rule Modes.
_WHITELIST = 'whitelist'
_BLACKLIST = 'blacklist'
_REQUIRED = 'required'
_RULE_MODES = frozenset([_WHITELIST, _BLACKLIST, _REQUIRED])


[docs]class LogSinkRulesEngine(bre.BaseRulesEngine): """Rules engine for Log Sinks.""" def __init__(self, rules_file_path, snapshot_timestamp=None): """Initialize. Args: rules_file_path (str): File location of rules. snapshot_timestamp (str): The snapshot to work with. """ super(LogSinkRulesEngine, self).__init__( rules_file_path=rules_file_path, snapshot_timestamp=snapshot_timestamp) self.rule_book = None
[docs] def build_rule_book(self, global_configs=None): """Build LogSinkRuleBook from the rules definition file. Args: global_configs (dict): Global configurations. """ self.rule_book = LogSinkRuleBook( global_configs, self._load_rule_definitions(), snapshot_timestamp=self.snapshot_timestamp)
[docs] def find_violations(self, resource, log_sinks, force_rebuild=False): """Determine whether a resources's log sink config violates rules. Args: resource (gcp_type): The resource that the log sinks belong to. log_sinks (list): list of LogSinks for resource. force_rebuild (bool): If True, rebuilds the rule book. This will reload the rules definition file and add the rules to the book. Returns: iterable: A generator of rule violations. """ if self.rule_book is None or force_rebuild: self.build_rule_book() violations = self.rule_book.find_violations(resource, log_sinks) return set(violations)
[docs] def add_rules(self, rules): """Add rules to the rule book. Args: rules (list): The list of rules to add to the book. """ if self.rule_book is not None: self.rule_book.add_rules(rules)
[docs]def _parse_sink_rule_spec(sink_spec): """Validates and escapes a sink from a rule config. Args: sink_spec (dict): A sink definition from a LogSink rule definition. Returns: dict: A sink definition with fields escaped and globified, or None if sink_spec is invalid. """ if not sink_spec: return None sink_destination = sink_spec.get('destination') sink_filter = sink_spec.get('filter') sink_include_children = sink_spec.get('include_children') # All fields are mandatory. if any(item is None for item in [ sink_destination, sink_filter, sink_include_children]): return None # include_children will either match a boolean, or allow either. if sink_include_children.lower() not in ['*', 'true', 'false']: return None if sink_include_children != '*': sink_include_children = sink_include_children.lower() == 'true' return { 'destination': escape_and_globify(sink_destination), 'filter': escape_and_globify(sink_filter), 'include_children': sink_include_children, }
[docs]class LogSinkRuleBook(bre.BaseRuleBook): """The RuleBook for Log Sink configs. Rules from the rules definition file are parsed and placed into a map, which associates the applies_to value and GCP resource (project, folder, billing_account or organization) with the rules defined for it. Resources are evaulated against matching rules defined with applies_to = "self". Project resources are also evaulated against rules for ancestor resources defined with applies_to = "children". """ supported_resource_types = frozenset([ 'project', 'folder', 'billing_account', 'organization', ]) supported_rule_applies_to = frozenset([ 'self', 'children', ]) def __init__(self, global_configs, # pylint: disable= unused-argument rule_defs=None, snapshot_timestamp=None): """Initialize. Args: global_configs (dict): Global configurations. rule_defs (dict): The parsed dictionary of rules from the YAML definition file. snapshot_timestamp (str): The snapshot to lookup data. """ super(LogSinkRuleBook, self).__init__() self._rules_sema = threading.BoundedSemaphore(value=1) self.resource_rules_map = { applies_to: collections.defaultdict(set) for applies_to in self.supported_rule_applies_to} if not rule_defs: self.rule_defs = {} else: self.rule_defs = rule_defs self.add_rules(rule_defs) if snapshot_timestamp: self.snapshot_timestamp = snapshot_timestamp
[docs] def __eq__(self, other): """Equals. Args: other (object): Object to compare. Returns: bool: True or False. """ if not isinstance(other, type(self)): return NotImplemented return self.resource_rules_map == other.resource_rules_map
[docs] def __ne__(self, other): """Not Equals. Args: other (object): Object to compare. Returns: bool: True or False. """ return not self == other
[docs] def __repr__(self): """Object representation. Returns: str: The object representation. """ return 'LogSinkRuleBook <{}>'.format(self.resource_rules_map)
[docs] def add_rules(self, rule_defs): """Add rules to the rule book. Args: rule_defs (dict): Rules parsed from the rule definition file. """ for (i, rule) in enumerate(rule_defs.get('rules', [])): self.add_rule(rule, i)
[docs] def add_rule(self, rule_def, rule_index): """Add a rule to the rule book. The rule supplied to this method is the dictionary parsed from the rules definition file. For example, this rule... # rules yaml: rules: - name: a rule mode: required resource: - type: organization applies_to: children resource_ids: - 11223344 sink: - destination: 'bigquery.googleapis.com/projects/my-proj/*' filter: 'logName:"logs/cloudaudit.googleapis.com"' include_children: '*' ... gets parsed into: { 'name': 'a rule', 'mode': 'required', 'resource': [{ 'type': 'organization', 'applies_to': 'children', 'resource_ids': ['11223344'] }], 'sink': { 'destination': 'bigquery.googleapis.com/projects/my-proj/*', 'filter': logName:"logs/cloudaudit.googleapis.com"', 'include_children': '*' } } Args: rule_def (dict): Contains rule definition properties. rule_index (int): The index of the rule from the rule definitions. Assigned automatically when the rule book is built. """ self._rules_sema.acquire() try: resources = rule_def.get('resource') mode = rule_def.get('mode') sink = _parse_sink_rule_spec(rule_def.get('sink')) if not resources or sink is None or mode not in _RULE_MODES: raise audit_errors.InvalidRulesSchemaError( 'Faulty rule {}'.format(rule_index)) for resource in resources: resource_type = resource.get('type') applies_to = resource.get('applies_to') resource_ids = resource.get('resource_ids') if resource_type not in self.supported_resource_types: raise audit_errors.InvalidRulesSchemaError( 'Invalid resource type in rule {}'.format(rule_index)) if applies_to not in self.supported_rule_applies_to: raise audit_errors.InvalidRulesSchemaError( 'Invalid applies_to type in rule {}'.format(rule_index)) if applies_to == 'children' and resource_type in [ 'project', 'billing_account']: raise audit_errors.InvalidRulesSchemaError( 'Rule {} cannot apply to children of a {}'.format( rule_index, resource_type)) if not resource_ids: raise audit_errors.InvalidRulesSchemaError( 'Missing resource ids in rule {}'.format(rule_index)) # For each resource id associated with the rule, create a # mapping of applies_to => resource => rules. for resource_id in resource_ids: gcp_resource = resource_util.create_resource( resource_id=resource_id, resource_type=resource_type) rule_def_resource = { 'sink': sink, 'mode': mode, } rule = Rule(rule_name=rule_def.get('name'), rule_index=rule_index, rule=rule_def_resource) # If no mapping exists, create it. If the rule isn't in the # mapping, add it. self.resource_rules_map[applies_to][gcp_resource].add(rule) finally: self._rules_sema.release()
[docs] def find_violations(self, resource, log_sinks): """Find Log Sink violations in the rule book. Args: resource (gcp_type): The resource that the log sinks belong to. log_sinks (list): list of LogSinks for resource. Returns: iterable: A generator of the rule violations. """ violations = itertools.chain() # Check for rules that apply to this resource directly. resource_rules = self.resource_rules_map['self'].get(resource, []) for rule in resource_rules: violations = itertools.chain( violations, rule.find_violations(resource, log_sinks)) # If resource is a project, check for ancestor rules that apply to # children. if resource.type == 'project': resource_ancestors = ( relationship.find_ancestors(resource, resource.full_name)) for curr_resource in resource_ancestors: resource_rules = self.resource_rules_map['children'].get( curr_resource, []) for rule in resource_rules: violations = itertools.chain( violations, rule.find_violations(resource, log_sinks)) return violations
[docs]def _sink_matches_rule(rule_def, sink): """Returns true if the log sink matches the rule's sink definition. Args: rule_def (dict): sink rule definition. sink (LogSink): sink being matched to the rule definition. Returns: bool: True if sink matches rule definition. """ if (not re.match(rule_def['destination'], sink.destination) or not re.match(rule_def['filter'], sink.sink_filter)): return False return (rule_def['include_children'] == '*' or rule_def['include_children'] == sink.include_children)
[docs]def _find_whitelist_violations(rule_def, sinks): """Returns log sinks that DON'T match the rule definition. Args: rule_def (dict): sink whitelist rule definition. sinks (list): list of LogSinks to be matched against whitelist. Returns: list: All LogSinks in `sinks` that violate the whitelist. """ violating_sinks = [] for sink in sinks: if not _sink_matches_rule(rule_def, sink): violating_sinks.append(sink) return violating_sinks
[docs]def _find_blacklist_violations(rule_def, sinks): """Returns log sinks that match the rule definition. Args: rule_def (dict): sink blacklist rule definition. sinks (list): list of LogSinks to be matched against blacklist. Returns: list: All LogSinks in `sinks` that violate the blacklist. """ violating_sinks = [] for sink in sinks: if _sink_matches_rule(rule_def, sink): violating_sinks.append(sink) return violating_sinks
[docs]def _required_sink_missing(rule_def, sinks): """Returns True if no sink matches the rule definition. Args: rule_def (dict): required sink rule definition. sinks (list): list of LogSinks to be matched against required sink. Returns: bool: True if at least one log sink matches the required sink. """ for sink in sinks: if _sink_matches_rule(rule_def, sink): return False return True
[docs]class Rule(object): """Rule properties from the rule definition file. Also finds violations.""" RuleViolation = collections.namedtuple( 'RuleViolation', ['resource_type', 'resource_id', 'full_name', 'rule_name', 'rule_index', 'violation_type', 'sink_destination', 'sink_filter', 'sink_include_children', 'resource_data', 'resource_name']) def __init__(self, rule_name, rule_index, rule): """Initialize. Args: rule_name (str): Name of the loaded rule. rule_index (int): The index of the rule from the rule definitions. rule (dict): The rule definition from the file. """ self.rule_name = rule_name self.rule_index = rule_index self.rule = rule
[docs] def find_violations(self, resource, log_sinks): """Find Log Sink violations in the rule book. Args: resource (gcp_type): The resource that the log sinks belong to. log_sinks (list): list of log sinks for resource. Yields: namedtuple: Returns RuleViolation named tuple. """ # Required-mode violations are violations on the parent resource, # other violations are on the sink resource. if self.rule['mode'] == _REQUIRED: if _required_sink_missing(self.rule['sink'], log_sinks): sink = self.rule['sink'] yield self.RuleViolation( resource_name=resource.id, resource_type=resource.type, resource_id=resource.id, full_name=resource.full_name, rule_name=self.rule_name, rule_index=self.rule_index, violation_type=VIOLATION_TYPE, sink_destination=sink['destination'], sink_filter=sink['filter'], sink_include_children=sink['include_children'], resource_data='' ) else: if self.rule['mode'] == _WHITELIST: violating_sinks = _find_whitelist_violations( self.rule['sink'], log_sinks) else: violating_sinks = _find_blacklist_violations( self.rule['sink'], log_sinks) # Return a violation for each sink that violates black/whitelist. for sink in violating_sinks: yield self.RuleViolation( resource_name=sink.name, resource_type=sink.type, resource_id=sink.id, full_name=to_full_resource_name(resource.full_name, sink.id), rule_name=self.rule_name, rule_index=self.rule_index, violation_type=VIOLATION_TYPE, sink_destination=sink.destination, sink_filter=sink.sink_filter, sink_include_children=sink.include_children, resource_data=sink.raw_json )