Source code for google.cloud.forseti.common.data_access.csv_writer

# Copyright 2017 The Forseti Security Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Writes the csv files for upload to Cloud SQL."""
from contextlib import contextmanager
import json
import os
import tempfile

import unicodecsv as csv

from google.cloud.forseti.common.data_access.errors import CSVFileError

APPENGINE_FIELDNAMES = [
    'project_id',
    'name',
    'app_id',
    'dispatch_rules',
    'auth_domain',
    'location_id',
    'code_bucket',
    'default_cookie_expiration',
    'serving_status',
    'default_hostname',
    'default_bucket',
    'iap',
    'gcr_domain',
    'raw_application'
]

APPENGINE_SERVICES_FIELDNAMES = [
    'project_id',
    'app_id',
    'service_id',
    'service'
]

APPENGINE_VERSIONS_FIELDNAMES = [
    'project_id',
    'app_id',
    'service_id',
    'version_id',
    'version'
]

APPENGINE_INSTANCES_FIELDNAMES = [
    'project_id',
    'app_id',
    'service_id',
    'version_id',
    'instance_id',
    'instance'
]

BACKEND_SERVICES_FIELDNAMES = [
    'id',
    'project_id',
    'affinity_cookie_ttl_sec',
    'backends',
    'cdn_policy',
    'connection_draining',
    'creation_timestamp',
    'description',
    'enable_cdn',
    'health_checks',
    'iap',
    'load_balancing_scheme',
    'name',
    'port_name',
    'port',
    'protocol',
    'region',
    'session_affinity',
    'timeout_sec',
    'raw_backend_service'
]

BIGQUERY_DATASET_FIELDNAMES = [
    'project_id',
    'dataset_id',
    'access_domain',
    'access_user_by_email',
    'access_special_group',
    'access_group_by_email',
    'role',
    'access_view_project_id',
    'access_view_table_id',
    'access_view_dataset_id',
    'raw_access_map'
]

BUCKETS_ACL_FIELDNAMES = [
    'bucket',
    'domain',
    'email',
    'entity',
    'entity_id',
    'acl_id',
    'kind',
    'project_team',  # TODO: flatten this
    'role',
    'bucket_acl_selflink',
    'raw_bucket_acl'
]

# TODO: Add pydoc to describe the mapping of the custom field naming
# to the field names in the resource objects.
# https://cloud.google.com/storage/docs/json_api/v1/buckets#resource
BUCKETS_FIELDNAMES = [
    'project_number',
    'bucket_id',
    'bucket_name',
    'bucket_kind',
    'bucket_storage_class',
    'bucket_location',
    'bucket_create_time',
    'bucket_update_time',
    'bucket_selflink',
    'bucket_lifecycle_raw',
    'raw_bucket'
]

CLOUDSQL_INSTANCES_FIELDNAMES = [
    'project_number',
    'name',
    'project',
    'backend_type',
    'connection_name',
    'current_disk_size',
    'database_version',
    'failover_replica_available',
    'failover_replica_name',
    'instance_type',
    'ipv6_address',
    'kind',
    'master_instance_name',
    'max_disk_size',
    'on_premises_configuration_host_port',
    'on_premises_configuration_kind',
    'region',
    'replica_configuration',
    'replica_names',
    'self_link',
    'server_ca_cert',
    'service_account_email_address',
    'settings_activation_policy',
    'settings_authorized_gae_applications',
    'settings_availability_type',
    'settings_backup_configuration_binary_log_enabled',
    'settings_backup_configuration_enabled',
    'settings_backup_configuration_kind',
    'settings_backup_configuration_start_time',
    'settings_crash_safe_replication_enabled',
    'settings_data_disk_size_gb',
    'settings_data_disk_type',
    'settings_database_flags',
    'settings_database_replication_enabled',
    'settings_ip_configuration_ipv4_enabled',
    'settings_ip_configuration_require_ssl',
    'settings_kind',
    'settings_labels',
    'settings_location_preference_follow_gae_application',
    'settings_location_preference_kind',
    'settings_location_preference_zone',
    'settings_maintenance_window',
    'settings_pricing_plan',
    'settings_replication_type',
    'settings_settings_version',
    'settings_storage_auto_resize',
    'settings_storage_auto_resize_limit',
    'settings_tier',
    'state',
    'suspension_reason',
    'raw_cloudsql_instance',
]

CLOUDSQL_IPADDRESSES_FIELDNAMES = [
    'project_number',
    'instance_name',
    'type',
    'ip_address',
    'time_to_retire',
]

CLOUDSQL_IPCONFIGURATION_AUTHORIZEDNETWORKS_FIELDNAMES = [
    'project_number',
    'instance_name',
    'kind',
    'name',
    'value',
    'expiration_time',
]

FIREWALL_RULES_FIELDNAMES = [
    'firewall_rule_id',
    'project_id',
    'firewall_rule_name',
    'firewall_rule_description',
    'firewall_rule_kind',
    'firewall_rule_network',
    'firewall_rule_priority',
    'firewall_rule_direction',
    'firewall_rule_source_ranges',
    'firewall_rule_destination_ranges',
    'firewall_rule_source_tags',
    'firewall_rule_target_tags',
    'firewall_rule_source_service_accounts',
    'firewall_rule_target_service_accounts',
    'firewall_rule_allowed',
    'firewall_rule_denied',
    'firewall_rule_self_link',
    'firewall_rule_create_time',
    'raw_firewall_rule'
]

FOLDER_IAM_POLICIES_FIELDNAMES = [
    'folder_id',
    'role',
    'member_type',
    'member_name',
    'member_domain'
]

FOLDERS_FIELDNAMES = [
    'folder_id',
    'name',
    'display_name',
    'lifecycle_state',
    'parent_type',
    'parent_id',
    'raw_folder',
    'create_time',
]

RAW_FOLDER_IAM_POLICIES_FIELDNAMES = [
    'folder_id',
    'iam_policy'
]

FORWARDING_RULES_FIELDNAMES = [
    'id',
    'project_id',
    'creation_timestamp',
    'name',
    'description',
    'region',
    'ip_address',
    'ip_protocol',
    'port_range',
    'ports',  # json list
    'target',
    'load_balancing_scheme',
    'subnetwork',
    'network',
    'backend_service',
    'raw_forwarding_rule',
]

GROUP_MEMBERS_FIELDNAMES = [
    'group_id',
    'member_kind',
    'member_role',
    'member_type',
    'member_status',
    'member_id',
    'member_email',
    'raw_member'
]

GROUPS_FIELDNAMES = [
    'group_id',
    'group_email',
    'group_kind',
    'direct_member_count',
    'raw_group'
]

INSTANCES_FIELDNAMES = [
    'id',
    'project_id',
    'can_ip_forward',
    'cpu_platform',
    'creation_timestamp',
    'description',
    'disks',
    'machine_type',
    'metadata',
    'name',
    'network_interfaces',
    'scheduling',
    'service_accounts',
    'status',
    'status_message',
    'tags',
    'zone',
    'raw_instance',
]

INSTANCE_GROUPS_FIELDNAMES = [
    'id',
    'project_id',
    'creation_timestamp',
    'description',
    'instance_urls',
    'name',
    'named_ports',
    'network',
    'region',
    'size',
    'subnetwork',
    'zone',
    'raw_instance_group',
]

INSTANCE_TEMPLATES_FIELDNAMES = [
    'id',
    'project_id',
    'creation_timestamp',
    'description',
    'name',
    'properties',
    'raw_instance_template',
]

INSTANCE_GROUP_MANAGERS_FIELDNAMES = [
    'id',
    'project_id',
    'base_instance_name',
    'creation_timestamp',
    'current_actions',
    'description',
    'instance_group',
    'instance_template',
    'name',
    'named_ports',
    'region',
    'target_pools',
    'target_size',
    'zone',
    'raw_instance_group_manager',
]

INV_SUMMARY_FIELDNAMES = [
    'resource_type',
    'count',
]

ORG_IAM_POLICIES_FIELDNAMES = [
    'org_id',
    'role',
    'member_type',
    'member_name',
    'member_domain'
]

ORGANIZATIONS_FIELDNAMES = [
    'org_id',
    'name',
    'display_name',
    'lifecycle_state',
    'raw_org',
    'creation_time',
]

VIOLATION_FIELDNAMES = [
    'resource_id',
    'resource_type',
    'resource_name',
    'full_name',
    'rule_index',
    'rule_name',
    'violation_type',
    'violation_data',
]

PROJECT_IAM_POLICIES_FIELDNAMES = [
    'project_number',
    'role',
    'member_type',
    'member_name',
    'member_domain'
]

PROJECTS_FIELDNAMES = [
    'project_number',
    'project_id',
    'project_name',
    'lifecycle_state',
    'parent_type',
    'parent_id',
    'raw_project',
    'create_time'
]

RAW_BUCKETS_FIELDNAMES = [
    'project_number',
    'buckets'
]

RAW_ORG_IAM_POLICIES_FIELDNAMES = [
    'org_id',
    'iam_policy'
]

RAW_PROJECT_IAM_POLICIES_FIELDNAMES = [
    'project_number',
    'iam_policy'
]

SERVICE_ACCOUNTS_FIELDNAMES = [
    'project_id',
    'name',
    'email',
    'oauth2_client_id',
    'account_keys',
    'raw_service_account'
]

CSV_FIELDNAME_MAP = {
    'appengine': APPENGINE_FIELDNAMES,
    'appengine_services': APPENGINE_SERVICES_FIELDNAMES,
    'appengine_versions': APPENGINE_VERSIONS_FIELDNAMES,
    'appengine_instances': APPENGINE_INSTANCES_FIELDNAMES,

    'backend_services': BACKEND_SERVICES_FIELDNAMES,

    'bigquery_datasets': BIGQUERY_DATASET_FIELDNAMES,

    'buckets': BUCKETS_FIELDNAMES,
    'buckets_acl': BUCKETS_ACL_FIELDNAMES,
    'raw_buckets': RAW_BUCKETS_FIELDNAMES,

    'cloudsql_instances': CLOUDSQL_INSTANCES_FIELDNAMES,
    'cloudsql_ipaddresses': CLOUDSQL_IPADDRESSES_FIELDNAMES,
    'cloudsql_ipconfiguration_authorizednetworks': (
        CLOUDSQL_IPCONFIGURATION_AUTHORIZEDNETWORKS_FIELDNAMES
    ),

    'firewall_rules': FIREWALL_RULES_FIELDNAMES,

    'folder_iam_policies': FOLDER_IAM_POLICIES_FIELDNAMES,
    'folders': FOLDERS_FIELDNAMES,
    'raw_folder_iam_policies': RAW_FOLDER_IAM_POLICIES_FIELDNAMES,

    'forwarding_rules': FORWARDING_RULES_FIELDNAMES,

    'group_members': GROUP_MEMBERS_FIELDNAMES,
    'groups': GROUPS_FIELDNAMES,

    'instances': INSTANCES_FIELDNAMES,
    'instance_groups': INSTANCE_GROUPS_FIELDNAMES,
    'instance_templates': INSTANCE_TEMPLATES_FIELDNAMES,
    'instance_group_managers': INSTANCE_GROUP_MANAGERS_FIELDNAMES,
    'inv_summary': INV_SUMMARY_FIELDNAMES,

    'org_iam_policies': ORG_IAM_POLICIES_FIELDNAMES,
    'organizations': ORGANIZATIONS_FIELDNAMES,
    'raw_org_iam_policies': RAW_ORG_IAM_POLICIES_FIELDNAMES,

    'project_iam_policies': PROJECT_IAM_POLICIES_FIELDNAMES,
    'projects': PROJECTS_FIELDNAMES,
    'raw_project_iam_policies': RAW_PROJECT_IAM_POLICIES_FIELDNAMES,

    'service_accounts': SERVICE_ACCOUNTS_FIELDNAMES,

    'violations': VIOLATION_FIELDNAMES,
}


[docs]def normalize_nested_dicts(row): """Transform nested dicts into json strings with sorted keys. Args: row (dict): A dictionary to normalize. Returns: dict: A row with nested dicts transformed to json string. """ new_row = {} for key, value in list(row.items()): if isinstance(value, dict): new_row[key] = json.dumps(value, sort_keys=True) else: new_row[key] = value return new_row
[docs]@contextmanager def write_csv(resource_name, data, write_header=False): """Start the csv writing flow. Args: resource_name (str): The resource name. data (iterable): An iterable of data to be written to csv. write_header (bool): If True, write the header in the csv file. Yields: object: The CSV temporary file pointer. Raises: CSVFileError: If there was an error writing the CSV file. """ csv_file = tempfile.NamedTemporaryFile(delete=False) try: writer = csv.DictWriter(csv_file, extrasaction='ignore', fieldnames=CSV_FIELDNAME_MAP[resource_name]) if write_header: writer.writeheader() for row in data: # Not ready to send these data via CSV attachment as they break # across multiple columns. row.pop('inventory_data', None) writer.writerow(normalize_nested_dicts(row)) # This must be closed before returned for loading. csv_file.close() yield csv_file # Remove the csv file after loading. os.remove(csv_file.name) except (OSError, csv.Error) as e: raise CSVFileError(resource_name, e)