#!/usr/bin/python3 # ---------------------------------------------------------------------- # File: eos-iam-mapfile.py # Author: Manuel Reis - CERN # ---------------------------------------------------------------------- # ************************************************************************ # * EOS - the CERN Disk Storage System * # * Copyright (C) 2021 CERN/Switzerland * # * * # * This program is free software: you can redistribute it and/or modify * # * it under the terms of the GNU General Public License as published by * # * the Free Software Foundation, either version 3 of the License, or * # * (at your option) any later version. * # * * # * This program is distributed in the hope that it will be useful, * # * but WITHOUT ANY WARRANTY; without even the implied warranty of * # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * # * GNU General Public License for more details. * # * * # * You should have received a copy of the GNU General Public License * # * along with this program. If not, see .* # ************************************************************************ import re import json import pickle import logging import argparse from sys import exit from os import getenv from urllib import request, parse from configparser import ConfigParser from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor, as_completed class IAM_Server: TOKEN_ENDPOINT = '/token' USER_ENDPOINT ='/scim/Users' def __init__(self, server, client_id, client_secret, token_server = None): self.server = server self.client_id = client_id self.client_secret = client_secret # Assuming token server is the same as IAM's self.token_server = token_server or server self._token = None def __hash__(self): return hash(self.server) def __eq__(self, other): return self.server == other.server def __get_token(self): """ Authenticates with the iam server and returns the access token. """ request_data = { "client_id": self.client_id, "client_secret": self.client_secret, "grant_type": "client_credentials", "scope": "scim:read" } now = datetime.now() response = request.urlopen(f'https://{self.token_server}{self.TOKEN_ENDPOINT}', data=parse.urlencode(request_data).encode('utf-8')) response = json.loads(response.read()) if 'access_token' not in response: raise BaseException("Authentication Failed") response['request_time'] = now self._token = response @property def token(self): """ Property that return and renews the bearer token if expired """ if self._token is None or self._token['request_time'] + timedelta(seconds=self._token['expires_in']-10) < datetime.now(): self.__get_token() return self._token['access_token'] def get_users(self, start_index = 0,count = 1, filter_function=None, **kwargs): """ Queries the server to get all users belonging to the VO. Each batch can be up to 100 records so the requests are parallelized """ # Get's a new token if expired header = {"Authorization": f"Bearer {self.token}"} users_so_far = 0 startIndex = 0 params = {"startIndex": startIndex, "count": count} params["startIndex"] = startIndex # Get's a new token if expired header["Authorization"] = f"Bearer {self.token}" req = request.Request(f"https://{self.server}{self.USER_ENDPOINT}?{parse.urlencode(params)}", headers=header) response = request.urlopen(req) response = json.loads(response.read()) users = set() # We can use a with statement to ensure threads are cleaned up promptly with ThreadPoolExecutor(max_workers=8) as executor: # Start the load operations and mark each future with its URL reqs = [] for start_index in range(0,response['totalResults'],count): params["startIndex"] = start_index # Get's a new token if expired header["Authorization"] = f"Bearer {self.token}" req = request.Request(f"https://{self.server}{self.USER_ENDPOINT}?{parse.urlencode(params)}", headers=header) reqs.append(executor.submit(request.urlopen, req)) logging.debug(f"https://{self.server}{self.USER_ENDPOINT}?{parse.urlencode(params)} with headers: {header}") for req in as_completed(reqs): try: response=req.result() response = json.loads(response.read()) if filter_function is not None: users.update(filter_function(*response['Resources'], **kwargs)) else: users.update(reponse) except Exception as e: logging.error(f'{req} generated an exception: {e}') return users def name_map_filter(*users, kwargs=None): """ Collect user's id to build 'Mapfile format' rules: https://github.com/xrootd/xrootd/tree/master/src/XrdSciTokens """ logging.debug(f"This request has {len(users)}") ids=set() return set((user.get('id') for user in users if user.get('id') is not None)) def dn_filter(*users, pattern=None, prefer_cern=False, **kwargs): """ Collect users with DN certificates matching regex """ logging.debug(f"This request has {len(users)}") matching_dn = set() for user in users: try: certs = user['urn:indigo-dc:scim:schemas:IndigoUser']['certificates'] # Is there a CERN certificate if prefered? if prefer_cern: certs = [*filter(lambda x: x.get('subjectDn',x.get('issuerDn')).endswith('DC=cern,DC=ch'), certs)] for cert in certs: # Revert subjectDn and replace , with / (making sure commas on the values aren't replaced) grid_dn = '/'.join(re.split(r',(?=\w+=)', cert["subjectDn"])[::-1]) # re: courtesy of Maarten Litmaath if pattern is None or pattern.search(grid_dn): matching_dn.add(f'/{grid_dn}') except KeyError: logging.warning(f"User {user['id']} doesn't have certificate to extract info (skipping it)") logging.info(f"{len(matching_dn)} matching certificates") return matching_dn def build_namemap_file(users_id, account, ifile, ofile): name_map=set() # serialized dictionary! if ifile: try: with open(ifile) as f: for entry in json.load(f): name_map.add(pickle.dumps(entry)) except FileNotFoundError as e: logging.error(f"Unable to read {ifile}, ignoring it's content...") exit(4) for id in users_id: name_map.add(pickle.dumps({'sub':id,'result':account})) if ofile: try: with open(ofile,'w') as f: json.dump([pickle.loads(rule) for rule in name_map],f) except Exception as e: logging.error(f'Unable to write to {ofile},raised exception {e}') exit(4) else: print(json.dumps([pickle.loads(rule) for rule in name_map])) def build_gridmap_file(users_dn, account, ifile, ofile): grid_map = {} if ifile: try: # As some entries may be encoded in latin let's escape it as unicode with open(ifile, "r", encoding='unicode_escape') as igridmap_file: for dn,acc in (l.rsplit(' ',1) for l in igridmap_file.readlines()): grid_map[dn] = acc.strip() except FileNotFoundError as e: logging.error(f"Unable to read {ifile}, ignoring it's content...") exit(4) # Overwrite / append results for dn in users_dn: if dn in grid_map: logging.debug(f'Overwritting {dn}') grid_map[f'"{dn}"'] = account content = '\n'.join(f'{dn} {acc}' for dn, acc in grid_map.items()) if ofile: try: with open(ofile, "w", encoding='utf-8') as ogridmap_file: ogridmap_file.write(content) except Exception as e: logging.error(f'Unable to write to {ofile}, raised exception {e}') exit(4) else: print(content) def configure(credentials, servers, targets): # First level of configuration - file iam_servers = set() # Second configuration stage is to load environment variable configuration envconf = zip(getenv('EOS_IAM_SERVER','').splitlines(), getenv('EOS_IAM_CLIENT_ID','').splitlines(), getenv('EOS_IAM_CLIENT_SECRET','').splitlines()) # First configuration stage is to use command args for server, client_id, client_secret in servers or envconf: iam_servers.add(IAM_Server(server, client_id, client_secret)) # Third option is to rely on configuration file #[] #client-id = #client-secret = if credentials and len(iam_servers) == 0 : config = ConfigParser() files_read = config.read(credentials) if len(files_read) > 0: # Credentials file should have IAM server on the section if targets is not None: it = filter(lambda x: True if targets in x else False, config.sections()) else: it = config.sections() for section in it: server = section client_id = config.get(section,'client-id') client_secret = config.get(section,'client-secret') # Assuming IAM server is token server if not defined token_server = config.get(section,'token-server', fallback=server) iam_servers.add(IAM_Server(server, client_id, client_secret, token_server)) else: logging.warning("Credentials couldn't be loaded from configuration file") if len(iam_servers): return iam_servers else: logging.error('Configuration problem! Configuration file not loaded (correctly?), environment not set or arguments not passed.\n\tSet EOS_IAM_SERVER, EOS_IAM_CLIENT_ID, EOS_IAM_CLIENT_SECRET.') exit(3) def main(server = None, credentials=None, targets = None, account=None, ifile=None, ofile=None, pattern=None, sensitive=re.IGNORECASE, debug_level=logging.WARNING, prefer_cern=False, type_of_format="GRIDMAP"): """ Configure IAM servers to be queried, update/write gridmap file format """ logging.basicConfig(level=debug_level) try: pattern = re.compile(pattern, flags=sensitive) except: if pattern is not None: logging.critical(f'Pattern provided cannot be compiled: {pattern}') exit(1) iam_servers = configure(credentials, server, targets) # Query IAM server users = set() for iam in iam_servers: if type_of_format == "GRIDMAP": users.update(iam.get_users(count=100, filter_function=dn_filter, pattern=pattern, prefer_cern=prefer_cern)) elif type_of_format == "MAPFILE": users.update(iam.get_users(count=100, filter_function=name_map_filter)) if type_of_format == "GRIDMAP": build_gridmap_file(users, account, ifile, ofile) elif type_of_format == "MAPFILE": build_namemap_file(users, account, ifile, ofile) if __name__ == '__main__': parser = argparse.ArgumentParser(description='GRID Map file generation from IAM Server', epilog='''examples: $ export EOS_IAM_SERVER=atlas-auth.web.cern.ch $ export EOS_IAM_CLIENT_ID=... $ export EOS_IAM_CLIENT_SECRET=... $ eos-iam-mapfile -a account4user2bmapped2 $ echo -e '[myiamserver.cern.ch]\\nclient-id = 1234567890\\nclient-secret = *******' > iam.conf $ eos-iam-mapfile -a account4user2bmapped2 -c iam.conf''',formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-v', '--verbose', type = str.upper, nargs='?', const="DEBUG", default="WARNING", choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"), dest = 'debug', help = 'Control log verbosity') parser.add_argument('-s', '--server', dest = 'server', nargs=3,action='append', help = 'IAM server to query with respective client key and secret (space separated)', metavar=('SERVER', 'CLIENT_ID','CLIENT_KEY')) parser.add_argument('-c', '--credentidals', dest = 'credentials', help = r'Client credentials file (for API access) in the following format: `[]\nclient-id = \nclient-secret = `') parser.add_argument('-t', '--targets', dest = 'targets', help = 'Target specific IAM servers defined in the configuration file (must be used together with -c)') parser.add_argument('-i', '--inputfile', dest = 'ifile', default=None, help = "Path to existing gridmapfile to be updated (matching DN's will be overwritten)") parser.add_argument('-o', '--outfile', dest = 'ofile', default=None, help = 'Path to dump gridmapfile') parser.add_argument('-a', '--account', dest = 'account', required=True, help = 'Account to which the result from the match should be mapped to') parser.add_argument('-p', '--pattern', type=str, dest = 'pattern', default=None, help = 'Pattern to search on user certificates `subject DN` field') parser.add_argument('-C','--case-sensitive', dest='sensitive',action='store_const', const=0, default=re.IGNORECASE, help = 'Makes the regex pattern (-p) to be case sensitive') parser.add_argument('-u', '--prefer-cern-certs', dest = 'prefer_cern',action='store_true', help = 'Prefers CERN.CH certificates (if any) to map user (uniquely)') parser.add_argument('-f', '--format', type = str.upper, nargs='?', const="MAPFILE", default="GRIDMAP", choices=("MAPFILE","GRIDMAP"),dest='type_of_format', help = 'Choose file format, using DN or ID (defaults to ID if used, else DN)') args = parser.parse_args() logging.basicConfig(level=eval(f"logging.{args.debug}")) logging.debug(args) main(server=args.server, credentials=args.credentials, targets=args.targets, ifile=args.ifile, ofile=args.ofile, account=args.account, pattern=args.pattern, sensitive=args.sensitive, debug_level=logging.DEBUG, prefer_cern=args.prefer_cern, type_of_format=args.type_of_format)