Source code for scidatalib.scidata

"""Python library for writing SciData JSON-LD files"""
from datetime import datetime

# noinspection PyTypeChecker
[docs]class SciData: """ This class is used to create and populate a SciData object, to be output as a SciData JSON-LD document A SciData object is created by calling the SciData class i.e. SciDataObject = SciData(<uid>) The meta variable defines the keys that make up the backbone structure of the JSON-LD document. Class methods are called to populate the meta keys """ def __init__(self, uid: str): """Initialize the instance using a unique id""" self.meta = { "@context": [ "", # noqa {"sdo": ""}, # noqa {} ], # def base "@id": "", # def docid "generatedAt": "", # autopopulated "version": "", # def version "@graph": { "@id": "", # autopopulated "@type": "sdo:scidataFramework", "uid": "", # def graph_uid "title": "", # def title "authors": [], # def author "description": "", # def description "publisher": "", # def publisher "version": "", # def graphversion "keywords": [], # def keywords "starttime": "", # def starttime "permalink": "", # def permalink "related": [], # def related "toc": [], # autopopulated "ids": [], # def ids "scidata": { "@id": "scidata/", "@type": "sdo:scientificData", "discipline": "", # def discipline "subdiscipline": "", # def subdiscipline "methodology": { "@id": "methodology/", "@type": "sdo:methodology", "evaluation": "", # def evaluation "aspects": []}, # def aspects OR def scidatapacket "system": { "@id": "system/", "@type": "sdo:system", "facets": []}, # def facets OR def scidatapacket "dataset": { "@id": "dataset/", "@type": "sdo:dataset", "datapoint": [], # def datapoint OR def scidatapacket "scope": ""}, # def scope }, "sources": [], # def sources "rights": [] # def rights } } self.contexts = [] self.nspaces = {} self.baseurl = {} self.meta['@graph']['uid'] = uid self.uidindex = [] # public class methods
[docs] def context(self, context: [str, list], replace=False) -> list: """ Add to or replace the list of external context files :param context: context URL string or list of context URL strings :param replace: boolean to replace or not the existing data When called, the content URL content of the @context JSON object will be replaced or updated with the supplied list of context urls Example: .. code-block:: python SciDataObject.context( ['']) """ if replace: self.meta['@context'][:-2] = [] self.contexts = [] if isinstance(context, str): self.contexts = [context] if isinstance(context, list): self.contexts = context if not replace: self.contexts += self.meta['@context'][:-2] if isinstance(context, str): self.contexts.append(context) if isinstance(context, list): self.contexts += context self.contexts = sorted(list(set(self.contexts))) self.__make_context() return self.contexts
[docs] def namespaces(self, namespaces: dict, replace=False) -> dict: """ Add to or replace the dictionary of namespaces within @context. Namespaces are needed for values in a file that reference external resources that define something (vocabulary/taxonomy/ontology entries). :param namespaces: dictionary of namespaces (key->ns, val->URI start) :param replace: boolean to replace or not the existing data When called, the dictionary of namespaces within the @context key of the meta variable will be replaced or updated with the supplied dictionary of namespaces Example: .. code-block:: python SciDataObject.namespaces( { "sdo": "" } ) """ if isinstance(namespaces, dict): if replace: self.meta['@context'][-2] = {} self.nspaces = {} self.nspaces = namespaces if not replace: self.nspaces.update(self.meta['@context'][-2]) self.nspaces.update(namespaces) self.__make_context() return self.nspaces
[docs] def base(self, base: str) -> dict: """ Assign the JSON-LD @base URL (also defines '@id' under '@graph' for consistency) See: :param base: @base URL for a JSON-LD file Defines the base url for all internal unique identifiers (defined though '@id' keyword fields). For consistency, the code also sets the '@id' field under '@graph' so that all triple subjects are unique and associated with the same graph Example: .. code-block:: python SciDataObject.graph_uid("<uniqueidentifier>") """ if isinstance(base, str): if base == "": base = "" self.baseurl = {"@base": base} self.__make_context() self.__graphid(base) return self.baseurl
def __make_context(self) -> dict: """ Recreates the context when something is added to contexts, namespaces or base. The method is called as part of the contexts, namespaces and base methods. """ self.contexts += self.meta['@context'][:-2] self.contexts = sorted(list(set(self.contexts))) c = self.contexts self.nspaces.update(self.meta['@context'][-2]) n = self.nspaces b = self.baseurl self.meta["@context"] = c + [n, b] return self.meta["@context"]
[docs] def docid(self, docid: str) -> dict: """ Assign the document identifier. This will become the graph name if the file is uploaded to a graph database :param docid: the root level @id value """ if isinstance(docid, str): self.meta['@id'] = docid return self.meta['@id']
[docs] def version(self, version: str) -> dict: """ Assign the version of this file (not the version of the data) :param version: the top level 'version' value """ if isinstance(version, str): self.meta['version'] = version return self.meta['version']
[docs] def graph_uid(self, guid: str) -> dict: """ Assign the uid value within the @graph JSON object :param guid: the @graph uid value Normally the same as the unique id used in the @graph @id value and used to easily find the data in a file system. Example: .. code-block:: python SciDataObject.graph_uid("<uniqueidentifier>") """ if isinstance(guid, str): self.meta['@graph']['uid'] = guid return self.meta['@graph']['uid']
[docs] def author(self, authors: list, replace=False) -> list: """ Add to or replace the list of authors within the @graph authors section :param authors: list of names, or list of dicts with multiple fields :param replace: boolean to replace or not the existing data Add the list of authors of a set of data with the following defined fields in the SciData context file: name, address, organization, email, orcid. Expects either: 1) a list of dictionaries where each dictionary contains at minimum of a key that is 'name' Example: .. code-block:: python [{'name': 'George Washington', 'ORCID': 1}, {'name': 'John Adams', 'ORCID': 2}]) 2) a list of strings which are author names Example: .. code-block:: python['George Washington', 'John Adams']) """ if isinstance(authors, list): a = [] if not replace: a += self.meta['@graph']['authors'] for au in authors: auth = {'@id': ('author/' + str(len(a) + 1) + '/')} auth.update({'@type': 'dc:creator'}) if isinstance(au, dict): if 'name' in au: auth.update(au) elif isinstance(au, str): auth.update({'name': au}) a.append(auth) self.meta['@graph']['authors'] = a return self.meta['@graph']['authors']
[docs] def title(self, title: str) -> str: """ Used to create or replace title key within @graph :param title: descriptive title of the dataset For a data source such as a journal article, this would typically be the title of the article Example: .. code-block:: python SciDataObject.title("The Hitchhiker's Guide to the Galaxy") """ if isinstance(title, str): self.meta['@graph']['title'] = title return self.meta['@graph']['title']
[docs] def description(self, description: str) -> str: """ Assign the description field within @graph :param description: textual description of the dataset Used as a brief description of the type of data. For a journal article, this might house the abstract Example: .. code-block:: python SciDataObject.description('a brief description') """ if isinstance(description, str): self.meta['@graph']['description'] = description return self.meta['@graph']['description']
[docs] def publisher(self, publisher: str) -> str: """ Assign the publisher field within @graph :param publisher - the name or title of the publisher of the data This is a person, project, research group, organization etc. Example: .. code-block:: python SciDataObject.publisher('The Daily Prophet') """ if isinstance(publisher, str): self.meta['@graph']['publisher'] = publisher return self.meta['@graph']['publisher']
[docs] def graphversion(self, version: str) -> str: """ Assign the data version :param version: the version assigned to the data If a version is not available, the date it was accessed online can be used to indicate the 'state' of the data as downloaded Example: .. code-block:: python SciDataObject.graphversion('ChEMBL database v28') """ if isinstance(version, str): self.meta['@graph']['version'] = version return self.meta['@graph']['version']
[docs] def keywords(self, keywords: [str, list], replace=False) -> list: """ Add to or replace the keywords of the instance :param keywords: important keywords to improve data findability :param replace: boolean to replace or not the existing data Example: .. code-block:: python SciDataObject.keywords('important') """ keys = [] if not replace: keys = self.meta['@graph']['keywords'] if isinstance(keywords, str): keys.append(keywords) elif isinstance(keywords, list): keys += keywords keys.sort() self.meta['@graph']['keywords'] = keys return self.meta['@graph']['keywords']
[docs] def starttime(self, stime: str) -> str: """ Assign the start time :param stime: datetime string Typically in "%m-%d-%y %H:%M:%S" format Example: .. code-block:: python SciDataObject.starttime('04-05-21 06:14:53') """ if isinstance(stime, str): self.meta['@graph']['starttime'] = stime return self.meta['@graph']['starttime']
[docs] def related(self, related: [str, list], replace=False) -> list: """ Add to or replace the related URLs :param related: URLs to other data related to this dataset :param replace: boolean to replace or not the existing data Example: .. code-block:: python SciDataObject.related('') """ rels = [] if not replace: rels = self.meta['@graph']['related'] if isinstance(related, str): rels.append(related) elif isinstance(related, list): rels += related self.meta['@graph']['related'] = rels return self.meta['@graph']['related']
[docs] def ids(self, ids: [str, list]) -> list: """ Add to the ids list :param ids: string or list of strings that are external references to ontological concepts When called the contents of 'ids' is added to the ids list. Note that when the output function is called it iterates over instance content to find any values that are ontological references, in the format "<namespace>:<uniquevalue>", and adds them to ids. Only ids provided in this format will be added and duplicates are ignored. Remember to add namespaces for ids. Example: .. code-block:: python SciDataObject.ids(['chebi:00001','qudt:GM']) (requires the addition of the 'chebi' namespace) """ curr_ids = self.meta['@graph']['ids'] if isinstance(ids, list): for idee in ids: if ':' in idee: if idee.split(':')[0] not in self.nspaces.keys(): print('Note: Namespace <' + idee.split(':')[0] + "> not set. A crosswalk " "url prefix is likely not " "matched with it's linked namespace") # raise EnvironmentError curr_ids.append(idee) elif isinstance(ids, str): if ':' in ids: if ids.split(':')[0] not in self.nspaces.keys(): print('Note: Namespace <' + ids.split(':')[0] + "> not set. A crosswalk url prefix is " "likely not matched with it's linked namespace") # raise EnvironmentError curr_ids.append(ids) self.meta['@graph']['ids'] = sorted(set(curr_ids)) return self.meta['@graph']['ids']
[docs] def discipline(self, disc: str) -> str: """ Assign the discipline area of the data' :param disc: a discipline name or identifier (preferred) Best practice is to use and entry in an ontology, i.e. the Modern Science Ontology ( Example: .. code-block:: python SciDataObject.discipline('w3i:Chemistry') (requires the addition of the 'w3i' namespace) """ if isinstance(disc, str): if ":" in disc: self.__addid(disc) self.meta['@graph']['scidata']['discipline'] = disc return self.meta['@graph']['scidata']['discipline']
[docs] def subdiscipline(self, subdisc: str) -> str: """ Assign the subdiscipline area of the data :param subdisc: a subdiscipline name or identifier (preferred) Best practice is to use and entry in an ontology, i.e. the Modern Science Ontology ( Example: .. code-block:: python SciDataObject.subdiscipline('w3i:AnalyticalChemistry') """ if isinstance(subdisc, str): if ":" in subdisc: self.__addid(subdisc) self.meta['@graph']['scidata']['subdiscipline'] = subdisc return self.meta['@graph']['scidata']['subdiscipline']
[docs] def evaluation(self, evaln: str) -> str: """ Assign the evaluation field :param evaln: the method of evaluation of research data Recommended values of this field are: experimental, theoretical, computational Example: .. code-block:: python SciDataObject.evaluation('experimental') """ if isinstance(evaln, str): if ":" in evaln: self.__addid(evaln) self.meta['@graph']['scidata']['methodology']['evaluation'] = evaln return self.meta['@graph']['scidata']['methodology']['evaluation']
[docs] def aspects(self, aspects: list) -> list: """Add to or replace the aspects of the file Example: .. code-block:: python SciDataObject.aspects( [{"@id": "assay", "@type": "sdo:assay", "description": "Inhibition of human ERG " "by MK499 binding assay", "assay_organism": "Homo sapiens"}]) Method also accepts keyword '#intlinks'. See documentation for def scidatapackage. """ new_aspects = [] scidata: dict = self.meta['@graph']['scidata'] methodology: dict = scidata['methodology'] curr_aspects: list = methodology['aspects'] for listentry in aspects: intlinklist = None if '#intlinks' in listentry.keys(): intlinklist = listentry.pop('#intlinks') rootitem = self.__iterate_function(listentry) rootitemid = rootitem['@id'] itemlist = [rootitem] if intlinklist: for intlinkentry in intlinklist: intitem = (self.__iterate_function(intlinkentry)) intitem.update({'aspects#': [rootitemid]}) itemlist.append(intitem) for n, item in enumerate(itemlist): item_noid = {k: item[k] for k in set(list(item.keys())) - {'@id'} - {'aspects#'}} matched_aspect = 0 for aspectitem in curr_aspects: aspect_item_noid = { k: aspectitem[k] for k in set(list(aspectitem.keys())) - {'@id'} - {'aspects#'}} if aspect_item_noid == item_noid: if n == 0: rootitemid = aspectitem['@id'] if aspectitem.get('aspects#', None): item['aspects#'] = [rootitemid] if item['aspects#'][0] not in \ aspectitem['aspects#']: aspectitem['aspects#'] \ .append(item['aspects#'][0]) matched_aspect = aspectitem if matched_aspect: self.uidindex.remove(item['@id']) new_aspects.append(matched_aspect) else: new_aspects.append(item) curr_aspects.append(item) methodology['aspects'] = curr_aspects scidata['methodology'] = methodology self.meta['@graph']['scidata'] = scidata return new_aspects
[docs] def facets(self, facets: list) -> list: """Add to or replace the facets of the file Example: .. code-block:: python SciDataObject.facets( [{"@id": "compound", "@type": "sdo:compound", "mw_freebase": "491.52", "full_molformula": "C26H26FN5O4"}]) Method also accepts keyword '#intlinks'. See documentation for def scidatapackage. """ new_facets = [] scidata: dict = self.meta['@graph']['scidata'] system: dict = scidata['system'] curr_facets: list = system['facets'] for listentry in facets: intlinklist = None if '#intlinks' in listentry.keys(): intlinklist = listentry.pop('#intlinks') rootitem = self.__iterate_function(listentry) rootitemid = rootitem['@id'] itemlist = [rootitem] if intlinklist: for intlinkentry in intlinklist: intitem = self.__iterate_function(intlinkentry) intitem.update({'facets#': [rootitemid]}) itemlist.append(intitem) for n, item in enumerate(itemlist): item_keys = set(list(item.keys())) item_noid = { k: item[k] for k in item_keys - {'@id'} - {'facets#'} } matched_facet = 0 for facetitem in curr_facets: facet_item_noid = { k: facetitem[k] for k in set(list(facetitem.keys())) - {'@id'} - {'facets#'}} if facet_item_noid == item_noid: if n == 0: rootitemid = facetitem['@id'] if facetitem.get('facets#', None): item['facets#'] = [rootitemid] facetitem['facets#'].append(item['facets#'][0]) facet_list = list(set(facetitem['facets#'])) facetitem['facets#'] = facet_list matched_facet = facetitem if matched_facet: self.uidindex.remove(item['@id']) new_facets.append(matched_facet) else: new_facets.append(item) curr_facets.append(item) system['facets'] = curr_facets scidata['system'] = system self.meta['@graph']['scidata'] = scidata return new_facets
[docs] def scope(self, scope: [str, list]) -> str: """ Assign what thing(s) the dataset relates to :param scope: str or list of internal unique id()s of entity(ies) in the system to which the data describes The scope of a datasets should be described in the 'system' 'facets' section, e.g. chemical system, organism, specimen, should be included as a scope using the defined unique '@id' for that section Example: .. code-block:: python SciDataObject.scope('chemicalsystem/1/') """ if isinstance(scope, str) or isinstance(scope, list): self.meta['@graph']['scidata']['dataset']['scope'] = scope return self.meta['@graph']['scidata']['dataset']['scope']
[docs] def attribute(self, attributes: list) -> list: """Add one or more attributes""" new_attributes = [] scidata: dict = self.meta['@graph']['scidata'] dataset: dict = scidata['dataset'] if 'attribute' in dataset.keys(): curr_attributes: list = dataset['attribute'] else: curr_attributes = [] for listentry in attributes: item = self.__iterate_function(listentry) item_noid = {k: item[k] for k in set(list(item.keys())) - {'@id'}} matched_attribute = 0 for attributeitem in curr_attributes: attribute_item_noid = { k: attributeitem[k] for k in set( list( attributeitem.keys())) - {'@id'}} if attribute_item_noid == item_noid: matched_attribute = attributeitem if matched_attribute: new_attributes.append(matched_attribute) self.uidindex.remove(item['@id']) else: new_attributes.append(item) curr_attributes.append(item) dataset['attribute'] = curr_attributes scidata['dataset'] = dataset self.meta['@graph']['scidata'] = scidata return new_attributes
[docs] def datapoint(self, points: list) -> list: """Add one or more datapoints Example: .. code-block:: python SciDataObject.datapoint( [{"@id": "datapoint", "@type": "sdo:datapoint", "data": [{"@id": "datum", "@type": "sdo:exptdata", "type": "IC50", "value": "15.2", "units": "uM"}]}]) """ new_points = [] scidata: dict = self.meta['@graph']['scidata'] dataset: dict = scidata['dataset'] curr_points: list = dataset['datapoint'] for listentry in points: item = self.__iterate_function(listentry) new_points.append(item) curr_points.append(item) dataset['datapoint'] = curr_points scidata['dataset'] = dataset self.meta['@graph']['scidata'] = scidata return new_points
[docs] def dataseries(self, series: list) -> list: """Add one or more dataseries""" new_series = [] scidata: dict = self.meta['@graph']['scidata'] dataset: dict = scidata['dataset'] if 'dataseries' in dataset.keys(): curr_series: list = dataset['dataseries'] else: curr_series = [] for listentry in series: item = self.__iterate_function(listentry) item_noid = {k: item[k] for k in set(list(item.keys())) - {'@id'}} matched_serie = 0 for serieitem in curr_series: serie_item_noid = { k: serieitem[k] for k in set( list( serieitem.keys())) - {'@id'}} if serie_item_noid == item_noid: matched_serie = serieitem if matched_serie: new_series.append(matched_serie) self.uidindex.remove(item['@id']) else: new_series.append(item) curr_series.append(item) dataset['dataseries'] = curr_series scidata['dataset'] = dataset self.meta['@graph']['scidata'] = scidata return new_series
[docs] def datagroup(self, group: list) -> list: """Add one or more datagroups""" new_group = [] scidata: dict = self.meta['@graph']['scidata'] dataset: dict = scidata['dataset'] if 'datagroup' in dataset.keys(): curr_group: list = dataset['datagroup'] else: curr_group = [] for listentry in group: item = self.__iterate_function(listentry) item_noid = {k: item[k] for k in set(list(item.keys())) - {'@id'}} matched_group = 0 for groupitem in curr_group: group_item_noid = { k: groupitem[k] for k in set( list( groupitem.keys())) - {'@id'}} if group_item_noid == item_noid: matched_group = groupitem if matched_group: new_group.append(matched_group) self.uidindex.remove(item['@id']) else: new_group.append(item) curr_group.append(item) dataset['datagroup'] = curr_group scidata['dataset'] = dataset self.meta['@graph']['scidata'] = scidata return new_group
[docs] def scidatapackage(self, package): """ Add a package of data where the datapoints are linked with the associated aspects and facets. A package contains one or more 'packets' of associated aspects, facets and datapoints. Template: .. code-block:: python package = [ {'aspects':{},'facets':{},'datapoints':{}}, {'aspects':{},'facets':{},'datapoints':{}} ] Example: .. code-block:: python SciDataObject.scidatapackage([{ "aspects": [{ "@id": "assay/", "@type": "sdo:assay", "description": "Inhibition of human ERG by MK499 binding assay", "assay_organism": "Homo sapiens" }], "facets": [ { "@id": "compound/", "@type": "sdo:compound", "mw_freebase": "491.52", "full_molformula": "C26H26FN5O4", "#intlinks": [{ "@id": "identifier/", "@type": "sdo:identifier", "standard_inchi_key": "OINHUVBCKUJZAG-UHFFFAOYSA-N" }] }, { "@id": "target/", "@type": "sdo:target", "pref_name": "HERG", "tax_id": 9606, "organism": "Homo sapiens" } ], "datapoints": [{ "@id": "datapoint/", "@type": "sdo:datapoint", "data":[{ "@id": "datum", "@type": "sdo:exptdata", "type": "IC50", "value": "15.2", "units": "uM" }] }] }]) """ for packet in package: packet['facets'] = self.facets(packet['facets']) packet['aspects'] = self.aspects(packet['aspects']) atfacet = [a_dict["@id"] for a_dict in packet['facets']] ataspect = [a_dict["@id"] for a_dict in packet['aspects']] for dp in packet['dataset']: if atfacet: dp.update({'facets#': atfacet}) if ataspect: dp.update({'aspects#': ataspect}) self.datapoint(packet['dataset'])
[docs] def sources(self, sources: list, replace=False) -> dict: """ Add to or replace the source reference list :param sources: information about where the data came from :type sources: list :param replace: replace (True) or add to the existing sources (False) :type replace: bool (default: False) Add a list of sources with any of the available defined fields in the SciData context file: citation, reftype, url, doi Example: .. code-block:: python SciDataObject.sources([ {'citation': 'Chalk, S.J. SciData: a data model and ontology for semantic representation of scientific data. J Cheminform 8, 54 (2016)', doi':'}]) """ srcs = [] if not replace: srcs = self.meta['@graph']['sources'] for x in sources: ld = { '@id': 'source/' + str(len(srcs) + 1) + '/', '@type': 'dc:source' } ld.update(x) srcs.append(ld) return self.meta['@graph']['sources']
[docs] def rights(self, holder: str, lic: str) -> dict: """ Add the rights section to the file (max: 1 entry) :param holder: the entity that holds the license to this data :param lic: the assigned license """ rights = [] if isinstance(holder, str) and isinstance(lic, str): rights = [{ '@id': 'rights/1/', '@type': 'dc:rights', 'holder': holder, 'license': lic, }] self.meta['@graph']['rights'] = rights return self.meta['@graph']['rights']
# private class functions def __addid(self, text: str) -> bool: """ Adds entry to ids list if string contains ':' """ if isinstance(text, str): if '://' in text: return False elif len(text.split(':')) > 1: return False elif ':' in text: self.ids(text) return True else: return False def __graphid(self, gid: str) -> bool: """ Assigns the @id value within the @graph JSON object. """ self.meta['@graph']['@id'] = gid return True def __addtoc(self): """ adds entries to the toc list""" def tocdict(a): """ get the @type entry from a dictionary """ for k, v in a.items(): if k == '@type': if isinstance(v, list): self.meta['@graph']['toc'].extend(v) else: self.meta['@graph']['toc'].append(v) if isinstance(v, list): toclist(v) if isinstance(v, dict): tocdict(v) def toclist(a): """ process lists """ for x in a: if isinstance(x, dict): tocdict(x) if isinstance(x, list): toclist(x) for key, value in self.meta['@graph'].items(): if key == '@type': self.meta['@graph']['toc'].append(value) if isinstance(value, dict): tocdict(value) if isinstance(value, list): toclist(value) self.meta['@graph']['toc'] = sorted(set(self.meta['@graph']['toc'])) return def __iterate_function(self, it, uid=False): if isinstance(it, str): self.__addid(it) return it if isinstance(it, list): if not all(isinstance(item, dict) for item in it): return it prev_uid = uid # Set the category if '@id' in it: category = it['@id'] else: category = 'undefined' if prev_uid: uid = prev_uid + category + '/1/' else: uid = category + '/1/' def enumuid(uidstr): """ function to create unique internal id ('@id') for each section of the file. """ uidsplit = uidstr.rsplit('/', 2) uidstr = uidsplit[0] + '/' + str(int(uidsplit[1]) + 1) + '/' return uidstr while uid in self.uidindex: uid = enumuid(uid) self.uidindex.append(uid) temp: dict = {'@id': uid, '@type': 'sdo:' + category} for key, value in it.items(): if key != '@id': if isinstance(value, list): if not all(isinstance(item, dict) for item in value): temp[key] = value else: listuid = uid for i, listentry in enumerate(value): value[i] = self.__iterate_function( listentry, listuid) temp[key] = value elif isinstance(value, dict): temp[key] = self.__iterate_function( value, uid) else: temp[key] = value self.__addid(value) return temp @property def output(self) -> dict: """ Completes and cleans a Scidata Object (instance of this class) before its output. """ # add the generatedAt date today = self.meta['generatedAt'] = today.strftime("%m-%d-%y %H:%M:%S") # clean @graph for key in list(self.meta['@graph']): if not self.meta['@graph'][key]: if key != 'toc': del self.meta['@graph'][key] # clean scidata for key in list(self.meta['@graph']['scidata']): value = self.meta['@graph']['scidata'][key] if not value or value == "": del self.meta['@graph']['scidata'][key] # clean methodology, if exists if 'methodology' in self.meta['@graph']['scidata']: methodology = self.meta['@graph']['scidata']['methodology'] if methodology.get('aspects', False): for key in list(methodology): if not methodology[key] or methodology[key] == "": del methodology[key] else: # as 'aspects' is empty, delete the methodology section del methodology # clean system, if exists if 'system' in self.meta['@graph']['scidata']: system = self.meta['@graph']['scidata']['system'] if system.get('facets', False): for key in list(system): if not system[key] or system[key] == "": del system[key] else: # as 'facets' is empty, delete the system section del system # remove data set if not data if 'dataset' in self.meta['@graph']['scidata']: dataset = self.meta['@graph']['scidata']['dataset'] if not dataset.get('dataseries', False): if not dataset.get('datagroups', False): if not dataset.get('datapoints', False): del dataset # clean dataset, if exists if 'dataset' in self.meta['@graph']['scidata']: dataset = self.meta['@graph']['scidata']['dataset'] if dataset: for key in list(dataset): if not dataset[key] or dataset[key] == "": del dataset[key] # clean dataseries if 'dataseries' in dataset.keys(): if dataset.get('dataseries', False): dataseries = dataset["dataseries"] for seridx, series in enumerate(dataseries): for key in list(series): if not series[key]: del dataseries[seridx][key] else: # delete if present but empty del dataseries # clean datagroups if 'datagroups' in dataset.keys(): if dataset.get('datagroups', False): datagroups = dataset["datagroups"] for grpidx, series in enumerate(datagroups): for key in list(series): if not series[key]: del datagroups[grpidx][key] else: # delete if present but empty del datagroups # clean datapoints if 'datapoints' in dataset.keys(): if dataset.get('datapoints', False): datapoints = dataset["datapoints"] for pntidx, series in enumerate(datapoints): for key in list(series): if not series[key]: del datapoints[pntidx][key] else: # delete if present but empty del datapoints # add the toc to the output self.__addtoc() return self.meta