import json
import pandas as pd
import numpy as np
import requests
import datetime
import time
import os
import urllib3
from progress.bar import Bar
def load_file(file_path):
with open(file_path, 'rb') as inp:
file = inp.read()
return file
def write_file(file, file_path):
with open(file_path, 'wb') as out:
out.write(file)
def parsing_login_file(file):
with open(file, 'r') as f:
return {e.split('\t')[0].strip(): e.split('\t')[1].strip() for e in f.readlines()}
def check_type(var):
if var == type(object):
return "Text"
elif (var == type(float)) or var == np.dtype(float):
return "Float"
elif (var == type(bool)) or var == np.dtype(bool):
return "Boolean"
elif var == type(str):
return "Text"
elif (var == type(int)) or var == np.dtype(int):
return "Integer"
else:
raise Exception(f'Cannot put to BioUML column of type {type(var)}')
def dictionary_converter(dic_for_parsing):
converted_dictionary = {}
for k, v in dic_for_parsing.items():
if '[' in k:
for k1, v1 in v.items():
if not isinstance(v1, dict):
if k1 not in converted_dictionary.keys():
converted_dictionary[k1] = [v1]
else:
converted_dictionary[k1].append(v1)
else:
PyBiouml().as_name_value(v1)
return converted_dictionary
def adder(object, dictionary, value):
if len(object) != 1:
dictionary.setdefault(object[0], {})
dictionary[object[0]].update({})
adder(object[1:], dictionary[object[0]], value)
else:
dictionary[object[0]] = value
def list_to_dataframe():
pass
def deexpand(dic, final_list, key=None):
for k, v in dic.items():
for e in v:
if isinstance(dict(), type(e)):
deexpand(e, final_list, key=k)
else:
if key is None:
final_list.append(f'{k}/{e}')
else:
final_list.append(f'{key}/{k}/{e}')
def expand(e, prop):
if 'type' in e.keys() and e['type'] == 'composite':
result = []
for el in e['value']:
result.append(expand(el, prop))
return {e[prop]: result}
else:
return e[prop]
def column(params, prop):
result = []
for e in params:
try:
expand_result = expand(e, prop)
except:
expand_result = None
if isinstance(dict(), type(expand_result)):
deexpand(expand_result, result)
else:
result.append(expand_result)
return result
[docs]class PyBiouml:
"""
This library is a python API interface for using BioUML Web service
"""
def __init__(self):
self.options = {}
self.info = {}
self.stats = [] # List to store statistics for each request
[docs] def get(self, path):
"""
Method fetches table data from BioUML server.
:param path: Path to table in BioUML repository.
:type path: str
:return crafted_table: return DataFrame representation of BioUML table from path.
:rtype crafted_table: pandas.DataFrame
"""
TABLES_CONST = '/web/table/'
table_info = pd.DataFrame(self.query_json(server_path=''.join([TABLES_CONST, 'columns']), de=path)['values'])
table_data = self.query_json(server_path=''.join([TABLES_CONST, 'rawdata']), de=path)['values']
if 'hidden' in table_info.columns:
table_info = table_info[table_info['hidden'] != True]
crafted_table = pd.DataFrame(table_data, index=table_info['name'].to_list()).T
return crafted_table.loc[:, crafted_table.columns != 'ID']
[docs] def get_connection(self):
"""
Service method for creation a connection with BioUML Web Service, which checks if user was logged into BioUML
:return connection: connection link
"""
urllib3.disable_warnings()
connection = self.options
if len(connection.keys()) == 0:
LINK_TO_CONF_FILE = '/home/jovyan/work/.user.txt'
if os.path.exists(LINK_TO_CONF_FILE):
params = parsing_login_file(LINK_TO_CONF_FILE)
self.login(url=params['url'], user=params['user'], password=params['pass'])
return self.options
else:
raise Exception('Not logged into biouml, run PyBiouml().login() first')
return connection
[docs] def query(self, servers_path, data, file=None, binary=False):
"""
Service method for making post request with time and size logging
:return: result of post request
"""
connection = self.get_connection()
url = ''.join([connection['url'], servers_path])
if file:
response = requests.post(url, data=data, cookies=self.options['cookie'], verify=False, files=file)
else:
response = requests.post(url, data=data, cookies=self.options['cookie'], verify=False)
return response
[docs] def query_json(self, server_path, reconnect=True, parameters=None, **params):
"""
Service method for json return of connection with time and size logging
:param server_path: path to required server.
:type server_path: str
:param reconnect: try to reconnect or not.
:type reconnect: bool
:param parameters: a dictionary with parameters for connection. Default None, this method can take parameters through **params.
:type parameters: dict
:param params: This parameter collect any other parameters which are give to the method and return a dictionary with them.
:return json_content: return a result of the request in json format.
:rtype json_content: json
"""
if parameters is None:
parameters = {}
parameters.update(params)
connect = self.query(server_path, parameters)
json_content = connect.json()
response_type = json_content.get('type')
if response_type == 3 & reconnect:
self.reconnect()
return self.query_json(server_path=server_path, reconnect=False, parameters=parameters)
elif response_type != 0:
raise Exception(json_content.get('message'))
return json_content
[docs] def get_stats(self):
"""
Retrieve statistics for all requests
:return: List of statistics dictionaries
"""
return self.stats
[docs] def login(self, url='http://localhost:8080/biouml', user='', password=''):
"""
Method which LogintoBioUMLserver. Theconnectionwillbesavedinglobaloptionsundernamebiouml_connection for future reuse.
:param url: URLof running biouml server. Default it is http://localhost:8080/biouml, like an example of local biouml service.
:type url: str
:param user: BioUML user, empty string for anonymous login.
:type user: str
:param password: password.
:type password: str
"""
HTTP_CONST = 'http://'
HTTPS_CONST = 'https://'
BIOUML_CONST = '/biouml'
if not (url.startswith(HTTP_CONST) or url.startswith(HTTPS_CONST)):
url = ''.join([HTTP_CONST, url])
if not url.endswith(BIOUML_CONST):
url = ''.join([url, BIOUML_CONST])
self.options.update([('url', url), ('username', user), ('password', password)])
self.reconnect()
[docs] def reconnect(self):
"""
Service method for reconnection to BioUML web service
"""
options = self.options
url = options.pop('url')
new_url = ''.join([url, '/web/login'])
session = requests.Session()
req = session.post(new_url, data=options)
json_req = req.json()
if json_req['type'] != 0:
raise Exception(json_req.get('message'))
self.options.update([('url', url), ('cookie', req.cookies)])
self.options.update(options)
[docs] def logout(self):
"""
Method which Logouts from BioUML server.
"""
self.query_json('/web/logout')
[docs] def analysis(self, analysis_name, wait=True, verbose=True, parameters=None, **params):
"""
Method for run BioUML analysis, optionally tracking progress
:param analysis_name: name of BioUML analysis to run, use PyBiouml.analysis_list to get the list of possible values
:type analysis_name: str
:param wait: whether to wait for analysis completion or return immediately
:type wait: bool
:param verbose: print messages and progress from BioUML analysis, only meaningful if wait is TRUE
:type verbose: bool
:param parameters: a dictionary of parameters to BioUML analysis, use PyBiouml.analysis_parameters to get the table of parameters. Default None, this method can take parameters through **params.
:type parameters: dict
:param params: parameters to BioUML analysis, use PyBiouml.analysis_parameters to get the table of parameters. Collect any other parameters which are give to the method and return a dictionary with them.
:return: job_id: return a job id of started work, that can be passed to PyBiouml.job_info and PyBiouml.job_wait.
:rtype: str
"""
job_id = self.next_job_id()
if parameters is None:
parameters = self.as_tree(params)
else:
parameters = self.as_tree(parameters)
self.query_json('/web/analysis', jobID=job_id, de=analysis_name, json=parameters)
if wait:
self.job_wait(job_id=job_id, verbose=verbose)
return print(job_id)
[docs] def next_job_id(self):
"""
Service method for creation a new job id.
:return: job_id
:rtype: str
"""
last_job_id = self.info.setdefault('last_job_id', 0)
n_job_id = last_job_id + 1
self.info['last_job_id'] = n_job_id
return ''.join(['RJOB', str(datetime.datetime.now().strftime('%Y%m%d%H%M%S')), str(n_job_id)])
[docs] def job_info(self, job_id):
"""
Method info fetches info about BioUML job.
:param job_id: ID of job usually returned from PyBiouml.analysis
:type job_id: str
:return: info: json with information about requested process (status, results, values, percent)
"""
info = self.query_json('/web/jobcontrol', jobID=job_id)
info['status'] = ['CREATED',
'RUNNING',
'PAUSED',
'COMPLETED',
'TERMINATED_BY_REQUEST',
'TERMINATED_BY_ERROR'][info['status']]
return info
[docs] def job_wait(self, job_id, verbose=True):
"""
Waits for BioUML job completion
:param job_id: ID of job usually returned from biouml.analysis
:param verbose: print messages and progress from BioUML job
:return: json with information about requested process (status, results, values, percent)
"""
message_length = 0
STATUSES = ['COMPLETED', 'TERMINATED_BY_REQUEST', 'TERMINATED_BY_ERROR']
with Bar(message='Processing', max=100, suffix='%(percent)d%%') as bar:
while True:
info = self.job_info(job_id)
if verbose:
if info['percent'] is not None:
bar.index = int(info['percent'])
bar.next()
if info['values'] is not None:
print(info['values'][0][message_length:])
message_length = len(info['values'][0])
if info['status'] in STATUSES:
return info
time.sleep(1)
[docs] def as_tree(self, params_dictionary):
"""
Service method for parsing dictionary with parameters to json, which can be used by BioUML Web service.
:param params_dictionary: dictionary with parameters
:type params_dictionary: dict
:return: parameters in json format
"""
heirarhical_dictionary = {}
for e in params_dictionary.keys():
if '/' in e:
splited_name = e.split('/')
adder(splited_name, heirarhical_dictionary, params_dictionary[e])
else:
heirarhical_dictionary[e] = params_dictionary.get(e)
name_values = self.as_name_value(heirarhical_dictionary)
return json.dumps(name_values)
def as_name_value(self, dictionary):
local_list = []
c = 0
for k, v in dictionary.items():
if not isinstance(v, dict):
local_list.append({'name': k, 'value': v})
else:
if '[' in k:
if c == 0:
local_list.append(dictionary_converter(dictionary))
c += 1
else:
continue
else:
local_list.append({'name': k, 'value': self.as_name_value(v)})
return local_list
[docs] def put(self, path, value):
"""
Method to put usere's table on BioUML Web Service.
:param path: path to repository for table.
:type path: str
:param value: pandas DataFrame table to put
:type value: DataFrame
"""
columns = []
columns.append({'name': 'ID', 'type': 'Text'})
data = []
v_c = value.index.values.astype(str, copy=True)
data.append(v_c.tolist())
for i, v in enumerate(value.columns):
columns.append({'name': v, 'type': check_type(value.dtypes[v])})
str_column = value[v].astype(str, copy=True)
data.append(str_column.tolist())
self.query_json('/web/table/createTable', de=path, columns=json.dumps(columns), data=json.dumps(data))
[docs] def export(self, path, exporter_params=None, exporter='Tab-separated text (*.txt)', target_file='biouml.out'):
"""
Method exports data from BioUML server to local file in given format
:param path: path in BioUML repository.
:type path: str
:param exporter_params: dictionary of parameters to exporter. Default None.
:param exporter: character string specifying format, PyBiouml_exporters provides possible values.
:type exporter: str
:param target_file: a character string naming a file to export to. Default biouml.out
:type target_file: str
"""
if exporter_params is None:
exporter_params = []
else:
exporter_params = self.as_tree(exporter_params)
start_time = time.time()
data = {'exporter': exporter,
'type': 'de',
'detype': 'Element',
'de': path,
'parameters': exporter_params}
content = self.query('/web/export', data=data, binary=True)
end_time = time.time()
elapsed_time = round(end_time - start_time, 6)
self.stats.append({
'Type': 'Exporting',
'File name': os.path.basename(target_file),
'Upload time': f'{elapsed_time} seconds'
})
write_file(content.content, target_file)
[docs] def to_import(self, file, parentPath, importer, importer_params=None):
"""
Method imports file to BioUML repository.
:param file: The name of file to import.
:param parentPath: Path to folder in BioUML repository.
:type parentPath: str
:param importer: character string specifying format, PyBiouml.importers() provides list of posible values
:type importer: str
:param importer_params: dictionary of parameters to importer
:return: Resulting path in BioUML repository
:rtype: str
"""
if importer_params is None:
importer_params = []
else:
importer_params = self.as_tree(importer_params)
start_time = time.time()
file_id = self.next_job_id()
job_id = self.next_job_id()
data = {'fileID': file_id}
filename = os.path.basename(file)
start_time = time.time()
self.query('/web/upload', data=data, file={'file': (filename, load_file(file))})
params = {'type': 'import',
'fileID': file_id,
'de': parentPath,
'jobID': job_id,
'format': importer,
'json': importer_params
}
self.query_json('/web/import', parameters=params)
end_time = time.time()
elapsed_time = round(end_time - start_time, 6)
file_size = os.path.getsize(file)
self.stats.append({
'Type': 'Importing',
'File name': filename,
'jobID': job_id,
'fileID': file_id,
'Upload time': f'{elapsed_time} seconds',
'File size': f'{file_size} bytes'
})
return self.job_wait(job_id)['results'][0]
[docs] def ls(self, path, extended=False):
"""
Method lists children data elements by path in BioUML repository.
:param path: path to data collection in BioUML repository.
:type path: str
:param extended: whether to return additional attributes for each children.
:type extended: bool
:return: df: If extended is False a DataFrame with child names, otherwise a DataFrame wich contains row with names corresponding to child names and columns hasChildren and type.
:rtype: pandas.DataFrame
"""
resp = self.query_json('/web/data', service='access.service', command=29, dc=path)
content = resp.get('values')
d = json.loads(content)
if len(d.get('names')) == 0:
return pd.DataFrame()
df = pd.DataFrame(d['names'])
return df[['name', 'hasChildren', 'class']]
[docs] def analysis_list(self):
"""
Method that fetches list of available analyses from current BioUML server
:return: DataFrame table of analysis with two column ’Group’ and ’Name’.
:rtype: pandas.DataFrame
"""
resp = self.query_json("/web/analysis/list")['values']
r = {'Group': [], 'Name': []}
for e in resp:
splitted = e.split('/')
r['Group'].append(splitted[0])
r['Name'].append(splitted[1])
return pd.DataFrame(r)
[docs] def exporters(self):
"""
Method fetches the list of exporters from BioUML server, these exporters can be used in PyBiouml.export method
:return: list with expoters
"""
return self.query_json("/web/export/list")['values']
[docs] def importers(self):
"""
Method fetches the list of importers from BioUML server, these importers can be used in PyBiouml.to_import method
:return: list with expoters
"""
return self.query_json("/web/import/list")['values']
[docs] def workflow(self, path, parameters=None, wait=True, verbose=True):
"""
Method run BioUML workflow optionaly tracking progress.
:param path: path to BioUML workflow
:type path: str
:param parameters: list of parameters to BioUML workflow.
:param wait: whether to wait for workflow completion or return immediately
:type wait: bool
:param verbose: print messages and progress from BioUML workflow, only meaningful if wait is TRUE
:type verbose: bool
:return: job_id: return a job id of started work, that can be passed to PyBiouml.job_info and PyBiouml.job_wait.
:rtype: str
"""
if parameters is None:
parameters = []
else:
parameters = self.as_tree(parameters)
job_id = self.next_job_id()
self.query_json('/web/research',
jobID=job_id,
action='start_workflow',
de=path,
json=parameters)
if wait:
self.job_wait(job_id, verbose)
return job_id
[docs] def parameters(self, server_path, **params):
"""
Service method, which create a request to get parameters
:param server_path: path to target of the request for which need to take parameters
:type server_path: str
:param params: This parameter collect any other parameters which are give to the method and return a dictionary with them.
:return: DataFrame with all parameters
:rtype: pandas.DataFrame
"""
query_params = self.query_json(server_path=server_path,
parameters=params)['values']
name = column(query_params, 'name')
desc = column(query_params, 'description')
param_type = column(query_params, 'type')
return pd.DataFrame({'Name': name, 'Description': desc, 'Type': param_type})
[docs] def analysis_parameters(self, analysis_name):
"""
Get BioUML analysis parameters names and description
:param analysis_name: me of BioUML analysis, , use PyBiouml.analysis_list to get the list of possible values
:type analysis_name: str
:return: DataFrame which contains row with names corresponding to parameter names and one column ’description’ with parameter description
:rtype: pandas.DataFrame
"""
return self.parameters('/web/bean/get', de=''.join(['properties/method/parameters/', analysis_name]))
[docs] def export_parameters(self, path, exporter):
"""
Method get BioUML export parameters
:param path: path to data element in BioUML repository to export
:type path: str
:param exporter: name of BioUML exporter, use PyBiouml.exporters to get the table of possible values
:type exporter: str
:return: DataFrame which contains row with names corresponding to parameter names and one column ’description’ with parameter description
:rtype: pandas.DataFrame
"""
return self.parameters('/web/export',
de=path,
detype='Element',
type='deParams',
exporter=exporter
)
[docs] def import_parameters(self, path, importer):
"""
Method Get BioUML import parameters
:param path: path to data element in BioUML repository to import
:type path: str
:param importer: name of BioUML importer, use biouml.importers to get the list of possible values
:type importer: str
:return: DataFrame which contains row with names corresponding to parameter names and one column ’description’ with parameter description
:rtype: pandas.DataFrame
"""
return self.parameters('/web/import',
de=path,
detype='Element',
type='properties',
format=importer,
jobID=self.next_job_id()
)