python 总结来自人肝细胞的H3K4me3峰区域的肝组织中的DNA甲基化水平
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 总结来自人肝细胞的H3K4me3峰区域的肝组织中的DNA甲基化水平相关的知识,希望对你有一定的参考价值。
import xmlrpclib
import time
import os.path
user_key = 'anonymous_key'
url = "http://deepblue.mpi-inf.mpg.de/xmlrpc"
server = xmlrpclib.Server(url, encoding='UTF-8', allow_none=True)
print server.echo(user_key)
## List the H1-hESC samples from ENCODE
(status, H1_hESC_samples) = server.list_samples("H1-hESC",
{"source":"ENCODE"}, user_key)
H1_hESC_samples_ids = server.extract_ids(H1_hESC_samples)[1]
## List the peaks experiments from H3K27ac with the H1-hESC samples
(status, experiments) = server.list_experiments("hg19", "peaks", "H3K4me3", "",
H1_hESC_samples_ids, None,
"ENCODE", user_key)
## Find the experiment that come from a bed file
experiments_id = server.extract_ids(experiments)[1]
(status, exps_infos) = server.info(experiments_id, user_key)
h1_hESC_H3K4me3_experiment_name = [e["name"] for e in
exps_infos if e["extra_metadata"]['original_file_url'].endswith("bed.gz")]
if len(h1_hESC_H3K4me3_experiment_name) != 1:
print "It was expected only one h1_hESC_H3K4me3 experiment"
h1_hESC_H3K4me3_exp = h1_hESC_H3K4me3_experiment_name[0]
## Obtain the regions from the H1-hESC H3k4me3 peaks
(status, h1_hESC_H3K4me3) = server.select_experiments(h1_hESC_H3K4me3_exp,
None, None, None,
user_key)
## List the liver and hepatocyte experiments
ss, liver_experiments = server.list_experiments("hg19", "",
"DNA Methylation",
'liver',
None, "RRBS", "ENCODE", user_key)
liver_experiments_names = server.extract_names(liver_experiments)[1]
print liver_experiments_names
## RETRIEVE THE SUMMARY DATA
requests = []
request_id_experiment = {}
## For each liver or hepatocyte experiment
for liver_experiment in liver_experiments_names:
print "Processing", liver_experiment
# Select the experiment regions
(status, q_liver_data) = server.select_experiments(liver_experiment,
None, None, None,
user_key)
# Perform aggregation
(status, agg_id) = server.aggregate(q_liver_data, h1_hESC_H3K4me3,
'SCORE', user_key)
# Filter the aggregate regions that summarized at least 1 region
(res, q_filter) = server.filter_regions(agg_id, "@AGG.COUNT", ">", "0",
"number", user_key)
# Request regions
status, req = server.get_regions(q_filter,
"CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT,@AGG.MAX,@AGG.MIN", user_key)
print liver_experiment, status, req
request_id_experiment[req] = liver_experiment
requests.append(req)
print requests
print request_id_experiment
if not os.path.isdir("data/"):
os.mkdir("data/")
# Here we are waiting for the requests be completed
# We store the results in the files and print the request info in case of error
while len(requests) > 0:
for req in requests[:]:
(s, ss) = server.info(req, user_key)
if ss[0]["state"] == "done":
print ss[0]
print "getting data from " + ss[0]["_id"]
(s, data) = server.get_request_data(req, user_key)
with open("data/"+request_id_experiment[req]+".bed", 'wb') as f:
f.write(data)
requests.remove(req)
if ss[0]["state"] == "failed":
print ss
requests.remove(req)
time.sleep(1.0)
以上是关于python 总结来自人肝细胞的H3K4me3峰区域的肝组织中的DNA甲基化水平的主要内容,如果未能解决你的问题,请参考以下文章
python 列出来自Blueprint项目的生物资源“炎性巨噬细胞”和“巨噬细胞”的所有高峰实验
python 选择来自BLUEPRINT样本和骨髓细胞类型的所有实验。