text nim-lang中的去均聚物
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了text nim-lang中的去均聚物相关的知识,希望对你有一定的参考价值。
# Some proc are stolen from
# https://github.com/jhbadger/nimbioseq
import sequtils, strutils, math, tables, osproc, streams, os, typetraits
type Record* = object
## This type represents a genetic sequence with optional quality
id*: string
description*: string
quality*: string
sequence*: string
proc length*(self:Record): int =
## returns length of sequence
self.sequence.len()
proc check_app*(appname: string): bool =
## Check app existed?
var command = "type"
var process = startProcess(command, args=[appname], options={poUsePath})
var line = TaintedString("")
var exited = true
while process.outputStream.readLine(line):
if line.find("not found") > -1:
exited = false
process.close
return exited
iterator compressedLines*(filename: string): string =
## iterator to read lines of a (maybe) compressed text file transparently
var command = "none"
if filename.find(".gz") > -1:
command = "gzcat"
elif filename.find(".bz2") > -1:
command = "bzcat"
if command == "none":
for line in lines filename:
yield line
else:
var process = startProcess(command, args=[filename], options={poUsePath})
var line = TaintedString("")
while process.outputStream.readLine(line):
yield line
process.close
iterator readFasta*(filename: string): Record =
## iterator to iterate over the FASTA records in a file
var s = Record(id:nil, description:"", sequence:"")
var seqLines = @[""]
for line in compressedLines filename:
if line[0] == '>':
if s.id != nil:
s.sequence = seqLines.join
yield s
s.id = nil
s.description = ""
s.sequence = ""
seqLines = @[]
var fields = split(line[1..len(line)-1], ' ', 1)
if len(fields) > 1:
(s.id, s.description) = fields
else:
s.id = fields[0]
else:
seqLines.add(line)
if s.id != nil:
s.sequence = seqLines.join
yield s
iterator readFastq*(filename:string): Record =
## iterator to iterate over the FASTQ records in a file
var s = Record(id:nil, description:"", quality: "", sequence:"")
var lineNum = 0
for line in compressedLines filename:
if lineNum == 0:
if s.id != nil:
yield s
s.id = nil
s.description = ""
s.sequence = ""
s.quality = ""
var fields = split(line[1..len(line)-1], ' ', 1)
if len(fields) > 1:
(s.id, s.description) = fields
else:
s.id = fields[0]
elif lineNum == 1:
s.sequence = line
elif lineNum == 3:
s.quality = line
lineNum = (lineNum + 1) mod 4
if s.id != nil:
yield s
iterator readSeqs*(filename:string):Record =
for line in compressedLines filename:
if line[0] == '>':
for s in readFasta(filename):
yield s
break
elif line[0] == '@':
for s in readFastq(filename):
yield s
break
else:
echo "I don't know what type of file is " & filename
proc dehomo*(input:Record): Record =
var i = 0
var dehomo = Record(id:nil, description:"", quality: "", sequence:"")
dehomo.id = input.id
dehomo.description = input.description
while i < input.length:
if i == 0:
dehomo.sequence.add(input.sequence[i])
dehomo.quality.add(input.quality[i])
else:
if input.sequence[i] != input.sequence[i-1]:
dehomo.sequence.add(input.sequence[i])
dehomo.quality.add(input.quality[i])
i += 1
return dehomo
proc dehomopolymerate(input: string, output: string, summary = false) =
let progName = split(getAppFilename(), "/")[getAppFileName().count("/")]
if input != nil:
var o = system.open(output, fmWrite)
var s_sum = 0
var dehomo_sum = 0
var num_reads = 0
for s in readSeqs(input):
var o_dehomo: Record
o_dehomo = dehomo(s)
o.writeline("@",o_dehomo.id," ", o_dehomo.description)
o.writeline(o_dehomo.sequence)
o.writeline("+")
o.writeline(o_dehomo.quality)
s_sum += s.length
dehomo_sum += o_dehomo.length
num_reads += 1
o.close()
if summary:
echo "INPUT: ", s_sum, " bases"
echo "OUTPUT: ", dehomo_sum, " bases"
echo "Total reads: ", num_reads
else:
echo progName & ": needs file name"
# Need to have cligen installed, with nimble
when isMainModule: import cligen;dispatch(dehomopolymerate)
以上是关于text nim-lang中的去均聚物的主要内容,如果未能解决你的问题,请参考以下文章
Flow2000?v6.2?1CD(最好的高聚物挤出过程分析软件)