git: 886844b5ecf8 - main - biology/py-crossmap: Update to 0.7.3
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 04 Mar 2025 13:15:15 UTC
The branch main has been updated by jwb: URL: https://cgit.FreeBSD.org/ports/commit/?id=886844b5ecf827d919cf53ba0875b0ebd6e2a29a commit 886844b5ecf827d919cf53ba0875b0ebd6e2a29a Author: Jason W. Bacon <jwb@FreeBSD.org> AuthorDate: 2025-03-04 13:12:42 +0000 Commit: Jason W. Bacon <jwb@FreeBSD.org> CommitDate: 2025-03-04 13:12:42 +0000 biology/py-crossmap: Update to 0.7.3 Several fixes and enhancements since 0.5.4 Changes: https://github.com/liguowang/CrossMap/releases PR: 284923 Reported by: alster@vinterdalen.se --- biology/py-crossmap/Makefile | 8 +- biology/py-crossmap/distinfo | 6 +- biology/py-crossmap/files/patch-2to3 | 2955 -------------------- .../files/patch-lib_cmmodule_twoList.py | 18 - biology/py-crossmap/files/patch-setup.py | 10 - 5 files changed, 6 insertions(+), 2991 deletions(-) diff --git a/biology/py-crossmap/Makefile b/biology/py-crossmap/Makefile index a5a3746a31c4..ddee1a5a1fa7 100644 --- a/biology/py-crossmap/Makefile +++ b/biology/py-crossmap/Makefile @@ -1,16 +1,14 @@ PORTNAME= crossmap -DISTVERSION= 0.5.4 -PORTREVISION= 1 +DISTVERSION= 0.7.3 CATEGORIES= biology python MASTER_SITES= PYPI PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} -DISTNAME= CrossMap-${DISTVERSION} MAINTAINER= jwb@FreeBSD.org COMMENT= Lift over genomics coordinates between assemblies -WWW= https://pypi.python.org/pypi/crossmap +WWW= https://github.com/liguowang/CrossMap -LICENSE= GPLv2 +LICENSE= GPLv3+ LICENSE_FILE= ${WRKSRC}/LICENSE.txt BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}nose>0.10.4:devel/py-nose@${PY_FLAVOR} diff --git a/biology/py-crossmap/distinfo b/biology/py-crossmap/distinfo index 1dabfc731aef..f35df2d53427 100644 --- a/biology/py-crossmap/distinfo +++ b/biology/py-crossmap/distinfo @@ -1,3 +1,3 @@ -TIMESTAMP = 1618936542 -SHA256 (CrossMap-0.5.4.tar.gz) = 3419a5bf422754c3acb1e59ec9bba1a6c9a19d3673e962ae832e429ef0f768e5 -SIZE (CrossMap-0.5.4.tar.gz) = 9511082 +TIMESTAMP = 1740057259 +SHA256 (crossmap-0.7.3.tar.gz) = c5793d1bbfea962b3f21d8cd9830a79a169b3a3ec6461555d819be125b391f6d +SIZE (crossmap-0.7.3.tar.gz) = 33325 diff --git a/biology/py-crossmap/files/patch-2to3 b/biology/py-crossmap/files/patch-2to3 deleted file mode 100644 index c3b680224b43..000000000000 --- a/biology/py-crossmap/files/patch-2to3 +++ /dev/null @@ -1,2955 +0,0 @@ ---- lib/cmmodule/SAM.py.orig 2018-12-17 16:05:26 UTC -+++ lib/cmmodule/SAM.py -@@ -150,52 +150,52 @@ class ParseSAM: - forward_SE +=1 - - if paired: -- print >>sys.stderr,"\n#==================================================" -- print >>sys.stderr,"#================Report (pair-end)=================" -- print >>sys.stderr, "%-25s%d" % ("Total Reads:",total_read) -- print >>sys.stderr, "%-25s%d" % ("Total Mapped Reads:", (mapped_read1 + mapped_read2)) -- print >>sys.stderr, "%-25s%d" % ("Total Unmapped Reads:",(unmapped_read1 + unmapped_read2)) -- print >>sys.stderr, "%-25s%d" % ("PCR duplicate:",pcr_duplicate) -- print >>sys.stderr, "%-25s%d" % ("QC-failed:",low_qual) -- print >>sys.stderr, "%-25s%d" % ("Not primary mapping:",secondary_hit) -- print >>sys.stderr, "\n", -- print >>sys.stderr, "%-25s%d" % ("Unmapped Read-1:",unmapped_read1) -- print >>sys.stderr, "%-25s%d" % ("Mapped Read-1:",mapped_read1) -- print >>sys.stderr, "%-25s%d" % (" Forward (+):",forward_read1) -- print >>sys.stderr, "%-25s%d" % (" Reverse (-):",reverse_read1) -+ print("\n#==================================================", file=sys.stderr) -+ print("#================Report (pair-end)=================", file=sys.stderr) -+ print("%-25s%d" % ("Total Reads:",total_read), file=sys.stderr) -+ print("%-25s%d" % ("Total Mapped Reads:", (mapped_read1 + mapped_read2)), file=sys.stderr) -+ print("%-25s%d" % ("Total Unmapped Reads:",(unmapped_read1 + unmapped_read2)), file=sys.stderr) -+ print("%-25s%d" % ("PCR duplicate:",pcr_duplicate), file=sys.stderr) -+ print("%-25s%d" % ("QC-failed:",low_qual), file=sys.stderr) -+ print("%-25s%d" % ("Not primary mapping:",secondary_hit), file=sys.stderr) -+ print("\n", end=' ', file=sys.stderr) -+ print("%-25s%d" % ("Unmapped Read-1:",unmapped_read1), file=sys.stderr) -+ print("%-25s%d" % ("Mapped Read-1:",mapped_read1), file=sys.stderr) -+ print("%-25s%d" % (" Forward (+):",forward_read1), file=sys.stderr) -+ print("%-25s%d" % (" Reverse (-):",reverse_read1), file=sys.stderr) - -- print >>sys.stderr, "\n", -- print >>sys.stderr, "%-25s%d" % ("Unmapped Read-2:",unmapped_read2) -- print >>sys.stderr, "%-25s%d" % ("Mapped Read-2:",mapped_read2) -- print >>sys.stderr, "%-25s%d" % (" Forward (+):",forward_read2) -- print >>sys.stderr, "%-25s%d" % (" Reverse (-):",reverse_read2) -+ print("\n", end=' ', file=sys.stderr) -+ print("%-25s%d" % ("Unmapped Read-2:",unmapped_read2), file=sys.stderr) -+ print("%-25s%d" % ("Mapped Read-2:",mapped_read2), file=sys.stderr) -+ print("%-25s%d" % (" Forward (+):",forward_read2), file=sys.stderr) -+ print("%-25s%d" % (" Reverse (-):",reverse_read2), file=sys.stderr) - -- print >>sys.stderr, "\n", -- print >>sys.stderr, "%-25s%d" % ("Mapped to (+/-):",plus_minus) -- print >>sys.stderr, "%-25s%d" % ("Mapped to (-/+):",minus_plus) -- print >>sys.stderr, "%-25s%d" % ("Mapped to (+/+):",plus_plus) -- print >>sys.stderr, "%-25s%d" % ("Mapped to (-/-):",minus_minus) -- print >>sys.stderr, "\n", -- print >>sys.stderr, "%-25s%d" % ("Spliced Hits:",_numSplitHit) -- print >>sys.stderr, "%-25s%d" % ("Non-spliced Hits:",_numMonoHit) -- print >>sys.stderr, "%-25s%d" % ("Reads have insertion:",_numInsertion) -- print >>sys.stderr, "%-25s%d" % ("Reads have deletion:",_numDeletion) -+ print("\n", end=' ', file=sys.stderr) -+ print("%-25s%d" % ("Mapped to (+/-):",plus_minus), file=sys.stderr) -+ print("%-25s%d" % ("Mapped to (-/+):",minus_plus), file=sys.stderr) -+ print("%-25s%d" % ("Mapped to (+/+):",plus_plus), file=sys.stderr) -+ print("%-25s%d" % ("Mapped to (-/-):",minus_minus), file=sys.stderr) -+ print("\n", end=' ', file=sys.stderr) -+ print("%-25s%d" % ("Spliced Hits:",_numSplitHit), file=sys.stderr) -+ print("%-25s%d" % ("Non-spliced Hits:",_numMonoHit), file=sys.stderr) -+ print("%-25s%d" % ("Reads have insertion:",_numInsertion), file=sys.stderr) -+ print("%-25s%d" % ("Reads have deletion:",_numDeletion), file=sys.stderr) - else: -- print >>sys.stderr,"\n#====================================================" -- print >>sys.stderr,"#================Report (single-end)=================" -- print >>sys.stderr, "%-25s%d" % ("Total Reads:",total_read) -- print >>sys.stderr, "%-25s%d" % ("Total Mapped Reads:", map_SE) -- print >>sys.stderr, "%-25s%d" % ("Total Unmapped Reads:",unmap_SE) -- print >>sys.stderr, "%-25s%d" % ("PCR duplicate:",pcr_duplicate) -- print >>sys.stderr, "%-25s%d" % ("QC-failed:",low_qual) -- print >>sys.stderr, "%-25s%d" % ("Not primary mapping:",secondary_hit) -- print >>sys.stderr, "%-25s%d" % ("froward (+):",forward_SE) -- print >>sys.stderr, "%-25s%d" % ("reverse (-):",reverse_SE) -- print >>sys.stderr, "\n", -- print >>sys.stderr, "%-25s%d" % ("Spliced Hits:",_numSplitHit) -- print >>sys.stderr, "%-25s%d" % ("Non-spliced Hits:",_numMonoHit) -- print >>sys.stderr, "%-25s%d" % ("Reads have insertion:",_numInsertion) -- print >>sys.stderr, "%-25s%d" % ("Reads have deletion:",_numDeletion) -+ print("\n#====================================================", file=sys.stderr) -+ print("#================Report (single-end)=================", file=sys.stderr) -+ print("%-25s%d" % ("Total Reads:",total_read), file=sys.stderr) -+ print("%-25s%d" % ("Total Mapped Reads:", map_SE), file=sys.stderr) -+ print("%-25s%d" % ("Total Unmapped Reads:",unmap_SE), file=sys.stderr) -+ print("%-25s%d" % ("PCR duplicate:",pcr_duplicate), file=sys.stderr) -+ print("%-25s%d" % ("QC-failed:",low_qual), file=sys.stderr) -+ print("%-25s%d" % ("Not primary mapping:",secondary_hit), file=sys.stderr) -+ print("%-25s%d" % ("froward (+):",forward_SE), file=sys.stderr) -+ print("%-25s%d" % ("reverse (-):",reverse_SE), file=sys.stderr) -+ print("\n", end=' ', file=sys.stderr) -+ print("%-25s%d" % ("Spliced Hits:",_numSplitHit), file=sys.stderr) -+ print("%-25s%d" % ("Non-spliced Hits:",_numMonoHit), file=sys.stderr) -+ print("%-25s%d" % ("Reads have insertion:",_numInsertion), file=sys.stderr) -+ print("%-25s%d" % ("Reads have deletion:",_numDeletion), file=sys.stderr) - - def samTobed(self,outfile=None,mergePE=False): - """Convert SAM file to BED file. BED file will be saved as xxx.sam.bed unless otherwise specified. -@@ -204,7 +204,7 @@ class ParseSAM: - if outfile is None: - outfile=self.fileName + ".bed" - -- print >>sys.stderr,"\tWriting bed entries to\"",outfile,"\"...", -+ print("\tWriting bed entries to\"",outfile,"\"...", end=' ', file=sys.stderr) - FO=open(outfile,'w') - for line in self.f: - if line.startswith(('@','track')):continue #skip head lines -@@ -240,14 +240,14 @@ class ParseSAM: - for i in range(0,len(comb),2): - blockStart.append(str(sum(comb[:i]))) - blockStarts = ','.join(blockStart) -- print >>FO, string.join((str(i) for i in [chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts]),sep="\t") -- print >>sys.stderr, "Done" -+ print(string.join((str(i) for i in [chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts]),sep="\t"), file=FO) -+ print("Done", file=sys.stderr) - FO.close() - self.f.seek(0) - - if mergePE: - #creat another bed file. pair-end reads will be merged into single bed entry -- print >>sys.stderr, "Writing consoidated bed file ...", -+ print("Writing consoidated bed file ...", end=' ', file=sys.stderr) - bedfile = open(outfile,'r') - outfile_2 = outfile + ".consolidate.bed" - outfile_3 = outfile + '.filter' -@@ -292,11 +292,11 @@ class ParseSAM: - if(blocks[key] ==1): #single end, single hit - st = [i - txSt[key] for i in starts[key]] - st = string.join([str(i) for i in st],',') -- print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","11\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st -+ print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","11\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st, file=FO) - else: - st = [i - txSt[key] for i in starts[key]] #single end, spliced hit - st = string.join([str(i) for i in st],',') -- print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","12\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st -+ print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key,"\t","12\t",strand[key][0],"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sizes[key],','),"\t",st, file=FO) - - elif(count[key]==2): #pair-end read - direction = string.join(strand[key],'/') -@@ -306,17 +306,17 @@ class ParseSAM: - #st=[string.atoi(i) for i in st] - if(len(chr[key])==1): #pair-end reads mapped to same chromosome - if blocks[key] ==2: #pair end, single hits -- print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","21\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],',') -+ print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","21\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],','), file=FO) - elif blocks[key] >2: # -- print >>FO, chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","22\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],',') -+ print(chr[key].pop(),"\t",txSt[key],"\t",txEnd[key],"\t",key + "|strand=" + direction + "|chrom=same","\t","22\t",'.',"\t",txSt[key],"\t",txEnd[key],"\t","0,255,0\t",blocks[key],"\t",string.join(sz,','),"\t",string.join([str(i) for i in st],','), file=FO) - else: -- print >>FOF,key,"\t","pair-end mapped, but two ends mapped to different chromosome" -+ print(key,"\t","pair-end mapped, but two ends mapped to different chromosome", file=FOF) - elif(count[key] >2): #reads occur more than 2 times -- print >>FOF,key,"\t","occurs more than 2 times in sam file" -+ print(key,"\t","occurs more than 2 times in sam file", file=FOF) - continue - FO.close() - FOF.close() -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - - - def samTowig(self,outfile=None,log2scale=False,header=False,strandSpecific=False): -@@ -326,7 +326,7 @@ class ParseSAM: - if outfile is None: - outfile = self.fileName + ".wig" - FO=open(outfile,'w') -- print >>sys.stderr, "Writing wig file to\"",outfile,"\"..." -+ print("Writing wig file to\"",outfile,"\"...", file=sys.stderr) - - headline="track type=wiggle_0 name=" + outfile + " track_label description='' visibility=full color=255,0,0" - wig=collections.defaultdict(dict) -@@ -359,24 +359,24 @@ class ParseSAM: - - blocks = cigar.fetch_exon(chrom,txStart,field[5]) - for block in blocks: -- hits.extend(range(block[1]+1,block[2]+1)) -+ hits.extend(list(range(block[1]+1,block[2]+1))) - - if strandSpecific is not True: - for i in hits: -- if wig[chrom].has_key(i): -+ if i in wig[chrom]: - wig[chrom][i] +=1 - else: - wig[chrom][i]=1 - else: - if strand_rule[read_type + strand] == '-': - for i in hits: -- if Nwig[chrom].has_key(i): -+ if i in Nwig[chrom]: - Nwig[chrom][i] += 1 - else: - Nwig[chrom][i] = 1 - if strand_rule[read_type + strand] == '+': - for i in hits: -- if Pwig[chrom].has_key(i): -+ if i in Pwig[chrom]: - Pwig[chrom][i] +=1 - else: - Pwig[chrom][i]=1 -@@ -385,17 +385,17 @@ class ParseSAM: - - if strandSpecific is not True: - for chr in sorted(wig.keys()): -- print >>sys.stderr, "Writing ",chr, " ..." -+ print("Writing ",chr, " ...", file=sys.stderr) - FO.write('variableStep chrom='+chr+'\n') - for coord in sorted(wig[chr]): - if log2scale:FO.write("%d\t%5.3f\n" % (coord,math.log(wig[chr][coord],2))) - else:FO.write("%d\t%d\n" % (coord,wig[chr][coord])) - else: -- chroms=set(Pwig.keys() + Nwig.keys()) -+ chroms=set(list(Pwig.keys()) + list(Nwig.keys())) - for chr in sorted(chroms): -- print >>sys.stderr, "Writing ",chr, " ..." -+ print("Writing ",chr, " ...", file=sys.stderr) - FO.write('variableStep chrom='+chr+'\n') -- coords=sorted(set(Pwig[chr].keys() + Nwig[chr].keys())) -+ coords=sorted(set(list(Pwig[chr].keys()) + list(Nwig[chr].keys()))) - for coord in coords: - if ((coord in Pwig[chr]) and (coord not in Nwig[chr])): - FO.write("%d\t%d\n" % (coord,Pwig[chr][coord])) -@@ -418,7 +418,7 @@ class ParseSAM: - else: outfile = self.fileName + ".unmap.fa" - FO=open(outfile,'w') - unmapCount=0 -- print >>sys.stderr, "Writing unmapped reads to\"",outfile,"\"... ", -+ print("Writing unmapped reads to\"",outfile,"\"... ", end=' ', file=sys.stderr) - - for line in self.f: - hits=[] -@@ -438,7 +438,7 @@ class ParseSAM: - if fastq: FO.write('@' + seqID + '\n' + seq +'\n' + '+' +'\n' + qual+'\n') - else: FO.write('>' + seqID + '\n' + seq +'\n') - -- print >>sys.stderr, str(unmapCount) + " reads saved!\n" -+ print(str(unmapCount) + " reads saved!\n", file=sys.stderr) - FO.close() - self.f.seek(0) - -@@ -449,7 +449,7 @@ class ParseSAM: - outfile = self.fileName + ".PP.sam" - FO=open(outfile,'w') - PPcount=0 -- print >>sys.stderr, "Writing proper paired reads to\"",outfile,"\"... ", -+ print("Writing proper paired reads to\"",outfile,"\"... ", end=' ', file=sys.stderr) - for line in self.f: - hits=[] - if line[0] == '@':continue #skip head lines -@@ -460,7 +460,7 @@ class ParseSAM: - PPcount +=1 - FO.write(line) - FO.close() -- print >>sys.stderr, str(PPcount) + " reads were saved!\n", -+ print(str(PPcount) + " reads were saved!\n", end=' ', file=sys.stderr) - self.f.seek(0) - - def samNVC(self,outfile=None): -@@ -481,7 +481,7 @@ class ParseSAM: - c_count=[] - g_count=[] - t_count=[] -- print >>sys.stderr, "reading sam file ... " -+ print("reading sam file ... ", file=sys.stderr) - for line in self.f: - if line.startswith('@'):continue #skip head lines - if ParseSAM._reExpr2.match(line):continue #skip blank lines -@@ -492,44 +492,44 @@ class ParseSAM: - RNA_read = field[9].upper() - else: - RNA_read = field[9].upper().translate(transtab)[::-1] -- for i in xrange(len(RNA_read)): -+ for i in range(len(RNA_read)): - key = str(i) + RNA_read[i] - base_freq[key] += 1 - -- print >>sys.stderr, "generating data matrix ..." -- print >>FO, "Position\tA\tC\tG\tT\tN\tX" -- for i in xrange(len(RNA_read)): -- print >>FO, str(i) + '\t', -- print >>FO, str(base_freq[str(i) + "A"]) + '\t', -+ print("generating data matrix ...", file=sys.stderr) -+ print("Position\tA\tC\tG\tT\tN\tX", file=FO) -+ for i in range(len(RNA_read)): -+ print(str(i) + '\t', end=' ', file=FO) -+ print(str(base_freq[str(i) + "A"]) + '\t', end=' ', file=FO) - a_count.append(str(base_freq[str(i) + "A"])) -- print >>FO, str(base_freq[str(i) + "C"]) + '\t', -+ print(str(base_freq[str(i) + "C"]) + '\t', end=' ', file=FO) - c_count.append(str(base_freq[str(i) + "C"])) -- print >>FO, str(base_freq[str(i) + "G"]) + '\t', -+ print(str(base_freq[str(i) + "G"]) + '\t', end=' ', file=FO) - g_count.append(str(base_freq[str(i) + "G"])) -- print >>FO, str(base_freq[str(i) + "T"]) + '\t', -+ print(str(base_freq[str(i) + "T"]) + '\t', end=' ', file=FO) - t_count.append(str(base_freq[str(i) + "T"])) -- print >>FO, str(base_freq[str(i) + "N"]) + '\t', -- print >>FO, str(base_freq[str(i) + "X"]) + '\t' -+ print(str(base_freq[str(i) + "N"]) + '\t', end=' ', file=FO) -+ print(str(base_freq[str(i) + "X"]) + '\t', file=FO) - FO.close() - - #generating R scripts -- print >>sys.stderr, "generating R script ..." -- print >>RS, "position=c(" + ','.join([str(i) for i in xrange(len(RNA_read))]) + ')' -- print >>RS, "A_count=c(" + ','.join(a_count) + ')' -- print >>RS, "C_count=c(" + ','.join(c_count) + ')' -- print >>RS, "G_count=c(" + ','.join(g_count) + ')' -- print >>RS, "T_count=c(" + ','.join(t_count) + ')' -- print >>RS, "total= A_count + C_count + G_count + T_count" -- print >>RS, "ym=max(A_count/total,C_count/total,G_count/total,T_count/total) + 0.05" -- print >>RS, "yn=min(A_count/total,C_count/total,G_count/total,T_count/total)" -+ print("generating R script ...", file=sys.stderr) -+ print("position=c(" + ','.join([str(i) for i in range(len(RNA_read))]) + ')', file=RS) -+ print("A_count=c(" + ','.join(a_count) + ')', file=RS) -+ print("C_count=c(" + ','.join(c_count) + ')', file=RS) -+ print("G_count=c(" + ','.join(g_count) + ')', file=RS) -+ print("T_count=c(" + ','.join(t_count) + ')', file=RS) -+ print("total= A_count + C_count + G_count + T_count", file=RS) -+ print("ym=max(A_count/total,C_count/total,G_count/total,T_count/total) + 0.05", file=RS) -+ print("yn=min(A_count/total,C_count/total,G_count/total,T_count/total)", file=RS) - -- print >>RS, 'pdf("NVC_plot.pdf")' -- print >>RS, 'plot(position,A_count/total,type="o",pch=20,ylim=c(yn,ym),col="dark green",xlab="Position of Read",ylab="Nucleotide Frequency")' -- print >>RS, 'lines(position,T_count/total,type="o",pch=20,col="red")' -- print >>RS, 'lines(position,G_count/total,type="o",pch=20,col="blue")' -- print >>RS, 'lines(position,C_count/total,type="o",pch=20,col="cyan")' -- print >>RS, 'legend('+ str(len(RNA_read)-10) + ',ym,legend=c("A","T","G","C"),col=c("dark green","red","blue","cyan"),lwd=2,pch=20,text.col=c("dark green","red","blue","cyan"))' -- print >>RS, "dev.off()" -+ print('pdf("NVC_plot.pdf")', file=RS) -+ print('plot(position,A_count/total,type="o",pch=20,ylim=c(yn,ym),col="dark green",xlab="Position of Read",ylab="Nucleotide Frequency")', file=RS) -+ print('lines(position,T_count/total,type="o",pch=20,col="red")', file=RS) -+ print('lines(position,G_count/total,type="o",pch=20,col="blue")', file=RS) -+ print('lines(position,C_count/total,type="o",pch=20,col="cyan")', file=RS) -+ print('legend('+ str(len(RNA_read)-10) + ',ym,legend=c("A","T","G","C"),col=c("dark green","red","blue","cyan"),lwd=2,pch=20,text.col=c("dark green","red","blue","cyan"))', file=RS) -+ print("dev.off()", file=RS) - - RS.close() - #self.f.seek(0) -@@ -546,7 +546,7 @@ class ParseSAM: - RS=open(outfile2,'w') - - gc_hist=collections.defaultdict(int) #key is GC percent, value is count of reads -- print >>sys.stderr, "reading sam file ... " -+ print("reading sam file ... ", file=sys.stderr) - for line in self.f: - if line[0] == '@':continue #skip head lines - if ParseSAM._reExpr2.match(line):continue #skip blank lines -@@ -556,18 +556,18 @@ class ParseSAM: - #print gc_percent - gc_hist[gc_percent] += 1 - -- print >>sys.stderr, "writing GC content ..." -+ print("writing GC content ...", file=sys.stderr) - -- print >>FO, "GC%\tread_count" -- for i in gc_hist.keys(): -- print >>FO, i + '\t' + str(gc_hist[i]) -+ print("GC%\tread_count", file=FO) -+ for i in list(gc_hist.keys()): -+ print(i + '\t' + str(gc_hist[i]), file=FO) - -- print >>sys.stderr, "writing R script ..." -- print >>RS, "pdf('GC_content.pdf')" -- print >>RS, 'gc=rep(c(' + ','.join([i for i in gc_hist.keys()]) + '),' + 'times=c(' + ','.join([str(i) for i in gc_hist.values()]) + '))' -- print >>RS, 'hist(gc,probability=T,breaks=%d,xlab="GC content (%%)",ylab="Density of Reads",border="blue",main="")' % 100 -+ print("writing R script ...", file=sys.stderr) -+ print("pdf('GC_content.pdf')", file=RS) -+ print('gc=rep(c(' + ','.join([i for i in list(gc_hist.keys())]) + '),' + 'times=c(' + ','.join([str(i) for i in list(gc_hist.values())]) + '))', file=RS) -+ print('hist(gc,probability=T,breaks=%d,xlab="GC content (%%)",ylab="Density of Reads",border="blue",main="")' % 100, file=RS) - #print >>RS, "lines(density(gc),col='red')" -- print >>RS ,"dev.off()" -+ print("dev.off()", file=RS) - #self.f.seek(0) - - def samDupRate(self,outfile=None,up_bound=500): -@@ -589,7 +589,7 @@ class ParseSAM: - - seqDup_count=collections.defaultdict(int) - posDup_count=collections.defaultdict(int) -- print >>sys.stderr, "reading sam file ... " -+ print("reading sam file ... ", file=sys.stderr) - for line in self.f: - if line[0] == '@':continue #skip head lines - if ParseSAM._reExpr2.match(line):continue #skip blank lines -@@ -616,37 +616,37 @@ class ParseSAM: - coord = chrom + ":" + str(chromStart) + "-" + str(chromEnd) + ":" + blockSizes + ":" + blockStarts - posDup[coord] +=1 - -- print >>sys.stderr, "report duplicte rate based on sequence ..." -- print >>SEQ, "Occurrence\tUniqReadNumber" -- for i in seqDup.values(): #key is occurence, value is uniq reads number (based on seq) -+ print("report duplicte rate based on sequence ...", file=sys.stderr) -+ print("Occurrence\tUniqReadNumber", file=SEQ) -+ for i in list(seqDup.values()): #key is occurence, value is uniq reads number (based on seq) - seqDup_count[i] +=1 -- for k in sorted(seqDup_count.iterkeys()): -- print >>SEQ, str(k) +'\t'+ str(seqDup_count[k]) -+ for k in sorted(seqDup_count.keys()): -+ print(str(k) +'\t'+ str(seqDup_count[k]), file=SEQ) - SEQ.close() - -- print >>sys.stderr, "report duplicte rate based on mapping ..." -- print >>POS, "Occurrence\tUniqReadNumber" -- for i in posDup.values(): #key is occurence, value is uniq reads number (based on coord) -+ print("report duplicte rate based on mapping ...", file=sys.stderr) -+ print("Occurrence\tUniqReadNumber", file=POS) -+ for i in list(posDup.values()): #key is occurence, value is uniq reads number (based on coord) - posDup_count[i] +=1 -- for k in sorted(posDup_count.iterkeys()): -- print >>POS, str(k) +'\t'+ str(posDup_count[k]) -+ for k in sorted(posDup_count.keys()): -+ print(str(k) +'\t'+ str(posDup_count[k]), file=POS) - POS.close() - - -- print >>sys.stderr, "generate R script ..." -- print >>RS, "pdf('duplicateRead.pdf')" -- print >>RS, "par(mar=c(5,4,4,5),las=0)" -- print >>RS, "seq_occ=c(" + ','.join([str(i) for i in sorted(seqDup_count.iterkeys()) ]) + ')' -- print >>RS, "seq_uniqRead=c(" + ','.join([str(seqDup_count[i]) for i in sorted(seqDup_count.iterkeys()) ]) + ')' -- print >>RS, "pos_occ=c(" + ','.join([str(i) for i in sorted(posDup_count.iterkeys()) ]) + ')' -- print >>RS, "pos_uniqRead=c(" + ','.join([str(posDup_count[i]) for i in sorted(posDup_count.iterkeys()) ]) + ')' -- print >>RS, "plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Frequency',pch=4,cex=0.8,col='blue',xlim=c(1,%d),yaxt='n')" % up_bound -- print >>RS, "points(seq_occ,log10(seq_uniqRead),pch=20,cex=0.8,col='red')" -- print >>RS, 'ym=floor(max(log10(pos_uniqRead)))' -- print >>RS, "legend(%d,ym,legend=c('Sequence-base','Mapping-base'),col=c('red','blue'),pch=c(4,20))" % max(up_bound-200,1) -- print >>RS, 'axis(side=2,at=0:ym,labels=0:ym)' -- print >>RS, 'axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead)),round(pos_uniqRead[2]*100/sum(pos_uniqRead)),round(pos_uniqRead[3]*100/sum(pos_uniqRead)),round(pos_uniqRead[4]*100/sum(pos_uniqRead))))' -- print >>RS, 'mtext(4, text = "Reads %", line = 2)' -+ print("generate R script ...", file=sys.stderr) -+ print("pdf('duplicateRead.pdf')", file=RS) -+ print("par(mar=c(5,4,4,5),las=0)", file=RS) -+ print("seq_occ=c(" + ','.join([str(i) for i in sorted(seqDup_count.keys()) ]) + ')', file=RS) -+ print("seq_uniqRead=c(" + ','.join([str(seqDup_count[i]) for i in sorted(seqDup_count.keys()) ]) + ')', file=RS) -+ print("pos_occ=c(" + ','.join([str(i) for i in sorted(posDup_count.keys()) ]) + ')', file=RS) -+ print("pos_uniqRead=c(" + ','.join([str(posDup_count[i]) for i in sorted(posDup_count.keys()) ]) + ')', file=RS) -+ print("plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Frequency',pch=4,cex=0.8,col='blue',xlim=c(1,%d),yaxt='n')" % up_bound, file=RS) -+ print("points(seq_occ,log10(seq_uniqRead),pch=20,cex=0.8,col='red')", file=RS) -+ print('ym=floor(max(log10(pos_uniqRead)))', file=RS) -+ print("legend(%d,ym,legend=c('Sequence-base','Mapping-base'),col=c('red','blue'),pch=c(4,20))" % max(up_bound-200,1), file=RS) -+ print('axis(side=2,at=0:ym,labels=0:ym)', file=RS) -+ print('axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead)),round(pos_uniqRead[2]*100/sum(pos_uniqRead)),round(pos_uniqRead[3]*100/sum(pos_uniqRead)),round(pos_uniqRead[4]*100/sum(pos_uniqRead))))', file=RS) -+ print('mtext(4, text = "Reads %", line = 2)', file=RS) - #self.f.seek(0) - - def getUniqMapRead(self,outfile=None): -@@ -655,7 +655,7 @@ class ParseSAM: - outfile = self.fileName + ".uniq.sam" - FO=open(outfile,'w') - Uniqcount=0 -- print >>sys.stderr, "Writing uniquely mapped reads to\"",outfile,"\"... ", -+ print("Writing uniquely mapped reads to\"",outfile,"\"... ", end=' ', file=sys.stderr) - for line in self.f: - hits=[] - if line[0] == '@':continue #skip head lines -@@ -667,11 +667,11 @@ class ParseSAM: - #else: - #print >>sys.stderr,line, - if (ParseSAM._uniqueHit_pat.search(line)): -- print >>sys.stderr,line, -+ print(line, end=' ', file=sys.stderr) - Uniqcount +=1 - FO.write(line) - FO.close() -- print >>sys.stderr, str(Uniqcount) + " reads were saved!\n", -+ print(str(Uniqcount) + " reads were saved!\n", end=' ', file=sys.stderr) - self.f.seek(0) - - def getWrongStrand(self,outfile=None): -@@ -680,7 +680,7 @@ class ParseSAM: - outfile = self.fileName + ".wrongStrand.sam" - FO=open(outfile,'w') - wrongStrand=0 -- print >>sys.stderr, "Writing incorrectly stranded reads to\"",outfile,"\"... ", -+ print("Writing incorrectly stranded reads to\"",outfile,"\"... ", end=' ', file=sys.stderr) - for line in self.f: - hits=[] - if line.startswith('@'):continue #skip head lines -@@ -701,7 +701,7 @@ class ParseSAM: - wrongStrand+=1 - - FO.close() -- print >>sys.stderr, str(wrongStrand) + " reads were saved!\n", -+ print(str(wrongStrand) + " reads were saved!\n", end=' ', file=sys.stderr) - self.f.seek(0) - - def filterSpliceRead(self,outfile=None,min_overhang=8,min_gap=50,max_gap=1000000): -@@ -714,7 +714,7 @@ class ParseSAM: - outfile = self.fileName + ".SR.sam" - #outfile2 = self.fileName + ".SR.filter.sam" - splice_sites=collections.defaultdict(set) -- print >>sys.stderr, "\tDetermine splice sites with proper overhang, intron size ... ", -+ print("\tDetermine splice sites with proper overhang, intron size ... ", end=' ', file=sys.stderr) - for line in self.f: - if line[0] == '@':continue #skip head lines - if ParseSAM._reExpr2.match(line):continue #skip blank lines -@@ -741,12 +741,12 @@ class ParseSAM: - if (comb[2] >= min_overhang): - splice_sites[chrom].add(map_st + comb[0] + comb[1]) - self.f.seek(0) -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - - - FO=open(outfile,'w') - #FO2=open(outfile2,'w') -- print >>sys.stderr, "\tExtracting splicing reads ... ", -+ print("\tExtracting splicing reads ... ", end=' ', file=sys.stderr) - total_SR =0 - extract_SR =0 - total_read =0 -@@ -778,10 +778,10 @@ class ParseSAM: - else: - #FO2.write(line) - continue -- print >>sys.stderr, "Done" -- print >>sys.stderr, "\tTotal mapped Read: " + str(total_read) -- print >>sys.stderr, "\tTotal Splicing Read: " + str(total_SR) -- print >>sys.stderr, "\Usable Splicing Read: " + str(extract_SR) -+ print("Done", file=sys.stderr) -+ print("\tTotal mapped Read: " + str(total_read), file=sys.stderr) -+ print("\tTotal Splicing Read: " + str(total_SR), file=sys.stderr) -+ print("\\Usable Splicing Read: " + str(extract_SR), file=sys.stderr) - FO.close() - #FO2.close() - self.f.seek(0) -@@ -792,7 +792,7 @@ class ParseSAM: - if outfile is None: - outfile = self.fileName + ".SR.sam" - FO=open(outfile,'w') -- print >>sys.stderr, "\tExtract splicing reads without any filter ...", -+ print("\tExtract splicing reads without any filter ...", end=' ', file=sys.stderr) - for line in self.f: - if line[0] == '@':continue #skip head lines - if ParseSAM._reExpr2.match(line):continue #skip blank lines -@@ -803,7 +803,7 @@ class ParseSAM: - if (len(comb)>=3): - FO.write(line) - -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - self.f.seek(0) - FO.close() - -@@ -812,7 +812,7 @@ class ParseSAM: - The original SAM file must be sorted before hand. if not, using linux command like "sort -k3,3 -k4,4n myfile.sam >myfile.sorted.sam" ''' - if outfile is None: - outfile = self.fileName + ".collapsed.sam" -- print >>sys.stderr, "Writing collapsed SAM file to\"",outfile,"\"... " -+ print("Writing collapsed SAM file to\"",outfile,"\"... ", file=sys.stderr) - FO=open(outfile,'w') - flag="" - for line in self.f: -@@ -840,7 +840,7 @@ class ParseSAM: - else: - outfile = outfile + ".qual.plot.r" - FO=open(outfile,'w') -- print >>sys.stderr, "\tcalculating quality score ... " -+ print("\tcalculating quality score ... ", file=sys.stderr) - qual_min={} - qual_max={} - qual_sum={} -@@ -875,16 +875,16 @@ class ParseSAM: - max_qualities =[str(qual_max[i]) for i in range(0,read_len)] - avg_qualities = [str(qual_sum[i]/total_read) for i in range(0,read_len)] - nt_pos = [str(i) for i in range(0,read_len)] -- print >>FO, "nt_pos=c(" + ','.join(nt_pos) + ')' -- print >>FO, "max_qual=c(" + ','.join(max_qualities) + ')' -- print >>FO, "min_qual=c(" + ','.join(min_qualities) + ')' -- print >>FO, "avg_qual=c(" + ','.join(avg_qualities) + ')' -- print >>FO, "pdf('phred_qual.pdf')" -- print >>FO, "plot(nt_pos,avg_qual, xlab=\"Nucleotide Position (5'->3')\", ylab='Phred Quality',ylim=c(0,97),lwd=2,type='s')" -- print >>FO, 'lines(nt_pos,max_qual,type="s",lwd=2,col="red")' -- print >>FO, 'lines(nt_pos,min_qual,type="s",lwd=2,col="blue")' -- print >>FO, 'legend(0,100,legend=c("Max","Average","Min"),col=c("red","black","blue"),lwd=2)' -- print >>FO, 'dev.off()' -+ print("nt_pos=c(" + ','.join(nt_pos) + ')', file=FO) -+ print("max_qual=c(" + ','.join(max_qualities) + ')', file=FO) -+ print("min_qual=c(" + ','.join(min_qualities) + ')', file=FO) -+ print("avg_qual=c(" + ','.join(avg_qualities) + ')', file=FO) -+ print("pdf('phred_qual.pdf')", file=FO) -+ print("plot(nt_pos,avg_qual, xlab=\"Nucleotide Position (5'->3')\", ylab='Phred Quality',ylim=c(0,97),lwd=2,type='s')", file=FO) -+ print('lines(nt_pos,max_qual,type="s",lwd=2,col="red")', file=FO) -+ print('lines(nt_pos,min_qual,type="s",lwd=2,col="blue")', file=FO) -+ print('legend(0,100,legend=c("Max","Average","Min"),col=c("red","black","blue"),lwd=2)', file=FO) -+ print('dev.off()', file=FO) - #for i in range(0,read_len): - # print >>sys.stderr, str(i) + '\t' + str(qual_max[i]) + '\t' + str(qual_min[i]) + '\t' + str(qual_sum[i]/total_read) - #self.f.seek(0) -@@ -918,7 +918,7 @@ class ParseSAM: - scores[chrom][pos] =1 - else: - scores[chrom][pos] +=1 -- if lines % 10000 == 0: print >>sys.stderr, "%i lines loaded \r" % lines -+ if lines % 10000 == 0: print("%i lines loaded \r" % lines, file=sys.stderr) - return scores - self.f.seek(0) - -@@ -943,7 +943,7 @@ class QCSAM: - The 5th column is number of reads fallen into the region defined by the first 3 columns''' - - if refbed is None: -- print >>sys.stderr,"You must specify a bed file representing gene model\n" -+ print("You must specify a bed file representing gene model\n", file=sys.stderr) - exit(0) - if outfile is None: - exon_count = self.fileName + "_exon.count.bed" -@@ -968,7 +968,7 @@ class QCSAM: - splicedReads=0 - - #read SAM -- print >>sys.stderr, "reading "+ self.fileName + '...', -+ print("reading "+ self.fileName + '...', end=' ', file=sys.stderr) - for line in self.f: - if line.startswith("@"):continue - fields=line.rstrip('\n ').split() -@@ -990,10 +990,10 @@ class QCSAM: - ranges[chrom].add_interval( Interval( mid, mid ) ) - - self.f.seek(0) -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - - #read refbed file -- print >>sys.stderr, "Assign reads to "+ refbed + '...', -+ print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr) - for line in open(refbed,'r'): - try: - if line.startswith('#'):continue -@@ -1007,14 +1007,14 @@ class QCSAM: - geneName = fields[3] - strand = fields[5].replace(" ","_") - -- exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) ) -- exon_starts = map((lambda x: x + tx_start ), exon_starts) -- exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) ) -- exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends); -+ exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) )) -+ exon_starts = list(map((lambda x: x + tx_start ), exon_starts)) -+ exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) )) -+ exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends)); - intron_starts = exon_ends[:-1] - intron_ends=exon_starts[1:] - except: -- print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line, -+ print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr) - continue - - # assign reads to intron -@@ -1050,28 +1050,28 @@ class QCSAM: - EXON_OUT.write(chrom + "\t" + str(st) + "\t" + str(end) + "\t" + geneName + "_exon_" + str(exonNum) + "\t" + str(hits) + "\t" + strand + '\n') - exonNum += 1 - intergenicReads=totalReads-exonReads-intronReads-splicedReads -- print >>sys.stderr, "Done." + '\n' -- print >>sys.stderr, "Total reads:\t" + str(totalReads) -- print >>sys.stderr, "Exonic reads:\t" + str(exonReads) -- print >>sys.stderr, "Intronic reads:\t" + str(intronReads) -- print >>sys.stderr, "Splicing reads:\t" + str(splicedReads) -- print >>sys.stderr, "Intergenic reads:\t" + str(intergenicReads) -+ print("Done." + '\n', file=sys.stderr) -+ print("Total reads:\t" + str(totalReads), file=sys.stderr) -+ print("Exonic reads:\t" + str(exonReads), file=sys.stderr) -+ print("Intronic reads:\t" + str(intronReads), file=sys.stderr) -+ print("Splicing reads:\t" + str(splicedReads), file=sys.stderr) -+ print("Intergenic reads:\t" + str(intergenicReads), file=sys.stderr) - -- print >>sys.stderr,"writing R script ...", -+ print("writing R script ...", end=' ', file=sys.stderr) - totalReads=float(totalReads) -- print >>R_OUT, "pdf('%s')" % rpdf -- print >>R_OUT, "dat=c(%d,%d,%d,%d)" % (exonReads,splicedReads,intronReads,intergenicReads) -- print >>R_OUT, "lb=c('exon(%.2f)','junction(%.2f)','intron(%.2f)','intergenic(%.2f)')" % (exonReads/totalReads,splicedReads/totalReads,intronReads/totalReads,intergenicReads/totalReads) -- print >>R_OUT, "pie(dat,labels=lb,col=rainbow(4),clockwise=TRUE,main='Total reads = %d')" % int(totalReads) -- print >>R_OUT, "dev.off()" -- print >>sys.stderr, "Done." -+ print("pdf('%s')" % rpdf, file=R_OUT) -+ print("dat=c(%d,%d,%d,%d)" % (exonReads,splicedReads,intronReads,intergenicReads), file=R_OUT) -+ print("lb=c('exon(%.2f)','junction(%.2f)','intron(%.2f)','intergenic(%.2f)')" % (exonReads/totalReads,splicedReads/totalReads,intronReads/totalReads,intergenicReads/totalReads), file=R_OUT) -+ print("pie(dat,labels=lb,col=rainbow(4),clockwise=TRUE,main='Total reads = %d')" % int(totalReads), file=R_OUT) -+ print("dev.off()", file=R_OUT) -+ print("Done.", file=sys.stderr) - - - def coverageGeneBody(self,refbed,outfile=None): - '''Calculate reads coverage over gene body, from 5'to 3'. each gene will be equally divied - into 100 regsions''' - if refbed is None: -- print >>sys.stderr,"You must specify a bed file representing gene model\n" -+ print("You must specify a bed file representing gene model\n", file=sys.stderr) - exit(0) - if outfile is None: - outfile1 = self.fileName + ".geneBodyCoverage_plot.r" -@@ -1088,7 +1088,7 @@ class QCSAM: - rpkm={} - - #read SAM -- print >>sys.stderr, "reading "+ self.fileName + '...', -+ print("reading "+ self.fileName + '...', end=' ', file=sys.stderr) - for line in self.f: - if line.startswith("@"):continue - fields=line.rstrip('\n ').split() -@@ -1114,9 +1114,9 @@ class QCSAM: - ranges[chrom] = Intersecter() - else: - ranges[chrom].add_interval( Interval( st, st+size ) ) -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - -- print >>sys.stderr, "calculating coverage over gene body ..." -+ print("calculating coverage over gene body ...", file=sys.stderr) - coverage=collections.defaultdict(int) - flag=0 - for line in open(refbed,'r'): -@@ -1130,19 +1130,19 @@ class QCSAM: - geneName = fields[3] - strand = fields[5] - -- exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) ) -- exon_starts = map((lambda x: x + tx_start ), exon_starts) -- exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) ) -- exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends); -+ exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) )) -+ exon_starts = list(map((lambda x: x + tx_start ), exon_starts)) -+ exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) )) -+ exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends)); - except: -- print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line, -+ print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr) - continue - gene_all_base=[] - percentile_base=[] - mRNA_len =0 - flag=0 - for st,end in zip(exon_starts,exon_ends): -- gene_all_base.extend(range(st+1,end+1)) #0-based coordinates on genome -+ gene_all_base.extend(list(range(st+1,end+1))) #0-based coordinates on genome - mRNA_len = len(gene_all_base) - if mRNA_len <100: - flag=1 -@@ -1159,18 +1159,18 @@ class QCSAM: - coverage[i] += len(ranges[chrom].find(percentile_base[i], percentile_base[i]+1)) - x_coord=[] - y_coord=[] -- print >>OUT2, "Total reads: " + str(totalReads) -- print >>OUT2, "Fragment number: " + str(fragment_num) -- print >>OUT2, "percentile\tcount" -+ print("Total reads: " + str(totalReads), file=OUT2) -+ print("Fragment number: " + str(fragment_num), file=OUT2) -+ print("percentile\tcount", file=OUT2) - for i in coverage: - x_coord.append(str(i)) - y_coord.append(str(coverage[i])) -- print >>OUT2, str(i) + '\t' + str(coverage[i]) -- print >>OUT1, "pdf('geneBody_coverage.pdf')" -- print >>OUT1, "x=0:100" -- print >>OUT1, "y=c(" + ','.join(y_coord) + ')' -- print >>OUT1, "plot(x,y,xlab=\"percentile of gene body (5'->3')\",ylab='read number',type='s')" -- print >>OUT1, "dev.off()" -+ print(str(i) + '\t' + str(coverage[i]), file=OUT2) -+ print("pdf('geneBody_coverage.pdf')", file=OUT1) -+ print("x=0:100", file=OUT1) -+ print("y=c(" + ','.join(y_coord) + ')', file=OUT1) -+ print("plot(x,y,xlab=\"percentile of gene body (5'->3')\",ylab='read number',type='s')", file=OUT1) -+ print("dev.off()", file=OUT1) - - def calculateRPKM(self,refbed,outfile=None): - '''calculate RPKM values for each gene in refbed. Only uniquely aligned reads are used. -@@ -1178,7 +1178,7 @@ class QCSAM: - exon per Million mapped reads) for each exon, intron and mRNA''' - - if refbed is None: -- print >>sys.stderr,"You must specify a bed file representing gene model\n" -+ print("You must specify a bed file representing gene model\n", file=sys.stderr) - exit(0) - if outfile is None: - rpkm_file = self.fileName + ".rpkm.xls" -@@ -1194,7 +1194,7 @@ class QCSAM: - rpkm={} - - #read SAM -- print >>sys.stderr, "reading "+ self.fileName + '...', -+ print("reading "+ self.fileName + '...', end=' ', file=sys.stderr) - for line in self.f: - if line.startswith("@"):continue - fields=line.rstrip('\n ').split() -@@ -1228,17 +1228,17 @@ class QCSAM: - ranges[chrom].add_interval( Interval( mid, mid ) ) - - self.f.seek(0) -- print >>sys.stderr, "Done" -- print >>RPKM_OUT, "Total mapped reads (TR): " + str(totalReads) -- print >>RPKM_OUT, "Multiple mapped reads (MR): " + str(multiMapReads) -- print >>RPKM_OUT, "Uniquely mapped reads (UR): " + str(totalReads - multiMapReads) -- print >>RPKM_OUT, "Spliced mapped reads (SR): " + str(sR) -- print >>RPKM_OUT, "Corrected uniquely mapped reads (cUR): " + str(cUR) -+ print("Done", file=sys.stderr) -+ print("Total mapped reads (TR): " + str(totalReads), file=RPKM_OUT) -+ print("Multiple mapped reads (MR): " + str(multiMapReads), file=RPKM_OUT) -+ print("Uniquely mapped reads (UR): " + str(totalReads - multiMapReads), file=RPKM_OUT) -+ print("Spliced mapped reads (SR): " + str(sR), file=RPKM_OUT) -+ print("Corrected uniquely mapped reads (cUR): " + str(cUR), file=RPKM_OUT) - if totalReads ==0: - sys.exit(1) - - #read refbed file -- print >>sys.stderr, "Assign reads to "+ refbed + '...', -+ print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr) - for line in open(refbed,'r'): - try: - if line.startswith('#'):continue -@@ -1252,16 +1252,16 @@ class QCSAM: - geneName = fields[3] - strand = fields[5].replace(" ","_") - -- exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) ) -- exon_starts = map((lambda x: x + tx_start ), exon_starts) -- exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) ) -- exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends) -- exon_sizes = map(int,fields[10].rstrip(',\n').split(',')) -+ exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) )) -+ exon_starts = list(map((lambda x: x + tx_start ), exon_starts)) -+ exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) )) -+ exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends)) -+ exon_sizes = list(map(int,fields[10].rstrip(',\n').split(','))) - intron_starts = exon_ends[:-1] - intron_ends=exon_starts[1:] - key='\t'.join((chrom.lower(),str(tx_start),str(tx_end),geneName,'0',strand)) - except: -- print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line, -+ print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr) - continue - - # assign reads to intron -@@ -1309,7 +1309,7 @@ class QCSAM: - except: - RPKM_OUT.write(chrom.lower() + "\t" + str(tx_start) + "\t" + str(tx_end) + "\t" + geneName + "_mRNA" + "\t" + str(0) + "\t" + strand + '\t' + str(0) +'\n') - rpkm[key] = 0 -- print >>sys.stderr, "Done" -+ print("Done", file=sys.stderr) - return rpkm - self.f.seek(0) - -@@ -1320,7 +1320,7 @@ class QCSAM: - NOTE: intronic reads are not counted as part of total reads''' - - if refbed is None: -- print >>sys.stderr,"You must specify a bed file representing gene model\n" -+ print("You must specify a bed file representing gene model\n", file=sys.stderr) - exit(0) - if outfile is None: - rpkm_file = self.fileName + ".rpkm.xls" -@@ -1338,7 +1338,7 @@ class QCSAM: - rpkm={} - - #read gene model file, the purpose is to remove intronic reads -- print >>sys.stderr, "Reading reference gene model "+ refbed + '...' -+ print("Reading reference gene model "+ refbed + '...', file=sys.stderr) - for line in open(refbed,'r'): - try: - if line.startswith(('#','track','browser')):continue -@@ -1351,12 +1351,12 @@ class QCSAM: - geneName = fields[3] - strand = fields[5].replace(" ","_") - -- exon_starts = map( int, fields[11].rstrip( ',\n' ).split( ',' ) ) -- exon_starts = map((lambda x: x + tx_start ), exon_starts) -- exon_ends = map( int, fields[10].rstrip( ',\n' ).split( ',' ) ) -- exon_ends = map((lambda x, y: x + y ), exon_starts, exon_ends); -+ exon_starts = list(map( int, fields[11].rstrip( ',\n' ).split( ',' ) )) -+ exon_starts = list(map((lambda x: x + tx_start ), exon_starts)) -+ exon_ends = list(map( int, fields[10].rstrip( ',\n' ).split( ',' ) )) -+ exon_ends = list(map((lambda x, y: x + y ), exon_starts, exon_ends)); - except: -- print >>sys.stderr,"[NOTE:input bed must be 12-column] skipped this line: " + line, -+ print("[NOTE:input bed must be 12-column] skipped this line: " + line, end=' ', file=sys.stderr) - continue - - for st,end in zip(exon_starts,exon_ends): -@@ -1366,7 +1366,7 @@ class QCSAM: - exon_ranges[chrom].add_interval( Interval( st, end ) ) - - #read SAM -- print >>sys.stderr, "reading "+ self.fileName + '...', -+ print("reading "+ self.fileName + '...', end=' ', file=sys.stderr) - for line in self.f: - if line.startswith("@"):continue - fields=line.rstrip('\n ').split() -@@ -1401,22 +1401,22 @@ class QCSAM: - ranges[chrom] = Intersecter() - else: - ranges[chrom].add_interval( Interval( mid, mid ) ) -- else: #if this framgnet is intronic, skip it. -+ else: #if this framgnet is intronic, skip it. - #intronic +=1 -- continue -+ continue - self.f.seek(0) -- print >>sys.stderr, "Done" -- print >>RPKM_OUT, "Total mapped reads (TR): " + str(totalReads) -- print >>RPKM_OUT, "Multiple mapped reads (MR): " + str(multiMapReads) -- print >>RPKM_OUT, "Uniquely mapped reads (UR): " + str(totalReads - multiMapReads) -- print >>RPKM_OUT, "Spliced mapped reads (SR): " + str(sR) -- print >>RPKM_OUT, "Corrected uniquely mapped reads (cUR, non-intronic fragments): " + str(cUR) -+ print("Done", file=sys.stderr) -+ print("Total mapped reads (TR): " + str(totalReads), file=RPKM_OUT) -+ print("Multiple mapped reads (MR): " + str(multiMapReads), file=RPKM_OUT) -+ print("Uniquely mapped reads (UR): " + str(totalReads - multiMapReads), file=RPKM_OUT) -+ print("Spliced mapped reads (SR): " + str(sR), file=RPKM_OUT) -+ print("Corrected uniquely mapped reads (cUR, non-intronic fragments): " + str(cUR), file=RPKM_OUT) - #print >>RPKM_OUT, "Intronic Fragments (IF): " + str(intronic) - if totalReads ==0: - sys.exit(1) - - #read refbed file -- print >>sys.stderr, "Assign reads to "+ refbed + '...', -+ print("Assign reads to "+ refbed + '...', end=' ', file=sys.stderr) - for line in open(refbed,'r'): - try: - if line.startswith('#'):continue *** 2075 LINES SKIPPED ***