B-Form DNA generator for NIH-XPLOR - python script

From NMR Wiki

Jump to: navigation, search

This code will generate for any DNA sequence restraints for the NIH-XPLOR to produce perfect B-form DNA coordinates. At least that was an intention.

The script produced only restraints, to generate coordinates these restraints will need to be run in XPLOR simulated annealing procedure.

#
# generate XPLOR restraints for b-form DNA based on primary sequence of DNA
# modify sequence and base count on single strand below and run the script
#
# complimentary strand called dnaB will be added automatically
# in addition to dnaA strand
#
seqA = "CYT ADE ADE GUA ADE GUA THY CYT ADE ADE THY CYT"
selLen = 12 #number of bases
 
def dnaGetComplSeq(seq):
        seq = seq.split()
        seq.reverse()
        out = []
        for r in seq:
                if r == 'GUA':
                        r = 'CYT'
                elif r == 'CYT':
                        r = 'GUA'
                elif r == 'ADE':
                        r = 'THY'
                elif r == 'THY':
                        r = 'ADE'
                out.append(r)
        return ' '.join(out)
 
def dihe(res1,at1,res2,at2,res3,at3,res4,at4,segid='    ',ang=None,dev=None,pow=2,const=1):
        out = ''
        out += 'assign (segid "%s" and resid %d and name %s)\n' % (segid,res1,at1)
        out += '\t(segid "%s" and resid %d and name %s)\n' % (segid,res2,at2)
        out += '\t(segid "%s" and resid %d and name %s)\n' % (segid,res3,at3)
        out += '\t(segid "%s" and resid %d and name %s) %d %5.1f %5.1f %d\n' \
                                                % (segid,res4,at4,const,ang,dev,pow)
        return out
 
def unambig_noe(seg1,res1,at1,seg2,res2,at2,upl,plus,minus):
        out = 'assign (segid "%s" and resid %d and name %s)\n' % (seg1,res1,at1)
        out += '\t(segid "%s" and resid %d and name %s) %5.2f %5.2f %5.2f\n' % \
                        (seg2,res2,at2,upl,plus,minus)
        return out
 
def setBDnaDistances(seq1,seg1,start1,seg2,start2):
        seq2 = dnaGetComplSeq(seq1)
        seq1 = seq1.split()
        seq2 = seq2.split()
        nres = len(seq1)
        #sugar intranucleotide
        lines = []
        lines.append('noe\n')
        for k in range(nres):
                i = k + start1
                j = k + start2
                lines.append(unambig_noe(seg1,i,"h1'",seg1,i,"h2'",3.05,0.3,0.3))
                lines.append(unambig_noe(seg1,i,"h4'",seg1,i,"h2''",3.82,0.3,0.3))
 
 
                lines.append(unambig_noe(seg2,j,"h1'",seg2,j,"h2'",3.05,0.3,0.3))
                lines.append(unambig_noe(seg2,j,"h4'",seg2,j,"h2''",3.82,0.3,0.3))
        lines.append('end\n')
 
 
def setBasePairPlanes(seq1,seg1,start1,seg2,start2,weight=100):
        seq1 = seq1.split()
        nres = len(seq1)
 
        lines = []
        lines.append('restraint')
        lines.append('planar')
 
        for k in range(nres):
                i = start1 + k
                j = start2 + nres - 1 - k
                si = seg1
                sj = seg2
 
                if seq1[i-start1] in ['ADE','GUA']:
                        tmp = i
                        i = j
                        j = tmp
                        tmp = si
                        si = sj
                        sj = tmp
 
 
                lines.append('group\n')
                lines.append('selection = \n')
                lines.append('\t((segid "%s" and resid %d and (name n3 or name c2 or name c4))\n' % (si,i))
                lines.append('\t or\n')
                lines.append('\t(segid "%s" and resid %d and (name n1 or name c6 or name c2)))\n' % (sj,j))
                lines.append('\tweight = %6.2f\n' % weight)
                lines.append('end\n')
 
        lines.append('end')
        return '\n'.join(lines)
 
 
def setBDnaDihedrals(seq1,seg1,start1,seg2,start2):
 
        seq2 = dnaGetComplSeq(seq1)
        seq1 = seq1.split()
        seq2 = seq2.split()
        nres = len(seq1)
 
        lines = []
        lines.append('restraint')
        lines.append('dihedral')
        for k in range(nres-1):
                i = start1 + k
                j = start2 + k
                #backbone
                lines.append(dihe(i,"c4'",i,"c3'",i,"o3'",i+1,'p',segid=seg1,ang=180,dev=50))
                lines.append(dihe(i,"c3'",i,"o3'",i+1,"p",i+1,"o5'",segid=seg1,ang=-85,dev=50))
                lines.append(dihe(i,"o3'",i+1,"p",i+1,"o5'",i+1,"c5'",segid=seg1,ang=-70,dev=30))
                lines.append(dihe(i+1,"p",i+1,"o5'",i+1,"c5'",i+1,"c4'",segid=seg1,ang=180,dev=30))
                lines.append(dihe(i,"o5'",i,"c5'",i,"c4'",i,"c3'",segid=seg1,ang=60,dev=30))
                lines.append(dihe(i,"c5'",i,"c4'",i,"c3'",i,"o3'",segid=seg1,ang=145,dev=30))
 
                #chi
                at3 = ''
                at4 = ''
                if seq1[k] in ['ADE','GUA']:
                        at3 = 'N9'
                        at4 = 'C4'
                else:
                        at3 = 'N1'
                        at4 = 'C2'
                lines.append(dihe(i,"o4'",i,"c1'",i,at3,i,at4,segid=seg1,ang=-120,dev=20))
 
                lines.append(dihe(j,"c4'",j,"c3'",j,"o3'",j+1,'p',segid=seg2,ang=180,dev=50))
                lines.append(dihe(j,"c3'",j,"o3'",j+1,"p",j+1,"o5'",segid=seg2,ang=-85,dev=50))
                lines.append(dihe(j,"o3'",j+1,"p",j+1,"o5'",j+1,"c5'",segid=seg2,ang=-70,dev=30))
                lines.append(dihe(j+1,"p",j+1,"o5'",j+1,"c5'",j+1,"c4'",segid=seg2,ang=180,dev=30))
                lines.append(dihe(j,"o5'",j,"c5'",j,"c4'",j,"c3'",segid=seg2,ang=60,dev=30))
                lines.append(dihe(j,"c5'",j,"c4'",j,"c3'",j,"o3'",segid=seg2,ang=145,dev=30))
                if seq2[k] in ['ADE','GUA']:
                        at3 = 'N9'
                        at4 = 'C4'
                else:
                        at3 = 'N1'
                        at4 = 'C2'
                lines.append(dihe(j,"o4'",j,"c1'",j,at3,j,at4,segid=seg2,ang=-120,dev=20))
 
        lines.append('end')
        return '\n'.join(lines)
 
def setWatsonCreekHBonds(seq1,seg1,start1,seg2,start2):
        """set watson creek hydrogen bonds
                seq1 - sequence of first strand
                seg1 - name of first strand segment
                start1 - number of first residue in strand1
                seg2 - name of second strand segment
                start2 - number of first residue in strand2
        """
        seq1 = seq1.split()
        i = start1
        j = start2 + len(seq1) - 1
        si = seg1
        sj = seg2
        lines = []
        for res in seq1:
                if res == 'GUA':
                        lines.append('assign (segid "%s" and resid %d and name N1)' % (si,i))
                        lines.append('\t(segid "%s" and resid %d and name N3) 2.95 0.2 0.2' % (sj,j))
                        lines.append('assign (segid "%s" and resid %d and name H1)  \
                                (segid "%s" and resid %d and name N3)  1.95 0.2 0.2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name o6)  \
                                (segid "%s" and resid %d and name N4)  2.91 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name o6)  \
                                (segid "%s" and resid %d and name hn*) 1.91 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name n2)  \
                                (segid "%s" and resid %d and name o2)  2.86 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name hn*) \
                                (segid "%s" and resid %d and name o2)  1.86 0.2 0.2' % (si,i,sj,j))
                elif res == 'CYT':
                        lines.append('assign (segid "%s" and resid %d and name N1) \
                                                (segid "%s" and resid %d and name N3) 2.95 0.2 0.2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name H1)  \
                                                (segid "%s" and resid %d and name N3)  1.95 0.2 0.2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name o6)  \
                                                (segid "%s" and resid %d and name n4)  2.91 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name o6)  \
                                                (segid "%s" and resid %d and name hn*) 1.91 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name n2)  \
                                                (segid "%s" and resid %d and name o2)  2.86 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name hn*) \
                                                (segid "%s" and resid %d and name o2)  1.86 0.2 0.2' % (sj,j,si,i))
                elif res == 'ADE':
                        lines.append('assign (segid "%s" and resid %d and name N1) \
                                                (segid "%s" and resid %d and name N3) 2.82 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name n1) \
                                                (segid "%s" and resid %d and name h3) 1.82 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name n6) \
                                                (segid "%s" and resid %d and name o4) 2.95 .2 .2' % (si,i,sj,j))
                        lines.append('assign (segid "%s" and resid %d and name hn*) \
                                                (segid "%s" and resid %d and name o4) 1.95 0.2 0.2' % (si,i,sj,j))
                elif res == 'THY':
                        lines.append('assign (segid "%s" and resid %d and name N1) \
                                                (segid "%s" and resid %d and name N3) 2.82 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name n1) \
                                                (segid "%s" and resid %d and name h3) 1.82 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name n6) \
                                                (segid "%s" and resid %d and name o4) 2.95 .2 .2' % (sj,j,si,i))
                        lines.append('assign (segid "%s" and resid %d and name hn*) \
                                                (segid "%s" and resid %d and name o4) 1.95 0.2 0.2' % (sj,j,si,i))
                i+=1
                j-=1
        return 'noe\nclass all\n nres 75000\n' + '\n'.join(lines) + '\nend\n'
 
def setPPRepulsion(seq,segid1,start1,segid2,start2,depth=None,flex=7):
        """flex - minus and plus margin for restraints"""
 
        #distance offsets taken from 3D model of ideal B-form DNA
        intra = {
                1:6.5,2:12.5,3:17.6,4:21.7,5:24.6,
                6:26.4,7:27.7,8:29.0,9:30.9,10:33.8,11:37.6,
                12:41.9,13:46.24,14:50.26,15:53.7,16:56.67
                }
 
        inter = { 
                -16:50.08, -15:46.57, -14:43.73, -13:41.48,
                -12:39.49, -11:37.31, -10:34.55, -9:30.95,
                -8:26.52, -7:21.56, -6:16.6, -5:12.84,
                -4:11.7, -3:13.17, -2:15.53, -1:17.39,
                0:18.23, 1:18.03, 2:17.29, 3:16.96,
                4:18.12, 5:21.12, 6:25.43, 7:30.20,
                8:34.75, 9:38.70, 10:41.90, 11:44.47,
                12:46.69, 13:48.96, 14:51.63, 15:54.88,
                }
 
        seq = seq.split()
        lines = ['noe\n']
        #same strand restraints
        N = len(seq)
        for i in range(1,N-1):
                for j in range(i+1,N):
                        if j-i>len(intra.keys()): break
                        lines.append(unambig_noe(segid1,i+start1,"P",segid1,j+start1,"P",intra[j-i],flex,flex))
                        lines.append(unambig_noe(segid2,i+start2,"P",segid2,j+start2,"P",intra[j-i],flex,flex))
 
        lines.append('!end of intrastrand %d\n'%(N))
 
        #interstrand restraints
        for i in range(1,N):
                for j in range(1,N):
                        index = N-1-j-i
                        if not index in inter.keys(): break
                        lines.append(unambig_noe(segid1,i+start1,"P",segid2,j+start2,"P",inter[index],flex,flex))
 
        lines.append('end\n')
        return '\n'.join(lines)
 
print setWatsonCreekHBonds(seqA,'dnaA',1,'dnaB',seqLen+1)
print setBDnaDihedrals(seqA,'dnaA',1,'dnaB',seqLen+1)
print setBasePairPlanes(seqA,'dnaA',1,'dnaB',seqLen+1)
print setPPRepulsion(seqA,'dnaA',1,'dnaB',seqLen+1)
Personal tools