B-Form DNA generator for NIH-XPLOR - python script
From NMR Wiki
This code will generate for any DNA sequence restraints for the NIH-XPLOR to produce perfect B-form DNA coordinates. At least that was an intention.
The script produced only restraints, to generate coordinates these restraints will need to be run in XPLOR simulated annealing procedure.
# # generate XPLOR restraints for b-form DNA based on primary sequence of DNA # modify sequence and base count on single strand below and run the script # # complimentary strand called dnaB will be added automatically # in addition to dnaA strand # seqA = "CYT ADE ADE GUA ADE GUA THY CYT ADE ADE THY CYT" selLen = 12 #number of bases def dnaGetComplSeq(seq): seq = seq.split() seq.reverse() out = [] for r in seq: if r == 'GUA': r = 'CYT' elif r == 'CYT': r = 'GUA' elif r == 'ADE': r = 'THY' elif r == 'THY': r = 'ADE' out.append(r) return ' '.join(out) def dihe(res1,at1,res2,at2,res3,at3,res4,at4,segid=' ',ang=None,dev=None,pow=2,const=1): out = '' out += 'assign (segid "%s" and resid %d and name %s)\n' % (segid,res1,at1) out += '\t(segid "%s" and resid %d and name %s)\n' % (segid,res2,at2) out += '\t(segid "%s" and resid %d and name %s)\n' % (segid,res3,at3) out += '\t(segid "%s" and resid %d and name %s) %d %5.1f %5.1f %d\n' \ % (segid,res4,at4,const,ang,dev,pow) return out def unambig_noe(seg1,res1,at1,seg2,res2,at2,upl,plus,minus): out = 'assign (segid "%s" and resid %d and name %s)\n' % (seg1,res1,at1) out += '\t(segid "%s" and resid %d and name %s) %5.2f %5.2f %5.2f\n' % \ (seg2,res2,at2,upl,plus,minus) return out def setBDnaDistances(seq1,seg1,start1,seg2,start2): seq2 = dnaGetComplSeq(seq1) seq1 = seq1.split() seq2 = seq2.split() nres = len(seq1) #sugar intranucleotide lines = [] lines.append('noe\n') for k in range(nres): i = k + start1 j = k + start2 lines.append(unambig_noe(seg1,i,"h1'",seg1,i,"h2'",3.05,0.3,0.3)) lines.append(unambig_noe(seg1,i,"h4'",seg1,i,"h2''",3.82,0.3,0.3)) lines.append(unambig_noe(seg2,j,"h1'",seg2,j,"h2'",3.05,0.3,0.3)) lines.append(unambig_noe(seg2,j,"h4'",seg2,j,"h2''",3.82,0.3,0.3)) lines.append('end\n') def setBasePairPlanes(seq1,seg1,start1,seg2,start2,weight=100): seq1 = seq1.split() nres = len(seq1) lines = [] lines.append('restraint') lines.append('planar') for k in range(nres): i = start1 + k j = start2 + nres - 1 - k si = seg1 sj = seg2 if seq1[i-start1] in ['ADE','GUA']: tmp = i i = j j = tmp tmp = si si = sj sj = tmp lines.append('group\n') lines.append('selection = \n') lines.append('\t((segid "%s" and resid %d and (name n3 or name c2 or name c4))\n' % (si,i)) lines.append('\t or\n') lines.append('\t(segid "%s" and resid %d and (name n1 or name c6 or name c2)))\n' % (sj,j)) lines.append('\tweight = %6.2f\n' % weight) lines.append('end\n') lines.append('end') return '\n'.join(lines) def setBDnaDihedrals(seq1,seg1,start1,seg2,start2): seq2 = dnaGetComplSeq(seq1) seq1 = seq1.split() seq2 = seq2.split() nres = len(seq1) lines = [] lines.append('restraint') lines.append('dihedral') for k in range(nres-1): i = start1 + k j = start2 + k #backbone lines.append(dihe(i,"c4'",i,"c3'",i,"o3'",i+1,'p',segid=seg1,ang=180,dev=50)) lines.append(dihe(i,"c3'",i,"o3'",i+1,"p",i+1,"o5'",segid=seg1,ang=-85,dev=50)) lines.append(dihe(i,"o3'",i+1,"p",i+1,"o5'",i+1,"c5'",segid=seg1,ang=-70,dev=30)) lines.append(dihe(i+1,"p",i+1,"o5'",i+1,"c5'",i+1,"c4'",segid=seg1,ang=180,dev=30)) lines.append(dihe(i,"o5'",i,"c5'",i,"c4'",i,"c3'",segid=seg1,ang=60,dev=30)) lines.append(dihe(i,"c5'",i,"c4'",i,"c3'",i,"o3'",segid=seg1,ang=145,dev=30)) #chi at3 = '' at4 = '' if seq1[k] in ['ADE','GUA']: at3 = 'N9' at4 = 'C4' else: at3 = 'N1' at4 = 'C2' lines.append(dihe(i,"o4'",i,"c1'",i,at3,i,at4,segid=seg1,ang=-120,dev=20)) lines.append(dihe(j,"c4'",j,"c3'",j,"o3'",j+1,'p',segid=seg2,ang=180,dev=50)) lines.append(dihe(j,"c3'",j,"o3'",j+1,"p",j+1,"o5'",segid=seg2,ang=-85,dev=50)) lines.append(dihe(j,"o3'",j+1,"p",j+1,"o5'",j+1,"c5'",segid=seg2,ang=-70,dev=30)) lines.append(dihe(j+1,"p",j+1,"o5'",j+1,"c5'",j+1,"c4'",segid=seg2,ang=180,dev=30)) lines.append(dihe(j,"o5'",j,"c5'",j,"c4'",j,"c3'",segid=seg2,ang=60,dev=30)) lines.append(dihe(j,"c5'",j,"c4'",j,"c3'",j,"o3'",segid=seg2,ang=145,dev=30)) if seq2[k] in ['ADE','GUA']: at3 = 'N9' at4 = 'C4' else: at3 = 'N1' at4 = 'C2' lines.append(dihe(j,"o4'",j,"c1'",j,at3,j,at4,segid=seg2,ang=-120,dev=20)) lines.append('end') return '\n'.join(lines) def setWatsonCreekHBonds(seq1,seg1,start1,seg2,start2): """set watson creek hydrogen bonds seq1 - sequence of first strand seg1 - name of first strand segment start1 - number of first residue in strand1 seg2 - name of second strand segment start2 - number of first residue in strand2 """ seq1 = seq1.split() i = start1 j = start2 + len(seq1) - 1 si = seg1 sj = seg2 lines = [] for res in seq1: if res == 'GUA': lines.append('assign (segid "%s" and resid %d and name N1)' % (si,i)) lines.append('\t(segid "%s" and resid %d and name N3) 2.95 0.2 0.2' % (sj,j)) lines.append('assign (segid "%s" and resid %d and name H1) \ (segid "%s" and resid %d and name N3) 1.95 0.2 0.2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name o6) \ (segid "%s" and resid %d and name N4) 2.91 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name o6) \ (segid "%s" and resid %d and name hn*) 1.91 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name n2) \ (segid "%s" and resid %d and name o2) 2.86 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name hn*) \ (segid "%s" and resid %d and name o2) 1.86 0.2 0.2' % (si,i,sj,j)) elif res == 'CYT': lines.append('assign (segid "%s" and resid %d and name N1) \ (segid "%s" and resid %d and name N3) 2.95 0.2 0.2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name H1) \ (segid "%s" and resid %d and name N3) 1.95 0.2 0.2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name o6) \ (segid "%s" and resid %d and name n4) 2.91 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name o6) \ (segid "%s" and resid %d and name hn*) 1.91 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name n2) \ (segid "%s" and resid %d and name o2) 2.86 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name hn*) \ (segid "%s" and resid %d and name o2) 1.86 0.2 0.2' % (sj,j,si,i)) elif res == 'ADE': lines.append('assign (segid "%s" and resid %d and name N1) \ (segid "%s" and resid %d and name N3) 2.82 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name n1) \ (segid "%s" and resid %d and name h3) 1.82 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name n6) \ (segid "%s" and resid %d and name o4) 2.95 .2 .2' % (si,i,sj,j)) lines.append('assign (segid "%s" and resid %d and name hn*) \ (segid "%s" and resid %d and name o4) 1.95 0.2 0.2' % (si,i,sj,j)) elif res == 'THY': lines.append('assign (segid "%s" and resid %d and name N1) \ (segid "%s" and resid %d and name N3) 2.82 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name n1) \ (segid "%s" and resid %d and name h3) 1.82 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name n6) \ (segid "%s" and resid %d and name o4) 2.95 .2 .2' % (sj,j,si,i)) lines.append('assign (segid "%s" and resid %d and name hn*) \ (segid "%s" and resid %d and name o4) 1.95 0.2 0.2' % (sj,j,si,i)) i+=1 j-=1 return 'noe\nclass all\n nres 75000\n' + '\n'.join(lines) + '\nend\n' def setPPRepulsion(seq,segid1,start1,segid2,start2,depth=None,flex=7): """flex - minus and plus margin for restraints""" #distance offsets taken from 3D model of ideal B-form DNA intra = { 1:6.5,2:12.5,3:17.6,4:21.7,5:24.6, 6:26.4,7:27.7,8:29.0,9:30.9,10:33.8,11:37.6, 12:41.9,13:46.24,14:50.26,15:53.7,16:56.67 } inter = { -16:50.08, -15:46.57, -14:43.73, -13:41.48, -12:39.49, -11:37.31, -10:34.55, -9:30.95, -8:26.52, -7:21.56, -6:16.6, -5:12.84, -4:11.7, -3:13.17, -2:15.53, -1:17.39, 0:18.23, 1:18.03, 2:17.29, 3:16.96, 4:18.12, 5:21.12, 6:25.43, 7:30.20, 8:34.75, 9:38.70, 10:41.90, 11:44.47, 12:46.69, 13:48.96, 14:51.63, 15:54.88, } seq = seq.split() lines = ['noe\n'] #same strand restraints N = len(seq) for i in range(1,N-1): for j in range(i+1,N): if j-i>len(intra.keys()): break lines.append(unambig_noe(segid1,i+start1,"P",segid1,j+start1,"P",intra[j-i],flex,flex)) lines.append(unambig_noe(segid2,i+start2,"P",segid2,j+start2,"P",intra[j-i],flex,flex)) lines.append('!end of intrastrand %d\n'%(N)) #interstrand restraints for i in range(1,N): for j in range(1,N): index = N-1-j-i if not index in inter.keys(): break lines.append(unambig_noe(segid1,i+start1,"P",segid2,j+start2,"P",inter[index],flex,flex)) lines.append('end\n') return '\n'.join(lines) print setWatsonCreekHBonds(seqA,'dnaA',1,'dnaB',seqLen+1) print setBDnaDihedrals(seqA,'dnaA',1,'dnaB',seqLen+1) print setBasePairPlanes(seqA,'dnaA',1,'dnaB',seqLen+1) print setPPRepulsion(seqA,'dnaA',1,'dnaB',seqLen+1)
Categories: Python | Scripts | NIH-XPLOR