Script xplo2xyz.py - convert PDB files created by NIH-XPLOR to XYZ file format
From NMR Wiki
This script will convert PDB file saved by NIH XPLOR into XYZ format.
Will also work on other PDB files, but does not process HETATM entries - only ATOM. Can be easily extended though.
Usage:
xplo2xyz.py [options] <file.pdb> [<file.xyz>] options: --version show program's version number and exit -h, --help show this help message and exit -o, --overwrite overwrite output file, if it exists -v, --verbose print info about files being processed
Create file xplo2xyz.py, copy-paste into the content below and save.
#/usr/bin/python from optparse import OptionParser import re import os.path import sys class PDBAtom(object): def __init__(self, string): #this is what we need to parse #ATOM 1 CA ORN 1 4.935 1.171 7.983 1.00 0.00 sega #XPLOR pdb files do not fully agree with the PDB conventions self.name = string[12:16].strip() self.x = float(string[30:38].strip()) self.y = float(string[38:46].strip()) self.z = float(string[46:54].strip()) self.warnings = [] if len(string) < 78: self.element = self.name[0] self.warnings.append('Chemical element name guessed ' +\ 'to be %s from atom name %s' % (self.element, self.name)) else: self.element = string[76:78].strip() usage = 'usage: %prog [options] <file.pdb> [<file.xyz>]\n\n' \ + '\tConvert protein data bank PDB file created by NIH-XPLOR to XYZ file\n' \ + '\tReferences: http://www.wwpdb.org/docs.html, ' \ + 'http://en.wikipedia.org/wiki/XYZ_file_format\n\n' \ + '\tto get help type: python %prog -h' version = '%prog 0.1 - convert pdb file to xyz' opt = OptionParser(usage=usage,version=version) opt.add_option('-o','--overwrite',action='store_true',dest='overwrite',\ default=False, help='overwrite output file, if it exists') opt.add_option('-v','--verbose',action='store_true', dest='verbose',\ default=False, help='print info about files being processed') (options, args) = opt.parse_args() narg = len(args) if narg == 0: opt.error('must provide name of pdb file') elif narg > 2: opt.error('too many no-option arguments should be either one or two (second - name of xyz file)') else: infile = args[0] pdb_re = re.compile('^(.+).pdb$', re.IGNORECASE) m = pdb_re.search(infile) if m: basename = m.group(1) if narg == 2: if args[1].endswith('.xyz'): outfile = args[1] else: opt.error('output file (second argument) must have .xyz extension - case insensitive') else: outfile = basename + '.xyz' else: opt.error('input file (first argument) must have .pdb extension - case insensitive') if os.path.exists(outfile) and options.overwrite == False: opt.error('file %s exists, use -o or --overwrite otion to overwrite the file' % outfile) if os.path.isfile(infile): pdb_file = open(infile,'r') else: opt.error('file %s does not exist' % infile) if options.verbose: sys.stderr.write('converting %s --> %s\n' % (infile, outfile)) lineno = 0 atoms = [] #read pdb file for line in pdb_file: lineno += 1 if line.startswith('ATOM'): try: atoms.append(PDBAtom(line)) except: sys.stderr.write('\nProblem parsing line %d in file %s\n' % (lineno,infile)) sys.stderr.write(line) sys.stderr.write('Probably ATOM entry is formatted incorrectly?\n') sys.stderr.write('Please refer to - http://www.wwpdb.org/documentation/format32/sect9.html#ATOM\n\n') sys.exit(1) pdb_file.close() #save xyz file xyz_file = open(outfile,'w') xyz_file.write('%d\n' % len(atoms)) xyz_file.write('xyz file converted from %s\n' % infile) lineno = 2 num_hidden_warnings = 0 for atom in atoms: lineno += 1 xyz_file.write('%s\t%f\t%f\t%f\n' % (atom.name, atom.x, atom.y, atom.z)) if atom.warnings: if options.verbose: sys.stderr.write('Possible issue on line %d in %s\n' % (lineno, outfile)) sys.stderr.write('\n'.join(atom.warnings)) sys.stderr.write('\n') else: num_hidden_warnings += 1 xyz_file.close() if options.verbose == False and num_hidden_warnings > 0: sys.stderr.write('file %s saved\n' % outfile) sys.stderr.write('%d warnings were not shown, ' % num_hidden_warnings) sys.stderr.write('please rerun with option -v to see them\n')
Categories: NIH-XPLOR | Python | Scripts