Script xplo2xyz.py - convert PDB files created by NIH-XPLOR to XYZ file format

From NMR Wiki

(Difference between revisions)

Current revision

This script will convert PDB file saved by NIH XPLOR into XYZ format.

Will also work on other PDB files, but does not process HETATM entries - only ATOM. Can be easily extended though.

Usage:

xplo2xyz.py [options] <file.pdb> [<file.xyz>]
options:
 --version        show program's version number and exit
 -h, --help       show this help message and exit
 -o, --overwrite  overwrite output file, if it exists
 -v, --verbose    print info about files being processed

Create file xplo2xyz.py, copy-paste into the content below and save.

#/usr/bin/python
from optparse import OptionParser
import re
import os.path
import sys
 
class PDBAtom(object):
    def __init__(self, string):
        #this is what we need to parse
        #ATOM      1  CA  ORN     1       4.935   1.171   7.983  1.00  0.00      sega
        #XPLOR pdb files do not fully agree with the PDB conventions 
        self.name = string[12:16].strip()
        self.x = float(string[30:38].strip())
        self.y = float(string[38:46].strip())
        self.z = float(string[46:54].strip())
        self.warnings = []
        if len(string) < 78:
            self.element = self.name[0]
            self.warnings.append('Chemical element name guessed ' +\
                                'to be %s from atom name %s' % (self.element, self.name))
        else:
            self.element = string[76:78].strip()
 
usage = 'usage: %prog [options] <file.pdb> [<file.xyz>]\n\n' \
        + '\tConvert protein data bank PDB file created by NIH-XPLOR to XYZ file\n' \
        + '\tReferences: http://www.wwpdb.org/docs.html, ' \
        + 'http://en.wikipedia.org/wiki/XYZ_file_format\n\n' \
        + '\tto get help type: python %prog -h'
version = '%prog 0.1 - convert pdb file to xyz'
opt = OptionParser(usage=usage,version=version)
opt.add_option('-o','--overwrite',action='store_true',dest='overwrite',\
                default=False, help='overwrite output file, if it exists')
opt.add_option('-v','--verbose',action='store_true', dest='verbose',\
                default=False, help='print info about files being processed')
(options, args) = opt.parse_args()
 
narg = len(args)
if narg == 0:
    opt.error('must provide name of pdb file')
elif narg > 2:
    opt.error('too many no-option arguments should be either one or two (second - name of xyz file)')
else:
    infile = args[0]
    pdb_re = re.compile('^(.+).pdb$', re.IGNORECASE)
    m = pdb_re.search(infile)
    if m:
        basename = m.group(1)
        if narg == 2:
            if args[1].endswith('.xyz'):
                outfile = args[1]
            else:
                opt.error('output file (second argument) must have .xyz extension - case insensitive')
        else:
            outfile = basename + '.xyz'
    else:
        opt.error('input file (first argument) must have .pdb extension - case insensitive')
 
if os.path.exists(outfile) and options.overwrite == False:
    opt.error('file %s exists, use -o or --overwrite otion to overwrite the file' % outfile)
 
if os.path.isfile(infile):
    pdb_file = open(infile,'r')
else:
    opt.error('file %s does not exist' % infile)
 
if options.verbose:
    sys.stderr.write('converting %s --> %s\n' % (infile, outfile))
 
lineno = 0
atoms = []
#read pdb file
for line in pdb_file:
    lineno += 1
    if line.startswith('ATOM'):
        try:
            atoms.append(PDBAtom(line))
        except:
            sys.stderr.write('\nProblem parsing line %d in file %s\n' % (lineno,infile))
            sys.stderr.write(line)
            sys.stderr.write('Probably ATOM entry is formatted incorrectly?\n')
            sys.stderr.write('Please refer to - http://www.wwpdb.org/documentation/format32/sect9.html#ATOM\n\n')
            sys.exit(1)
pdb_file.close()
 
#save xyz file
xyz_file = open(outfile,'w')
xyz_file.write('%d\n' % len(atoms))
xyz_file.write('xyz file converted from %s\n' % infile)
lineno = 2
num_hidden_warnings = 0
for atom in atoms:
    lineno += 1
    xyz_file.write('%s\t%f\t%f\t%f\n' % (atom.name, atom.x, atom.y, atom.z))
    if atom.warnings:
        if options.verbose:
            sys.stderr.write('Possible issue on line %d in %s\n' % (lineno, outfile))
            sys.stderr.write('\n'.join(atom.warnings))
            sys.stderr.write('\n')
        else:
            num_hidden_warnings += 1
 
xyz_file.close()
if options.verbose == False and num_hidden_warnings > 0:
    sys.stderr.write('file %s saved\n' % outfile)
    sys.stderr.write('%d warnings were not shown, ' % num_hidden_warnings)
    sys.stderr.write('please rerun with option -v to see them\n')